]> Git Repo - linux.git/blob - drivers/gpu/drm/radeon/cik.c
Merge tag 'asoc-fix-v4.13-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broon...
[linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37
38 #define SH_MEM_CONFIG_GFX_DEFAULT \
39         ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
40
41 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
49 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
50
51 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
52 MODULE_FIRMWARE("radeon/bonaire_me.bin");
53 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
55 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
57 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
58 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
59 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
60
61 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
69 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
70
71 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
72 MODULE_FIRMWARE("radeon/hawaii_me.bin");
73 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
75 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
77 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
78 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
79 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
80
81 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
86 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
87
88 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
89 MODULE_FIRMWARE("radeon/kaveri_me.bin");
90 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
92 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
93 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
94 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
95
96 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
97 MODULE_FIRMWARE("radeon/KABINI_me.bin");
98 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
99 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
100 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
101 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
102
103 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
104 MODULE_FIRMWARE("radeon/kabini_me.bin");
105 MODULE_FIRMWARE("radeon/kabini_ce.bin");
106 MODULE_FIRMWARE("radeon/kabini_mec.bin");
107 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
108 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
109
110 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
115 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
116
117 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
118 MODULE_FIRMWARE("radeon/mullins_me.bin");
119 MODULE_FIRMWARE("radeon/mullins_ce.bin");
120 MODULE_FIRMWARE("radeon/mullins_mec.bin");
121 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
122 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
123
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
127 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
129 extern void sumo_rlc_fini(struct radeon_device *rdev);
130 extern int sumo_rlc_init(struct radeon_device *rdev);
131 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
132 extern void si_rlc_reset(struct radeon_device *rdev);
133 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
134 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
135 extern int cik_sdma_resume(struct radeon_device *rdev);
136 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
137 extern void cik_sdma_fini(struct radeon_device *rdev);
138 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
139 static void cik_rlc_stop(struct radeon_device *rdev);
140 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
141 static void cik_program_aspm(struct radeon_device *rdev);
142 static void cik_init_pg(struct radeon_device *rdev);
143 static void cik_init_cg(struct radeon_device *rdev);
144 static void cik_fini_pg(struct radeon_device *rdev);
145 static void cik_fini_cg(struct radeon_device *rdev);
146 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
147                                           bool enable);
148
149 /**
150  * cik_get_allowed_info_register - fetch the register for the info ioctl
151  *
152  * @rdev: radeon_device pointer
153  * @reg: register offset in bytes
154  * @val: register value
155  *
156  * Returns 0 for success or -EINVAL for an invalid register
157  *
158  */
159 int cik_get_allowed_info_register(struct radeon_device *rdev,
160                                   u32 reg, u32 *val)
161 {
162         switch (reg) {
163         case GRBM_STATUS:
164         case GRBM_STATUS2:
165         case GRBM_STATUS_SE0:
166         case GRBM_STATUS_SE1:
167         case GRBM_STATUS_SE2:
168         case GRBM_STATUS_SE3:
169         case SRBM_STATUS:
170         case SRBM_STATUS2:
171         case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
172         case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
173         case UVD_STATUS:
174         /* TODO VCE */
175                 *val = RREG32(reg);
176                 return 0;
177         default:
178                 return -EINVAL;
179         }
180 }
181
182 /*
183  * Indirect registers accessor
184  */
185 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
186 {
187         unsigned long flags;
188         u32 r;
189
190         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
191         WREG32(CIK_DIDT_IND_INDEX, (reg));
192         r = RREG32(CIK_DIDT_IND_DATA);
193         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
194         return r;
195 }
196
197 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
198 {
199         unsigned long flags;
200
201         spin_lock_irqsave(&rdev->didt_idx_lock, flags);
202         WREG32(CIK_DIDT_IND_INDEX, (reg));
203         WREG32(CIK_DIDT_IND_DATA, (v));
204         spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
205 }
206
207 /* get temperature in millidegrees */
208 int ci_get_temp(struct radeon_device *rdev)
209 {
210         u32 temp;
211         int actual_temp = 0;
212
213         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
214                 CTF_TEMP_SHIFT;
215
216         if (temp & 0x200)
217                 actual_temp = 255;
218         else
219                 actual_temp = temp & 0x1ff;
220
221         actual_temp = actual_temp * 1000;
222
223         return actual_temp;
224 }
225
226 /* get temperature in millidegrees */
227 int kv_get_temp(struct radeon_device *rdev)
228 {
229         u32 temp;
230         int actual_temp = 0;
231
232         temp = RREG32_SMC(0xC0300E0C);
233
234         if (temp)
235                 actual_temp = (temp / 8) - 49;
236         else
237                 actual_temp = 0;
238
239         actual_temp = actual_temp * 1000;
240
241         return actual_temp;
242 }
243
244 /*
245  * Indirect registers accessor
246  */
247 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
248 {
249         unsigned long flags;
250         u32 r;
251
252         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
253         WREG32(PCIE_INDEX, reg);
254         (void)RREG32(PCIE_INDEX);
255         r = RREG32(PCIE_DATA);
256         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
257         return r;
258 }
259
260 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
261 {
262         unsigned long flags;
263
264         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
265         WREG32(PCIE_INDEX, reg);
266         (void)RREG32(PCIE_INDEX);
267         WREG32(PCIE_DATA, v);
268         (void)RREG32(PCIE_DATA);
269         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
270 }
271
272 static const u32 spectre_rlc_save_restore_register_list[] =
273 {
274         (0x0e00 << 16) | (0xc12c >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0xc140 >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0xc150 >> 2),
279         0x00000000,
280         (0x0e00 << 16) | (0xc15c >> 2),
281         0x00000000,
282         (0x0e00 << 16) | (0xc168 >> 2),
283         0x00000000,
284         (0x0e00 << 16) | (0xc170 >> 2),
285         0x00000000,
286         (0x0e00 << 16) | (0xc178 >> 2),
287         0x00000000,
288         (0x0e00 << 16) | (0xc204 >> 2),
289         0x00000000,
290         (0x0e00 << 16) | (0xc2b4 >> 2),
291         0x00000000,
292         (0x0e00 << 16) | (0xc2b8 >> 2),
293         0x00000000,
294         (0x0e00 << 16) | (0xc2bc >> 2),
295         0x00000000,
296         (0x0e00 << 16) | (0xc2c0 >> 2),
297         0x00000000,
298         (0x0e00 << 16) | (0x8228 >> 2),
299         0x00000000,
300         (0x0e00 << 16) | (0x829c >> 2),
301         0x00000000,
302         (0x0e00 << 16) | (0x869c >> 2),
303         0x00000000,
304         (0x0600 << 16) | (0x98f4 >> 2),
305         0x00000000,
306         (0x0e00 << 16) | (0x98f8 >> 2),
307         0x00000000,
308         (0x0e00 << 16) | (0x9900 >> 2),
309         0x00000000,
310         (0x0e00 << 16) | (0xc260 >> 2),
311         0x00000000,
312         (0x0e00 << 16) | (0x90e8 >> 2),
313         0x00000000,
314         (0x0e00 << 16) | (0x3c000 >> 2),
315         0x00000000,
316         (0x0e00 << 16) | (0x3c00c >> 2),
317         0x00000000,
318         (0x0e00 << 16) | (0x8c1c >> 2),
319         0x00000000,
320         (0x0e00 << 16) | (0x9700 >> 2),
321         0x00000000,
322         (0x0e00 << 16) | (0xcd20 >> 2),
323         0x00000000,
324         (0x4e00 << 16) | (0xcd20 >> 2),
325         0x00000000,
326         (0x5e00 << 16) | (0xcd20 >> 2),
327         0x00000000,
328         (0x6e00 << 16) | (0xcd20 >> 2),
329         0x00000000,
330         (0x7e00 << 16) | (0xcd20 >> 2),
331         0x00000000,
332         (0x8e00 << 16) | (0xcd20 >> 2),
333         0x00000000,
334         (0x9e00 << 16) | (0xcd20 >> 2),
335         0x00000000,
336         (0xae00 << 16) | (0xcd20 >> 2),
337         0x00000000,
338         (0xbe00 << 16) | (0xcd20 >> 2),
339         0x00000000,
340         (0x0e00 << 16) | (0x89bc >> 2),
341         0x00000000,
342         (0x0e00 << 16) | (0x8900 >> 2),
343         0x00000000,
344         0x3,
345         (0x0e00 << 16) | (0xc130 >> 2),
346         0x00000000,
347         (0x0e00 << 16) | (0xc134 >> 2),
348         0x00000000,
349         (0x0e00 << 16) | (0xc1fc >> 2),
350         0x00000000,
351         (0x0e00 << 16) | (0xc208 >> 2),
352         0x00000000,
353         (0x0e00 << 16) | (0xc264 >> 2),
354         0x00000000,
355         (0x0e00 << 16) | (0xc268 >> 2),
356         0x00000000,
357         (0x0e00 << 16) | (0xc26c >> 2),
358         0x00000000,
359         (0x0e00 << 16) | (0xc270 >> 2),
360         0x00000000,
361         (0x0e00 << 16) | (0xc274 >> 2),
362         0x00000000,
363         (0x0e00 << 16) | (0xc278 >> 2),
364         0x00000000,
365         (0x0e00 << 16) | (0xc27c >> 2),
366         0x00000000,
367         (0x0e00 << 16) | (0xc280 >> 2),
368         0x00000000,
369         (0x0e00 << 16) | (0xc284 >> 2),
370         0x00000000,
371         (0x0e00 << 16) | (0xc288 >> 2),
372         0x00000000,
373         (0x0e00 << 16) | (0xc28c >> 2),
374         0x00000000,
375         (0x0e00 << 16) | (0xc290 >> 2),
376         0x00000000,
377         (0x0e00 << 16) | (0xc294 >> 2),
378         0x00000000,
379         (0x0e00 << 16) | (0xc298 >> 2),
380         0x00000000,
381         (0x0e00 << 16) | (0xc29c >> 2),
382         0x00000000,
383         (0x0e00 << 16) | (0xc2a0 >> 2),
384         0x00000000,
385         (0x0e00 << 16) | (0xc2a4 >> 2),
386         0x00000000,
387         (0x0e00 << 16) | (0xc2a8 >> 2),
388         0x00000000,
389         (0x0e00 << 16) | (0xc2ac  >> 2),
390         0x00000000,
391         (0x0e00 << 16) | (0xc2b0 >> 2),
392         0x00000000,
393         (0x0e00 << 16) | (0x301d0 >> 2),
394         0x00000000,
395         (0x0e00 << 16) | (0x30238 >> 2),
396         0x00000000,
397         (0x0e00 << 16) | (0x30250 >> 2),
398         0x00000000,
399         (0x0e00 << 16) | (0x30254 >> 2),
400         0x00000000,
401         (0x0e00 << 16) | (0x30258 >> 2),
402         0x00000000,
403         (0x0e00 << 16) | (0x3025c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc900 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc900 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc900 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc900 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc900 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc900 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc900 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc900 >> 2),
420         0x00000000,
421         (0x4e00 << 16) | (0xc904 >> 2),
422         0x00000000,
423         (0x5e00 << 16) | (0xc904 >> 2),
424         0x00000000,
425         (0x6e00 << 16) | (0xc904 >> 2),
426         0x00000000,
427         (0x7e00 << 16) | (0xc904 >> 2),
428         0x00000000,
429         (0x8e00 << 16) | (0xc904 >> 2),
430         0x00000000,
431         (0x9e00 << 16) | (0xc904 >> 2),
432         0x00000000,
433         (0xae00 << 16) | (0xc904 >> 2),
434         0x00000000,
435         (0xbe00 << 16) | (0xc904 >> 2),
436         0x00000000,
437         (0x4e00 << 16) | (0xc908 >> 2),
438         0x00000000,
439         (0x5e00 << 16) | (0xc908 >> 2),
440         0x00000000,
441         (0x6e00 << 16) | (0xc908 >> 2),
442         0x00000000,
443         (0x7e00 << 16) | (0xc908 >> 2),
444         0x00000000,
445         (0x8e00 << 16) | (0xc908 >> 2),
446         0x00000000,
447         (0x9e00 << 16) | (0xc908 >> 2),
448         0x00000000,
449         (0xae00 << 16) | (0xc908 >> 2),
450         0x00000000,
451         (0xbe00 << 16) | (0xc908 >> 2),
452         0x00000000,
453         (0x4e00 << 16) | (0xc90c >> 2),
454         0x00000000,
455         (0x5e00 << 16) | (0xc90c >> 2),
456         0x00000000,
457         (0x6e00 << 16) | (0xc90c >> 2),
458         0x00000000,
459         (0x7e00 << 16) | (0xc90c >> 2),
460         0x00000000,
461         (0x8e00 << 16) | (0xc90c >> 2),
462         0x00000000,
463         (0x9e00 << 16) | (0xc90c >> 2),
464         0x00000000,
465         (0xae00 << 16) | (0xc90c >> 2),
466         0x00000000,
467         (0xbe00 << 16) | (0xc90c >> 2),
468         0x00000000,
469         (0x4e00 << 16) | (0xc910 >> 2),
470         0x00000000,
471         (0x5e00 << 16) | (0xc910 >> 2),
472         0x00000000,
473         (0x6e00 << 16) | (0xc910 >> 2),
474         0x00000000,
475         (0x7e00 << 16) | (0xc910 >> 2),
476         0x00000000,
477         (0x8e00 << 16) | (0xc910 >> 2),
478         0x00000000,
479         (0x9e00 << 16) | (0xc910 >> 2),
480         0x00000000,
481         (0xae00 << 16) | (0xc910 >> 2),
482         0x00000000,
483         (0xbe00 << 16) | (0xc910 >> 2),
484         0x00000000,
485         (0x0e00 << 16) | (0xc99c >> 2),
486         0x00000000,
487         (0x0e00 << 16) | (0x9834 >> 2),
488         0x00000000,
489         (0x0000 << 16) | (0x30f00 >> 2),
490         0x00000000,
491         (0x0001 << 16) | (0x30f00 >> 2),
492         0x00000000,
493         (0x0000 << 16) | (0x30f04 >> 2),
494         0x00000000,
495         (0x0001 << 16) | (0x30f04 >> 2),
496         0x00000000,
497         (0x0000 << 16) | (0x30f08 >> 2),
498         0x00000000,
499         (0x0001 << 16) | (0x30f08 >> 2),
500         0x00000000,
501         (0x0000 << 16) | (0x30f0c >> 2),
502         0x00000000,
503         (0x0001 << 16) | (0x30f0c >> 2),
504         0x00000000,
505         (0x0600 << 16) | (0x9b7c >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x8a14 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x8a18 >> 2),
510         0x00000000,
511         (0x0600 << 16) | (0x30a00 >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x8bf0 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x8bcc >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x8b24 >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x30a04 >> 2),
520         0x00000000,
521         (0x0600 << 16) | (0x30a10 >> 2),
522         0x00000000,
523         (0x0600 << 16) | (0x30a14 >> 2),
524         0x00000000,
525         (0x0600 << 16) | (0x30a18 >> 2),
526         0x00000000,
527         (0x0600 << 16) | (0x30a2c >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0xc700 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0xc704 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0xc708 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0xc768 >> 2),
536         0x00000000,
537         (0x0400 << 16) | (0xc770 >> 2),
538         0x00000000,
539         (0x0400 << 16) | (0xc774 >> 2),
540         0x00000000,
541         (0x0400 << 16) | (0xc778 >> 2),
542         0x00000000,
543         (0x0400 << 16) | (0xc77c >> 2),
544         0x00000000,
545         (0x0400 << 16) | (0xc780 >> 2),
546         0x00000000,
547         (0x0400 << 16) | (0xc784 >> 2),
548         0x00000000,
549         (0x0400 << 16) | (0xc788 >> 2),
550         0x00000000,
551         (0x0400 << 16) | (0xc78c >> 2),
552         0x00000000,
553         (0x0400 << 16) | (0xc798 >> 2),
554         0x00000000,
555         (0x0400 << 16) | (0xc79c >> 2),
556         0x00000000,
557         (0x0400 << 16) | (0xc7a0 >> 2),
558         0x00000000,
559         (0x0400 << 16) | (0xc7a4 >> 2),
560         0x00000000,
561         (0x0400 << 16) | (0xc7a8 >> 2),
562         0x00000000,
563         (0x0400 << 16) | (0xc7ac >> 2),
564         0x00000000,
565         (0x0400 << 16) | (0xc7b0 >> 2),
566         0x00000000,
567         (0x0400 << 16) | (0xc7b4 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0x9100 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0x3c010 >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x92a8 >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x92ac >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x92b4 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x92b8 >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x92bc >> 2),
582         0x00000000,
583         (0x0e00 << 16) | (0x92c0 >> 2),
584         0x00000000,
585         (0x0e00 << 16) | (0x92c4 >> 2),
586         0x00000000,
587         (0x0e00 << 16) | (0x92c8 >> 2),
588         0x00000000,
589         (0x0e00 << 16) | (0x92cc >> 2),
590         0x00000000,
591         (0x0e00 << 16) | (0x92d0 >> 2),
592         0x00000000,
593         (0x0e00 << 16) | (0x8c00 >> 2),
594         0x00000000,
595         (0x0e00 << 16) | (0x8c04 >> 2),
596         0x00000000,
597         (0x0e00 << 16) | (0x8c20 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0x8c38 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0x8c3c >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0xae00 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x9604 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0xac08 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0xac0c >> 2),
610         0x00000000,
611         (0x0e00 << 16) | (0xac10 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0xac14 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0xac58 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0xac68 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0xac6c >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0xac70 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0xac74 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0xac78 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0xac7c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0xac80 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0xac84 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0xac88 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0xac8c >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x970c >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x9714 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x9718 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x971c >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x31068 >> 2),
646         0x00000000,
647         (0x4e00 << 16) | (0x31068 >> 2),
648         0x00000000,
649         (0x5e00 << 16) | (0x31068 >> 2),
650         0x00000000,
651         (0x6e00 << 16) | (0x31068 >> 2),
652         0x00000000,
653         (0x7e00 << 16) | (0x31068 >> 2),
654         0x00000000,
655         (0x8e00 << 16) | (0x31068 >> 2),
656         0x00000000,
657         (0x9e00 << 16) | (0x31068 >> 2),
658         0x00000000,
659         (0xae00 << 16) | (0x31068 >> 2),
660         0x00000000,
661         (0xbe00 << 16) | (0x31068 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xcd10 >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xcd14 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0x88b0 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0x88b4 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0x88b8 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0x88bc >> 2),
674         0x00000000,
675         (0x0400 << 16) | (0x89c0 >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0x88c4 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x88c8 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x88d0 >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x88d4 >> 2),
684         0x00000000,
685         (0x0e00 << 16) | (0x88d8 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x8980 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x30938 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0x3093c >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x30940 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x89a0 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x30900 >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x30904 >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x89b4 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0x3c210 >> 2),
704         0x00000000,
705         (0x0e00 << 16) | (0x3c214 >> 2),
706         0x00000000,
707         (0x0e00 << 16) | (0x3c218 >> 2),
708         0x00000000,
709         (0x0e00 << 16) | (0x8904 >> 2),
710         0x00000000,
711         0x5,
712         (0x0e00 << 16) | (0x8c28 >> 2),
713         (0x0e00 << 16) | (0x8c2c >> 2),
714         (0x0e00 << 16) | (0x8c30 >> 2),
715         (0x0e00 << 16) | (0x8c34 >> 2),
716         (0x0e00 << 16) | (0x9600 >> 2),
717 };
718
719 static const u32 kalindi_rlc_save_restore_register_list[] =
720 {
721         (0x0e00 << 16) | (0xc12c >> 2),
722         0x00000000,
723         (0x0e00 << 16) | (0xc140 >> 2),
724         0x00000000,
725         (0x0e00 << 16) | (0xc150 >> 2),
726         0x00000000,
727         (0x0e00 << 16) | (0xc15c >> 2),
728         0x00000000,
729         (0x0e00 << 16) | (0xc168 >> 2),
730         0x00000000,
731         (0x0e00 << 16) | (0xc170 >> 2),
732         0x00000000,
733         (0x0e00 << 16) | (0xc204 >> 2),
734         0x00000000,
735         (0x0e00 << 16) | (0xc2b4 >> 2),
736         0x00000000,
737         (0x0e00 << 16) | (0xc2b8 >> 2),
738         0x00000000,
739         (0x0e00 << 16) | (0xc2bc >> 2),
740         0x00000000,
741         (0x0e00 << 16) | (0xc2c0 >> 2),
742         0x00000000,
743         (0x0e00 << 16) | (0x8228 >> 2),
744         0x00000000,
745         (0x0e00 << 16) | (0x829c >> 2),
746         0x00000000,
747         (0x0e00 << 16) | (0x869c >> 2),
748         0x00000000,
749         (0x0600 << 16) | (0x98f4 >> 2),
750         0x00000000,
751         (0x0e00 << 16) | (0x98f8 >> 2),
752         0x00000000,
753         (0x0e00 << 16) | (0x9900 >> 2),
754         0x00000000,
755         (0x0e00 << 16) | (0xc260 >> 2),
756         0x00000000,
757         (0x0e00 << 16) | (0x90e8 >> 2),
758         0x00000000,
759         (0x0e00 << 16) | (0x3c000 >> 2),
760         0x00000000,
761         (0x0e00 << 16) | (0x3c00c >> 2),
762         0x00000000,
763         (0x0e00 << 16) | (0x8c1c >> 2),
764         0x00000000,
765         (0x0e00 << 16) | (0x9700 >> 2),
766         0x00000000,
767         (0x0e00 << 16) | (0xcd20 >> 2),
768         0x00000000,
769         (0x4e00 << 16) | (0xcd20 >> 2),
770         0x00000000,
771         (0x5e00 << 16) | (0xcd20 >> 2),
772         0x00000000,
773         (0x6e00 << 16) | (0xcd20 >> 2),
774         0x00000000,
775         (0x7e00 << 16) | (0xcd20 >> 2),
776         0x00000000,
777         (0x0e00 << 16) | (0x89bc >> 2),
778         0x00000000,
779         (0x0e00 << 16) | (0x8900 >> 2),
780         0x00000000,
781         0x3,
782         (0x0e00 << 16) | (0xc130 >> 2),
783         0x00000000,
784         (0x0e00 << 16) | (0xc134 >> 2),
785         0x00000000,
786         (0x0e00 << 16) | (0xc1fc >> 2),
787         0x00000000,
788         (0x0e00 << 16) | (0xc208 >> 2),
789         0x00000000,
790         (0x0e00 << 16) | (0xc264 >> 2),
791         0x00000000,
792         (0x0e00 << 16) | (0xc268 >> 2),
793         0x00000000,
794         (0x0e00 << 16) | (0xc26c >> 2),
795         0x00000000,
796         (0x0e00 << 16) | (0xc270 >> 2),
797         0x00000000,
798         (0x0e00 << 16) | (0xc274 >> 2),
799         0x00000000,
800         (0x0e00 << 16) | (0xc28c >> 2),
801         0x00000000,
802         (0x0e00 << 16) | (0xc290 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc294 >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0xc298 >> 2),
807         0x00000000,
808         (0x0e00 << 16) | (0xc2a0 >> 2),
809         0x00000000,
810         (0x0e00 << 16) | (0xc2a4 >> 2),
811         0x00000000,
812         (0x0e00 << 16) | (0xc2a8 >> 2),
813         0x00000000,
814         (0x0e00 << 16) | (0xc2ac >> 2),
815         0x00000000,
816         (0x0e00 << 16) | (0x301d0 >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x30238 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x30250 >> 2),
821         0x00000000,
822         (0x0e00 << 16) | (0x30254 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x30258 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x3025c >> 2),
827         0x00000000,
828         (0x4e00 << 16) | (0xc900 >> 2),
829         0x00000000,
830         (0x5e00 << 16) | (0xc900 >> 2),
831         0x00000000,
832         (0x6e00 << 16) | (0xc900 >> 2),
833         0x00000000,
834         (0x7e00 << 16) | (0xc900 >> 2),
835         0x00000000,
836         (0x4e00 << 16) | (0xc904 >> 2),
837         0x00000000,
838         (0x5e00 << 16) | (0xc904 >> 2),
839         0x00000000,
840         (0x6e00 << 16) | (0xc904 >> 2),
841         0x00000000,
842         (0x7e00 << 16) | (0xc904 >> 2),
843         0x00000000,
844         (0x4e00 << 16) | (0xc908 >> 2),
845         0x00000000,
846         (0x5e00 << 16) | (0xc908 >> 2),
847         0x00000000,
848         (0x6e00 << 16) | (0xc908 >> 2),
849         0x00000000,
850         (0x7e00 << 16) | (0xc908 >> 2),
851         0x00000000,
852         (0x4e00 << 16) | (0xc90c >> 2),
853         0x00000000,
854         (0x5e00 << 16) | (0xc90c >> 2),
855         0x00000000,
856         (0x6e00 << 16) | (0xc90c >> 2),
857         0x00000000,
858         (0x7e00 << 16) | (0xc90c >> 2),
859         0x00000000,
860         (0x4e00 << 16) | (0xc910 >> 2),
861         0x00000000,
862         (0x5e00 << 16) | (0xc910 >> 2),
863         0x00000000,
864         (0x6e00 << 16) | (0xc910 >> 2),
865         0x00000000,
866         (0x7e00 << 16) | (0xc910 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0xc99c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0x9834 >> 2),
871         0x00000000,
872         (0x0000 << 16) | (0x30f00 >> 2),
873         0x00000000,
874         (0x0000 << 16) | (0x30f04 >> 2),
875         0x00000000,
876         (0x0000 << 16) | (0x30f08 >> 2),
877         0x00000000,
878         (0x0000 << 16) | (0x30f0c >> 2),
879         0x00000000,
880         (0x0600 << 16) | (0x9b7c >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0x8a14 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0x8a18 >> 2),
885         0x00000000,
886         (0x0600 << 16) | (0x30a00 >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0x8bf0 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0x8bcc >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0x8b24 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0x30a04 >> 2),
895         0x00000000,
896         (0x0600 << 16) | (0x30a10 >> 2),
897         0x00000000,
898         (0x0600 << 16) | (0x30a14 >> 2),
899         0x00000000,
900         (0x0600 << 16) | (0x30a18 >> 2),
901         0x00000000,
902         (0x0600 << 16) | (0x30a2c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0xc700 >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0xc704 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0xc708 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0xc768 >> 2),
911         0x00000000,
912         (0x0400 << 16) | (0xc770 >> 2),
913         0x00000000,
914         (0x0400 << 16) | (0xc774 >> 2),
915         0x00000000,
916         (0x0400 << 16) | (0xc798 >> 2),
917         0x00000000,
918         (0x0400 << 16) | (0xc79c >> 2),
919         0x00000000,
920         (0x0e00 << 16) | (0x9100 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0x3c010 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0x8c00 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x8c04 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x8c20 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x8c38 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x8c3c >> 2),
933         0x00000000,
934         (0x0e00 << 16) | (0xae00 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x9604 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0xac08 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0xac0c >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0xac10 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0xac14 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0xac58 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0xac68 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0xac6c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0xac70 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0xac74 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0xac78 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0xac7c >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0xac80 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0xac84 >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0xac88 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0xac8c >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x970c >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x9714 >> 2),
971         0x00000000,
972         (0x0e00 << 16) | (0x9718 >> 2),
973         0x00000000,
974         (0x0e00 << 16) | (0x971c >> 2),
975         0x00000000,
976         (0x0e00 << 16) | (0x31068 >> 2),
977         0x00000000,
978         (0x4e00 << 16) | (0x31068 >> 2),
979         0x00000000,
980         (0x5e00 << 16) | (0x31068 >> 2),
981         0x00000000,
982         (0x6e00 << 16) | (0x31068 >> 2),
983         0x00000000,
984         (0x7e00 << 16) | (0x31068 >> 2),
985         0x00000000,
986         (0x0e00 << 16) | (0xcd10 >> 2),
987         0x00000000,
988         (0x0e00 << 16) | (0xcd14 >> 2),
989         0x00000000,
990         (0x0e00 << 16) | (0x88b0 >> 2),
991         0x00000000,
992         (0x0e00 << 16) | (0x88b4 >> 2),
993         0x00000000,
994         (0x0e00 << 16) | (0x88b8 >> 2),
995         0x00000000,
996         (0x0e00 << 16) | (0x88bc >> 2),
997         0x00000000,
998         (0x0400 << 16) | (0x89c0 >> 2),
999         0x00000000,
1000         (0x0e00 << 16) | (0x88c4 >> 2),
1001         0x00000000,
1002         (0x0e00 << 16) | (0x88c8 >> 2),
1003         0x00000000,
1004         (0x0e00 << 16) | (0x88d0 >> 2),
1005         0x00000000,
1006         (0x0e00 << 16) | (0x88d4 >> 2),
1007         0x00000000,
1008         (0x0e00 << 16) | (0x88d8 >> 2),
1009         0x00000000,
1010         (0x0e00 << 16) | (0x8980 >> 2),
1011         0x00000000,
1012         (0x0e00 << 16) | (0x30938 >> 2),
1013         0x00000000,
1014         (0x0e00 << 16) | (0x3093c >> 2),
1015         0x00000000,
1016         (0x0e00 << 16) | (0x30940 >> 2),
1017         0x00000000,
1018         (0x0e00 << 16) | (0x89a0 >> 2),
1019         0x00000000,
1020         (0x0e00 << 16) | (0x30900 >> 2),
1021         0x00000000,
1022         (0x0e00 << 16) | (0x30904 >> 2),
1023         0x00000000,
1024         (0x0e00 << 16) | (0x89b4 >> 2),
1025         0x00000000,
1026         (0x0e00 << 16) | (0x3e1fc >> 2),
1027         0x00000000,
1028         (0x0e00 << 16) | (0x3c210 >> 2),
1029         0x00000000,
1030         (0x0e00 << 16) | (0x3c214 >> 2),
1031         0x00000000,
1032         (0x0e00 << 16) | (0x3c218 >> 2),
1033         0x00000000,
1034         (0x0e00 << 16) | (0x8904 >> 2),
1035         0x00000000,
1036         0x5,
1037         (0x0e00 << 16) | (0x8c28 >> 2),
1038         (0x0e00 << 16) | (0x8c2c >> 2),
1039         (0x0e00 << 16) | (0x8c30 >> 2),
1040         (0x0e00 << 16) | (0x8c34 >> 2),
1041         (0x0e00 << 16) | (0x9600 >> 2),
1042 };
1043
1044 static const u32 bonaire_golden_spm_registers[] =
1045 {
1046         0x30800, 0xe0ffffff, 0xe0000000
1047 };
1048
1049 static const u32 bonaire_golden_common_registers[] =
1050 {
1051         0xc770, 0xffffffff, 0x00000800,
1052         0xc774, 0xffffffff, 0x00000800,
1053         0xc798, 0xffffffff, 0x00007fbf,
1054         0xc79c, 0xffffffff, 0x00007faf
1055 };
1056
1057 static const u32 bonaire_golden_registers[] =
1058 {
1059         0x3354, 0x00000333, 0x00000333,
1060         0x3350, 0x000c0fc0, 0x00040200,
1061         0x9a10, 0x00010000, 0x00058208,
1062         0x3c000, 0xffff1fff, 0x00140000,
1063         0x3c200, 0xfdfc0fff, 0x00000100,
1064         0x3c234, 0x40000000, 0x40000200,
1065         0x9830, 0xffffffff, 0x00000000,
1066         0x9834, 0xf00fffff, 0x00000400,
1067         0x9838, 0x0002021c, 0x00020200,
1068         0xc78, 0x00000080, 0x00000000,
1069         0x5bb0, 0x000000f0, 0x00000070,
1070         0x5bc0, 0xf0311fff, 0x80300000,
1071         0x98f8, 0x73773777, 0x12010001,
1072         0x350c, 0x00810000, 0x408af000,
1073         0x7030, 0x31000111, 0x00000011,
1074         0x2f48, 0x73773777, 0x12010001,
1075         0x220c, 0x00007fb6, 0x0021a1b1,
1076         0x2210, 0x00007fb6, 0x002021b1,
1077         0x2180, 0x00007fb6, 0x00002191,
1078         0x2218, 0x00007fb6, 0x002121b1,
1079         0x221c, 0x00007fb6, 0x002021b1,
1080         0x21dc, 0x00007fb6, 0x00002191,
1081         0x21e0, 0x00007fb6, 0x00002191,
1082         0x3628, 0x0000003f, 0x0000000a,
1083         0x362c, 0x0000003f, 0x0000000a,
1084         0x2ae4, 0x00073ffe, 0x000022a2,
1085         0x240c, 0x000007ff, 0x00000000,
1086         0x8a14, 0xf000003f, 0x00000007,
1087         0x8bf0, 0x00002001, 0x00000001,
1088         0x8b24, 0xffffffff, 0x00ffffff,
1089         0x30a04, 0x0000ff0f, 0x00000000,
1090         0x28a4c, 0x07ffffff, 0x06000000,
1091         0x4d8, 0x00000fff, 0x00000100,
1092         0x3e78, 0x00000001, 0x00000002,
1093         0x9100, 0x03000000, 0x0362c688,
1094         0x8c00, 0x000000ff, 0x00000001,
1095         0xe40, 0x00001fff, 0x00001fff,
1096         0x9060, 0x0000007f, 0x00000020,
1097         0x9508, 0x00010000, 0x00010000,
1098         0xac14, 0x000003ff, 0x000000f3,
1099         0xac0c, 0xffffffff, 0x00001032
1100 };
1101
1102 static const u32 bonaire_mgcg_cgcg_init[] =
1103 {
1104         0xc420, 0xffffffff, 0xfffffffc,
1105         0x30800, 0xffffffff, 0xe0000000,
1106         0x3c2a0, 0xffffffff, 0x00000100,
1107         0x3c208, 0xffffffff, 0x00000100,
1108         0x3c2c0, 0xffffffff, 0xc0000100,
1109         0x3c2c8, 0xffffffff, 0xc0000100,
1110         0x3c2c4, 0xffffffff, 0xc0000100,
1111         0x55e4, 0xffffffff, 0x00600100,
1112         0x3c280, 0xffffffff, 0x00000100,
1113         0x3c214, 0xffffffff, 0x06000100,
1114         0x3c220, 0xffffffff, 0x00000100,
1115         0x3c218, 0xffffffff, 0x06000100,
1116         0x3c204, 0xffffffff, 0x00000100,
1117         0x3c2e0, 0xffffffff, 0x00000100,
1118         0x3c224, 0xffffffff, 0x00000100,
1119         0x3c200, 0xffffffff, 0x00000100,
1120         0x3c230, 0xffffffff, 0x00000100,
1121         0x3c234, 0xffffffff, 0x00000100,
1122         0x3c250, 0xffffffff, 0x00000100,
1123         0x3c254, 0xffffffff, 0x00000100,
1124         0x3c258, 0xffffffff, 0x00000100,
1125         0x3c25c, 0xffffffff, 0x00000100,
1126         0x3c260, 0xffffffff, 0x00000100,
1127         0x3c27c, 0xffffffff, 0x00000100,
1128         0x3c278, 0xffffffff, 0x00000100,
1129         0x3c210, 0xffffffff, 0x06000100,
1130         0x3c290, 0xffffffff, 0x00000100,
1131         0x3c274, 0xffffffff, 0x00000100,
1132         0x3c2b4, 0xffffffff, 0x00000100,
1133         0x3c2b0, 0xffffffff, 0x00000100,
1134         0x3c270, 0xffffffff, 0x00000100,
1135         0x30800, 0xffffffff, 0xe0000000,
1136         0x3c020, 0xffffffff, 0x00010000,
1137         0x3c024, 0xffffffff, 0x00030002,
1138         0x3c028, 0xffffffff, 0x00040007,
1139         0x3c02c, 0xffffffff, 0x00060005,
1140         0x3c030, 0xffffffff, 0x00090008,
1141         0x3c034, 0xffffffff, 0x00010000,
1142         0x3c038, 0xffffffff, 0x00030002,
1143         0x3c03c, 0xffffffff, 0x00040007,
1144         0x3c040, 0xffffffff, 0x00060005,
1145         0x3c044, 0xffffffff, 0x00090008,
1146         0x3c048, 0xffffffff, 0x00010000,
1147         0x3c04c, 0xffffffff, 0x00030002,
1148         0x3c050, 0xffffffff, 0x00040007,
1149         0x3c054, 0xffffffff, 0x00060005,
1150         0x3c058, 0xffffffff, 0x00090008,
1151         0x3c05c, 0xffffffff, 0x00010000,
1152         0x3c060, 0xffffffff, 0x00030002,
1153         0x3c064, 0xffffffff, 0x00040007,
1154         0x3c068, 0xffffffff, 0x00060005,
1155         0x3c06c, 0xffffffff, 0x00090008,
1156         0x3c070, 0xffffffff, 0x00010000,
1157         0x3c074, 0xffffffff, 0x00030002,
1158         0x3c078, 0xffffffff, 0x00040007,
1159         0x3c07c, 0xffffffff, 0x00060005,
1160         0x3c080, 0xffffffff, 0x00090008,
1161         0x3c084, 0xffffffff, 0x00010000,
1162         0x3c088, 0xffffffff, 0x00030002,
1163         0x3c08c, 0xffffffff, 0x00040007,
1164         0x3c090, 0xffffffff, 0x00060005,
1165         0x3c094, 0xffffffff, 0x00090008,
1166         0x3c098, 0xffffffff, 0x00010000,
1167         0x3c09c, 0xffffffff, 0x00030002,
1168         0x3c0a0, 0xffffffff, 0x00040007,
1169         0x3c0a4, 0xffffffff, 0x00060005,
1170         0x3c0a8, 0xffffffff, 0x00090008,
1171         0x3c000, 0xffffffff, 0x96e00200,
1172         0x8708, 0xffffffff, 0x00900100,
1173         0xc424, 0xffffffff, 0x0020003f,
1174         0x38, 0xffffffff, 0x0140001c,
1175         0x3c, 0x000f0000, 0x000f0000,
1176         0x220, 0xffffffff, 0xC060000C,
1177         0x224, 0xc0000fff, 0x00000100,
1178         0xf90, 0xffffffff, 0x00000100,
1179         0xf98, 0x00000101, 0x00000000,
1180         0x20a8, 0xffffffff, 0x00000104,
1181         0x55e4, 0xff000fff, 0x00000100,
1182         0x30cc, 0xc0000fff, 0x00000104,
1183         0xc1e4, 0x00000001, 0x00000001,
1184         0xd00c, 0xff000ff0, 0x00000100,
1185         0xd80c, 0xff000ff0, 0x00000100
1186 };
1187
1188 static const u32 spectre_golden_spm_registers[] =
1189 {
1190         0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192
1193 static const u32 spectre_golden_common_registers[] =
1194 {
1195         0xc770, 0xffffffff, 0x00000800,
1196         0xc774, 0xffffffff, 0x00000800,
1197         0xc798, 0xffffffff, 0x00007fbf,
1198         0xc79c, 0xffffffff, 0x00007faf
1199 };
1200
1201 static const u32 spectre_golden_registers[] =
1202 {
1203         0x3c000, 0xffff1fff, 0x96940200,
1204         0x3c00c, 0xffff0001, 0xff000000,
1205         0x3c200, 0xfffc0fff, 0x00000100,
1206         0x6ed8, 0x00010101, 0x00010000,
1207         0x9834, 0xf00fffff, 0x00000400,
1208         0x9838, 0xfffffffc, 0x00020200,
1209         0x5bb0, 0x000000f0, 0x00000070,
1210         0x5bc0, 0xf0311fff, 0x80300000,
1211         0x98f8, 0x73773777, 0x12010001,
1212         0x9b7c, 0x00ff0000, 0x00fc0000,
1213         0x2f48, 0x73773777, 0x12010001,
1214         0x8a14, 0xf000003f, 0x00000007,
1215         0x8b24, 0xffffffff, 0x00ffffff,
1216         0x28350, 0x3f3f3fff, 0x00000082,
1217         0x28354, 0x0000003f, 0x00000000,
1218         0x3e78, 0x00000001, 0x00000002,
1219         0x913c, 0xffff03df, 0x00000004,
1220         0xc768, 0x00000008, 0x00000008,
1221         0x8c00, 0x000008ff, 0x00000800,
1222         0x9508, 0x00010000, 0x00010000,
1223         0xac0c, 0xffffffff, 0x54763210,
1224         0x214f8, 0x01ff01ff, 0x00000002,
1225         0x21498, 0x007ff800, 0x00200000,
1226         0x2015c, 0xffffffff, 0x00000f40,
1227         0x30934, 0xffffffff, 0x00000001
1228 };
1229
1230 static const u32 spectre_mgcg_cgcg_init[] =
1231 {
1232         0xc420, 0xffffffff, 0xfffffffc,
1233         0x30800, 0xffffffff, 0xe0000000,
1234         0x3c2a0, 0xffffffff, 0x00000100,
1235         0x3c208, 0xffffffff, 0x00000100,
1236         0x3c2c0, 0xffffffff, 0x00000100,
1237         0x3c2c8, 0xffffffff, 0x00000100,
1238         0x3c2c4, 0xffffffff, 0x00000100,
1239         0x55e4, 0xffffffff, 0x00600100,
1240         0x3c280, 0xffffffff, 0x00000100,
1241         0x3c214, 0xffffffff, 0x06000100,
1242         0x3c220, 0xffffffff, 0x00000100,
1243         0x3c218, 0xffffffff, 0x06000100,
1244         0x3c204, 0xffffffff, 0x00000100,
1245         0x3c2e0, 0xffffffff, 0x00000100,
1246         0x3c224, 0xffffffff, 0x00000100,
1247         0x3c200, 0xffffffff, 0x00000100,
1248         0x3c230, 0xffffffff, 0x00000100,
1249         0x3c234, 0xffffffff, 0x00000100,
1250         0x3c250, 0xffffffff, 0x00000100,
1251         0x3c254, 0xffffffff, 0x00000100,
1252         0x3c258, 0xffffffff, 0x00000100,
1253         0x3c25c, 0xffffffff, 0x00000100,
1254         0x3c260, 0xffffffff, 0x00000100,
1255         0x3c27c, 0xffffffff, 0x00000100,
1256         0x3c278, 0xffffffff, 0x00000100,
1257         0x3c210, 0xffffffff, 0x06000100,
1258         0x3c290, 0xffffffff, 0x00000100,
1259         0x3c274, 0xffffffff, 0x00000100,
1260         0x3c2b4, 0xffffffff, 0x00000100,
1261         0x3c2b0, 0xffffffff, 0x00000100,
1262         0x3c270, 0xffffffff, 0x00000100,
1263         0x30800, 0xffffffff, 0xe0000000,
1264         0x3c020, 0xffffffff, 0x00010000,
1265         0x3c024, 0xffffffff, 0x00030002,
1266         0x3c028, 0xffffffff, 0x00040007,
1267         0x3c02c, 0xffffffff, 0x00060005,
1268         0x3c030, 0xffffffff, 0x00090008,
1269         0x3c034, 0xffffffff, 0x00010000,
1270         0x3c038, 0xffffffff, 0x00030002,
1271         0x3c03c, 0xffffffff, 0x00040007,
1272         0x3c040, 0xffffffff, 0x00060005,
1273         0x3c044, 0xffffffff, 0x00090008,
1274         0x3c048, 0xffffffff, 0x00010000,
1275         0x3c04c, 0xffffffff, 0x00030002,
1276         0x3c050, 0xffffffff, 0x00040007,
1277         0x3c054, 0xffffffff, 0x00060005,
1278         0x3c058, 0xffffffff, 0x00090008,
1279         0x3c05c, 0xffffffff, 0x00010000,
1280         0x3c060, 0xffffffff, 0x00030002,
1281         0x3c064, 0xffffffff, 0x00040007,
1282         0x3c068, 0xffffffff, 0x00060005,
1283         0x3c06c, 0xffffffff, 0x00090008,
1284         0x3c070, 0xffffffff, 0x00010000,
1285         0x3c074, 0xffffffff, 0x00030002,
1286         0x3c078, 0xffffffff, 0x00040007,
1287         0x3c07c, 0xffffffff, 0x00060005,
1288         0x3c080, 0xffffffff, 0x00090008,
1289         0x3c084, 0xffffffff, 0x00010000,
1290         0x3c088, 0xffffffff, 0x00030002,
1291         0x3c08c, 0xffffffff, 0x00040007,
1292         0x3c090, 0xffffffff, 0x00060005,
1293         0x3c094, 0xffffffff, 0x00090008,
1294         0x3c098, 0xffffffff, 0x00010000,
1295         0x3c09c, 0xffffffff, 0x00030002,
1296         0x3c0a0, 0xffffffff, 0x00040007,
1297         0x3c0a4, 0xffffffff, 0x00060005,
1298         0x3c0a8, 0xffffffff, 0x00090008,
1299         0x3c0ac, 0xffffffff, 0x00010000,
1300         0x3c0b0, 0xffffffff, 0x00030002,
1301         0x3c0b4, 0xffffffff, 0x00040007,
1302         0x3c0b8, 0xffffffff, 0x00060005,
1303         0x3c0bc, 0xffffffff, 0x00090008,
1304         0x3c000, 0xffffffff, 0x96e00200,
1305         0x8708, 0xffffffff, 0x00900100,
1306         0xc424, 0xffffffff, 0x0020003f,
1307         0x38, 0xffffffff, 0x0140001c,
1308         0x3c, 0x000f0000, 0x000f0000,
1309         0x220, 0xffffffff, 0xC060000C,
1310         0x224, 0xc0000fff, 0x00000100,
1311         0xf90, 0xffffffff, 0x00000100,
1312         0xf98, 0x00000101, 0x00000000,
1313         0x20a8, 0xffffffff, 0x00000104,
1314         0x55e4, 0xff000fff, 0x00000100,
1315         0x30cc, 0xc0000fff, 0x00000104,
1316         0xc1e4, 0x00000001, 0x00000001,
1317         0xd00c, 0xff000ff0, 0x00000100,
1318         0xd80c, 0xff000ff0, 0x00000100
1319 };
1320
1321 static const u32 kalindi_golden_spm_registers[] =
1322 {
1323         0x30800, 0xe0ffffff, 0xe0000000
1324 };
1325
1326 static const u32 kalindi_golden_common_registers[] =
1327 {
1328         0xc770, 0xffffffff, 0x00000800,
1329         0xc774, 0xffffffff, 0x00000800,
1330         0xc798, 0xffffffff, 0x00007fbf,
1331         0xc79c, 0xffffffff, 0x00007faf
1332 };
1333
1334 static const u32 kalindi_golden_registers[] =
1335 {
1336         0x3c000, 0xffffdfff, 0x6e944040,
1337         0x55e4, 0xff607fff, 0xfc000100,
1338         0x3c220, 0xff000fff, 0x00000100,
1339         0x3c224, 0xff000fff, 0x00000100,
1340         0x3c200, 0xfffc0fff, 0x00000100,
1341         0x6ed8, 0x00010101, 0x00010000,
1342         0x9830, 0xffffffff, 0x00000000,
1343         0x9834, 0xf00fffff, 0x00000400,
1344         0x5bb0, 0x000000f0, 0x00000070,
1345         0x5bc0, 0xf0311fff, 0x80300000,
1346         0x98f8, 0x73773777, 0x12010001,
1347         0x98fc, 0xffffffff, 0x00000010,
1348         0x9b7c, 0x00ff0000, 0x00fc0000,
1349         0x8030, 0x00001f0f, 0x0000100a,
1350         0x2f48, 0x73773777, 0x12010001,
1351         0x2408, 0x000fffff, 0x000c007f,
1352         0x8a14, 0xf000003f, 0x00000007,
1353         0x8b24, 0x3fff3fff, 0x00ffcfff,
1354         0x30a04, 0x0000ff0f, 0x00000000,
1355         0x28a4c, 0x07ffffff, 0x06000000,
1356         0x4d8, 0x00000fff, 0x00000100,
1357         0x3e78, 0x00000001, 0x00000002,
1358         0xc768, 0x00000008, 0x00000008,
1359         0x8c00, 0x000000ff, 0x00000003,
1360         0x214f8, 0x01ff01ff, 0x00000002,
1361         0x21498, 0x007ff800, 0x00200000,
1362         0x2015c, 0xffffffff, 0x00000f40,
1363         0x88c4, 0x001f3ae3, 0x00000082,
1364         0x88d4, 0x0000001f, 0x00000010,
1365         0x30934, 0xffffffff, 0x00000000
1366 };
1367
1368 static const u32 kalindi_mgcg_cgcg_init[] =
1369 {
1370         0xc420, 0xffffffff, 0xfffffffc,
1371         0x30800, 0xffffffff, 0xe0000000,
1372         0x3c2a0, 0xffffffff, 0x00000100,
1373         0x3c208, 0xffffffff, 0x00000100,
1374         0x3c2c0, 0xffffffff, 0x00000100,
1375         0x3c2c8, 0xffffffff, 0x00000100,
1376         0x3c2c4, 0xffffffff, 0x00000100,
1377         0x55e4, 0xffffffff, 0x00600100,
1378         0x3c280, 0xffffffff, 0x00000100,
1379         0x3c214, 0xffffffff, 0x06000100,
1380         0x3c220, 0xffffffff, 0x00000100,
1381         0x3c218, 0xffffffff, 0x06000100,
1382         0x3c204, 0xffffffff, 0x00000100,
1383         0x3c2e0, 0xffffffff, 0x00000100,
1384         0x3c224, 0xffffffff, 0x00000100,
1385         0x3c200, 0xffffffff, 0x00000100,
1386         0x3c230, 0xffffffff, 0x00000100,
1387         0x3c234, 0xffffffff, 0x00000100,
1388         0x3c250, 0xffffffff, 0x00000100,
1389         0x3c254, 0xffffffff, 0x00000100,
1390         0x3c258, 0xffffffff, 0x00000100,
1391         0x3c25c, 0xffffffff, 0x00000100,
1392         0x3c260, 0xffffffff, 0x00000100,
1393         0x3c27c, 0xffffffff, 0x00000100,
1394         0x3c278, 0xffffffff, 0x00000100,
1395         0x3c210, 0xffffffff, 0x06000100,
1396         0x3c290, 0xffffffff, 0x00000100,
1397         0x3c274, 0xffffffff, 0x00000100,
1398         0x3c2b4, 0xffffffff, 0x00000100,
1399         0x3c2b0, 0xffffffff, 0x00000100,
1400         0x3c270, 0xffffffff, 0x00000100,
1401         0x30800, 0xffffffff, 0xe0000000,
1402         0x3c020, 0xffffffff, 0x00010000,
1403         0x3c024, 0xffffffff, 0x00030002,
1404         0x3c028, 0xffffffff, 0x00040007,
1405         0x3c02c, 0xffffffff, 0x00060005,
1406         0x3c030, 0xffffffff, 0x00090008,
1407         0x3c034, 0xffffffff, 0x00010000,
1408         0x3c038, 0xffffffff, 0x00030002,
1409         0x3c03c, 0xffffffff, 0x00040007,
1410         0x3c040, 0xffffffff, 0x00060005,
1411         0x3c044, 0xffffffff, 0x00090008,
1412         0x3c000, 0xffffffff, 0x96e00200,
1413         0x8708, 0xffffffff, 0x00900100,
1414         0xc424, 0xffffffff, 0x0020003f,
1415         0x38, 0xffffffff, 0x0140001c,
1416         0x3c, 0x000f0000, 0x000f0000,
1417         0x220, 0xffffffff, 0xC060000C,
1418         0x224, 0xc0000fff, 0x00000100,
1419         0x20a8, 0xffffffff, 0x00000104,
1420         0x55e4, 0xff000fff, 0x00000100,
1421         0x30cc, 0xc0000fff, 0x00000104,
1422         0xc1e4, 0x00000001, 0x00000001,
1423         0xd00c, 0xff000ff0, 0x00000100,
1424         0xd80c, 0xff000ff0, 0x00000100
1425 };
1426
1427 static const u32 hawaii_golden_spm_registers[] =
1428 {
1429         0x30800, 0xe0ffffff, 0xe0000000
1430 };
1431
1432 static const u32 hawaii_golden_common_registers[] =
1433 {
1434         0x30800, 0xffffffff, 0xe0000000,
1435         0x28350, 0xffffffff, 0x3a00161a,
1436         0x28354, 0xffffffff, 0x0000002e,
1437         0x9a10, 0xffffffff, 0x00018208,
1438         0x98f8, 0xffffffff, 0x12011003
1439 };
1440
1441 static const u32 hawaii_golden_registers[] =
1442 {
1443         0x3354, 0x00000333, 0x00000333,
1444         0x9a10, 0x00010000, 0x00058208,
1445         0x9830, 0xffffffff, 0x00000000,
1446         0x9834, 0xf00fffff, 0x00000400,
1447         0x9838, 0x0002021c, 0x00020200,
1448         0xc78, 0x00000080, 0x00000000,
1449         0x5bb0, 0x000000f0, 0x00000070,
1450         0x5bc0, 0xf0311fff, 0x80300000,
1451         0x350c, 0x00810000, 0x408af000,
1452         0x7030, 0x31000111, 0x00000011,
1453         0x2f48, 0x73773777, 0x12010001,
1454         0x2120, 0x0000007f, 0x0000001b,
1455         0x21dc, 0x00007fb6, 0x00002191,
1456         0x3628, 0x0000003f, 0x0000000a,
1457         0x362c, 0x0000003f, 0x0000000a,
1458         0x2ae4, 0x00073ffe, 0x000022a2,
1459         0x240c, 0x000007ff, 0x00000000,
1460         0x8bf0, 0x00002001, 0x00000001,
1461         0x8b24, 0xffffffff, 0x00ffffff,
1462         0x30a04, 0x0000ff0f, 0x00000000,
1463         0x28a4c, 0x07ffffff, 0x06000000,
1464         0x3e78, 0x00000001, 0x00000002,
1465         0xc768, 0x00000008, 0x00000008,
1466         0xc770, 0x00000f00, 0x00000800,
1467         0xc774, 0x00000f00, 0x00000800,
1468         0xc798, 0x00ffffff, 0x00ff7fbf,
1469         0xc79c, 0x00ffffff, 0x00ff7faf,
1470         0x8c00, 0x000000ff, 0x00000800,
1471         0xe40, 0x00001fff, 0x00001fff,
1472         0x9060, 0x0000007f, 0x00000020,
1473         0x9508, 0x00010000, 0x00010000,
1474         0xae00, 0x00100000, 0x000ff07c,
1475         0xac14, 0x000003ff, 0x0000000f,
1476         0xac10, 0xffffffff, 0x7564fdec,
1477         0xac0c, 0xffffffff, 0x3120b9a8,
1478         0xac08, 0x20000000, 0x0f9c0000
1479 };
1480
1481 static const u32 hawaii_mgcg_cgcg_init[] =
1482 {
1483         0xc420, 0xffffffff, 0xfffffffd,
1484         0x30800, 0xffffffff, 0xe0000000,
1485         0x3c2a0, 0xffffffff, 0x00000100,
1486         0x3c208, 0xffffffff, 0x00000100,
1487         0x3c2c0, 0xffffffff, 0x00000100,
1488         0x3c2c8, 0xffffffff, 0x00000100,
1489         0x3c2c4, 0xffffffff, 0x00000100,
1490         0x55e4, 0xffffffff, 0x00200100,
1491         0x3c280, 0xffffffff, 0x00000100,
1492         0x3c214, 0xffffffff, 0x06000100,
1493         0x3c220, 0xffffffff, 0x00000100,
1494         0x3c218, 0xffffffff, 0x06000100,
1495         0x3c204, 0xffffffff, 0x00000100,
1496         0x3c2e0, 0xffffffff, 0x00000100,
1497         0x3c224, 0xffffffff, 0x00000100,
1498         0x3c200, 0xffffffff, 0x00000100,
1499         0x3c230, 0xffffffff, 0x00000100,
1500         0x3c234, 0xffffffff, 0x00000100,
1501         0x3c250, 0xffffffff, 0x00000100,
1502         0x3c254, 0xffffffff, 0x00000100,
1503         0x3c258, 0xffffffff, 0x00000100,
1504         0x3c25c, 0xffffffff, 0x00000100,
1505         0x3c260, 0xffffffff, 0x00000100,
1506         0x3c27c, 0xffffffff, 0x00000100,
1507         0x3c278, 0xffffffff, 0x00000100,
1508         0x3c210, 0xffffffff, 0x06000100,
1509         0x3c290, 0xffffffff, 0x00000100,
1510         0x3c274, 0xffffffff, 0x00000100,
1511         0x3c2b4, 0xffffffff, 0x00000100,
1512         0x3c2b0, 0xffffffff, 0x00000100,
1513         0x3c270, 0xffffffff, 0x00000100,
1514         0x30800, 0xffffffff, 0xe0000000,
1515         0x3c020, 0xffffffff, 0x00010000,
1516         0x3c024, 0xffffffff, 0x00030002,
1517         0x3c028, 0xffffffff, 0x00040007,
1518         0x3c02c, 0xffffffff, 0x00060005,
1519         0x3c030, 0xffffffff, 0x00090008,
1520         0x3c034, 0xffffffff, 0x00010000,
1521         0x3c038, 0xffffffff, 0x00030002,
1522         0x3c03c, 0xffffffff, 0x00040007,
1523         0x3c040, 0xffffffff, 0x00060005,
1524         0x3c044, 0xffffffff, 0x00090008,
1525         0x3c048, 0xffffffff, 0x00010000,
1526         0x3c04c, 0xffffffff, 0x00030002,
1527         0x3c050, 0xffffffff, 0x00040007,
1528         0x3c054, 0xffffffff, 0x00060005,
1529         0x3c058, 0xffffffff, 0x00090008,
1530         0x3c05c, 0xffffffff, 0x00010000,
1531         0x3c060, 0xffffffff, 0x00030002,
1532         0x3c064, 0xffffffff, 0x00040007,
1533         0x3c068, 0xffffffff, 0x00060005,
1534         0x3c06c, 0xffffffff, 0x00090008,
1535         0x3c070, 0xffffffff, 0x00010000,
1536         0x3c074, 0xffffffff, 0x00030002,
1537         0x3c078, 0xffffffff, 0x00040007,
1538         0x3c07c, 0xffffffff, 0x00060005,
1539         0x3c080, 0xffffffff, 0x00090008,
1540         0x3c084, 0xffffffff, 0x00010000,
1541         0x3c088, 0xffffffff, 0x00030002,
1542         0x3c08c, 0xffffffff, 0x00040007,
1543         0x3c090, 0xffffffff, 0x00060005,
1544         0x3c094, 0xffffffff, 0x00090008,
1545         0x3c098, 0xffffffff, 0x00010000,
1546         0x3c09c, 0xffffffff, 0x00030002,
1547         0x3c0a0, 0xffffffff, 0x00040007,
1548         0x3c0a4, 0xffffffff, 0x00060005,
1549         0x3c0a8, 0xffffffff, 0x00090008,
1550         0x3c0ac, 0xffffffff, 0x00010000,
1551         0x3c0b0, 0xffffffff, 0x00030002,
1552         0x3c0b4, 0xffffffff, 0x00040007,
1553         0x3c0b8, 0xffffffff, 0x00060005,
1554         0x3c0bc, 0xffffffff, 0x00090008,
1555         0x3c0c0, 0xffffffff, 0x00010000,
1556         0x3c0c4, 0xffffffff, 0x00030002,
1557         0x3c0c8, 0xffffffff, 0x00040007,
1558         0x3c0cc, 0xffffffff, 0x00060005,
1559         0x3c0d0, 0xffffffff, 0x00090008,
1560         0x3c0d4, 0xffffffff, 0x00010000,
1561         0x3c0d8, 0xffffffff, 0x00030002,
1562         0x3c0dc, 0xffffffff, 0x00040007,
1563         0x3c0e0, 0xffffffff, 0x00060005,
1564         0x3c0e4, 0xffffffff, 0x00090008,
1565         0x3c0e8, 0xffffffff, 0x00010000,
1566         0x3c0ec, 0xffffffff, 0x00030002,
1567         0x3c0f0, 0xffffffff, 0x00040007,
1568         0x3c0f4, 0xffffffff, 0x00060005,
1569         0x3c0f8, 0xffffffff, 0x00090008,
1570         0xc318, 0xffffffff, 0x00020200,
1571         0x3350, 0xffffffff, 0x00000200,
1572         0x15c0, 0xffffffff, 0x00000400,
1573         0x55e8, 0xffffffff, 0x00000000,
1574         0x2f50, 0xffffffff, 0x00000902,
1575         0x3c000, 0xffffffff, 0x96940200,
1576         0x8708, 0xffffffff, 0x00900100,
1577         0xc424, 0xffffffff, 0x0020003f,
1578         0x38, 0xffffffff, 0x0140001c,
1579         0x3c, 0x000f0000, 0x000f0000,
1580         0x220, 0xffffffff, 0xc060000c,
1581         0x224, 0xc0000fff, 0x00000100,
1582         0xf90, 0xffffffff, 0x00000100,
1583         0xf98, 0x00000101, 0x00000000,
1584         0x20a8, 0xffffffff, 0x00000104,
1585         0x55e4, 0xff000fff, 0x00000100,
1586         0x30cc, 0xc0000fff, 0x00000104,
1587         0xc1e4, 0x00000001, 0x00000001,
1588         0xd00c, 0xff000ff0, 0x00000100,
1589         0xd80c, 0xff000ff0, 0x00000100
1590 };
1591
1592 static const u32 godavari_golden_registers[] =
1593 {
1594         0x55e4, 0xff607fff, 0xfc000100,
1595         0x6ed8, 0x00010101, 0x00010000,
1596         0x9830, 0xffffffff, 0x00000000,
1597         0x98302, 0xf00fffff, 0x00000400,
1598         0x6130, 0xffffffff, 0x00010000,
1599         0x5bb0, 0x000000f0, 0x00000070,
1600         0x5bc0, 0xf0311fff, 0x80300000,
1601         0x98f8, 0x73773777, 0x12010001,
1602         0x98fc, 0xffffffff, 0x00000010,
1603         0x8030, 0x00001f0f, 0x0000100a,
1604         0x2f48, 0x73773777, 0x12010001,
1605         0x2408, 0x000fffff, 0x000c007f,
1606         0x8a14, 0xf000003f, 0x00000007,
1607         0x8b24, 0xffffffff, 0x00ff0fff,
1608         0x30a04, 0x0000ff0f, 0x00000000,
1609         0x28a4c, 0x07ffffff, 0x06000000,
1610         0x4d8, 0x00000fff, 0x00000100,
1611         0xd014, 0x00010000, 0x00810001,
1612         0xd814, 0x00010000, 0x00810001,
1613         0x3e78, 0x00000001, 0x00000002,
1614         0xc768, 0x00000008, 0x00000008,
1615         0xc770, 0x00000f00, 0x00000800,
1616         0xc774, 0x00000f00, 0x00000800,
1617         0xc798, 0x00ffffff, 0x00ff7fbf,
1618         0xc79c, 0x00ffffff, 0x00ff7faf,
1619         0x8c00, 0x000000ff, 0x00000001,
1620         0x214f8, 0x01ff01ff, 0x00000002,
1621         0x21498, 0x007ff800, 0x00200000,
1622         0x2015c, 0xffffffff, 0x00000f40,
1623         0x88c4, 0x001f3ae3, 0x00000082,
1624         0x88d4, 0x0000001f, 0x00000010,
1625         0x30934, 0xffffffff, 0x00000000
1626 };
1627
1628
1629 static void cik_init_golden_registers(struct radeon_device *rdev)
1630 {
1631         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1632         mutex_lock(&rdev->grbm_idx_mutex);
1633         switch (rdev->family) {
1634         case CHIP_BONAIRE:
1635                 radeon_program_register_sequence(rdev,
1636                                                  bonaire_mgcg_cgcg_init,
1637                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1638                 radeon_program_register_sequence(rdev,
1639                                                  bonaire_golden_registers,
1640                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1641                 radeon_program_register_sequence(rdev,
1642                                                  bonaire_golden_common_registers,
1643                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1644                 radeon_program_register_sequence(rdev,
1645                                                  bonaire_golden_spm_registers,
1646                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1647                 break;
1648         case CHIP_KABINI:
1649                 radeon_program_register_sequence(rdev,
1650                                                  kalindi_mgcg_cgcg_init,
1651                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1652                 radeon_program_register_sequence(rdev,
1653                                                  kalindi_golden_registers,
1654                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1655                 radeon_program_register_sequence(rdev,
1656                                                  kalindi_golden_common_registers,
1657                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1658                 radeon_program_register_sequence(rdev,
1659                                                  kalindi_golden_spm_registers,
1660                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1661                 break;
1662         case CHIP_MULLINS:
1663                 radeon_program_register_sequence(rdev,
1664                                                  kalindi_mgcg_cgcg_init,
1665                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1666                 radeon_program_register_sequence(rdev,
1667                                                  godavari_golden_registers,
1668                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1669                 radeon_program_register_sequence(rdev,
1670                                                  kalindi_golden_common_registers,
1671                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1672                 radeon_program_register_sequence(rdev,
1673                                                  kalindi_golden_spm_registers,
1674                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1675                 break;
1676         case CHIP_KAVERI:
1677                 radeon_program_register_sequence(rdev,
1678                                                  spectre_mgcg_cgcg_init,
1679                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1680                 radeon_program_register_sequence(rdev,
1681                                                  spectre_golden_registers,
1682                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1683                 radeon_program_register_sequence(rdev,
1684                                                  spectre_golden_common_registers,
1685                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1686                 radeon_program_register_sequence(rdev,
1687                                                  spectre_golden_spm_registers,
1688                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1689                 break;
1690         case CHIP_HAWAII:
1691                 radeon_program_register_sequence(rdev,
1692                                                  hawaii_mgcg_cgcg_init,
1693                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1694                 radeon_program_register_sequence(rdev,
1695                                                  hawaii_golden_registers,
1696                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1697                 radeon_program_register_sequence(rdev,
1698                                                  hawaii_golden_common_registers,
1699                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1700                 radeon_program_register_sequence(rdev,
1701                                                  hawaii_golden_spm_registers,
1702                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1703                 break;
1704         default:
1705                 break;
1706         }
1707         mutex_unlock(&rdev->grbm_idx_mutex);
1708 }
1709
1710 /**
1711  * cik_get_xclk - get the xclk
1712  *
1713  * @rdev: radeon_device pointer
1714  *
1715  * Returns the reference clock used by the gfx engine
1716  * (CIK).
1717  */
1718 u32 cik_get_xclk(struct radeon_device *rdev)
1719 {
1720         u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722         if (rdev->flags & RADEON_IS_IGP) {
1723                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724                         return reference_clock / 2;
1725         } else {
1726                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727                         return reference_clock / 4;
1728         }
1729         return reference_clock;
1730 }
1731
1732 /**
1733  * cik_mm_rdoorbell - read a doorbell dword
1734  *
1735  * @rdev: radeon_device pointer
1736  * @index: doorbell index
1737  *
1738  * Returns the value in the doorbell aperture at the
1739  * requested doorbell index (CIK).
1740  */
1741 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742 {
1743         if (index < rdev->doorbell.num_doorbells) {
1744                 return readl(rdev->doorbell.ptr + index);
1745         } else {
1746                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747                 return 0;
1748         }
1749 }
1750
1751 /**
1752  * cik_mm_wdoorbell - write a doorbell dword
1753  *
1754  * @rdev: radeon_device pointer
1755  * @index: doorbell index
1756  * @v: value to write
1757  *
1758  * Writes @v to the doorbell aperture at the
1759  * requested doorbell index (CIK).
1760  */
1761 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762 {
1763         if (index < rdev->doorbell.num_doorbells) {
1764                 writel(v, rdev->doorbell.ptr + index);
1765         } else {
1766                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767         }
1768 }
1769
1770 #define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773 {
1774         {0x00000070, 0x04400000},
1775         {0x00000071, 0x80c01803},
1776         {0x00000072, 0x00004004},
1777         {0x00000073, 0x00000100},
1778         {0x00000074, 0x00ff0000},
1779         {0x00000075, 0x34000000},
1780         {0x00000076, 0x08000014},
1781         {0x00000077, 0x00cc08ec},
1782         {0x00000078, 0x00000400},
1783         {0x00000079, 0x00000000},
1784         {0x0000007a, 0x04090000},
1785         {0x0000007c, 0x00000000},
1786         {0x0000007e, 0x4408a8e8},
1787         {0x0000007f, 0x00000304},
1788         {0x00000080, 0x00000000},
1789         {0x00000082, 0x00000001},
1790         {0x00000083, 0x00000002},
1791         {0x00000084, 0xf3e4f400},
1792         {0x00000085, 0x052024e3},
1793         {0x00000087, 0x00000000},
1794         {0x00000088, 0x01000000},
1795         {0x0000008a, 0x1c0a0000},
1796         {0x0000008b, 0xff010000},
1797         {0x0000008d, 0xffffefff},
1798         {0x0000008e, 0xfff3efff},
1799         {0x0000008f, 0xfff3efbf},
1800         {0x00000092, 0xf7ffffff},
1801         {0x00000093, 0xffffff7f},
1802         {0x00000095, 0x00101101},
1803         {0x00000096, 0x00000fff},
1804         {0x00000097, 0x00116fff},
1805         {0x00000098, 0x60010000},
1806         {0x00000099, 0x10010000},
1807         {0x0000009a, 0x00006000},
1808         {0x0000009b, 0x00001000},
1809         {0x0000009f, 0x00b48000}
1810 };
1811
1812 #define HAWAII_IO_MC_REGS_SIZE 22
1813
1814 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815 {
1816         {0x0000007d, 0x40000000},
1817         {0x0000007e, 0x40180304},
1818         {0x0000007f, 0x0000ff00},
1819         {0x00000081, 0x00000000},
1820         {0x00000083, 0x00000800},
1821         {0x00000086, 0x00000000},
1822         {0x00000087, 0x00000100},
1823         {0x00000088, 0x00020100},
1824         {0x00000089, 0x00000000},
1825         {0x0000008b, 0x00040000},
1826         {0x0000008c, 0x00000100},
1827         {0x0000008e, 0xff010000},
1828         {0x00000090, 0xffffefff},
1829         {0x00000091, 0xfff3efff},
1830         {0x00000092, 0xfff3efbf},
1831         {0x00000093, 0xf7ffffff},
1832         {0x00000094, 0xffffff7f},
1833         {0x00000095, 0x00000fff},
1834         {0x00000096, 0x00116fff},
1835         {0x00000097, 0x60010000},
1836         {0x00000098, 0x10010000},
1837         {0x0000009f, 0x00c79000}
1838 };
1839
1840
1841 /**
1842  * cik_srbm_select - select specific register instances
1843  *
1844  * @rdev: radeon_device pointer
1845  * @me: selected ME (micro engine)
1846  * @pipe: pipe
1847  * @queue: queue
1848  * @vmid: VMID
1849  *
1850  * Switches the currently active registers instances.  Some
1851  * registers are instanced per VMID, others are instanced per
1852  * me/pipe/queue combination.
1853  */
1854 static void cik_srbm_select(struct radeon_device *rdev,
1855                             u32 me, u32 pipe, u32 queue, u32 vmid)
1856 {
1857         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858                              MEID(me & 0x3) |
1859                              VMID(vmid & 0xf) |
1860                              QUEUEID(queue & 0x7));
1861         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862 }
1863
1864 /* ucode loading */
1865 /**
1866  * ci_mc_load_microcode - load MC ucode into the hw
1867  *
1868  * @rdev: radeon_device pointer
1869  *
1870  * Load the GDDR MC ucode into the hw (CIK).
1871  * Returns 0 on success, error on failure.
1872  */
1873 int ci_mc_load_microcode(struct radeon_device *rdev)
1874 {
1875         const __be32 *fw_data = NULL;
1876         const __le32 *new_fw_data = NULL;
1877         u32 running, tmp;
1878         u32 *io_mc_regs = NULL;
1879         const __le32 *new_io_mc_regs = NULL;
1880         int i, regs_size, ucode_size;
1881
1882         if (!rdev->mc_fw)
1883                 return -EINVAL;
1884
1885         if (rdev->new_fw) {
1886                 const struct mc_firmware_header_v1_0 *hdr =
1887                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889                 radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892                 new_io_mc_regs = (const __le32 *)
1893                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895                 new_fw_data = (const __le32 *)
1896                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897         } else {
1898                 ucode_size = rdev->mc_fw->size / 4;
1899
1900                 switch (rdev->family) {
1901                 case CHIP_BONAIRE:
1902                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904                         break;
1905                 case CHIP_HAWAII:
1906                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1908                         break;
1909                 default:
1910                         return -EINVAL;
1911                 }
1912                 fw_data = (const __be32 *)rdev->mc_fw->data;
1913         }
1914
1915         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917         if (running == 0) {
1918                 /* reset the engine and set to writable */
1919                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922                 /* load mc io regs */
1923                 for (i = 0; i < regs_size; i++) {
1924                         if (rdev->new_fw) {
1925                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927                         } else {
1928                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930                         }
1931                 }
1932
1933                 tmp = RREG32(MC_SEQ_MISC0);
1934                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939                 }
1940
1941                 /* load the MC ucode */
1942                 for (i = 0; i < ucode_size; i++) {
1943                         if (rdev->new_fw)
1944                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945                         else
1946                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947                 }
1948
1949                 /* put the engine back into the active state */
1950                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954                 /* wait for training to complete */
1955                 for (i = 0; i < rdev->usec_timeout; i++) {
1956                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957                                 break;
1958                         udelay(1);
1959                 }
1960                 for (i = 0; i < rdev->usec_timeout; i++) {
1961                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962                                 break;
1963                         udelay(1);
1964                 }
1965         }
1966
1967         return 0;
1968 }
1969
1970 /**
1971  * cik_init_microcode - load ucode images from disk
1972  *
1973  * @rdev: radeon_device pointer
1974  *
1975  * Use the firmware interface to load the ucode images into
1976  * the driver (not loaded into hw).
1977  * Returns 0 on success, error on failure.
1978  */
1979 static int cik_init_microcode(struct radeon_device *rdev)
1980 {
1981         const char *chip_name;
1982         const char *new_chip_name;
1983         size_t pfp_req_size, me_req_size, ce_req_size,
1984                 mec_req_size, rlc_req_size, mc_req_size = 0,
1985                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986         char fw_name[30];
1987         int new_fw = 0;
1988         int err;
1989         int num_fw;
1990         bool new_smc = false;
1991
1992         DRM_DEBUG("\n");
1993
1994         switch (rdev->family) {
1995         case CHIP_BONAIRE:
1996                 chip_name = "BONAIRE";
1997                 if ((rdev->pdev->revision == 0x80) ||
1998                     (rdev->pdev->revision == 0x81) ||
1999                     (rdev->pdev->device == 0x665f))
2000                         new_smc = true;
2001                 new_chip_name = "bonaire";
2002                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2004                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011                 num_fw = 8;
2012                 break;
2013         case CHIP_HAWAII:
2014                 chip_name = "HAWAII";
2015                 if (rdev->pdev->revision == 0x80)
2016                         new_smc = true;
2017                 new_chip_name = "hawaii";
2018                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2020                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027                 num_fw = 8;
2028                 break;
2029         case CHIP_KAVERI:
2030                 chip_name = "KAVERI";
2031                 new_chip_name = "kaveri";
2032                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2034                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038                 num_fw = 7;
2039                 break;
2040         case CHIP_KABINI:
2041                 chip_name = "KABINI";
2042                 new_chip_name = "kabini";
2043                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2045                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049                 num_fw = 6;
2050                 break;
2051         case CHIP_MULLINS:
2052                 chip_name = "MULLINS";
2053                 new_chip_name = "mullins";
2054                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055                 me_req_size = CIK_ME_UCODE_SIZE * 4;
2056                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060                 num_fw = 6;
2061                 break;
2062         default: BUG();
2063         }
2064
2065         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069         if (err) {
2070                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072                 if (err)
2073                         goto out;
2074                 if (rdev->pfp_fw->size != pfp_req_size) {
2075                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076                                rdev->pfp_fw->size, fw_name);
2077                         err = -EINVAL;
2078                         goto out;
2079                 }
2080         } else {
2081                 err = radeon_ucode_validate(rdev->pfp_fw);
2082                 if (err) {
2083                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084                                fw_name);
2085                         goto out;
2086                 } else {
2087                         new_fw++;
2088                 }
2089         }
2090
2091         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093         if (err) {
2094                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096                 if (err)
2097                         goto out;
2098                 if (rdev->me_fw->size != me_req_size) {
2099                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100                                rdev->me_fw->size, fw_name);
2101                         err = -EINVAL;
2102                 }
2103         } else {
2104                 err = radeon_ucode_validate(rdev->me_fw);
2105                 if (err) {
2106                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107                                fw_name);
2108                         goto out;
2109                 } else {
2110                         new_fw++;
2111                 }
2112         }
2113
2114         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116         if (err) {
2117                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119                 if (err)
2120                         goto out;
2121                 if (rdev->ce_fw->size != ce_req_size) {
2122                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123                                rdev->ce_fw->size, fw_name);
2124                         err = -EINVAL;
2125                 }
2126         } else {
2127                 err = radeon_ucode_validate(rdev->ce_fw);
2128                 if (err) {
2129                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130                                fw_name);
2131                         goto out;
2132                 } else {
2133                         new_fw++;
2134                 }
2135         }
2136
2137         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139         if (err) {
2140                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142                 if (err)
2143                         goto out;
2144                 if (rdev->mec_fw->size != mec_req_size) {
2145                         pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146                                rdev->mec_fw->size, fw_name);
2147                         err = -EINVAL;
2148                 }
2149         } else {
2150                 err = radeon_ucode_validate(rdev->mec_fw);
2151                 if (err) {
2152                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153                                fw_name);
2154                         goto out;
2155                 } else {
2156                         new_fw++;
2157                 }
2158         }
2159
2160         if (rdev->family == CHIP_KAVERI) {
2161                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163                 if (err) {
2164                         goto out;
2165                 } else {
2166                         err = radeon_ucode_validate(rdev->mec2_fw);
2167                         if (err) {
2168                                 goto out;
2169                         } else {
2170                                 new_fw++;
2171                         }
2172                 }
2173         }
2174
2175         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177         if (err) {
2178                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180                 if (err)
2181                         goto out;
2182                 if (rdev->rlc_fw->size != rlc_req_size) {
2183                         pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184                                rdev->rlc_fw->size, fw_name);
2185                         err = -EINVAL;
2186                 }
2187         } else {
2188                 err = radeon_ucode_validate(rdev->rlc_fw);
2189                 if (err) {
2190                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191                                fw_name);
2192                         goto out;
2193                 } else {
2194                         new_fw++;
2195                 }
2196         }
2197
2198         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200         if (err) {
2201                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203                 if (err)
2204                         goto out;
2205                 if (rdev->sdma_fw->size != sdma_req_size) {
2206                         pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207                                rdev->sdma_fw->size, fw_name);
2208                         err = -EINVAL;
2209                 }
2210         } else {
2211                 err = radeon_ucode_validate(rdev->sdma_fw);
2212                 if (err) {
2213                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214                                fw_name);
2215                         goto out;
2216                 } else {
2217                         new_fw++;
2218                 }
2219         }
2220
2221         /* No SMC, MC ucode on APUs */
2222         if (!(rdev->flags & RADEON_IS_IGP)) {
2223                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225                 if (err) {
2226                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228                         if (err) {
2229                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231                                 if (err)
2232                                         goto out;
2233                         }
2234                         if ((rdev->mc_fw->size != mc_req_size) &&
2235                             (rdev->mc_fw->size != mc2_req_size)){
2236                                 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237                                        rdev->mc_fw->size, fw_name);
2238                                 err = -EINVAL;
2239                         }
2240                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241                 } else {
2242                         err = radeon_ucode_validate(rdev->mc_fw);
2243                         if (err) {
2244                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245                                        fw_name);
2246                                 goto out;
2247                         } else {
2248                                 new_fw++;
2249                         }
2250                 }
2251
2252                 if (new_smc)
2253                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254                 else
2255                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257                 if (err) {
2258                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260                         if (err) {
2261                                 pr_err("smc: error loading firmware \"%s\"\n",
2262                                        fw_name);
2263                                 release_firmware(rdev->smc_fw);
2264                                 rdev->smc_fw = NULL;
2265                                 err = 0;
2266                         } else if (rdev->smc_fw->size != smc_req_size) {
2267                                 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268                                        rdev->smc_fw->size, fw_name);
2269                                 err = -EINVAL;
2270                         }
2271                 } else {
2272                         err = radeon_ucode_validate(rdev->smc_fw);
2273                         if (err) {
2274                                 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275                                        fw_name);
2276                                 goto out;
2277                         } else {
2278                                 new_fw++;
2279                         }
2280                 }
2281         }
2282
2283         if (new_fw == 0) {
2284                 rdev->new_fw = false;
2285         } else if (new_fw < num_fw) {
2286                 pr_err("ci_fw: mixing new and old firmware!\n");
2287                 err = -EINVAL;
2288         } else {
2289                 rdev->new_fw = true;
2290         }
2291
2292 out:
2293         if (err) {
2294                 if (err != -EINVAL)
2295                         pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296                                fw_name);
2297                 release_firmware(rdev->pfp_fw);
2298                 rdev->pfp_fw = NULL;
2299                 release_firmware(rdev->me_fw);
2300                 rdev->me_fw = NULL;
2301                 release_firmware(rdev->ce_fw);
2302                 rdev->ce_fw = NULL;
2303                 release_firmware(rdev->mec_fw);
2304                 rdev->mec_fw = NULL;
2305                 release_firmware(rdev->mec2_fw);
2306                 rdev->mec2_fw = NULL;
2307                 release_firmware(rdev->rlc_fw);
2308                 rdev->rlc_fw = NULL;
2309                 release_firmware(rdev->sdma_fw);
2310                 rdev->sdma_fw = NULL;
2311                 release_firmware(rdev->mc_fw);
2312                 rdev->mc_fw = NULL;
2313                 release_firmware(rdev->smc_fw);
2314                 rdev->smc_fw = NULL;
2315         }
2316         return err;
2317 }
2318
2319 /*
2320  * Core functions
2321  */
2322 /**
2323  * cik_tiling_mode_table_init - init the hw tiling table
2324  *
2325  * @rdev: radeon_device pointer
2326  *
2327  * Starting with SI, the tiling setup is done globally in a
2328  * set of 32 tiling modes.  Rather than selecting each set of
2329  * parameters per surface as on older asics, we just select
2330  * which index in the tiling table we want to use, and the
2331  * surface uses those parameters (CIK).
2332  */
2333 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334 {
2335         u32 *tile = rdev->config.cik.tile_mode_array;
2336         u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337         const u32 num_tile_mode_states =
2338                         ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339         const u32 num_secondary_tile_mode_states =
2340                         ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341         u32 reg_offset, split_equal_to_row_size;
2342         u32 num_pipe_configs;
2343         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344                 rdev->config.cik.max_shader_engines;
2345
2346         switch (rdev->config.cik.mem_row_size_in_kb) {
2347         case 1:
2348                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349                 break;
2350         case 2:
2351         default:
2352                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353                 break;
2354         case 4:
2355                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356                 break;
2357         }
2358
2359         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360         if (num_pipe_configs > 8)
2361                 num_pipe_configs = 16;
2362
2363         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364                 tile[reg_offset] = 0;
2365         for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366                 macrotile[reg_offset] = 0;
2367
2368         switch(num_pipe_configs) {
2369         case 16:
2370                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389                            TILE_SPLIT(split_equal_to_row_size));
2390                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                            TILE_SPLIT(split_equal_to_row_size));
2401                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                            PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                             PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446                             PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                            NUM_BANKS(ADDR_SURF_16_BANK));
2453                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                            NUM_BANKS(ADDR_SURF_16_BANK));
2457                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                            NUM_BANKS(ADDR_SURF_16_BANK));
2461                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464                            NUM_BANKS(ADDR_SURF_16_BANK));
2465                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468                            NUM_BANKS(ADDR_SURF_8_BANK));
2469                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472                            NUM_BANKS(ADDR_SURF_4_BANK));
2473                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476                            NUM_BANKS(ADDR_SURF_2_BANK));
2477                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480                            NUM_BANKS(ADDR_SURF_16_BANK));
2481                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484                            NUM_BANKS(ADDR_SURF_16_BANK));
2485                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488                             NUM_BANKS(ADDR_SURF_16_BANK));
2489                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492                             NUM_BANKS(ADDR_SURF_8_BANK));
2493                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496                             NUM_BANKS(ADDR_SURF_4_BANK));
2497                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500                             NUM_BANKS(ADDR_SURF_2_BANK));
2501                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502                             BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503                             MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504                             NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510                 break;
2511
2512         case 8:
2513                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                            TILE_SPLIT(split_equal_to_row_size));
2533                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                            TILE_SPLIT(split_equal_to_row_size));
2544                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                            PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579                 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585                             PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589                             PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595                                 NUM_BANKS(ADDR_SURF_16_BANK));
2596                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599                                 NUM_BANKS(ADDR_SURF_16_BANK));
2600                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603                                 NUM_BANKS(ADDR_SURF_16_BANK));
2604                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607                                 NUM_BANKS(ADDR_SURF_16_BANK));
2608                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611                                 NUM_BANKS(ADDR_SURF_8_BANK));
2612                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615                                 NUM_BANKS(ADDR_SURF_4_BANK));
2616                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619                                 NUM_BANKS(ADDR_SURF_2_BANK));
2620                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623                                 NUM_BANKS(ADDR_SURF_16_BANK));
2624                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627                                 NUM_BANKS(ADDR_SURF_16_BANK));
2628                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631                                 NUM_BANKS(ADDR_SURF_16_BANK));
2632                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639                                 NUM_BANKS(ADDR_SURF_8_BANK));
2640                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643                                 NUM_BANKS(ADDR_SURF_4_BANK));
2644                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647                                 NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653                 break;
2654
2655         case 4:
2656                 if (num_rbs == 4) {
2657                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676                            TILE_SPLIT(split_equal_to_row_size));
2677                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                            TILE_SPLIT(split_equal_to_row_size));
2688                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689                            PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691                            PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733                             PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736                 } else if (num_rbs < 4) {
2737                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756                            TILE_SPLIT(split_equal_to_row_size));
2757                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                            TILE_SPLIT(split_equal_to_row_size));
2768                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769                            PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813                             PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 }
2816
2817                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820                                 NUM_BANKS(ADDR_SURF_16_BANK));
2821                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840                                 NUM_BANKS(ADDR_SURF_8_BANK));
2841                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844                                 NUM_BANKS(ADDR_SURF_4_BANK));
2845                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852                                 NUM_BANKS(ADDR_SURF_16_BANK));
2853                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868                                 NUM_BANKS(ADDR_SURF_8_BANK));
2869                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872                                 NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878                 break;
2879
2880         case 2:
2881                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883                            PIPE_CONFIG(ADDR_SURF_P2) |
2884                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887                            PIPE_CONFIG(ADDR_SURF_P2) |
2888                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891                            PIPE_CONFIG(ADDR_SURF_P2) |
2892                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895                            PIPE_CONFIG(ADDR_SURF_P2) |
2896                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897                 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899                            PIPE_CONFIG(ADDR_SURF_P2) |
2900                            TILE_SPLIT(split_equal_to_row_size));
2901                 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902                            PIPE_CONFIG(ADDR_SURF_P2) |
2903                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                            PIPE_CONFIG(ADDR_SURF_P2) |
2907                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909                            MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                            PIPE_CONFIG(ADDR_SURF_P2) |
2911                            TILE_SPLIT(split_equal_to_row_size));
2912                 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913                            PIPE_CONFIG(ADDR_SURF_P2);
2914                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915                            MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916                            PIPE_CONFIG(ADDR_SURF_P2));
2917                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919                             PIPE_CONFIG(ADDR_SURF_P2) |
2920                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923                             PIPE_CONFIG(ADDR_SURF_P2) |
2924                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926                             MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927                             PIPE_CONFIG(ADDR_SURF_P2) |
2928                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                             PIPE_CONFIG(ADDR_SURF_P2) |
2931                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934                             PIPE_CONFIG(ADDR_SURF_P2) |
2935                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938                             PIPE_CONFIG(ADDR_SURF_P2) |
2939                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                             MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942                             PIPE_CONFIG(ADDR_SURF_P2) |
2943                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946                             PIPE_CONFIG(ADDR_SURF_P2));
2947                 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949                             PIPE_CONFIG(ADDR_SURF_P2) |
2950                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953                             PIPE_CONFIG(ADDR_SURF_P2) |
2954                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                             MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957                             PIPE_CONFIG(ADDR_SURF_P2) |
2958                             SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960                 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964                 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968                 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971                                 NUM_BANKS(ADDR_SURF_16_BANK));
2972                 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975                                 NUM_BANKS(ADDR_SURF_16_BANK));
2976                 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979                                 NUM_BANKS(ADDR_SURF_16_BANK));
2980                 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983                                 NUM_BANKS(ADDR_SURF_16_BANK));
2984                 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987                                 NUM_BANKS(ADDR_SURF_8_BANK));
2988                 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991                                 NUM_BANKS(ADDR_SURF_16_BANK));
2992                 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995                                 NUM_BANKS(ADDR_SURF_16_BANK));
2996                 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000                 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003                                 NUM_BANKS(ADDR_SURF_16_BANK));
3004                 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007                                 NUM_BANKS(ADDR_SURF_16_BANK));
3008                 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011                                 NUM_BANKS(ADDR_SURF_16_BANK));
3012                 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015                                 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021                 break;
3022
3023         default:
3024                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025         }
3026 }
3027
3028 /**
3029  * cik_select_se_sh - select which SE, SH to address
3030  *
3031  * @rdev: radeon_device pointer
3032  * @se_num: shader engine to address
3033  * @sh_num: sh block to address
3034  *
3035  * Select which SE, SH combinations to address. Certain
3036  * registers are instanced per SE or SH.  0xffffffff means
3037  * broadcast to all SEs or SHs (CIK).
3038  */
3039 static void cik_select_se_sh(struct radeon_device *rdev,
3040                              u32 se_num, u32 sh_num)
3041 {
3042         u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046         else if (se_num == 0xffffffff)
3047                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048         else if (sh_num == 0xffffffff)
3049                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050         else
3051                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052         WREG32(GRBM_GFX_INDEX, data);
3053 }
3054
3055 /**
3056  * cik_create_bitmask - create a bitmask
3057  *
3058  * @bit_width: length of the mask
3059  *
3060  * create a variable length bit mask (CIK).
3061  * Returns the bitmask.
3062  */
3063 static u32 cik_create_bitmask(u32 bit_width)
3064 {
3065         u32 i, mask = 0;
3066
3067         for (i = 0; i < bit_width; i++) {
3068                 mask <<= 1;
3069                 mask |= 1;
3070         }
3071         return mask;
3072 }
3073
3074 /**
3075  * cik_get_rb_disabled - computes the mask of disabled RBs
3076  *
3077  * @rdev: radeon_device pointer
3078  * @max_rb_num: max RBs (render backends) for the asic
3079  * @se_num: number of SEs (shader engines) for the asic
3080  * @sh_per_se: number of SH blocks per SE for the asic
3081  *
3082  * Calculates the bitmask of disabled RBs (CIK).
3083  * Returns the disabled RB bitmask.
3084  */
3085 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086                               u32 max_rb_num_per_se,
3087                               u32 sh_per_se)
3088 {
3089         u32 data, mask;
3090
3091         data = RREG32(CC_RB_BACKEND_DISABLE);
3092         if (data & 1)
3093                 data &= BACKEND_DISABLE_MASK;
3094         else
3095                 data = 0;
3096         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098         data >>= BACKEND_DISABLE_SHIFT;
3099
3100         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102         return data & mask;
3103 }
3104
3105 /**
3106  * cik_setup_rb - setup the RBs on the asic
3107  *
3108  * @rdev: radeon_device pointer
3109  * @se_num: number of SEs (shader engines) for the asic
3110  * @sh_per_se: number of SH blocks per SE for the asic
3111  * @max_rb_num: max RBs (render backends) for the asic
3112  *
3113  * Configures per-SE/SH RB registers (CIK).
3114  */
3115 static void cik_setup_rb(struct radeon_device *rdev,
3116                          u32 se_num, u32 sh_per_se,
3117                          u32 max_rb_num_per_se)
3118 {
3119         int i, j;
3120         u32 data, mask;
3121         u32 disabled_rbs = 0;
3122         u32 enabled_rbs = 0;
3123
3124         mutex_lock(&rdev->grbm_idx_mutex);
3125         for (i = 0; i < se_num; i++) {
3126                 for (j = 0; j < sh_per_se; j++) {
3127                         cik_select_se_sh(rdev, i, j);
3128                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3129                         if (rdev->family == CHIP_HAWAII)
3130                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3131                         else
3132                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3133                 }
3134         }
3135         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3136         mutex_unlock(&rdev->grbm_idx_mutex);
3137
3138         mask = 1;
3139         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3140                 if (!(disabled_rbs & mask))
3141                         enabled_rbs |= mask;
3142                 mask <<= 1;
3143         }
3144
3145         rdev->config.cik.backend_enable_mask = enabled_rbs;
3146
3147         mutex_lock(&rdev->grbm_idx_mutex);
3148         for (i = 0; i < se_num; i++) {
3149                 cik_select_se_sh(rdev, i, 0xffffffff);
3150                 data = 0;
3151                 for (j = 0; j < sh_per_se; j++) {
3152                         switch (enabled_rbs & 3) {
3153                         case 0:
3154                                 if (j == 0)
3155                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3156                                 else
3157                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3158                                 break;
3159                         case 1:
3160                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3161                                 break;
3162                         case 2:
3163                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3164                                 break;
3165                         case 3:
3166                         default:
3167                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3168                                 break;
3169                         }
3170                         enabled_rbs >>= 2;
3171                 }
3172                 WREG32(PA_SC_RASTER_CONFIG, data);
3173         }
3174         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3175         mutex_unlock(&rdev->grbm_idx_mutex);
3176 }
3177
3178 /**
3179  * cik_gpu_init - setup the 3D engine
3180  *
3181  * @rdev: radeon_device pointer
3182  *
3183  * Configures the 3D engine and tiling configuration
3184  * registers so that the 3D engine is usable.
3185  */
3186 static void cik_gpu_init(struct radeon_device *rdev)
3187 {
3188         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3189         u32 mc_shared_chmap, mc_arb_ramcfg;
3190         u32 hdp_host_path_cntl;
3191         u32 tmp;
3192         int i, j;
3193
3194         switch (rdev->family) {
3195         case CHIP_BONAIRE:
3196                 rdev->config.cik.max_shader_engines = 2;
3197                 rdev->config.cik.max_tile_pipes = 4;
3198                 rdev->config.cik.max_cu_per_sh = 7;
3199                 rdev->config.cik.max_sh_per_se = 1;
3200                 rdev->config.cik.max_backends_per_se = 2;
3201                 rdev->config.cik.max_texture_channel_caches = 4;
3202                 rdev->config.cik.max_gprs = 256;
3203                 rdev->config.cik.max_gs_threads = 32;
3204                 rdev->config.cik.max_hw_contexts = 8;
3205
3206                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3207                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3208                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3209                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3210                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3211                 break;
3212         case CHIP_HAWAII:
3213                 rdev->config.cik.max_shader_engines = 4;
3214                 rdev->config.cik.max_tile_pipes = 16;
3215                 rdev->config.cik.max_cu_per_sh = 11;
3216                 rdev->config.cik.max_sh_per_se = 1;
3217                 rdev->config.cik.max_backends_per_se = 4;
3218                 rdev->config.cik.max_texture_channel_caches = 16;
3219                 rdev->config.cik.max_gprs = 256;
3220                 rdev->config.cik.max_gs_threads = 32;
3221                 rdev->config.cik.max_hw_contexts = 8;
3222
3223                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3224                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3225                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3226                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3227                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3228                 break;
3229         case CHIP_KAVERI:
3230                 rdev->config.cik.max_shader_engines = 1;
3231                 rdev->config.cik.max_tile_pipes = 4;
3232                 if ((rdev->pdev->device == 0x1304) ||
3233                     (rdev->pdev->device == 0x1305) ||
3234                     (rdev->pdev->device == 0x130C) ||
3235                     (rdev->pdev->device == 0x130F) ||
3236                     (rdev->pdev->device == 0x1310) ||
3237                     (rdev->pdev->device == 0x1311) ||
3238                     (rdev->pdev->device == 0x131C)) {
3239                         rdev->config.cik.max_cu_per_sh = 8;
3240                         rdev->config.cik.max_backends_per_se = 2;
3241                 } else if ((rdev->pdev->device == 0x1309) ||
3242                            (rdev->pdev->device == 0x130A) ||
3243                            (rdev->pdev->device == 0x130D) ||
3244                            (rdev->pdev->device == 0x1313) ||
3245                            (rdev->pdev->device == 0x131D)) {
3246                         rdev->config.cik.max_cu_per_sh = 6;
3247                         rdev->config.cik.max_backends_per_se = 2;
3248                 } else if ((rdev->pdev->device == 0x1306) ||
3249                            (rdev->pdev->device == 0x1307) ||
3250                            (rdev->pdev->device == 0x130B) ||
3251                            (rdev->pdev->device == 0x130E) ||
3252                            (rdev->pdev->device == 0x1315) ||
3253                            (rdev->pdev->device == 0x1318) ||
3254                            (rdev->pdev->device == 0x131B)) {
3255                         rdev->config.cik.max_cu_per_sh = 4;
3256                         rdev->config.cik.max_backends_per_se = 1;
3257                 } else {
3258                         rdev->config.cik.max_cu_per_sh = 3;
3259                         rdev->config.cik.max_backends_per_se = 1;
3260                 }
3261                 rdev->config.cik.max_sh_per_se = 1;
3262                 rdev->config.cik.max_texture_channel_caches = 4;
3263                 rdev->config.cik.max_gprs = 256;
3264                 rdev->config.cik.max_gs_threads = 16;
3265                 rdev->config.cik.max_hw_contexts = 8;
3266
3267                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3268                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3269                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3270                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3271                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3272                 break;
3273         case CHIP_KABINI:
3274         case CHIP_MULLINS:
3275         default:
3276                 rdev->config.cik.max_shader_engines = 1;
3277                 rdev->config.cik.max_tile_pipes = 2;
3278                 rdev->config.cik.max_cu_per_sh = 2;
3279                 rdev->config.cik.max_sh_per_se = 1;
3280                 rdev->config.cik.max_backends_per_se = 1;
3281                 rdev->config.cik.max_texture_channel_caches = 2;
3282                 rdev->config.cik.max_gprs = 256;
3283                 rdev->config.cik.max_gs_threads = 16;
3284                 rdev->config.cik.max_hw_contexts = 8;
3285
3286                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3287                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3288                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3289                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3290                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3291                 break;
3292         }
3293
3294         /* Initialize HDP */
3295         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3296                 WREG32((0x2c14 + j), 0x00000000);
3297                 WREG32((0x2c18 + j), 0x00000000);
3298                 WREG32((0x2c1c + j), 0x00000000);
3299                 WREG32((0x2c20 + j), 0x00000000);
3300                 WREG32((0x2c24 + j), 0x00000000);
3301         }
3302
3303         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3304         WREG32(SRBM_INT_CNTL, 0x1);
3305         WREG32(SRBM_INT_ACK, 0x1);
3306
3307         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3308
3309         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3310         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3311
3312         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3313         rdev->config.cik.mem_max_burst_length_bytes = 256;
3314         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3315         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3316         if (rdev->config.cik.mem_row_size_in_kb > 4)
3317                 rdev->config.cik.mem_row_size_in_kb = 4;
3318         /* XXX use MC settings? */
3319         rdev->config.cik.shader_engine_tile_size = 32;
3320         rdev->config.cik.num_gpus = 1;
3321         rdev->config.cik.multi_gpu_tile_size = 64;
3322
3323         /* fix up row size */
3324         gb_addr_config &= ~ROW_SIZE_MASK;
3325         switch (rdev->config.cik.mem_row_size_in_kb) {
3326         case 1:
3327         default:
3328                 gb_addr_config |= ROW_SIZE(0);
3329                 break;
3330         case 2:
3331                 gb_addr_config |= ROW_SIZE(1);
3332                 break;
3333         case 4:
3334                 gb_addr_config |= ROW_SIZE(2);
3335                 break;
3336         }
3337
3338         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3339          * not have bank info, so create a custom tiling dword.
3340          * bits 3:0   num_pipes
3341          * bits 7:4   num_banks
3342          * bits 11:8  group_size
3343          * bits 15:12 row_size
3344          */
3345         rdev->config.cik.tile_config = 0;
3346         switch (rdev->config.cik.num_tile_pipes) {
3347         case 1:
3348                 rdev->config.cik.tile_config |= (0 << 0);
3349                 break;
3350         case 2:
3351                 rdev->config.cik.tile_config |= (1 << 0);
3352                 break;
3353         case 4:
3354                 rdev->config.cik.tile_config |= (2 << 0);
3355                 break;
3356         case 8:
3357         default:
3358                 /* XXX what about 12? */
3359                 rdev->config.cik.tile_config |= (3 << 0);
3360                 break;
3361         }
3362         rdev->config.cik.tile_config |=
3363                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3364         rdev->config.cik.tile_config |=
3365                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3366         rdev->config.cik.tile_config |=
3367                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3368
3369         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3370         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3371         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3372         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3373         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3374         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3375         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3376         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3377
3378         cik_tiling_mode_table_init(rdev);
3379
3380         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3381                      rdev->config.cik.max_sh_per_se,
3382                      rdev->config.cik.max_backends_per_se);
3383
3384         rdev->config.cik.active_cus = 0;
3385         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3386                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3387                         rdev->config.cik.active_cus +=
3388                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3389                 }
3390         }
3391
3392         /* set HW defaults for 3D engine */
3393         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3394
3395         mutex_lock(&rdev->grbm_idx_mutex);
3396         /*
3397          * making sure that the following register writes will be broadcasted
3398          * to all the shaders
3399          */
3400         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3401         WREG32(SX_DEBUG_1, 0x20);
3402
3403         WREG32(TA_CNTL_AUX, 0x00010000);
3404
3405         tmp = RREG32(SPI_CONFIG_CNTL);
3406         tmp |= 0x03000000;
3407         WREG32(SPI_CONFIG_CNTL, tmp);
3408
3409         WREG32(SQ_CONFIG, 1);
3410
3411         WREG32(DB_DEBUG, 0);
3412
3413         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3414         tmp |= 0x00000400;
3415         WREG32(DB_DEBUG2, tmp);
3416
3417         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3418         tmp |= 0x00020200;
3419         WREG32(DB_DEBUG3, tmp);
3420
3421         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3422         tmp |= 0x00018208;
3423         WREG32(CB_HW_CONTROL, tmp);
3424
3425         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3426
3427         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3428                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3429                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3430                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3431
3432         WREG32(VGT_NUM_INSTANCES, 1);
3433
3434         WREG32(CP_PERFMON_CNTL, 0);
3435
3436         WREG32(SQ_CONFIG, 0);
3437
3438         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3439                                           FORCE_EOV_MAX_REZ_CNT(255)));
3440
3441         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3442                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3443
3444         WREG32(VGT_GS_VERTEX_REUSE, 16);
3445         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3446
3447         tmp = RREG32(HDP_MISC_CNTL);
3448         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3449         WREG32(HDP_MISC_CNTL, tmp);
3450
3451         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3452         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3453
3454         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3455         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3456         mutex_unlock(&rdev->grbm_idx_mutex);
3457
3458         udelay(50);
3459 }
3460
3461 /*
3462  * GPU scratch registers helpers function.
3463  */
3464 /**
3465  * cik_scratch_init - setup driver info for CP scratch regs
3466  *
3467  * @rdev: radeon_device pointer
3468  *
3469  * Set up the number and offset of the CP scratch registers.
3470  * NOTE: use of CP scratch registers is a legacy inferface and
3471  * is not used by default on newer asics (r6xx+).  On newer asics,
3472  * memory buffers are used for fences rather than scratch regs.
3473  */
3474 static void cik_scratch_init(struct radeon_device *rdev)
3475 {
3476         int i;
3477
3478         rdev->scratch.num_reg = 7;
3479         rdev->scratch.reg_base = SCRATCH_REG0;
3480         for (i = 0; i < rdev->scratch.num_reg; i++) {
3481                 rdev->scratch.free[i] = true;
3482                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3483         }
3484 }
3485
3486 /**
3487  * cik_ring_test - basic gfx ring test
3488  *
3489  * @rdev: radeon_device pointer
3490  * @ring: radeon_ring structure holding ring information
3491  *
3492  * Allocate a scratch register and write to it using the gfx ring (CIK).
3493  * Provides a basic gfx ring test to verify that the ring is working.
3494  * Used by cik_cp_gfx_resume();
3495  * Returns 0 on success, error on failure.
3496  */
3497 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3498 {
3499         uint32_t scratch;
3500         uint32_t tmp = 0;
3501         unsigned i;
3502         int r;
3503
3504         r = radeon_scratch_get(rdev, &scratch);
3505         if (r) {
3506                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3507                 return r;
3508         }
3509         WREG32(scratch, 0xCAFEDEAD);
3510         r = radeon_ring_lock(rdev, ring, 3);
3511         if (r) {
3512                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3513                 radeon_scratch_free(rdev, scratch);
3514                 return r;
3515         }
3516         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3517         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3518         radeon_ring_write(ring, 0xDEADBEEF);
3519         radeon_ring_unlock_commit(rdev, ring, false);
3520
3521         for (i = 0; i < rdev->usec_timeout; i++) {
3522                 tmp = RREG32(scratch);
3523                 if (tmp == 0xDEADBEEF)
3524                         break;
3525                 DRM_UDELAY(1);
3526         }
3527         if (i < rdev->usec_timeout) {
3528                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3529         } else {
3530                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3531                           ring->idx, scratch, tmp);
3532                 r = -EINVAL;
3533         }
3534         radeon_scratch_free(rdev, scratch);
3535         return r;
3536 }
3537
3538 /**
3539  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3540  *
3541  * @rdev: radeon_device pointer
3542  * @ridx: radeon ring index
3543  *
3544  * Emits an hdp flush on the cp.
3545  */
3546 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3547                                        int ridx)
3548 {
3549         struct radeon_ring *ring = &rdev->ring[ridx];
3550         u32 ref_and_mask;
3551
3552         switch (ring->idx) {
3553         case CAYMAN_RING_TYPE_CP1_INDEX:
3554         case CAYMAN_RING_TYPE_CP2_INDEX:
3555         default:
3556                 switch (ring->me) {
3557                 case 0:
3558                         ref_and_mask = CP2 << ring->pipe;
3559                         break;
3560                 case 1:
3561                         ref_and_mask = CP6 << ring->pipe;
3562                         break;
3563                 default:
3564                         return;
3565                 }
3566                 break;
3567         case RADEON_RING_TYPE_GFX_INDEX:
3568                 ref_and_mask = CP0;
3569                 break;
3570         }
3571
3572         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3573         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3574                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3575                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3576         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3577         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3578         radeon_ring_write(ring, ref_and_mask);
3579         radeon_ring_write(ring, ref_and_mask);
3580         radeon_ring_write(ring, 0x20); /* poll interval */
3581 }
3582
3583 /**
3584  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3585  *
3586  * @rdev: radeon_device pointer
3587  * @fence: radeon fence object
3588  *
3589  * Emits a fence sequnce number on the gfx ring and flushes
3590  * GPU caches.
3591  */
3592 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3593                              struct radeon_fence *fence)
3594 {
3595         struct radeon_ring *ring = &rdev->ring[fence->ring];
3596         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3597
3598         /* Workaround for cache flush problems. First send a dummy EOP
3599          * event down the pipe with seq one below.
3600          */
3601         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603                                  EOP_TC_ACTION_EN |
3604                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605                                  EVENT_INDEX(5)));
3606         radeon_ring_write(ring, addr & 0xfffffffc);
3607         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3608                                 DATA_SEL(1) | INT_SEL(0));
3609         radeon_ring_write(ring, fence->seq - 1);
3610         radeon_ring_write(ring, 0);
3611
3612         /* Then send the real EOP event down the pipe. */
3613         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3614         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3615                                  EOP_TC_ACTION_EN |
3616                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3617                                  EVENT_INDEX(5)));
3618         radeon_ring_write(ring, addr & 0xfffffffc);
3619         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3620         radeon_ring_write(ring, fence->seq);
3621         radeon_ring_write(ring, 0);
3622 }
3623
3624 /**
3625  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3626  *
3627  * @rdev: radeon_device pointer
3628  * @fence: radeon fence object
3629  *
3630  * Emits a fence sequnce number on the compute ring and flushes
3631  * GPU caches.
3632  */
3633 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3634                                  struct radeon_fence *fence)
3635 {
3636         struct radeon_ring *ring = &rdev->ring[fence->ring];
3637         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3638
3639         /* RELEASE_MEM - flush caches, send int */
3640         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3641         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3642                                  EOP_TC_ACTION_EN |
3643                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3644                                  EVENT_INDEX(5)));
3645         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3646         radeon_ring_write(ring, addr & 0xfffffffc);
3647         radeon_ring_write(ring, upper_32_bits(addr));
3648         radeon_ring_write(ring, fence->seq);
3649         radeon_ring_write(ring, 0);
3650 }
3651
3652 /**
3653  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3654  *
3655  * @rdev: radeon_device pointer
3656  * @ring: radeon ring buffer object
3657  * @semaphore: radeon semaphore object
3658  * @emit_wait: Is this a sempahore wait?
3659  *
3660  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3661  * from running ahead of semaphore waits.
3662  */
3663 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3664                              struct radeon_ring *ring,
3665                              struct radeon_semaphore *semaphore,
3666                              bool emit_wait)
3667 {
3668         uint64_t addr = semaphore->gpu_addr;
3669         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3670
3671         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3672         radeon_ring_write(ring, lower_32_bits(addr));
3673         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3674
3675         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3676                 /* Prevent the PFP from running ahead of the semaphore wait */
3677                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3678                 radeon_ring_write(ring, 0x0);
3679         }
3680
3681         return true;
3682 }
3683
3684 /**
3685  * cik_copy_cpdma - copy pages using the CP DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @src_offset: src GPU address
3689  * @dst_offset: dst GPU address
3690  * @num_gpu_pages: number of GPU pages to xfer
3691  * @resv: reservation object to sync to
3692  *
3693  * Copy GPU paging using the CP DMA engine (CIK+).
3694  * Used by the radeon ttm implementation to move pages if
3695  * registered as the asic copy callback.
3696  */
3697 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3698                                     uint64_t src_offset, uint64_t dst_offset,
3699                                     unsigned num_gpu_pages,
3700                                     struct reservation_object *resv)
3701 {
3702         struct radeon_fence *fence;
3703         struct radeon_sync sync;
3704         int ring_index = rdev->asic->copy.blit_ring_index;
3705         struct radeon_ring *ring = &rdev->ring[ring_index];
3706         u32 size_in_bytes, cur_size_in_bytes, control;
3707         int i, num_loops;
3708         int r = 0;
3709
3710         radeon_sync_create(&sync);
3711
3712         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3713         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3714         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3715         if (r) {
3716                 DRM_ERROR("radeon: moving bo (%d).\n", r);
3717                 radeon_sync_free(rdev, &sync, NULL);
3718                 return ERR_PTR(r);
3719         }
3720
3721         radeon_sync_resv(rdev, &sync, resv, false);
3722         radeon_sync_rings(rdev, &sync, ring->idx);
3723
3724         for (i = 0; i < num_loops; i++) {
3725                 cur_size_in_bytes = size_in_bytes;
3726                 if (cur_size_in_bytes > 0x1fffff)
3727                         cur_size_in_bytes = 0x1fffff;
3728                 size_in_bytes -= cur_size_in_bytes;
3729                 control = 0;
3730                 if (size_in_bytes == 0)
3731                         control |= PACKET3_DMA_DATA_CP_SYNC;
3732                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3733                 radeon_ring_write(ring, control);
3734                 radeon_ring_write(ring, lower_32_bits(src_offset));
3735                 radeon_ring_write(ring, upper_32_bits(src_offset));
3736                 radeon_ring_write(ring, lower_32_bits(dst_offset));
3737                 radeon_ring_write(ring, upper_32_bits(dst_offset));
3738                 radeon_ring_write(ring, cur_size_in_bytes);
3739                 src_offset += cur_size_in_bytes;
3740                 dst_offset += cur_size_in_bytes;
3741         }
3742
3743         r = radeon_fence_emit(rdev, &fence, ring->idx);
3744         if (r) {
3745                 radeon_ring_unlock_undo(rdev, ring);
3746                 radeon_sync_free(rdev, &sync, NULL);
3747                 return ERR_PTR(r);
3748         }
3749
3750         radeon_ring_unlock_commit(rdev, ring, false);
3751         radeon_sync_free(rdev, &sync, fence);
3752
3753         return fence;
3754 }
3755
3756 /*
3757  * IB stuff
3758  */
3759 /**
3760  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3761  *
3762  * @rdev: radeon_device pointer
3763  * @ib: radeon indirect buffer object
3764  *
3765  * Emits a DE (drawing engine) or CE (constant engine) IB
3766  * on the gfx ring.  IBs are usually generated by userspace
3767  * acceleration drivers and submitted to the kernel for
3768  * scheduling on the ring.  This function schedules the IB
3769  * on the gfx ring for execution by the GPU.
3770  */
3771 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3772 {
3773         struct radeon_ring *ring = &rdev->ring[ib->ring];
3774         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3775         u32 header, control = INDIRECT_BUFFER_VALID;
3776
3777         if (ib->is_const_ib) {
3778                 /* set switch buffer packet before const IB */
3779                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3780                 radeon_ring_write(ring, 0);
3781
3782                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3783         } else {
3784                 u32 next_rptr;
3785                 if (ring->rptr_save_reg) {
3786                         next_rptr = ring->wptr + 3 + 4;
3787                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3788                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3789                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
3790                         radeon_ring_write(ring, next_rptr);
3791                 } else if (rdev->wb.enabled) {
3792                         next_rptr = ring->wptr + 5 + 4;
3793                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3794                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3795                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3796                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3797                         radeon_ring_write(ring, next_rptr);
3798                 }
3799
3800                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3801         }
3802
3803         control |= ib->length_dw | (vm_id << 24);
3804
3805         radeon_ring_write(ring, header);
3806         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3807         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3808         radeon_ring_write(ring, control);
3809 }
3810
3811 /**
3812  * cik_ib_test - basic gfx ring IB test
3813  *
3814  * @rdev: radeon_device pointer
3815  * @ring: radeon_ring structure holding ring information
3816  *
3817  * Allocate an IB and execute it on the gfx ring (CIK).
3818  * Provides a basic gfx ring test to verify that IBs are working.
3819  * Returns 0 on success, error on failure.
3820  */
3821 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3822 {
3823         struct radeon_ib ib;
3824         uint32_t scratch;
3825         uint32_t tmp = 0;
3826         unsigned i;
3827         int r;
3828
3829         r = radeon_scratch_get(rdev, &scratch);
3830         if (r) {
3831                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3832                 return r;
3833         }
3834         WREG32(scratch, 0xCAFEDEAD);
3835         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3836         if (r) {
3837                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3838                 radeon_scratch_free(rdev, scratch);
3839                 return r;
3840         }
3841         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3842         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3843         ib.ptr[2] = 0xDEADBEEF;
3844         ib.length_dw = 3;
3845         r = radeon_ib_schedule(rdev, &ib, NULL, false);
3846         if (r) {
3847                 radeon_scratch_free(rdev, scratch);
3848                 radeon_ib_free(rdev, &ib);
3849                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3850                 return r;
3851         }
3852         r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3853                 RADEON_USEC_IB_TEST_TIMEOUT));
3854         if (r < 0) {
3855                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3856                 radeon_scratch_free(rdev, scratch);
3857                 radeon_ib_free(rdev, &ib);
3858                 return r;
3859         } else if (r == 0) {
3860                 DRM_ERROR("radeon: fence wait timed out.\n");
3861                 radeon_scratch_free(rdev, scratch);
3862                 radeon_ib_free(rdev, &ib);
3863                 return -ETIMEDOUT;
3864         }
3865         r = 0;
3866         for (i = 0; i < rdev->usec_timeout; i++) {
3867                 tmp = RREG32(scratch);
3868                 if (tmp == 0xDEADBEEF)
3869                         break;
3870                 DRM_UDELAY(1);
3871         }
3872         if (i < rdev->usec_timeout) {
3873                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3874         } else {
3875                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3876                           scratch, tmp);
3877                 r = -EINVAL;
3878         }
3879         radeon_scratch_free(rdev, scratch);
3880         radeon_ib_free(rdev, &ib);
3881         return r;
3882 }
3883
3884 /*
3885  * CP.
3886  * On CIK, gfx and compute now have independant command processors.
3887  *
3888  * GFX
3889  * Gfx consists of a single ring and can process both gfx jobs and
3890  * compute jobs.  The gfx CP consists of three microengines (ME):
3891  * PFP - Pre-Fetch Parser
3892  * ME - Micro Engine
3893  * CE - Constant Engine
3894  * The PFP and ME make up what is considered the Drawing Engine (DE).
3895  * The CE is an asynchronous engine used for updating buffer desciptors
3896  * used by the DE so that they can be loaded into cache in parallel
3897  * while the DE is processing state update packets.
3898  *
3899  * Compute
3900  * The compute CP consists of two microengines (ME):
3901  * MEC1 - Compute MicroEngine 1
3902  * MEC2 - Compute MicroEngine 2
3903  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3904  * The queues are exposed to userspace and are programmed directly
3905  * by the compute runtime.
3906  */
3907 /**
3908  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3909  *
3910  * @rdev: radeon_device pointer
3911  * @enable: enable or disable the MEs
3912  *
3913  * Halts or unhalts the gfx MEs.
3914  */
3915 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3916 {
3917         if (enable)
3918                 WREG32(CP_ME_CNTL, 0);
3919         else {
3920                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3921                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3922                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3923                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3924         }
3925         udelay(50);
3926 }
3927
3928 /**
3929  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3930  *
3931  * @rdev: radeon_device pointer
3932  *
3933  * Loads the gfx PFP, ME, and CE ucode.
3934  * Returns 0 for success, -EINVAL if the ucode is not available.
3935  */
3936 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3937 {
3938         int i;
3939
3940         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3941                 return -EINVAL;
3942
3943         cik_cp_gfx_enable(rdev, false);
3944
3945         if (rdev->new_fw) {
3946                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3947                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3948                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3949                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3950                 const struct gfx_firmware_header_v1_0 *me_hdr =
3951                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3952                 const __le32 *fw_data;
3953                 u32 fw_size;
3954
3955                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3956                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3957                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3958
3959                 /* PFP */
3960                 fw_data = (const __le32 *)
3961                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3962                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3963                 WREG32(CP_PFP_UCODE_ADDR, 0);
3964                 for (i = 0; i < fw_size; i++)
3965                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3966                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3967
3968                 /* CE */
3969                 fw_data = (const __le32 *)
3970                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3971                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3972                 WREG32(CP_CE_UCODE_ADDR, 0);
3973                 for (i = 0; i < fw_size; i++)
3974                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3975                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3976
3977                 /* ME */
3978                 fw_data = (const __be32 *)
3979                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3980                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3981                 WREG32(CP_ME_RAM_WADDR, 0);
3982                 for (i = 0; i < fw_size; i++)
3983                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3984                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3985                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3986         } else {
3987                 const __be32 *fw_data;
3988
3989                 /* PFP */
3990                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3991                 WREG32(CP_PFP_UCODE_ADDR, 0);
3992                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3993                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3994                 WREG32(CP_PFP_UCODE_ADDR, 0);
3995
3996                 /* CE */
3997                 fw_data = (const __be32 *)rdev->ce_fw->data;
3998                 WREG32(CP_CE_UCODE_ADDR, 0);
3999                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4000                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4001                 WREG32(CP_CE_UCODE_ADDR, 0);
4002
4003                 /* ME */
4004                 fw_data = (const __be32 *)rdev->me_fw->data;
4005                 WREG32(CP_ME_RAM_WADDR, 0);
4006                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4007                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4008                 WREG32(CP_ME_RAM_WADDR, 0);
4009         }
4010
4011         return 0;
4012 }
4013
4014 /**
4015  * cik_cp_gfx_start - start the gfx ring
4016  *
4017  * @rdev: radeon_device pointer
4018  *
4019  * Enables the ring and loads the clear state context and other
4020  * packets required to init the ring.
4021  * Returns 0 for success, error for failure.
4022  */
4023 static int cik_cp_gfx_start(struct radeon_device *rdev)
4024 {
4025         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4026         int r, i;
4027
4028         /* init the CP */
4029         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4030         WREG32(CP_ENDIAN_SWAP, 0);
4031         WREG32(CP_DEVICE_ID, 1);
4032
4033         cik_cp_gfx_enable(rdev, true);
4034
4035         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4036         if (r) {
4037                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4038                 return r;
4039         }
4040
4041         /* init the CE partitions.  CE only used for gfx on CIK */
4042         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4043         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4044         radeon_ring_write(ring, 0x8000);
4045         radeon_ring_write(ring, 0x8000);
4046
4047         /* setup clear context state */
4048         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4049         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4050
4051         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4052         radeon_ring_write(ring, 0x80000000);
4053         radeon_ring_write(ring, 0x80000000);
4054
4055         for (i = 0; i < cik_default_size; i++)
4056                 radeon_ring_write(ring, cik_default_state[i]);
4057
4058         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4059         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4060
4061         /* set clear context state */
4062         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4063         radeon_ring_write(ring, 0);
4064
4065         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4066         radeon_ring_write(ring, 0x00000316);
4067         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4068         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4069
4070         radeon_ring_unlock_commit(rdev, ring, false);
4071
4072         return 0;
4073 }
4074
4075 /**
4076  * cik_cp_gfx_fini - stop the gfx ring
4077  *
4078  * @rdev: radeon_device pointer
4079  *
4080  * Stop the gfx ring and tear down the driver ring
4081  * info.
4082  */
4083 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4084 {
4085         cik_cp_gfx_enable(rdev, false);
4086         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4087 }
4088
4089 /**
4090  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4091  *
4092  * @rdev: radeon_device pointer
4093  *
4094  * Program the location and size of the gfx ring buffer
4095  * and test it to make sure it's working.
4096  * Returns 0 for success, error for failure.
4097  */
4098 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4099 {
4100         struct radeon_ring *ring;
4101         u32 tmp;
4102         u32 rb_bufsz;
4103         u64 rb_addr;
4104         int r;
4105
4106         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4107         if (rdev->family != CHIP_HAWAII)
4108                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4109
4110         /* Set the write pointer delay */
4111         WREG32(CP_RB_WPTR_DELAY, 0);
4112
4113         /* set the RB to use vmid 0 */
4114         WREG32(CP_RB_VMID, 0);
4115
4116         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4117
4118         /* ring 0 - compute and gfx */
4119         /* Set ring buffer size */
4120         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4121         rb_bufsz = order_base_2(ring->ring_size / 8);
4122         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4123 #ifdef __BIG_ENDIAN
4124         tmp |= BUF_SWAP_32BIT;
4125 #endif
4126         WREG32(CP_RB0_CNTL, tmp);
4127
4128         /* Initialize the ring buffer's read and write pointers */
4129         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4130         ring->wptr = 0;
4131         WREG32(CP_RB0_WPTR, ring->wptr);
4132
4133         /* set the wb address wether it's enabled or not */
4134         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4135         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4136
4137         /* scratch register shadowing is no longer supported */
4138         WREG32(SCRATCH_UMSK, 0);
4139
4140         if (!rdev->wb.enabled)
4141                 tmp |= RB_NO_UPDATE;
4142
4143         mdelay(1);
4144         WREG32(CP_RB0_CNTL, tmp);
4145
4146         rb_addr = ring->gpu_addr >> 8;
4147         WREG32(CP_RB0_BASE, rb_addr);
4148         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4149
4150         /* start the ring */
4151         cik_cp_gfx_start(rdev);
4152         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4153         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4154         if (r) {
4155                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4156                 return r;
4157         }
4158
4159         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4160                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4161
4162         return 0;
4163 }
4164
4165 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4166                      struct radeon_ring *ring)
4167 {
4168         u32 rptr;
4169
4170         if (rdev->wb.enabled)
4171                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4172         else
4173                 rptr = RREG32(CP_RB0_RPTR);
4174
4175         return rptr;
4176 }
4177
4178 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4179                      struct radeon_ring *ring)
4180 {
4181         return RREG32(CP_RB0_WPTR);
4182 }
4183
4184 void cik_gfx_set_wptr(struct radeon_device *rdev,
4185                       struct radeon_ring *ring)
4186 {
4187         WREG32(CP_RB0_WPTR, ring->wptr);
4188         (void)RREG32(CP_RB0_WPTR);
4189 }
4190
4191 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4192                          struct radeon_ring *ring)
4193 {
4194         u32 rptr;
4195
4196         if (rdev->wb.enabled) {
4197                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4198         } else {
4199                 mutex_lock(&rdev->srbm_mutex);
4200                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4201                 rptr = RREG32(CP_HQD_PQ_RPTR);
4202                 cik_srbm_select(rdev, 0, 0, 0, 0);
4203                 mutex_unlock(&rdev->srbm_mutex);
4204         }
4205
4206         return rptr;
4207 }
4208
4209 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4210                          struct radeon_ring *ring)
4211 {
4212         u32 wptr;
4213
4214         if (rdev->wb.enabled) {
4215                 /* XXX check if swapping is necessary on BE */
4216                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4217         } else {
4218                 mutex_lock(&rdev->srbm_mutex);
4219                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4220                 wptr = RREG32(CP_HQD_PQ_WPTR);
4221                 cik_srbm_select(rdev, 0, 0, 0, 0);
4222                 mutex_unlock(&rdev->srbm_mutex);
4223         }
4224
4225         return wptr;
4226 }
4227
4228 void cik_compute_set_wptr(struct radeon_device *rdev,
4229                           struct radeon_ring *ring)
4230 {
4231         /* XXX check if swapping is necessary on BE */
4232         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4233         WDOORBELL32(ring->doorbell_index, ring->wptr);
4234 }
4235
4236 static void cik_compute_stop(struct radeon_device *rdev,
4237                              struct radeon_ring *ring)
4238 {
4239         u32 j, tmp;
4240
4241         cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4242         /* Disable wptr polling. */
4243         tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4244         tmp &= ~WPTR_POLL_EN;
4245         WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4246         /* Disable HQD. */
4247         if (RREG32(CP_HQD_ACTIVE) & 1) {
4248                 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4249                 for (j = 0; j < rdev->usec_timeout; j++) {
4250                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4251                                 break;
4252                         udelay(1);
4253                 }
4254                 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4255                 WREG32(CP_HQD_PQ_RPTR, 0);
4256                 WREG32(CP_HQD_PQ_WPTR, 0);
4257         }
4258         cik_srbm_select(rdev, 0, 0, 0, 0);
4259 }
4260
4261 /**
4262  * cik_cp_compute_enable - enable/disable the compute CP MEs
4263  *
4264  * @rdev: radeon_device pointer
4265  * @enable: enable or disable the MEs
4266  *
4267  * Halts or unhalts the compute MEs.
4268  */
4269 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4270 {
4271         if (enable)
4272                 WREG32(CP_MEC_CNTL, 0);
4273         else {
4274                 /*
4275                  * To make hibernation reliable we need to clear compute ring
4276                  * configuration before halting the compute ring.
4277                  */
4278                 mutex_lock(&rdev->srbm_mutex);
4279                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4280                 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4281                 mutex_unlock(&rdev->srbm_mutex);
4282
4283                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4284                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4285                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4286         }
4287         udelay(50);
4288 }
4289
4290 /**
4291  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Loads the compute MEC1&2 ucode.
4296  * Returns 0 for success, -EINVAL if the ucode is not available.
4297  */
4298 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4299 {
4300         int i;
4301
4302         if (!rdev->mec_fw)
4303                 return -EINVAL;
4304
4305         cik_cp_compute_enable(rdev, false);
4306
4307         if (rdev->new_fw) {
4308                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4309                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4310                 const __le32 *fw_data;
4311                 u32 fw_size;
4312
4313                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4314
4315                 /* MEC1 */
4316                 fw_data = (const __le32 *)
4317                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4318                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4319                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4320                 for (i = 0; i < fw_size; i++)
4321                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4322                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4323
4324                 /* MEC2 */
4325                 if (rdev->family == CHIP_KAVERI) {
4326                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4327                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4328
4329                         fw_data = (const __le32 *)
4330                                 (rdev->mec2_fw->data +
4331                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4332                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4333                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4334                         for (i = 0; i < fw_size; i++)
4335                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4336                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4337                 }
4338         } else {
4339                 const __be32 *fw_data;
4340
4341                 /* MEC1 */
4342                 fw_data = (const __be32 *)rdev->mec_fw->data;
4343                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4345                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4346                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4347
4348                 if (rdev->family == CHIP_KAVERI) {
4349                         /* MEC2 */
4350                         fw_data = (const __be32 *)rdev->mec_fw->data;
4351                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4352                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4353                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4354                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4355                 }
4356         }
4357
4358         return 0;
4359 }
4360
4361 /**
4362  * cik_cp_compute_start - start the compute queues
4363  *
4364  * @rdev: radeon_device pointer
4365  *
4366  * Enable the compute queues.
4367  * Returns 0 for success, error for failure.
4368  */
4369 static int cik_cp_compute_start(struct radeon_device *rdev)
4370 {
4371         cik_cp_compute_enable(rdev, true);
4372
4373         return 0;
4374 }
4375
4376 /**
4377  * cik_cp_compute_fini - stop the compute queues
4378  *
4379  * @rdev: radeon_device pointer
4380  *
4381  * Stop the compute queues and tear down the driver queue
4382  * info.
4383  */
4384 static void cik_cp_compute_fini(struct radeon_device *rdev)
4385 {
4386         int i, idx, r;
4387
4388         cik_cp_compute_enable(rdev, false);
4389
4390         for (i = 0; i < 2; i++) {
4391                 if (i == 0)
4392                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4393                 else
4394                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4395
4396                 if (rdev->ring[idx].mqd_obj) {
4397                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4398                         if (unlikely(r != 0))
4399                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4400
4401                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4402                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4403
4404                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4405                         rdev->ring[idx].mqd_obj = NULL;
4406                 }
4407         }
4408 }
4409
4410 static void cik_mec_fini(struct radeon_device *rdev)
4411 {
4412         int r;
4413
4414         if (rdev->mec.hpd_eop_obj) {
4415                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4416                 if (unlikely(r != 0))
4417                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4418                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4419                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4420
4421                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4422                 rdev->mec.hpd_eop_obj = NULL;
4423         }
4424 }
4425
4426 #define MEC_HPD_SIZE 2048
4427
4428 static int cik_mec_init(struct radeon_device *rdev)
4429 {
4430         int r;
4431         u32 *hpd;
4432
4433         /*
4434          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4435          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4436          * Nonetheless, we assign only 1 pipe because all other pipes will
4437          * be handled by KFD
4438          */
4439         rdev->mec.num_mec = 1;
4440         rdev->mec.num_pipe = 1;
4441         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4442
4443         if (rdev->mec.hpd_eop_obj == NULL) {
4444                 r = radeon_bo_create(rdev,
4445                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4446                                      PAGE_SIZE, true,
4447                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4448                                      &rdev->mec.hpd_eop_obj);
4449                 if (r) {
4450                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4451                         return r;
4452                 }
4453         }
4454
4455         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4456         if (unlikely(r != 0)) {
4457                 cik_mec_fini(rdev);
4458                 return r;
4459         }
4460         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4461                           &rdev->mec.hpd_eop_gpu_addr);
4462         if (r) {
4463                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4464                 cik_mec_fini(rdev);
4465                 return r;
4466         }
4467         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4468         if (r) {
4469                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4470                 cik_mec_fini(rdev);
4471                 return r;
4472         }
4473
4474         /* clear memory.  Not sure if this is required or not */
4475         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4476
4477         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4478         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4479
4480         return 0;
4481 }
4482
4483 struct hqd_registers
4484 {
4485         u32 cp_mqd_base_addr;
4486         u32 cp_mqd_base_addr_hi;
4487         u32 cp_hqd_active;
4488         u32 cp_hqd_vmid;
4489         u32 cp_hqd_persistent_state;
4490         u32 cp_hqd_pipe_priority;
4491         u32 cp_hqd_queue_priority;
4492         u32 cp_hqd_quantum;
4493         u32 cp_hqd_pq_base;
4494         u32 cp_hqd_pq_base_hi;
4495         u32 cp_hqd_pq_rptr;
4496         u32 cp_hqd_pq_rptr_report_addr;
4497         u32 cp_hqd_pq_rptr_report_addr_hi;
4498         u32 cp_hqd_pq_wptr_poll_addr;
4499         u32 cp_hqd_pq_wptr_poll_addr_hi;
4500         u32 cp_hqd_pq_doorbell_control;
4501         u32 cp_hqd_pq_wptr;
4502         u32 cp_hqd_pq_control;
4503         u32 cp_hqd_ib_base_addr;
4504         u32 cp_hqd_ib_base_addr_hi;
4505         u32 cp_hqd_ib_rptr;
4506         u32 cp_hqd_ib_control;
4507         u32 cp_hqd_iq_timer;
4508         u32 cp_hqd_iq_rptr;
4509         u32 cp_hqd_dequeue_request;
4510         u32 cp_hqd_dma_offload;
4511         u32 cp_hqd_sema_cmd;
4512         u32 cp_hqd_msg_type;
4513         u32 cp_hqd_atomic0_preop_lo;
4514         u32 cp_hqd_atomic0_preop_hi;
4515         u32 cp_hqd_atomic1_preop_lo;
4516         u32 cp_hqd_atomic1_preop_hi;
4517         u32 cp_hqd_hq_scheduler0;
4518         u32 cp_hqd_hq_scheduler1;
4519         u32 cp_mqd_control;
4520 };
4521
4522 struct bonaire_mqd
4523 {
4524         u32 header;
4525         u32 dispatch_initiator;
4526         u32 dimensions[3];
4527         u32 start_idx[3];
4528         u32 num_threads[3];
4529         u32 pipeline_stat_enable;
4530         u32 perf_counter_enable;
4531         u32 pgm[2];
4532         u32 tba[2];
4533         u32 tma[2];
4534         u32 pgm_rsrc[2];
4535         u32 vmid;
4536         u32 resource_limits;
4537         u32 static_thread_mgmt01[2];
4538         u32 tmp_ring_size;
4539         u32 static_thread_mgmt23[2];
4540         u32 restart[3];
4541         u32 thread_trace_enable;
4542         u32 reserved1;
4543         u32 user_data[16];
4544         u32 vgtcs_invoke_count[2];
4545         struct hqd_registers queue_state;
4546         u32 dequeue_cntr;
4547         u32 interrupt_queue[64];
4548 };
4549
4550 /**
4551  * cik_cp_compute_resume - setup the compute queue registers
4552  *
4553  * @rdev: radeon_device pointer
4554  *
4555  * Program the compute queues and test them to make sure they
4556  * are working.
4557  * Returns 0 for success, error for failure.
4558  */
4559 static int cik_cp_compute_resume(struct radeon_device *rdev)
4560 {
4561         int r, i, j, idx;
4562         u32 tmp;
4563         bool use_doorbell = true;
4564         u64 hqd_gpu_addr;
4565         u64 mqd_gpu_addr;
4566         u64 eop_gpu_addr;
4567         u64 wb_gpu_addr;
4568         u32 *buf;
4569         struct bonaire_mqd *mqd;
4570
4571         r = cik_cp_compute_start(rdev);
4572         if (r)
4573                 return r;
4574
4575         /* fix up chicken bits */
4576         tmp = RREG32(CP_CPF_DEBUG);
4577         tmp |= (1 << 23);
4578         WREG32(CP_CPF_DEBUG, tmp);
4579
4580         /* init the pipes */
4581         mutex_lock(&rdev->srbm_mutex);
4582
4583         for (i = 0; i < rdev->mec.num_pipe; ++i) {
4584                 cik_srbm_select(rdev, 0, i, 0, 0);
4585
4586                 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4587                 /* write the EOP addr */
4588                 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4589                 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4590
4591                 /* set the VMID assigned */
4592                 WREG32(CP_HPD_EOP_VMID, 0);
4593
4594                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4595                 tmp = RREG32(CP_HPD_EOP_CONTROL);
4596                 tmp &= ~EOP_SIZE_MASK;
4597                 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4598                 WREG32(CP_HPD_EOP_CONTROL, tmp);
4599
4600         }
4601         mutex_unlock(&rdev->srbm_mutex);
4602
4603         /* init the queues.  Just two for now. */
4604         for (i = 0; i < 2; i++) {
4605                 if (i == 0)
4606                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4607                 else
4608                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4609
4610                 if (rdev->ring[idx].mqd_obj == NULL) {
4611                         r = radeon_bo_create(rdev,
4612                                              sizeof(struct bonaire_mqd),
4613                                              PAGE_SIZE, true,
4614                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4615                                              NULL, &rdev->ring[idx].mqd_obj);
4616                         if (r) {
4617                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4618                                 return r;
4619                         }
4620                 }
4621
4622                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4623                 if (unlikely(r != 0)) {
4624                         cik_cp_compute_fini(rdev);
4625                         return r;
4626                 }
4627                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4628                                   &mqd_gpu_addr);
4629                 if (r) {
4630                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4631                         cik_cp_compute_fini(rdev);
4632                         return r;
4633                 }
4634                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4635                 if (r) {
4636                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4637                         cik_cp_compute_fini(rdev);
4638                         return r;
4639                 }
4640
4641                 /* init the mqd struct */
4642                 memset(buf, 0, sizeof(struct bonaire_mqd));
4643
4644                 mqd = (struct bonaire_mqd *)buf;
4645                 mqd->header = 0xC0310800;
4646                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4647                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4648                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4649                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4650
4651                 mutex_lock(&rdev->srbm_mutex);
4652                 cik_srbm_select(rdev, rdev->ring[idx].me,
4653                                 rdev->ring[idx].pipe,
4654                                 rdev->ring[idx].queue, 0);
4655
4656                 /* disable wptr polling */
4657                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4658                 tmp &= ~WPTR_POLL_EN;
4659                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4660
4661                 /* enable doorbell? */
4662                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4663                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4664                 if (use_doorbell)
4665                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4666                 else
4667                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4668                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4669                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4670
4671                 /* disable the queue if it's active */
4672                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4673                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4674                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4675                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4676                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4677                         for (j = 0; j < rdev->usec_timeout; j++) {
4678                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4679                                         break;
4680                                 udelay(1);
4681                         }
4682                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4683                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4684                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4685                 }
4686
4687                 /* set the pointer to the MQD */
4688                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4689                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4690                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4691                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4692                 /* set MQD vmid to 0 */
4693                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4694                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4695                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4696
4697                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4698                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4699                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4700                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4701                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4702                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4703
4704                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4705                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4706                 mqd->queue_state.cp_hqd_pq_control &=
4707                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4708
4709                 mqd->queue_state.cp_hqd_pq_control |=
4710                         order_base_2(rdev->ring[idx].ring_size / 8);
4711                 mqd->queue_state.cp_hqd_pq_control |=
4712                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4713 #ifdef __BIG_ENDIAN
4714                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4715 #endif
4716                 mqd->queue_state.cp_hqd_pq_control &=
4717                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4718                 mqd->queue_state.cp_hqd_pq_control |=
4719                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4720                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4721
4722                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4723                 if (i == 0)
4724                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4725                 else
4726                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4727                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4728                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4729                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4730                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4731                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4732
4733                 /* set the wb address wether it's enabled or not */
4734                 if (i == 0)
4735                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4736                 else
4737                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4738                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4739                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4740                         upper_32_bits(wb_gpu_addr) & 0xffff;
4741                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4742                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4743                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4744                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4745
4746                 /* enable the doorbell if requested */
4747                 if (use_doorbell) {
4748                         mqd->queue_state.cp_hqd_pq_doorbell_control =
4749                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4750                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4751                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
4752                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4753                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4754                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
4755                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4756
4757                 } else {
4758                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4759                 }
4760                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4761                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4762
4763                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4764                 rdev->ring[idx].wptr = 0;
4765                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4766                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4767                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4768
4769                 /* set the vmid for the queue */
4770                 mqd->queue_state.cp_hqd_vmid = 0;
4771                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4772
4773                 /* activate the queue */
4774                 mqd->queue_state.cp_hqd_active = 1;
4775                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4776
4777                 cik_srbm_select(rdev, 0, 0, 0, 0);
4778                 mutex_unlock(&rdev->srbm_mutex);
4779
4780                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4781                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4782
4783                 rdev->ring[idx].ready = true;
4784                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4785                 if (r)
4786                         rdev->ring[idx].ready = false;
4787         }
4788
4789         return 0;
4790 }
4791
4792 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4793 {
4794         cik_cp_gfx_enable(rdev, enable);
4795         cik_cp_compute_enable(rdev, enable);
4796 }
4797
4798 static int cik_cp_load_microcode(struct radeon_device *rdev)
4799 {
4800         int r;
4801
4802         r = cik_cp_gfx_load_microcode(rdev);
4803         if (r)
4804                 return r;
4805         r = cik_cp_compute_load_microcode(rdev);
4806         if (r)
4807                 return r;
4808
4809         return 0;
4810 }
4811
4812 static void cik_cp_fini(struct radeon_device *rdev)
4813 {
4814         cik_cp_gfx_fini(rdev);
4815         cik_cp_compute_fini(rdev);
4816 }
4817
4818 static int cik_cp_resume(struct radeon_device *rdev)
4819 {
4820         int r;
4821
4822         cik_enable_gui_idle_interrupt(rdev, false);
4823
4824         r = cik_cp_load_microcode(rdev);
4825         if (r)
4826                 return r;
4827
4828         r = cik_cp_gfx_resume(rdev);
4829         if (r)
4830                 return r;
4831         r = cik_cp_compute_resume(rdev);
4832         if (r)
4833                 return r;
4834
4835         cik_enable_gui_idle_interrupt(rdev, true);
4836
4837         return 0;
4838 }
4839
4840 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4841 {
4842         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4843                 RREG32(GRBM_STATUS));
4844         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4845                 RREG32(GRBM_STATUS2));
4846         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4847                 RREG32(GRBM_STATUS_SE0));
4848         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4849                 RREG32(GRBM_STATUS_SE1));
4850         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4851                 RREG32(GRBM_STATUS_SE2));
4852         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4853                 RREG32(GRBM_STATUS_SE3));
4854         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4855                 RREG32(SRBM_STATUS));
4856         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4857                 RREG32(SRBM_STATUS2));
4858         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4859                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4860         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4861                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4862         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4863         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4864                  RREG32(CP_STALLED_STAT1));
4865         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4866                  RREG32(CP_STALLED_STAT2));
4867         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4868                  RREG32(CP_STALLED_STAT3));
4869         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4870                  RREG32(CP_CPF_BUSY_STAT));
4871         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4872                  RREG32(CP_CPF_STALLED_STAT1));
4873         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4874         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4875         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4876                  RREG32(CP_CPC_STALLED_STAT1));
4877         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4878 }
4879
4880 /**
4881  * cik_gpu_check_soft_reset - check which blocks are busy
4882  *
4883  * @rdev: radeon_device pointer
4884  *
4885  * Check which blocks are busy and return the relevant reset
4886  * mask to be used by cik_gpu_soft_reset().
4887  * Returns a mask of the blocks to be reset.
4888  */
4889 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4890 {
4891         u32 reset_mask = 0;
4892         u32 tmp;
4893
4894         /* GRBM_STATUS */
4895         tmp = RREG32(GRBM_STATUS);
4896         if (tmp & (PA_BUSY | SC_BUSY |
4897                    BCI_BUSY | SX_BUSY |
4898                    TA_BUSY | VGT_BUSY |
4899                    DB_BUSY | CB_BUSY |
4900                    GDS_BUSY | SPI_BUSY |
4901                    IA_BUSY | IA_BUSY_NO_DMA))
4902                 reset_mask |= RADEON_RESET_GFX;
4903
4904         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4905                 reset_mask |= RADEON_RESET_CP;
4906
4907         /* GRBM_STATUS2 */
4908         tmp = RREG32(GRBM_STATUS2);
4909         if (tmp & RLC_BUSY)
4910                 reset_mask |= RADEON_RESET_RLC;
4911
4912         /* SDMA0_STATUS_REG */
4913         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4914         if (!(tmp & SDMA_IDLE))
4915                 reset_mask |= RADEON_RESET_DMA;
4916
4917         /* SDMA1_STATUS_REG */
4918         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4919         if (!(tmp & SDMA_IDLE))
4920                 reset_mask |= RADEON_RESET_DMA1;
4921
4922         /* SRBM_STATUS2 */
4923         tmp = RREG32(SRBM_STATUS2);
4924         if (tmp & SDMA_BUSY)
4925                 reset_mask |= RADEON_RESET_DMA;
4926
4927         if (tmp & SDMA1_BUSY)
4928                 reset_mask |= RADEON_RESET_DMA1;
4929
4930         /* SRBM_STATUS */
4931         tmp = RREG32(SRBM_STATUS);
4932
4933         if (tmp & IH_BUSY)
4934                 reset_mask |= RADEON_RESET_IH;
4935
4936         if (tmp & SEM_BUSY)
4937                 reset_mask |= RADEON_RESET_SEM;
4938
4939         if (tmp & GRBM_RQ_PENDING)
4940                 reset_mask |= RADEON_RESET_GRBM;
4941
4942         if (tmp & VMC_BUSY)
4943                 reset_mask |= RADEON_RESET_VMC;
4944
4945         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4946                    MCC_BUSY | MCD_BUSY))
4947                 reset_mask |= RADEON_RESET_MC;
4948
4949         if (evergreen_is_display_hung(rdev))
4950                 reset_mask |= RADEON_RESET_DISPLAY;
4951
4952         /* Skip MC reset as it's mostly likely not hung, just busy */
4953         if (reset_mask & RADEON_RESET_MC) {
4954                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4955                 reset_mask &= ~RADEON_RESET_MC;
4956         }
4957
4958         return reset_mask;
4959 }
4960
4961 /**
4962  * cik_gpu_soft_reset - soft reset GPU
4963  *
4964  * @rdev: radeon_device pointer
4965  * @reset_mask: mask of which blocks to reset
4966  *
4967  * Soft reset the blocks specified in @reset_mask.
4968  */
4969 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4970 {
4971         struct evergreen_mc_save save;
4972         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4973         u32 tmp;
4974
4975         if (reset_mask == 0)
4976                 return;
4977
4978         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4979
4980         cik_print_gpu_status_regs(rdev);
4981         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4982                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4983         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4984                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4985
4986         /* disable CG/PG */
4987         cik_fini_pg(rdev);
4988         cik_fini_cg(rdev);
4989
4990         /* stop the rlc */
4991         cik_rlc_stop(rdev);
4992
4993         /* Disable GFX parsing/prefetching */
4994         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4995
4996         /* Disable MEC parsing/prefetching */
4997         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4998
4999         if (reset_mask & RADEON_RESET_DMA) {
5000                 /* sdma0 */
5001                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5002                 tmp |= SDMA_HALT;
5003                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5004         }
5005         if (reset_mask & RADEON_RESET_DMA1) {
5006                 /* sdma1 */
5007                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5008                 tmp |= SDMA_HALT;
5009                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5010         }
5011
5012         evergreen_mc_stop(rdev, &save);
5013         if (evergreen_mc_wait_for_idle(rdev)) {
5014                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5015         }
5016
5017         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5018                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5019
5020         if (reset_mask & RADEON_RESET_CP) {
5021                 grbm_soft_reset |= SOFT_RESET_CP;
5022
5023                 srbm_soft_reset |= SOFT_RESET_GRBM;
5024         }
5025
5026         if (reset_mask & RADEON_RESET_DMA)
5027                 srbm_soft_reset |= SOFT_RESET_SDMA;
5028
5029         if (reset_mask & RADEON_RESET_DMA1)
5030                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5031
5032         if (reset_mask & RADEON_RESET_DISPLAY)
5033                 srbm_soft_reset |= SOFT_RESET_DC;
5034
5035         if (reset_mask & RADEON_RESET_RLC)
5036                 grbm_soft_reset |= SOFT_RESET_RLC;
5037
5038         if (reset_mask & RADEON_RESET_SEM)
5039                 srbm_soft_reset |= SOFT_RESET_SEM;
5040
5041         if (reset_mask & RADEON_RESET_IH)
5042                 srbm_soft_reset |= SOFT_RESET_IH;
5043
5044         if (reset_mask & RADEON_RESET_GRBM)
5045                 srbm_soft_reset |= SOFT_RESET_GRBM;
5046
5047         if (reset_mask & RADEON_RESET_VMC)
5048                 srbm_soft_reset |= SOFT_RESET_VMC;
5049
5050         if (!(rdev->flags & RADEON_IS_IGP)) {
5051                 if (reset_mask & RADEON_RESET_MC)
5052                         srbm_soft_reset |= SOFT_RESET_MC;
5053         }
5054
5055         if (grbm_soft_reset) {
5056                 tmp = RREG32(GRBM_SOFT_RESET);
5057                 tmp |= grbm_soft_reset;
5058                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5059                 WREG32(GRBM_SOFT_RESET, tmp);
5060                 tmp = RREG32(GRBM_SOFT_RESET);
5061
5062                 udelay(50);
5063
5064                 tmp &= ~grbm_soft_reset;
5065                 WREG32(GRBM_SOFT_RESET, tmp);
5066                 tmp = RREG32(GRBM_SOFT_RESET);
5067         }
5068
5069         if (srbm_soft_reset) {
5070                 tmp = RREG32(SRBM_SOFT_RESET);
5071                 tmp |= srbm_soft_reset;
5072                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5073                 WREG32(SRBM_SOFT_RESET, tmp);
5074                 tmp = RREG32(SRBM_SOFT_RESET);
5075
5076                 udelay(50);
5077
5078                 tmp &= ~srbm_soft_reset;
5079                 WREG32(SRBM_SOFT_RESET, tmp);
5080                 tmp = RREG32(SRBM_SOFT_RESET);
5081         }
5082
5083         /* Wait a little for things to settle down */
5084         udelay(50);
5085
5086         evergreen_mc_resume(rdev, &save);
5087         udelay(50);
5088
5089         cik_print_gpu_status_regs(rdev);
5090 }
5091
5092 struct kv_reset_save_regs {
5093         u32 gmcon_reng_execute;
5094         u32 gmcon_misc;
5095         u32 gmcon_misc3;
5096 };
5097
5098 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5099                                    struct kv_reset_save_regs *save)
5100 {
5101         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5102         save->gmcon_misc = RREG32(GMCON_MISC);
5103         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5104
5105         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5106         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5107                                                 STCTRL_STUTTER_EN));
5108 }
5109
5110 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5111                                       struct kv_reset_save_regs *save)
5112 {
5113         int i;
5114
5115         WREG32(GMCON_PGFSM_WRITE, 0);
5116         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5117
5118         for (i = 0; i < 5; i++)
5119                 WREG32(GMCON_PGFSM_WRITE, 0);
5120
5121         WREG32(GMCON_PGFSM_WRITE, 0);
5122         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5123
5124         for (i = 0; i < 5; i++)
5125                 WREG32(GMCON_PGFSM_WRITE, 0);
5126
5127         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5128         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5129
5130         for (i = 0; i < 5; i++)
5131                 WREG32(GMCON_PGFSM_WRITE, 0);
5132
5133         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5134         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5135
5136         for (i = 0; i < 5; i++)
5137                 WREG32(GMCON_PGFSM_WRITE, 0);
5138
5139         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5140         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5141
5142         for (i = 0; i < 5; i++)
5143                 WREG32(GMCON_PGFSM_WRITE, 0);
5144
5145         WREG32(GMCON_PGFSM_WRITE, 0);
5146         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5147
5148         for (i = 0; i < 5; i++)
5149                 WREG32(GMCON_PGFSM_WRITE, 0);
5150
5151         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5152         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5153
5154         for (i = 0; i < 5; i++)
5155                 WREG32(GMCON_PGFSM_WRITE, 0);
5156
5157         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5158         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5159
5160         for (i = 0; i < 5; i++)
5161                 WREG32(GMCON_PGFSM_WRITE, 0);
5162
5163         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5164         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5165
5166         for (i = 0; i < 5; i++)
5167                 WREG32(GMCON_PGFSM_WRITE, 0);
5168
5169         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5170         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5171
5172         for (i = 0; i < 5; i++)
5173                 WREG32(GMCON_PGFSM_WRITE, 0);
5174
5175         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5176         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5177
5178         WREG32(GMCON_MISC3, save->gmcon_misc3);
5179         WREG32(GMCON_MISC, save->gmcon_misc);
5180         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5181 }
5182
5183 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5184 {
5185         struct evergreen_mc_save save;
5186         struct kv_reset_save_regs kv_save = { 0 };
5187         u32 tmp, i;
5188
5189         dev_info(rdev->dev, "GPU pci config reset\n");
5190
5191         /* disable dpm? */
5192
5193         /* disable cg/pg */
5194         cik_fini_pg(rdev);
5195         cik_fini_cg(rdev);
5196
5197         /* Disable GFX parsing/prefetching */
5198         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5199
5200         /* Disable MEC parsing/prefetching */
5201         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5202
5203         /* sdma0 */
5204         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5205         tmp |= SDMA_HALT;
5206         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5207         /* sdma1 */
5208         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5209         tmp |= SDMA_HALT;
5210         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5211         /* XXX other engines? */
5212
5213         /* halt the rlc, disable cp internal ints */
5214         cik_rlc_stop(rdev);
5215
5216         udelay(50);
5217
5218         /* disable mem access */
5219         evergreen_mc_stop(rdev, &save);
5220         if (evergreen_mc_wait_for_idle(rdev)) {
5221                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5222         }
5223
5224         if (rdev->flags & RADEON_IS_IGP)
5225                 kv_save_regs_for_reset(rdev, &kv_save);
5226
5227         /* disable BM */
5228         pci_clear_master(rdev->pdev);
5229         /* reset */
5230         radeon_pci_config_reset(rdev);
5231
5232         udelay(100);
5233
5234         /* wait for asic to come out of reset */
5235         for (i = 0; i < rdev->usec_timeout; i++) {
5236                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5237                         break;
5238                 udelay(1);
5239         }
5240
5241         /* does asic init need to be run first??? */
5242         if (rdev->flags & RADEON_IS_IGP)
5243                 kv_restore_regs_for_reset(rdev, &kv_save);
5244 }
5245
5246 /**
5247  * cik_asic_reset - soft reset GPU
5248  *
5249  * @rdev: radeon_device pointer
5250  * @hard: force hard reset
5251  *
5252  * Look up which blocks are hung and attempt
5253  * to reset them.
5254  * Returns 0 for success.
5255  */
5256 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5257 {
5258         u32 reset_mask;
5259
5260         if (hard) {
5261                 cik_gpu_pci_config_reset(rdev);
5262                 return 0;
5263         }
5264
5265         reset_mask = cik_gpu_check_soft_reset(rdev);
5266
5267         if (reset_mask)
5268                 r600_set_bios_scratch_engine_hung(rdev, true);
5269
5270         /* try soft reset */
5271         cik_gpu_soft_reset(rdev, reset_mask);
5272
5273         reset_mask = cik_gpu_check_soft_reset(rdev);
5274
5275         /* try pci config reset */
5276         if (reset_mask && radeon_hard_reset)
5277                 cik_gpu_pci_config_reset(rdev);
5278
5279         reset_mask = cik_gpu_check_soft_reset(rdev);
5280
5281         if (!reset_mask)
5282                 r600_set_bios_scratch_engine_hung(rdev, false);
5283
5284         return 0;
5285 }
5286
5287 /**
5288  * cik_gfx_is_lockup - check if the 3D engine is locked up
5289  *
5290  * @rdev: radeon_device pointer
5291  * @ring: radeon_ring structure holding ring information
5292  *
5293  * Check if the 3D engine is locked up (CIK).
5294  * Returns true if the engine is locked, false if not.
5295  */
5296 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5297 {
5298         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5299
5300         if (!(reset_mask & (RADEON_RESET_GFX |
5301                             RADEON_RESET_COMPUTE |
5302                             RADEON_RESET_CP))) {
5303                 radeon_ring_lockup_update(rdev, ring);
5304                 return false;
5305         }
5306         return radeon_ring_test_lockup(rdev, ring);
5307 }
5308
5309 /* MC */
5310 /**
5311  * cik_mc_program - program the GPU memory controller
5312  *
5313  * @rdev: radeon_device pointer
5314  *
5315  * Set the location of vram, gart, and AGP in the GPU's
5316  * physical address space (CIK).
5317  */
5318 static void cik_mc_program(struct radeon_device *rdev)
5319 {
5320         struct evergreen_mc_save save;
5321         u32 tmp;
5322         int i, j;
5323
5324         /* Initialize HDP */
5325         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5326                 WREG32((0x2c14 + j), 0x00000000);
5327                 WREG32((0x2c18 + j), 0x00000000);
5328                 WREG32((0x2c1c + j), 0x00000000);
5329                 WREG32((0x2c20 + j), 0x00000000);
5330                 WREG32((0x2c24 + j), 0x00000000);
5331         }
5332         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5333
5334         evergreen_mc_stop(rdev, &save);
5335         if (radeon_mc_wait_for_idle(rdev)) {
5336                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5337         }
5338         /* Lockout access through VGA aperture*/
5339         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5340         /* Update configuration */
5341         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5342                rdev->mc.vram_start >> 12);
5343         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5344                rdev->mc.vram_end >> 12);
5345         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5346                rdev->vram_scratch.gpu_addr >> 12);
5347         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5348         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5349         WREG32(MC_VM_FB_LOCATION, tmp);
5350         /* XXX double check these! */
5351         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5352         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5353         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5354         WREG32(MC_VM_AGP_BASE, 0);
5355         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5356         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5357         if (radeon_mc_wait_for_idle(rdev)) {
5358                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5359         }
5360         evergreen_mc_resume(rdev, &save);
5361         /* we need to own VRAM, so turn off the VGA renderer here
5362          * to stop it overwriting our objects */
5363         rv515_vga_render_disable(rdev);
5364 }
5365
5366 /**
5367  * cik_mc_init - initialize the memory controller driver params
5368  *
5369  * @rdev: radeon_device pointer
5370  *
5371  * Look up the amount of vram, vram width, and decide how to place
5372  * vram and gart within the GPU's physical address space (CIK).
5373  * Returns 0 for success.
5374  */
5375 static int cik_mc_init(struct radeon_device *rdev)
5376 {
5377         u32 tmp;
5378         int chansize, numchan;
5379
5380         /* Get VRAM informations */
5381         rdev->mc.vram_is_ddr = true;
5382         tmp = RREG32(MC_ARB_RAMCFG);
5383         if (tmp & CHANSIZE_MASK) {
5384                 chansize = 64;
5385         } else {
5386                 chansize = 32;
5387         }
5388         tmp = RREG32(MC_SHARED_CHMAP);
5389         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5390         case 0:
5391         default:
5392                 numchan = 1;
5393                 break;
5394         case 1:
5395                 numchan = 2;
5396                 break;
5397         case 2:
5398                 numchan = 4;
5399                 break;
5400         case 3:
5401                 numchan = 8;
5402                 break;
5403         case 4:
5404                 numchan = 3;
5405                 break;
5406         case 5:
5407                 numchan = 6;
5408                 break;
5409         case 6:
5410                 numchan = 10;
5411                 break;
5412         case 7:
5413                 numchan = 12;
5414                 break;
5415         case 8:
5416                 numchan = 16;
5417                 break;
5418         }
5419         rdev->mc.vram_width = numchan * chansize;
5420         /* Could aper size report 0 ? */
5421         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5422         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5423         /* size in MB on si */
5424         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5425         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5426         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5427         si_vram_gtt_location(rdev, &rdev->mc);
5428         radeon_update_bandwidth_info(rdev);
5429
5430         return 0;
5431 }
5432
5433 /*
5434  * GART
5435  * VMID 0 is the physical GPU addresses as used by the kernel.
5436  * VMIDs 1-15 are used for userspace clients and are handled
5437  * by the radeon vm/hsa code.
5438  */
5439 /**
5440  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5441  *
5442  * @rdev: radeon_device pointer
5443  *
5444  * Flush the TLB for the VMID 0 page table (CIK).
5445  */
5446 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5447 {
5448         /* flush hdp cache */
5449         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5450
5451         /* bits 0-15 are the VM contexts0-15 */
5452         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5453 }
5454
5455 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5456 {
5457         int i;
5458         uint32_t sh_mem_bases, sh_mem_config;
5459
5460         sh_mem_bases = 0x6000 | 0x6000 << 16;
5461         sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5462         sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5463
5464         mutex_lock(&rdev->srbm_mutex);
5465         for (i = 8; i < 16; i++) {
5466                 cik_srbm_select(rdev, 0, 0, 0, i);
5467                 /* CP and shaders */
5468                 WREG32(SH_MEM_CONFIG, sh_mem_config);
5469                 WREG32(SH_MEM_APE1_BASE, 1);
5470                 WREG32(SH_MEM_APE1_LIMIT, 0);
5471                 WREG32(SH_MEM_BASES, sh_mem_bases);
5472         }
5473         cik_srbm_select(rdev, 0, 0, 0, 0);
5474         mutex_unlock(&rdev->srbm_mutex);
5475 }
5476
5477 /**
5478  * cik_pcie_gart_enable - gart enable
5479  *
5480  * @rdev: radeon_device pointer
5481  *
5482  * This sets up the TLBs, programs the page tables for VMID0,
5483  * sets up the hw for VMIDs 1-15 which are allocated on
5484  * demand, and sets up the global locations for the LDS, GDS,
5485  * and GPUVM for FSA64 clients (CIK).
5486  * Returns 0 for success, errors for failure.
5487  */
5488 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5489 {
5490         int r, i;
5491
5492         if (rdev->gart.robj == NULL) {
5493                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5494                 return -EINVAL;
5495         }
5496         r = radeon_gart_table_vram_pin(rdev);
5497         if (r)
5498                 return r;
5499         /* Setup TLB control */
5500         WREG32(MC_VM_MX_L1_TLB_CNTL,
5501                (0xA << 7) |
5502                ENABLE_L1_TLB |
5503                ENABLE_L1_FRAGMENT_PROCESSING |
5504                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5505                ENABLE_ADVANCED_DRIVER_MODEL |
5506                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5507         /* Setup L2 cache */
5508         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5509                ENABLE_L2_FRAGMENT_PROCESSING |
5510                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5511                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5512                EFFECTIVE_L2_QUEUE_SIZE(7) |
5513                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5514         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5515         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5516                BANK_SELECT(4) |
5517                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5518         /* setup context0 */
5519         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5520         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5521         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5522         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5523                         (u32)(rdev->dummy_page.addr >> 12));
5524         WREG32(VM_CONTEXT0_CNTL2, 0);
5525         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5526                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5527
5528         WREG32(0x15D4, 0);
5529         WREG32(0x15D8, 0);
5530         WREG32(0x15DC, 0);
5531
5532         /* restore context1-15 */
5533         /* set vm size, must be a multiple of 4 */
5534         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5535         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5536         for (i = 1; i < 16; i++) {
5537                 if (i < 8)
5538                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5539                                rdev->vm_manager.saved_table_addr[i]);
5540                 else
5541                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5542                                rdev->vm_manager.saved_table_addr[i]);
5543         }
5544
5545         /* enable context1-15 */
5546         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5547                (u32)(rdev->dummy_page.addr >> 12));
5548         WREG32(VM_CONTEXT1_CNTL2, 4);
5549         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5550                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5551                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5552                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5553                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5554                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5555                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5556                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5557                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5558                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5559                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5560                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5561                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5562                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5563
5564         if (rdev->family == CHIP_KAVERI) {
5565                 u32 tmp = RREG32(CHUB_CONTROL);
5566                 tmp &= ~BYPASS_VM;
5567                 WREG32(CHUB_CONTROL, tmp);
5568         }
5569
5570         /* XXX SH_MEM regs */
5571         /* where to put LDS, scratch, GPUVM in FSA64 space */
5572         mutex_lock(&rdev->srbm_mutex);
5573         for (i = 0; i < 16; i++) {
5574                 cik_srbm_select(rdev, 0, 0, 0, i);
5575                 /* CP and shaders */
5576                 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5577                 WREG32(SH_MEM_APE1_BASE, 1);
5578                 WREG32(SH_MEM_APE1_LIMIT, 0);
5579                 WREG32(SH_MEM_BASES, 0);
5580                 /* SDMA GFX */
5581                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5582                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5583                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5584                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5585                 /* XXX SDMA RLC - todo */
5586         }
5587         cik_srbm_select(rdev, 0, 0, 0, 0);
5588         mutex_unlock(&rdev->srbm_mutex);
5589
5590         cik_pcie_init_compute_vmid(rdev);
5591
5592         cik_pcie_gart_tlb_flush(rdev);
5593         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5594                  (unsigned)(rdev->mc.gtt_size >> 20),
5595                  (unsigned long long)rdev->gart.table_addr);
5596         rdev->gart.ready = true;
5597         return 0;
5598 }
5599
5600 /**
5601  * cik_pcie_gart_disable - gart disable
5602  *
5603  * @rdev: radeon_device pointer
5604  *
5605  * This disables all VM page table (CIK).
5606  */
5607 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5608 {
5609         unsigned i;
5610
5611         for (i = 1; i < 16; ++i) {
5612                 uint32_t reg;
5613                 if (i < 8)
5614                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5615                 else
5616                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5617                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5618         }
5619
5620         /* Disable all tables */
5621         WREG32(VM_CONTEXT0_CNTL, 0);
5622         WREG32(VM_CONTEXT1_CNTL, 0);
5623         /* Setup TLB control */
5624         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5625                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5626         /* Setup L2 cache */
5627         WREG32(VM_L2_CNTL,
5628                ENABLE_L2_FRAGMENT_PROCESSING |
5629                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5630                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5631                EFFECTIVE_L2_QUEUE_SIZE(7) |
5632                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5633         WREG32(VM_L2_CNTL2, 0);
5634         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5635                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5636         radeon_gart_table_vram_unpin(rdev);
5637 }
5638
5639 /**
5640  * cik_pcie_gart_fini - vm fini callback
5641  *
5642  * @rdev: radeon_device pointer
5643  *
5644  * Tears down the driver GART/VM setup (CIK).
5645  */
5646 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5647 {
5648         cik_pcie_gart_disable(rdev);
5649         radeon_gart_table_vram_free(rdev);
5650         radeon_gart_fini(rdev);
5651 }
5652
5653 /* vm parser */
5654 /**
5655  * cik_ib_parse - vm ib_parse callback
5656  *
5657  * @rdev: radeon_device pointer
5658  * @ib: indirect buffer pointer
5659  *
5660  * CIK uses hw IB checking so this is a nop (CIK).
5661  */
5662 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5663 {
5664         return 0;
5665 }
5666
5667 /*
5668  * vm
5669  * VMID 0 is the physical GPU addresses as used by the kernel.
5670  * VMIDs 1-15 are used for userspace clients and are handled
5671  * by the radeon vm/hsa code.
5672  */
5673 /**
5674  * cik_vm_init - cik vm init callback
5675  *
5676  * @rdev: radeon_device pointer
5677  *
5678  * Inits cik specific vm parameters (number of VMs, base of vram for
5679  * VMIDs 1-15) (CIK).
5680  * Returns 0 for success.
5681  */
5682 int cik_vm_init(struct radeon_device *rdev)
5683 {
5684         /*
5685          * number of VMs
5686          * VMID 0 is reserved for System
5687          * radeon graphics/compute will use VMIDs 1-7
5688          * amdkfd will use VMIDs 8-15
5689          */
5690         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5691         /* base offset of vram pages */
5692         if (rdev->flags & RADEON_IS_IGP) {
5693                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5694                 tmp <<= 22;
5695                 rdev->vm_manager.vram_base_offset = tmp;
5696         } else
5697                 rdev->vm_manager.vram_base_offset = 0;
5698
5699         return 0;
5700 }
5701
5702 /**
5703  * cik_vm_fini - cik vm fini callback
5704  *
5705  * @rdev: radeon_device pointer
5706  *
5707  * Tear down any asic specific VM setup (CIK).
5708  */
5709 void cik_vm_fini(struct radeon_device *rdev)
5710 {
5711 }
5712
5713 /**
5714  * cik_vm_decode_fault - print human readable fault info
5715  *
5716  * @rdev: radeon_device pointer
5717  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5718  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5719  *
5720  * Print human readable fault information (CIK).
5721  */
5722 static void cik_vm_decode_fault(struct radeon_device *rdev,
5723                                 u32 status, u32 addr, u32 mc_client)
5724 {
5725         u32 mc_id;
5726         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5727         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5728         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5729                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5730
5731         if (rdev->family == CHIP_HAWAII)
5732                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5733         else
5734                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5735
5736         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5737                protections, vmid, addr,
5738                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5739                block, mc_client, mc_id);
5740 }
5741
5742 /**
5743  * cik_vm_flush - cik vm flush using the CP
5744  *
5745  * @rdev: radeon_device pointer
5746  *
5747  * Update the page table base and flush the VM TLB
5748  * using the CP (CIK).
5749  */
5750 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5751                   unsigned vm_id, uint64_t pd_addr)
5752 {
5753         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5754
5755         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5756         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5757                                  WRITE_DATA_DST_SEL(0)));
5758         if (vm_id < 8) {
5759                 radeon_ring_write(ring,
5760                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5761         } else {
5762                 radeon_ring_write(ring,
5763                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5764         }
5765         radeon_ring_write(ring, 0);
5766         radeon_ring_write(ring, pd_addr >> 12);
5767
5768         /* update SH_MEM_* regs */
5769         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771                                  WRITE_DATA_DST_SEL(0)));
5772         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5773         radeon_ring_write(ring, 0);
5774         radeon_ring_write(ring, VMID(vm_id));
5775
5776         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5777         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5778                                  WRITE_DATA_DST_SEL(0)));
5779         radeon_ring_write(ring, SH_MEM_BASES >> 2);
5780         radeon_ring_write(ring, 0);
5781
5782         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5783         radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5784         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5785         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5786
5787         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5789                                  WRITE_DATA_DST_SEL(0)));
5790         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5791         radeon_ring_write(ring, 0);
5792         radeon_ring_write(ring, VMID(0));
5793
5794         /* HDP flush */
5795         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5796
5797         /* bits 0-15 are the VM contexts0-15 */
5798         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5799         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5800                                  WRITE_DATA_DST_SEL(0)));
5801         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5802         radeon_ring_write(ring, 0);
5803         radeon_ring_write(ring, 1 << vm_id);
5804
5805         /* wait for the invalidate to complete */
5806         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5807         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5808                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5809                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5810         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5811         radeon_ring_write(ring, 0);
5812         radeon_ring_write(ring, 0); /* ref */
5813         radeon_ring_write(ring, 0); /* mask */
5814         radeon_ring_write(ring, 0x20); /* poll interval */
5815
5816         /* compute doesn't have PFP */
5817         if (usepfp) {
5818                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5819                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5820                 radeon_ring_write(ring, 0x0);
5821         }
5822 }
5823
5824 /*
5825  * RLC
5826  * The RLC is a multi-purpose microengine that handles a
5827  * variety of functions, the most important of which is
5828  * the interrupt controller.
5829  */
5830 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5831                                           bool enable)
5832 {
5833         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5834
5835         if (enable)
5836                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5837         else
5838                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5839         WREG32(CP_INT_CNTL_RING0, tmp);
5840 }
5841
5842 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5843 {
5844         u32 tmp;
5845
5846         tmp = RREG32(RLC_LB_CNTL);
5847         if (enable)
5848                 tmp |= LOAD_BALANCE_ENABLE;
5849         else
5850                 tmp &= ~LOAD_BALANCE_ENABLE;
5851         WREG32(RLC_LB_CNTL, tmp);
5852 }
5853
5854 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5855 {
5856         u32 i, j, k;
5857         u32 mask;
5858
5859         mutex_lock(&rdev->grbm_idx_mutex);
5860         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5861                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5862                         cik_select_se_sh(rdev, i, j);
5863                         for (k = 0; k < rdev->usec_timeout; k++) {
5864                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5865                                         break;
5866                                 udelay(1);
5867                         }
5868                 }
5869         }
5870         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5871         mutex_unlock(&rdev->grbm_idx_mutex);
5872
5873         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5874         for (k = 0; k < rdev->usec_timeout; k++) {
5875                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5876                         break;
5877                 udelay(1);
5878         }
5879 }
5880
5881 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5882 {
5883         u32 tmp;
5884
5885         tmp = RREG32(RLC_CNTL);
5886         if (tmp != rlc)
5887                 WREG32(RLC_CNTL, rlc);
5888 }
5889
5890 static u32 cik_halt_rlc(struct radeon_device *rdev)
5891 {
5892         u32 data, orig;
5893
5894         orig = data = RREG32(RLC_CNTL);
5895
5896         if (data & RLC_ENABLE) {
5897                 u32 i;
5898
5899                 data &= ~RLC_ENABLE;
5900                 WREG32(RLC_CNTL, data);
5901
5902                 for (i = 0; i < rdev->usec_timeout; i++) {
5903                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5904                                 break;
5905                         udelay(1);
5906                 }
5907
5908                 cik_wait_for_rlc_serdes(rdev);
5909         }
5910
5911         return orig;
5912 }
5913
5914 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5915 {
5916         u32 tmp, i, mask;
5917
5918         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5919         WREG32(RLC_GPR_REG2, tmp);
5920
5921         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5922         for (i = 0; i < rdev->usec_timeout; i++) {
5923                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5924                         break;
5925                 udelay(1);
5926         }
5927
5928         for (i = 0; i < rdev->usec_timeout; i++) {
5929                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5930                         break;
5931                 udelay(1);
5932         }
5933 }
5934
5935 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5936 {
5937         u32 tmp;
5938
5939         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5940         WREG32(RLC_GPR_REG2, tmp);
5941 }
5942
5943 /**
5944  * cik_rlc_stop - stop the RLC ME
5945  *
5946  * @rdev: radeon_device pointer
5947  *
5948  * Halt the RLC ME (MicroEngine) (CIK).
5949  */
5950 static void cik_rlc_stop(struct radeon_device *rdev)
5951 {
5952         WREG32(RLC_CNTL, 0);
5953
5954         cik_enable_gui_idle_interrupt(rdev, false);
5955
5956         cik_wait_for_rlc_serdes(rdev);
5957 }
5958
5959 /**
5960  * cik_rlc_start - start the RLC ME
5961  *
5962  * @rdev: radeon_device pointer
5963  *
5964  * Unhalt the RLC ME (MicroEngine) (CIK).
5965  */
5966 static void cik_rlc_start(struct radeon_device *rdev)
5967 {
5968         WREG32(RLC_CNTL, RLC_ENABLE);
5969
5970         cik_enable_gui_idle_interrupt(rdev, true);
5971
5972         udelay(50);
5973 }
5974
5975 /**
5976  * cik_rlc_resume - setup the RLC hw
5977  *
5978  * @rdev: radeon_device pointer
5979  *
5980  * Initialize the RLC registers, load the ucode,
5981  * and start the RLC (CIK).
5982  * Returns 0 for success, -EINVAL if the ucode is not available.
5983  */
5984 static int cik_rlc_resume(struct radeon_device *rdev)
5985 {
5986         u32 i, size, tmp;
5987
5988         if (!rdev->rlc_fw)
5989                 return -EINVAL;
5990
5991         cik_rlc_stop(rdev);
5992
5993         /* disable CG */
5994         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5995         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5996
5997         si_rlc_reset(rdev);
5998
5999         cik_init_pg(rdev);
6000
6001         cik_init_cg(rdev);
6002
6003         WREG32(RLC_LB_CNTR_INIT, 0);
6004         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6005
6006         mutex_lock(&rdev->grbm_idx_mutex);
6007         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6008         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6009         WREG32(RLC_LB_PARAMS, 0x00600408);
6010         WREG32(RLC_LB_CNTL, 0x80000004);
6011         mutex_unlock(&rdev->grbm_idx_mutex);
6012
6013         WREG32(RLC_MC_CNTL, 0);
6014         WREG32(RLC_UCODE_CNTL, 0);
6015
6016         if (rdev->new_fw) {
6017                 const struct rlc_firmware_header_v1_0 *hdr =
6018                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6019                 const __le32 *fw_data = (const __le32 *)
6020                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6021
6022                 radeon_ucode_print_rlc_hdr(&hdr->header);
6023
6024                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6025                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6026                 for (i = 0; i < size; i++)
6027                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6028                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6029         } else {
6030                 const __be32 *fw_data;
6031
6032                 switch (rdev->family) {
6033                 case CHIP_BONAIRE:
6034                 case CHIP_HAWAII:
6035                 default:
6036                         size = BONAIRE_RLC_UCODE_SIZE;
6037                         break;
6038                 case CHIP_KAVERI:
6039                         size = KV_RLC_UCODE_SIZE;
6040                         break;
6041                 case CHIP_KABINI:
6042                         size = KB_RLC_UCODE_SIZE;
6043                         break;
6044                 case CHIP_MULLINS:
6045                         size = ML_RLC_UCODE_SIZE;
6046                         break;
6047                 }
6048
6049                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6050                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6051                 for (i = 0; i < size; i++)
6052                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6053                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6054         }
6055
6056         /* XXX - find out what chips support lbpw */
6057         cik_enable_lbpw(rdev, false);
6058
6059         if (rdev->family == CHIP_BONAIRE)
6060                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6061
6062         cik_rlc_start(rdev);
6063
6064         return 0;
6065 }
6066
6067 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6068 {
6069         u32 data, orig, tmp, tmp2;
6070
6071         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6072
6073         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6074                 cik_enable_gui_idle_interrupt(rdev, true);
6075
6076                 tmp = cik_halt_rlc(rdev);
6077
6078                 mutex_lock(&rdev->grbm_idx_mutex);
6079                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6080                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6081                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6082                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6083                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6084                 mutex_unlock(&rdev->grbm_idx_mutex);
6085
6086                 cik_update_rlc(rdev, tmp);
6087
6088                 data |= CGCG_EN | CGLS_EN;
6089         } else {
6090                 cik_enable_gui_idle_interrupt(rdev, false);
6091
6092                 RREG32(CB_CGTT_SCLK_CTRL);
6093                 RREG32(CB_CGTT_SCLK_CTRL);
6094                 RREG32(CB_CGTT_SCLK_CTRL);
6095                 RREG32(CB_CGTT_SCLK_CTRL);
6096
6097                 data &= ~(CGCG_EN | CGLS_EN);
6098         }
6099
6100         if (orig != data)
6101                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6102
6103 }
6104
6105 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6106 {
6107         u32 data, orig, tmp = 0;
6108
6109         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6110                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6111                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6112                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6113                                 data |= CP_MEM_LS_EN;
6114                                 if (orig != data)
6115                                         WREG32(CP_MEM_SLP_CNTL, data);
6116                         }
6117                 }
6118
6119                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6120                 data |= 0x00000001;
6121                 data &= 0xfffffffd;
6122                 if (orig != data)
6123                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6124
6125                 tmp = cik_halt_rlc(rdev);
6126
6127                 mutex_lock(&rdev->grbm_idx_mutex);
6128                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6129                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6130                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6131                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6132                 WREG32(RLC_SERDES_WR_CTRL, data);
6133                 mutex_unlock(&rdev->grbm_idx_mutex);
6134
6135                 cik_update_rlc(rdev, tmp);
6136
6137                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6138                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6139                         data &= ~SM_MODE_MASK;
6140                         data |= SM_MODE(0x2);
6141                         data |= SM_MODE_ENABLE;
6142                         data &= ~CGTS_OVERRIDE;
6143                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6144                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6145                                 data &= ~CGTS_LS_OVERRIDE;
6146                         data &= ~ON_MONITOR_ADD_MASK;
6147                         data |= ON_MONITOR_ADD_EN;
6148                         data |= ON_MONITOR_ADD(0x96);
6149                         if (orig != data)
6150                                 WREG32(CGTS_SM_CTRL_REG, data);
6151                 }
6152         } else {
6153                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6154                 data |= 0x00000003;
6155                 if (orig != data)
6156                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6157
6158                 data = RREG32(RLC_MEM_SLP_CNTL);
6159                 if (data & RLC_MEM_LS_EN) {
6160                         data &= ~RLC_MEM_LS_EN;
6161                         WREG32(RLC_MEM_SLP_CNTL, data);
6162                 }
6163
6164                 data = RREG32(CP_MEM_SLP_CNTL);
6165                 if (data & CP_MEM_LS_EN) {
6166                         data &= ~CP_MEM_LS_EN;
6167                         WREG32(CP_MEM_SLP_CNTL, data);
6168                 }
6169
6170                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6171                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6172                 if (orig != data)
6173                         WREG32(CGTS_SM_CTRL_REG, data);
6174
6175                 tmp = cik_halt_rlc(rdev);
6176
6177                 mutex_lock(&rdev->grbm_idx_mutex);
6178                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6179                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6180                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6181                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6182                 WREG32(RLC_SERDES_WR_CTRL, data);
6183                 mutex_unlock(&rdev->grbm_idx_mutex);
6184
6185                 cik_update_rlc(rdev, tmp);
6186         }
6187 }
6188
6189 static const u32 mc_cg_registers[] =
6190 {
6191         MC_HUB_MISC_HUB_CG,
6192         MC_HUB_MISC_SIP_CG,
6193         MC_HUB_MISC_VM_CG,
6194         MC_XPB_CLK_GAT,
6195         ATC_MISC_CG,
6196         MC_CITF_MISC_WR_CG,
6197         MC_CITF_MISC_RD_CG,
6198         MC_CITF_MISC_VM_CG,
6199         VM_L2_CG,
6200 };
6201
6202 static void cik_enable_mc_ls(struct radeon_device *rdev,
6203                              bool enable)
6204 {
6205         int i;
6206         u32 orig, data;
6207
6208         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6209                 orig = data = RREG32(mc_cg_registers[i]);
6210                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6211                         data |= MC_LS_ENABLE;
6212                 else
6213                         data &= ~MC_LS_ENABLE;
6214                 if (data != orig)
6215                         WREG32(mc_cg_registers[i], data);
6216         }
6217 }
6218
6219 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6220                                bool enable)
6221 {
6222         int i;
6223         u32 orig, data;
6224
6225         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6226                 orig = data = RREG32(mc_cg_registers[i]);
6227                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6228                         data |= MC_CG_ENABLE;
6229                 else
6230                         data &= ~MC_CG_ENABLE;
6231                 if (data != orig)
6232                         WREG32(mc_cg_registers[i], data);
6233         }
6234 }
6235
6236 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6237                                  bool enable)
6238 {
6239         u32 orig, data;
6240
6241         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6242                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6243                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6244         } else {
6245                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6246                 data |= 0xff000000;
6247                 if (data != orig)
6248                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6249
6250                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6251                 data |= 0xff000000;
6252                 if (data != orig)
6253                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6254         }
6255 }
6256
6257 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6258                                  bool enable)
6259 {
6260         u32 orig, data;
6261
6262         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6263                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6264                 data |= 0x100;
6265                 if (orig != data)
6266                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6267
6268                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6269                 data |= 0x100;
6270                 if (orig != data)
6271                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6272         } else {
6273                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6274                 data &= ~0x100;
6275                 if (orig != data)
6276                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6277
6278                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6279                 data &= ~0x100;
6280                 if (orig != data)
6281                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6282         }
6283 }
6284
6285 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6286                                 bool enable)
6287 {
6288         u32 orig, data;
6289
6290         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6291                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6292                 data = 0xfff;
6293                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6294
6295                 orig = data = RREG32(UVD_CGC_CTRL);
6296                 data |= DCM;
6297                 if (orig != data)
6298                         WREG32(UVD_CGC_CTRL, data);
6299         } else {
6300                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6301                 data &= ~0xfff;
6302                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6303
6304                 orig = data = RREG32(UVD_CGC_CTRL);
6305                 data &= ~DCM;
6306                 if (orig != data)
6307                         WREG32(UVD_CGC_CTRL, data);
6308         }
6309 }
6310
6311 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6312                                bool enable)
6313 {
6314         u32 orig, data;
6315
6316         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6317
6318         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6319                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6320                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6321         else
6322                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6323                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6324
6325         if (orig != data)
6326                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6327 }
6328
6329 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6330                                 bool enable)
6331 {
6332         u32 orig, data;
6333
6334         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6335
6336         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6337                 data &= ~CLOCK_GATING_DIS;
6338         else
6339                 data |= CLOCK_GATING_DIS;
6340
6341         if (orig != data)
6342                 WREG32(HDP_HOST_PATH_CNTL, data);
6343 }
6344
6345 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6346                               bool enable)
6347 {
6348         u32 orig, data;
6349
6350         orig = data = RREG32(HDP_MEM_POWER_LS);
6351
6352         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6353                 data |= HDP_LS_ENABLE;
6354         else
6355                 data &= ~HDP_LS_ENABLE;
6356
6357         if (orig != data)
6358                 WREG32(HDP_MEM_POWER_LS, data);
6359 }
6360
6361 void cik_update_cg(struct radeon_device *rdev,
6362                    u32 block, bool enable)
6363 {
6364
6365         if (block & RADEON_CG_BLOCK_GFX) {
6366                 cik_enable_gui_idle_interrupt(rdev, false);
6367                 /* order matters! */
6368                 if (enable) {
6369                         cik_enable_mgcg(rdev, true);
6370                         cik_enable_cgcg(rdev, true);
6371                 } else {
6372                         cik_enable_cgcg(rdev, false);
6373                         cik_enable_mgcg(rdev, false);
6374                 }
6375                 cik_enable_gui_idle_interrupt(rdev, true);
6376         }
6377
6378         if (block & RADEON_CG_BLOCK_MC) {
6379                 if (!(rdev->flags & RADEON_IS_IGP)) {
6380                         cik_enable_mc_mgcg(rdev, enable);
6381                         cik_enable_mc_ls(rdev, enable);
6382                 }
6383         }
6384
6385         if (block & RADEON_CG_BLOCK_SDMA) {
6386                 cik_enable_sdma_mgcg(rdev, enable);
6387                 cik_enable_sdma_mgls(rdev, enable);
6388         }
6389
6390         if (block & RADEON_CG_BLOCK_BIF) {
6391                 cik_enable_bif_mgls(rdev, enable);
6392         }
6393
6394         if (block & RADEON_CG_BLOCK_UVD) {
6395                 if (rdev->has_uvd)
6396                         cik_enable_uvd_mgcg(rdev, enable);
6397         }
6398
6399         if (block & RADEON_CG_BLOCK_HDP) {
6400                 cik_enable_hdp_mgcg(rdev, enable);
6401                 cik_enable_hdp_ls(rdev, enable);
6402         }
6403
6404         if (block & RADEON_CG_BLOCK_VCE) {
6405                 vce_v2_0_enable_mgcg(rdev, enable);
6406         }
6407 }
6408
6409 static void cik_init_cg(struct radeon_device *rdev)
6410 {
6411
6412         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6413
6414         if (rdev->has_uvd)
6415                 si_init_uvd_internal_cg(rdev);
6416
6417         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6418                              RADEON_CG_BLOCK_SDMA |
6419                              RADEON_CG_BLOCK_BIF |
6420                              RADEON_CG_BLOCK_UVD |
6421                              RADEON_CG_BLOCK_HDP), true);
6422 }
6423
6424 static void cik_fini_cg(struct radeon_device *rdev)
6425 {
6426         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6427                              RADEON_CG_BLOCK_SDMA |
6428                              RADEON_CG_BLOCK_BIF |
6429                              RADEON_CG_BLOCK_UVD |
6430                              RADEON_CG_BLOCK_HDP), false);
6431
6432         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6433 }
6434
6435 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6436                                           bool enable)
6437 {
6438         u32 data, orig;
6439
6440         orig = data = RREG32(RLC_PG_CNTL);
6441         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6442                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6443         else
6444                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6445         if (orig != data)
6446                 WREG32(RLC_PG_CNTL, data);
6447 }
6448
6449 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6450                                           bool enable)
6451 {
6452         u32 data, orig;
6453
6454         orig = data = RREG32(RLC_PG_CNTL);
6455         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6457         else
6458                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6459         if (orig != data)
6460                 WREG32(RLC_PG_CNTL, data);
6461 }
6462
6463 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6464 {
6465         u32 data, orig;
6466
6467         orig = data = RREG32(RLC_PG_CNTL);
6468         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6469                 data &= ~DISABLE_CP_PG;
6470         else
6471                 data |= DISABLE_CP_PG;
6472         if (orig != data)
6473                 WREG32(RLC_PG_CNTL, data);
6474 }
6475
6476 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6477 {
6478         u32 data, orig;
6479
6480         orig = data = RREG32(RLC_PG_CNTL);
6481         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6482                 data &= ~DISABLE_GDS_PG;
6483         else
6484                 data |= DISABLE_GDS_PG;
6485         if (orig != data)
6486                 WREG32(RLC_PG_CNTL, data);
6487 }
6488
6489 #define CP_ME_TABLE_SIZE    96
6490 #define CP_ME_TABLE_OFFSET  2048
6491 #define CP_MEC_TABLE_OFFSET 4096
6492
6493 void cik_init_cp_pg_table(struct radeon_device *rdev)
6494 {
6495         volatile u32 *dst_ptr;
6496         int me, i, max_me = 4;
6497         u32 bo_offset = 0;
6498         u32 table_offset, table_size;
6499
6500         if (rdev->family == CHIP_KAVERI)
6501                 max_me = 5;
6502
6503         if (rdev->rlc.cp_table_ptr == NULL)
6504                 return;
6505
6506         /* write the cp table buffer */
6507         dst_ptr = rdev->rlc.cp_table_ptr;
6508         for (me = 0; me < max_me; me++) {
6509                 if (rdev->new_fw) {
6510                         const __le32 *fw_data;
6511                         const struct gfx_firmware_header_v1_0 *hdr;
6512
6513                         if (me == 0) {
6514                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6515                                 fw_data = (const __le32 *)
6516                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6517                                 table_offset = le32_to_cpu(hdr->jt_offset);
6518                                 table_size = le32_to_cpu(hdr->jt_size);
6519                         } else if (me == 1) {
6520                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6521                                 fw_data = (const __le32 *)
6522                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6523                                 table_offset = le32_to_cpu(hdr->jt_offset);
6524                                 table_size = le32_to_cpu(hdr->jt_size);
6525                         } else if (me == 2) {
6526                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6527                                 fw_data = (const __le32 *)
6528                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6529                                 table_offset = le32_to_cpu(hdr->jt_offset);
6530                                 table_size = le32_to_cpu(hdr->jt_size);
6531                         } else if (me == 3) {
6532                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6533                                 fw_data = (const __le32 *)
6534                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6535                                 table_offset = le32_to_cpu(hdr->jt_offset);
6536                                 table_size = le32_to_cpu(hdr->jt_size);
6537                         } else {
6538                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6539                                 fw_data = (const __le32 *)
6540                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6541                                 table_offset = le32_to_cpu(hdr->jt_offset);
6542                                 table_size = le32_to_cpu(hdr->jt_size);
6543                         }
6544
6545                         for (i = 0; i < table_size; i ++) {
6546                                 dst_ptr[bo_offset + i] =
6547                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6548                         }
6549                         bo_offset += table_size;
6550                 } else {
6551                         const __be32 *fw_data;
6552                         table_size = CP_ME_TABLE_SIZE;
6553
6554                         if (me == 0) {
6555                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6556                                 table_offset = CP_ME_TABLE_OFFSET;
6557                         } else if (me == 1) {
6558                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6559                                 table_offset = CP_ME_TABLE_OFFSET;
6560                         } else if (me == 2) {
6561                                 fw_data = (const __be32 *)rdev->me_fw->data;
6562                                 table_offset = CP_ME_TABLE_OFFSET;
6563                         } else {
6564                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6565                                 table_offset = CP_MEC_TABLE_OFFSET;
6566                         }
6567
6568                         for (i = 0; i < table_size; i ++) {
6569                                 dst_ptr[bo_offset + i] =
6570                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6571                         }
6572                         bo_offset += table_size;
6573                 }
6574         }
6575 }
6576
6577 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6578                                 bool enable)
6579 {
6580         u32 data, orig;
6581
6582         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6583                 orig = data = RREG32(RLC_PG_CNTL);
6584                 data |= GFX_PG_ENABLE;
6585                 if (orig != data)
6586                         WREG32(RLC_PG_CNTL, data);
6587
6588                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6589                 data |= AUTO_PG_EN;
6590                 if (orig != data)
6591                         WREG32(RLC_AUTO_PG_CTRL, data);
6592         } else {
6593                 orig = data = RREG32(RLC_PG_CNTL);
6594                 data &= ~GFX_PG_ENABLE;
6595                 if (orig != data)
6596                         WREG32(RLC_PG_CNTL, data);
6597
6598                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6599                 data &= ~AUTO_PG_EN;
6600                 if (orig != data)
6601                         WREG32(RLC_AUTO_PG_CTRL, data);
6602
6603                 data = RREG32(DB_RENDER_CONTROL);
6604         }
6605 }
6606
6607 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6608 {
6609         u32 mask = 0, tmp, tmp1;
6610         int i;
6611
6612         mutex_lock(&rdev->grbm_idx_mutex);
6613         cik_select_se_sh(rdev, se, sh);
6614         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6615         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6616         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6617         mutex_unlock(&rdev->grbm_idx_mutex);
6618
6619         tmp &= 0xffff0000;
6620
6621         tmp |= tmp1;
6622         tmp >>= 16;
6623
6624         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6625                 mask <<= 1;
6626                 mask |= 1;
6627         }
6628
6629         return (~tmp) & mask;
6630 }
6631
6632 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6633 {
6634         u32 i, j, k, active_cu_number = 0;
6635         u32 mask, counter, cu_bitmap;
6636         u32 tmp = 0;
6637
6638         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6639                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6640                         mask = 1;
6641                         cu_bitmap = 0;
6642                         counter = 0;
6643                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6644                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6645                                         if (counter < 2)
6646                                                 cu_bitmap |= mask;
6647                                         counter ++;
6648                                 }
6649                                 mask <<= 1;
6650                         }
6651
6652                         active_cu_number += counter;
6653                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6654                 }
6655         }
6656
6657         WREG32(RLC_PG_AO_CU_MASK, tmp);
6658
6659         tmp = RREG32(RLC_MAX_PG_CU);
6660         tmp &= ~MAX_PU_CU_MASK;
6661         tmp |= MAX_PU_CU(active_cu_number);
6662         WREG32(RLC_MAX_PG_CU, tmp);
6663 }
6664
6665 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6666                                        bool enable)
6667 {
6668         u32 data, orig;
6669
6670         orig = data = RREG32(RLC_PG_CNTL);
6671         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6672                 data |= STATIC_PER_CU_PG_ENABLE;
6673         else
6674                 data &= ~STATIC_PER_CU_PG_ENABLE;
6675         if (orig != data)
6676                 WREG32(RLC_PG_CNTL, data);
6677 }
6678
6679 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6680                                         bool enable)
6681 {
6682         u32 data, orig;
6683
6684         orig = data = RREG32(RLC_PG_CNTL);
6685         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6686                 data |= DYN_PER_CU_PG_ENABLE;
6687         else
6688                 data &= ~DYN_PER_CU_PG_ENABLE;
6689         if (orig != data)
6690                 WREG32(RLC_PG_CNTL, data);
6691 }
6692
6693 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6694 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6695
6696 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6697 {
6698         u32 data, orig;
6699         u32 i;
6700
6701         if (rdev->rlc.cs_data) {
6702                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6703                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6704                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6705                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6706         } else {
6707                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6708                 for (i = 0; i < 3; i++)
6709                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6710         }
6711         if (rdev->rlc.reg_list) {
6712                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6713                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6714                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6715         }
6716
6717         orig = data = RREG32(RLC_PG_CNTL);
6718         data |= GFX_PG_SRC;
6719         if (orig != data)
6720                 WREG32(RLC_PG_CNTL, data);
6721
6722         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6723         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6724
6725         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6726         data &= ~IDLE_POLL_COUNT_MASK;
6727         data |= IDLE_POLL_COUNT(0x60);
6728         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6729
6730         data = 0x10101010;
6731         WREG32(RLC_PG_DELAY, data);
6732
6733         data = RREG32(RLC_PG_DELAY_2);
6734         data &= ~0xff;
6735         data |= 0x3;
6736         WREG32(RLC_PG_DELAY_2, data);
6737
6738         data = RREG32(RLC_AUTO_PG_CTRL);
6739         data &= ~GRBM_REG_SGIT_MASK;
6740         data |= GRBM_REG_SGIT(0x700);
6741         WREG32(RLC_AUTO_PG_CTRL, data);
6742
6743 }
6744
6745 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6746 {
6747         cik_enable_gfx_cgpg(rdev, enable);
6748         cik_enable_gfx_static_mgpg(rdev, enable);
6749         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6750 }
6751
6752 u32 cik_get_csb_size(struct radeon_device *rdev)
6753 {
6754         u32 count = 0;
6755         const struct cs_section_def *sect = NULL;
6756         const struct cs_extent_def *ext = NULL;
6757
6758         if (rdev->rlc.cs_data == NULL)
6759                 return 0;
6760
6761         /* begin clear state */
6762         count += 2;
6763         /* context control state */
6764         count += 3;
6765
6766         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6767                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6768                         if (sect->id == SECT_CONTEXT)
6769                                 count += 2 + ext->reg_count;
6770                         else
6771                                 return 0;
6772                 }
6773         }
6774         /* pa_sc_raster_config/pa_sc_raster_config1 */
6775         count += 4;
6776         /* end clear state */
6777         count += 2;
6778         /* clear state */
6779         count += 2;
6780
6781         return count;
6782 }
6783
6784 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6785 {
6786         u32 count = 0, i;
6787         const struct cs_section_def *sect = NULL;
6788         const struct cs_extent_def *ext = NULL;
6789
6790         if (rdev->rlc.cs_data == NULL)
6791                 return;
6792         if (buffer == NULL)
6793                 return;
6794
6795         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6796         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6797
6798         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6799         buffer[count++] = cpu_to_le32(0x80000000);
6800         buffer[count++] = cpu_to_le32(0x80000000);
6801
6802         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6803                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6804                         if (sect->id == SECT_CONTEXT) {
6805                                 buffer[count++] =
6806                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6807                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6808                                 for (i = 0; i < ext->reg_count; i++)
6809                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
6810                         } else {
6811                                 return;
6812                         }
6813                 }
6814         }
6815
6816         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6817         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6818         switch (rdev->family) {
6819         case CHIP_BONAIRE:
6820                 buffer[count++] = cpu_to_le32(0x16000012);
6821                 buffer[count++] = cpu_to_le32(0x00000000);
6822                 break;
6823         case CHIP_KAVERI:
6824                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6825                 buffer[count++] = cpu_to_le32(0x00000000);
6826                 break;
6827         case CHIP_KABINI:
6828         case CHIP_MULLINS:
6829                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6830                 buffer[count++] = cpu_to_le32(0x00000000);
6831                 break;
6832         case CHIP_HAWAII:
6833                 buffer[count++] = cpu_to_le32(0x3a00161a);
6834                 buffer[count++] = cpu_to_le32(0x0000002e);
6835                 break;
6836         default:
6837                 buffer[count++] = cpu_to_le32(0x00000000);
6838                 buffer[count++] = cpu_to_le32(0x00000000);
6839                 break;
6840         }
6841
6842         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6843         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6844
6845         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6846         buffer[count++] = cpu_to_le32(0);
6847 }
6848
6849 static void cik_init_pg(struct radeon_device *rdev)
6850 {
6851         if (rdev->pg_flags) {
6852                 cik_enable_sck_slowdown_on_pu(rdev, true);
6853                 cik_enable_sck_slowdown_on_pd(rdev, true);
6854                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6855                         cik_init_gfx_cgpg(rdev);
6856                         cik_enable_cp_pg(rdev, true);
6857                         cik_enable_gds_pg(rdev, true);
6858                 }
6859                 cik_init_ao_cu_mask(rdev);
6860                 cik_update_gfx_pg(rdev, true);
6861         }
6862 }
6863
6864 static void cik_fini_pg(struct radeon_device *rdev)
6865 {
6866         if (rdev->pg_flags) {
6867                 cik_update_gfx_pg(rdev, false);
6868                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869                         cik_enable_cp_pg(rdev, false);
6870                         cik_enable_gds_pg(rdev, false);
6871                 }
6872         }
6873 }
6874
6875 /*
6876  * Interrupts
6877  * Starting with r6xx, interrupts are handled via a ring buffer.
6878  * Ring buffers are areas of GPU accessible memory that the GPU
6879  * writes interrupt vectors into and the host reads vectors out of.
6880  * There is a rptr (read pointer) that determines where the
6881  * host is currently reading, and a wptr (write pointer)
6882  * which determines where the GPU has written.  When the
6883  * pointers are equal, the ring is idle.  When the GPU
6884  * writes vectors to the ring buffer, it increments the
6885  * wptr.  When there is an interrupt, the host then starts
6886  * fetching commands and processing them until the pointers are
6887  * equal again at which point it updates the rptr.
6888  */
6889
6890 /**
6891  * cik_enable_interrupts - Enable the interrupt ring buffer
6892  *
6893  * @rdev: radeon_device pointer
6894  *
6895  * Enable the interrupt ring buffer (CIK).
6896  */
6897 static void cik_enable_interrupts(struct radeon_device *rdev)
6898 {
6899         u32 ih_cntl = RREG32(IH_CNTL);
6900         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6901
6902         ih_cntl |= ENABLE_INTR;
6903         ih_rb_cntl |= IH_RB_ENABLE;
6904         WREG32(IH_CNTL, ih_cntl);
6905         WREG32(IH_RB_CNTL, ih_rb_cntl);
6906         rdev->ih.enabled = true;
6907 }
6908
6909 /**
6910  * cik_disable_interrupts - Disable the interrupt ring buffer
6911  *
6912  * @rdev: radeon_device pointer
6913  *
6914  * Disable the interrupt ring buffer (CIK).
6915  */
6916 static void cik_disable_interrupts(struct radeon_device *rdev)
6917 {
6918         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6919         u32 ih_cntl = RREG32(IH_CNTL);
6920
6921         ih_rb_cntl &= ~IH_RB_ENABLE;
6922         ih_cntl &= ~ENABLE_INTR;
6923         WREG32(IH_RB_CNTL, ih_rb_cntl);
6924         WREG32(IH_CNTL, ih_cntl);
6925         /* set rptr, wptr to 0 */
6926         WREG32(IH_RB_RPTR, 0);
6927         WREG32(IH_RB_WPTR, 0);
6928         rdev->ih.enabled = false;
6929         rdev->ih.rptr = 0;
6930 }
6931
6932 /**
6933  * cik_disable_interrupt_state - Disable all interrupt sources
6934  *
6935  * @rdev: radeon_device pointer
6936  *
6937  * Clear all interrupt enable bits used by the driver (CIK).
6938  */
6939 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6940 {
6941         u32 tmp;
6942
6943         /* gfx ring */
6944         tmp = RREG32(CP_INT_CNTL_RING0) &
6945                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6946         WREG32(CP_INT_CNTL_RING0, tmp);
6947         /* sdma */
6948         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6949         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6950         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6951         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6952         /* compute queues */
6953         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6954         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6955         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6956         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6957         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6958         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6959         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6960         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6961         /* grbm */
6962         WREG32(GRBM_INT_CNTL, 0);
6963         /* SRBM */
6964         WREG32(SRBM_INT_CNTL, 0);
6965         /* vline/vblank, etc. */
6966         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6967         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6968         if (rdev->num_crtc >= 4) {
6969                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6970                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6971         }
6972         if (rdev->num_crtc >= 6) {
6973                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6974                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6975         }
6976         /* pflip */
6977         if (rdev->num_crtc >= 2) {
6978                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6979                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6980         }
6981         if (rdev->num_crtc >= 4) {
6982                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6983                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6984         }
6985         if (rdev->num_crtc >= 6) {
6986                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6987                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6988         }
6989
6990         /* dac hotplug */
6991         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6992
6993         /* digital hotplug */
6994         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6995         WREG32(DC_HPD1_INT_CONTROL, tmp);
6996         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6997         WREG32(DC_HPD2_INT_CONTROL, tmp);
6998         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6999         WREG32(DC_HPD3_INT_CONTROL, tmp);
7000         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7001         WREG32(DC_HPD4_INT_CONTROL, tmp);
7002         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003         WREG32(DC_HPD5_INT_CONTROL, tmp);
7004         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7005         WREG32(DC_HPD6_INT_CONTROL, tmp);
7006
7007 }
7008
7009 /**
7010  * cik_irq_init - init and enable the interrupt ring
7011  *
7012  * @rdev: radeon_device pointer
7013  *
7014  * Allocate a ring buffer for the interrupt controller,
7015  * enable the RLC, disable interrupts, enable the IH
7016  * ring buffer and enable it (CIK).
7017  * Called at device load and reume.
7018  * Returns 0 for success, errors for failure.
7019  */
7020 static int cik_irq_init(struct radeon_device *rdev)
7021 {
7022         int ret = 0;
7023         int rb_bufsz;
7024         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7025
7026         /* allocate ring */
7027         ret = r600_ih_ring_alloc(rdev);
7028         if (ret)
7029                 return ret;
7030
7031         /* disable irqs */
7032         cik_disable_interrupts(rdev);
7033
7034         /* init rlc */
7035         ret = cik_rlc_resume(rdev);
7036         if (ret) {
7037                 r600_ih_ring_fini(rdev);
7038                 return ret;
7039         }
7040
7041         /* setup interrupt control */
7042         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7043         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7044         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7045         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7046          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7047          */
7048         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7049         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7050         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7051         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7052
7053         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7054         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7055
7056         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7057                       IH_WPTR_OVERFLOW_CLEAR |
7058                       (rb_bufsz << 1));
7059
7060         if (rdev->wb.enabled)
7061                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7062
7063         /* set the writeback address whether it's enabled or not */
7064         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7065         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7066
7067         WREG32(IH_RB_CNTL, ih_rb_cntl);
7068
7069         /* set rptr, wptr to 0 */
7070         WREG32(IH_RB_RPTR, 0);
7071         WREG32(IH_RB_WPTR, 0);
7072
7073         /* Default settings for IH_CNTL (disabled at first) */
7074         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7075         /* RPTR_REARM only works if msi's are enabled */
7076         if (rdev->msi_enabled)
7077                 ih_cntl |= RPTR_REARM;
7078         WREG32(IH_CNTL, ih_cntl);
7079
7080         /* force the active interrupt state to all disabled */
7081         cik_disable_interrupt_state(rdev);
7082
7083         pci_set_master(rdev->pdev);
7084
7085         /* enable irqs */
7086         cik_enable_interrupts(rdev);
7087
7088         return ret;
7089 }
7090
7091 /**
7092  * cik_irq_set - enable/disable interrupt sources
7093  *
7094  * @rdev: radeon_device pointer
7095  *
7096  * Enable interrupt sources on the GPU (vblanks, hpd,
7097  * etc.) (CIK).
7098  * Returns 0 for success, errors for failure.
7099  */
7100 int cik_irq_set(struct radeon_device *rdev)
7101 {
7102         u32 cp_int_cntl;
7103         u32 cp_m1p0;
7104         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7105         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7106         u32 grbm_int_cntl = 0;
7107         u32 dma_cntl, dma_cntl1;
7108
7109         if (!rdev->irq.installed) {
7110                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7111                 return -EINVAL;
7112         }
7113         /* don't enable anything if the ih is disabled */
7114         if (!rdev->ih.enabled) {
7115                 cik_disable_interrupts(rdev);
7116                 /* force the active interrupt state to all disabled */
7117                 cik_disable_interrupt_state(rdev);
7118                 return 0;
7119         }
7120
7121         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7122                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7123         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7124
7125         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7128         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7129         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7130         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7131
7132         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7133         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7134
7135         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7136
7137         /* enable CP interrupts on all rings */
7138         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7139                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7140                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7141         }
7142         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7143                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7144                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7145                 if (ring->me == 1) {
7146                         switch (ring->pipe) {
7147                         case 0:
7148                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7149                                 break;
7150                         default:
7151                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7152                                 break;
7153                         }
7154                 } else {
7155                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7156                 }
7157         }
7158         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7159                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7160                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7161                 if (ring->me == 1) {
7162                         switch (ring->pipe) {
7163                         case 0:
7164                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7165                                 break;
7166                         default:
7167                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7168                                 break;
7169                         }
7170                 } else {
7171                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7172                 }
7173         }
7174
7175         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7176                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7177                 dma_cntl |= TRAP_ENABLE;
7178         }
7179
7180         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7181                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7182                 dma_cntl1 |= TRAP_ENABLE;
7183         }
7184
7185         if (rdev->irq.crtc_vblank_int[0] ||
7186             atomic_read(&rdev->irq.pflip[0])) {
7187                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7188                 crtc1 |= VBLANK_INTERRUPT_MASK;
7189         }
7190         if (rdev->irq.crtc_vblank_int[1] ||
7191             atomic_read(&rdev->irq.pflip[1])) {
7192                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7193                 crtc2 |= VBLANK_INTERRUPT_MASK;
7194         }
7195         if (rdev->irq.crtc_vblank_int[2] ||
7196             atomic_read(&rdev->irq.pflip[2])) {
7197                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7198                 crtc3 |= VBLANK_INTERRUPT_MASK;
7199         }
7200         if (rdev->irq.crtc_vblank_int[3] ||
7201             atomic_read(&rdev->irq.pflip[3])) {
7202                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7203                 crtc4 |= VBLANK_INTERRUPT_MASK;
7204         }
7205         if (rdev->irq.crtc_vblank_int[4] ||
7206             atomic_read(&rdev->irq.pflip[4])) {
7207                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7208                 crtc5 |= VBLANK_INTERRUPT_MASK;
7209         }
7210         if (rdev->irq.crtc_vblank_int[5] ||
7211             atomic_read(&rdev->irq.pflip[5])) {
7212                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7213                 crtc6 |= VBLANK_INTERRUPT_MASK;
7214         }
7215         if (rdev->irq.hpd[0]) {
7216                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7217                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218         }
7219         if (rdev->irq.hpd[1]) {
7220                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7221                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222         }
7223         if (rdev->irq.hpd[2]) {
7224                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7225                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226         }
7227         if (rdev->irq.hpd[3]) {
7228                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7229                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230         }
7231         if (rdev->irq.hpd[4]) {
7232                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7233                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7234         }
7235         if (rdev->irq.hpd[5]) {
7236                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7237                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7238         }
7239
7240         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7241
7242         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7243         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7244
7245         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7246
7247         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7248
7249         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7250         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7251         if (rdev->num_crtc >= 4) {
7252                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7253                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7254         }
7255         if (rdev->num_crtc >= 6) {
7256                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7257                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7258         }
7259
7260         if (rdev->num_crtc >= 2) {
7261                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7262                        GRPH_PFLIP_INT_MASK);
7263                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7264                        GRPH_PFLIP_INT_MASK);
7265         }
7266         if (rdev->num_crtc >= 4) {
7267                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7268                        GRPH_PFLIP_INT_MASK);
7269                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7270                        GRPH_PFLIP_INT_MASK);
7271         }
7272         if (rdev->num_crtc >= 6) {
7273                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7274                        GRPH_PFLIP_INT_MASK);
7275                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7276                        GRPH_PFLIP_INT_MASK);
7277         }
7278
7279         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7280         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7281         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7282         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7283         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7284         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7285
7286         /* posting read */
7287         RREG32(SRBM_STATUS);
7288
7289         return 0;
7290 }
7291
7292 /**
7293  * cik_irq_ack - ack interrupt sources
7294  *
7295  * @rdev: radeon_device pointer
7296  *
7297  * Ack interrupt sources on the GPU (vblanks, hpd,
7298  * etc.) (CIK).  Certain interrupts sources are sw
7299  * generated and do not require an explicit ack.
7300  */
7301 static inline void cik_irq_ack(struct radeon_device *rdev)
7302 {
7303         u32 tmp;
7304
7305         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7306         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7307         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7308         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7309         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7310         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7311         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7312
7313         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7314                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7315         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7316                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7317         if (rdev->num_crtc >= 4) {
7318                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7319                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7320                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7321                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7322         }
7323         if (rdev->num_crtc >= 6) {
7324                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7325                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7326                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7327                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7328         }
7329
7330         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7331                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7332                        GRPH_PFLIP_INT_CLEAR);
7333         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7334                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7335                        GRPH_PFLIP_INT_CLEAR);
7336         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7337                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7338         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7339                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7340         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7341                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7342         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7343                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7344
7345         if (rdev->num_crtc >= 4) {
7346                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7347                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7348                                GRPH_PFLIP_INT_CLEAR);
7349                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7350                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7351                                GRPH_PFLIP_INT_CLEAR);
7352                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7353                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7354                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7355                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7356                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7357                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7358                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7359                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7360         }
7361
7362         if (rdev->num_crtc >= 6) {
7363                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7364                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7365                                GRPH_PFLIP_INT_CLEAR);
7366                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7367                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7368                                GRPH_PFLIP_INT_CLEAR);
7369                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7370                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7371                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7372                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7373                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7374                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7375                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7376                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7377         }
7378
7379         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7380                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7381                 tmp |= DC_HPDx_INT_ACK;
7382                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7383         }
7384         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7385                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7386                 tmp |= DC_HPDx_INT_ACK;
7387                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7388         }
7389         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7390                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7391                 tmp |= DC_HPDx_INT_ACK;
7392                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7393         }
7394         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7395                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7396                 tmp |= DC_HPDx_INT_ACK;
7397                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7398         }
7399         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7400                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7401                 tmp |= DC_HPDx_INT_ACK;
7402                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7403         }
7404         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7405                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7406                 tmp |= DC_HPDx_INT_ACK;
7407                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7408         }
7409         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7410                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7411                 tmp |= DC_HPDx_RX_INT_ACK;
7412                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7413         }
7414         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7415                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7416                 tmp |= DC_HPDx_RX_INT_ACK;
7417                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7418         }
7419         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7420                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7421                 tmp |= DC_HPDx_RX_INT_ACK;
7422                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7423         }
7424         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7425                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7426                 tmp |= DC_HPDx_RX_INT_ACK;
7427                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7428         }
7429         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7430                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7431                 tmp |= DC_HPDx_RX_INT_ACK;
7432                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7433         }
7434         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7435                 tmp = RREG32(DC_HPD6_INT_CONTROL);
7436                 tmp |= DC_HPDx_RX_INT_ACK;
7437                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7438         }
7439 }
7440
7441 /**
7442  * cik_irq_disable - disable interrupts
7443  *
7444  * @rdev: radeon_device pointer
7445  *
7446  * Disable interrupts on the hw (CIK).
7447  */
7448 static void cik_irq_disable(struct radeon_device *rdev)
7449 {
7450         cik_disable_interrupts(rdev);
7451         /* Wait and acknowledge irq */
7452         mdelay(1);
7453         cik_irq_ack(rdev);
7454         cik_disable_interrupt_state(rdev);
7455 }
7456
7457 /**
7458  * cik_irq_disable - disable interrupts for suspend
7459  *
7460  * @rdev: radeon_device pointer
7461  *
7462  * Disable interrupts and stop the RLC (CIK).
7463  * Used for suspend.
7464  */
7465 static void cik_irq_suspend(struct radeon_device *rdev)
7466 {
7467         cik_irq_disable(rdev);
7468         cik_rlc_stop(rdev);
7469 }
7470
7471 /**
7472  * cik_irq_fini - tear down interrupt support
7473  *
7474  * @rdev: radeon_device pointer
7475  *
7476  * Disable interrupts on the hw and free the IH ring
7477  * buffer (CIK).
7478  * Used for driver unload.
7479  */
7480 static void cik_irq_fini(struct radeon_device *rdev)
7481 {
7482         cik_irq_suspend(rdev);
7483         r600_ih_ring_fini(rdev);
7484 }
7485
7486 /**
7487  * cik_get_ih_wptr - get the IH ring buffer wptr
7488  *
7489  * @rdev: radeon_device pointer
7490  *
7491  * Get the IH ring buffer wptr from either the register
7492  * or the writeback memory buffer (CIK).  Also check for
7493  * ring buffer overflow and deal with it.
7494  * Used by cik_irq_process().
7495  * Returns the value of the wptr.
7496  */
7497 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7498 {
7499         u32 wptr, tmp;
7500
7501         if (rdev->wb.enabled)
7502                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7503         else
7504                 wptr = RREG32(IH_RB_WPTR);
7505
7506         if (wptr & RB_OVERFLOW) {
7507                 wptr &= ~RB_OVERFLOW;
7508                 /* When a ring buffer overflow happen start parsing interrupt
7509                  * from the last not overwritten vector (wptr + 16). Hopefully
7510                  * this should allow us to catchup.
7511                  */
7512                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7513                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7514                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7515                 tmp = RREG32(IH_RB_CNTL);
7516                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7517                 WREG32(IH_RB_CNTL, tmp);
7518         }
7519         return (wptr & rdev->ih.ptr_mask);
7520 }
7521
7522 /*        CIK IV Ring
7523  * Each IV ring entry is 128 bits:
7524  * [7:0]    - interrupt source id
7525  * [31:8]   - reserved
7526  * [59:32]  - interrupt source data
7527  * [63:60]  - reserved
7528  * [71:64]  - RINGID
7529  *            CP:
7530  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7531  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7532  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7533  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7534  *            PIPE_ID - ME0 0=3D
7535  *                    - ME1&2 compute dispatcher (4 pipes each)
7536  *            SDMA:
7537  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7538  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7539  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7540  * [79:72]  - VMID
7541  * [95:80]  - PASID
7542  * [127:96] - reserved
7543  */
7544 /**
7545  * cik_irq_process - interrupt handler
7546  *
7547  * @rdev: radeon_device pointer
7548  *
7549  * Interrupt hander (CIK).  Walk the IH ring,
7550  * ack interrupts and schedule work to handle
7551  * interrupt events.
7552  * Returns irq process return code.
7553  */
7554 int cik_irq_process(struct radeon_device *rdev)
7555 {
7556         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7557         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7558         u32 wptr;
7559         u32 rptr;
7560         u32 src_id, src_data, ring_id;
7561         u8 me_id, pipe_id, queue_id;
7562         u32 ring_index;
7563         bool queue_hotplug = false;
7564         bool queue_dp = false;
7565         bool queue_reset = false;
7566         u32 addr, status, mc_client;
7567         bool queue_thermal = false;
7568
7569         if (!rdev->ih.enabled || rdev->shutdown)
7570                 return IRQ_NONE;
7571
7572         wptr = cik_get_ih_wptr(rdev);
7573
7574 restart_ih:
7575         /* is somebody else already processing irqs? */
7576         if (atomic_xchg(&rdev->ih.lock, 1))
7577                 return IRQ_NONE;
7578
7579         rptr = rdev->ih.rptr;
7580         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7581
7582         /* Order reading of wptr vs. reading of IH ring data */
7583         rmb();
7584
7585         /* display interrupts */
7586         cik_irq_ack(rdev);
7587
7588         while (rptr != wptr) {
7589                 /* wptr/rptr are in bytes! */
7590                 ring_index = rptr / 4;
7591
7592                 radeon_kfd_interrupt(rdev,
7593                                 (const void *) &rdev->ih.ring[ring_index]);
7594
7595                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7596                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7597                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7598
7599                 switch (src_id) {
7600                 case 1: /* D1 vblank/vline */
7601                         switch (src_data) {
7602                         case 0: /* D1 vblank */
7603                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7604                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7605
7606                                 if (rdev->irq.crtc_vblank_int[0]) {
7607                                         drm_handle_vblank(rdev->ddev, 0);
7608                                         rdev->pm.vblank_sync = true;
7609                                         wake_up(&rdev->irq.vblank_queue);
7610                                 }
7611                                 if (atomic_read(&rdev->irq.pflip[0]))
7612                                         radeon_crtc_handle_vblank(rdev, 0);
7613                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7614                                 DRM_DEBUG("IH: D1 vblank\n");
7615
7616                                 break;
7617                         case 1: /* D1 vline */
7618                                 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7619                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7620
7621                                 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7622                                 DRM_DEBUG("IH: D1 vline\n");
7623
7624                                 break;
7625                         default:
7626                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7627                                 break;
7628                         }
7629                         break;
7630                 case 2: /* D2 vblank/vline */
7631                         switch (src_data) {
7632                         case 0: /* D2 vblank */
7633                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7634                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7635
7636                                 if (rdev->irq.crtc_vblank_int[1]) {
7637                                         drm_handle_vblank(rdev->ddev, 1);
7638                                         rdev->pm.vblank_sync = true;
7639                                         wake_up(&rdev->irq.vblank_queue);
7640                                 }
7641                                 if (atomic_read(&rdev->irq.pflip[1]))
7642                                         radeon_crtc_handle_vblank(rdev, 1);
7643                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7644                                 DRM_DEBUG("IH: D2 vblank\n");
7645
7646                                 break;
7647                         case 1: /* D2 vline */
7648                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7649                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7650
7651                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7652                                 DRM_DEBUG("IH: D2 vline\n");
7653
7654                                 break;
7655                         default:
7656                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7657                                 break;
7658                         }
7659                         break;
7660                 case 3: /* D3 vblank/vline */
7661                         switch (src_data) {
7662                         case 0: /* D3 vblank */
7663                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7664                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7665
7666                                 if (rdev->irq.crtc_vblank_int[2]) {
7667                                         drm_handle_vblank(rdev->ddev, 2);
7668                                         rdev->pm.vblank_sync = true;
7669                                         wake_up(&rdev->irq.vblank_queue);
7670                                 }
7671                                 if (atomic_read(&rdev->irq.pflip[2]))
7672                                         radeon_crtc_handle_vblank(rdev, 2);
7673                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7674                                 DRM_DEBUG("IH: D3 vblank\n");
7675
7676                                 break;
7677                         case 1: /* D3 vline */
7678                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7679                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7680
7681                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7682                                 DRM_DEBUG("IH: D3 vline\n");
7683
7684                                 break;
7685                         default:
7686                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7687                                 break;
7688                         }
7689                         break;
7690                 case 4: /* D4 vblank/vline */
7691                         switch (src_data) {
7692                         case 0: /* D4 vblank */
7693                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7694                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7695
7696                                 if (rdev->irq.crtc_vblank_int[3]) {
7697                                         drm_handle_vblank(rdev->ddev, 3);
7698                                         rdev->pm.vblank_sync = true;
7699                                         wake_up(&rdev->irq.vblank_queue);
7700                                 }
7701                                 if (atomic_read(&rdev->irq.pflip[3]))
7702                                         radeon_crtc_handle_vblank(rdev, 3);
7703                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7704                                 DRM_DEBUG("IH: D4 vblank\n");
7705
7706                                 break;
7707                         case 1: /* D4 vline */
7708                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7709                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7710
7711                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7712                                 DRM_DEBUG("IH: D4 vline\n");
7713
7714                                 break;
7715                         default:
7716                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7717                                 break;
7718                         }
7719                         break;
7720                 case 5: /* D5 vblank/vline */
7721                         switch (src_data) {
7722                         case 0: /* D5 vblank */
7723                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7724                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7725
7726                                 if (rdev->irq.crtc_vblank_int[4]) {
7727                                         drm_handle_vblank(rdev->ddev, 4);
7728                                         rdev->pm.vblank_sync = true;
7729                                         wake_up(&rdev->irq.vblank_queue);
7730                                 }
7731                                 if (atomic_read(&rdev->irq.pflip[4]))
7732                                         radeon_crtc_handle_vblank(rdev, 4);
7733                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7734                                 DRM_DEBUG("IH: D5 vblank\n");
7735
7736                                 break;
7737                         case 1: /* D5 vline */
7738                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7739                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7740
7741                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7742                                 DRM_DEBUG("IH: D5 vline\n");
7743
7744                                 break;
7745                         default:
7746                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7747                                 break;
7748                         }
7749                         break;
7750                 case 6: /* D6 vblank/vline */
7751                         switch (src_data) {
7752                         case 0: /* D6 vblank */
7753                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7754                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755
7756                                 if (rdev->irq.crtc_vblank_int[5]) {
7757                                         drm_handle_vblank(rdev->ddev, 5);
7758                                         rdev->pm.vblank_sync = true;
7759                                         wake_up(&rdev->irq.vblank_queue);
7760                                 }
7761                                 if (atomic_read(&rdev->irq.pflip[5]))
7762                                         radeon_crtc_handle_vblank(rdev, 5);
7763                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7764                                 DRM_DEBUG("IH: D6 vblank\n");
7765
7766                                 break;
7767                         case 1: /* D6 vline */
7768                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7769                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7770
7771                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7772                                 DRM_DEBUG("IH: D6 vline\n");
7773
7774                                 break;
7775                         default:
7776                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7777                                 break;
7778                         }
7779                         break;
7780                 case 8: /* D1 page flip */
7781                 case 10: /* D2 page flip */
7782                 case 12: /* D3 page flip */
7783                 case 14: /* D4 page flip */
7784                 case 16: /* D5 page flip */
7785                 case 18: /* D6 page flip */
7786                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7787                         if (radeon_use_pflipirq > 0)
7788                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7789                         break;
7790                 case 42: /* HPD hotplug */
7791                         switch (src_data) {
7792                         case 0:
7793                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7794                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7795
7796                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7797                                 queue_hotplug = true;
7798                                 DRM_DEBUG("IH: HPD1\n");
7799
7800                                 break;
7801                         case 1:
7802                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7803                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7804
7805                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7806                                 queue_hotplug = true;
7807                                 DRM_DEBUG("IH: HPD2\n");
7808
7809                                 break;
7810                         case 2:
7811                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7812                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7813
7814                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7815                                 queue_hotplug = true;
7816                                 DRM_DEBUG("IH: HPD3\n");
7817
7818                                 break;
7819                         case 3:
7820                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7821                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7822
7823                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7824                                 queue_hotplug = true;
7825                                 DRM_DEBUG("IH: HPD4\n");
7826
7827                                 break;
7828                         case 4:
7829                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7830                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7831
7832                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7833                                 queue_hotplug = true;
7834                                 DRM_DEBUG("IH: HPD5\n");
7835
7836                                 break;
7837                         case 5:
7838                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7839                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7840
7841                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7842                                 queue_hotplug = true;
7843                                 DRM_DEBUG("IH: HPD6\n");
7844
7845                                 break;
7846                         case 6:
7847                                 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7848                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7849
7850                                 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7851                                 queue_dp = true;
7852                                 DRM_DEBUG("IH: HPD_RX 1\n");
7853
7854                                 break;
7855                         case 7:
7856                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7857                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7858
7859                                 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7860                                 queue_dp = true;
7861                                 DRM_DEBUG("IH: HPD_RX 2\n");
7862
7863                                 break;
7864                         case 8:
7865                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7866                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7867
7868                                 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7869                                 queue_dp = true;
7870                                 DRM_DEBUG("IH: HPD_RX 3\n");
7871
7872                                 break;
7873                         case 9:
7874                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7875                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7876
7877                                 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7878                                 queue_dp = true;
7879                                 DRM_DEBUG("IH: HPD_RX 4\n");
7880
7881                                 break;
7882                         case 10:
7883                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7884                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7885
7886                                 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7887                                 queue_dp = true;
7888                                 DRM_DEBUG("IH: HPD_RX 5\n");
7889
7890                                 break;
7891                         case 11:
7892                                 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7893                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7894
7895                                 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7896                                 queue_dp = true;
7897                                 DRM_DEBUG("IH: HPD_RX 6\n");
7898
7899                                 break;
7900                         default:
7901                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7902                                 break;
7903                         }
7904                         break;
7905                 case 96:
7906                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7907                         WREG32(SRBM_INT_ACK, 0x1);
7908                         break;
7909                 case 124: /* UVD */
7910                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7911                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7912                         break;
7913                 case 146:
7914                 case 147:
7915                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7916                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7917                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7918                         /* reset addr and status */
7919                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7920                         if (addr == 0x0 && status == 0x0)
7921                                 break;
7922                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7923                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7924                                 addr);
7925                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7926                                 status);
7927                         cik_vm_decode_fault(rdev, status, addr, mc_client);
7928                         break;
7929                 case 167: /* VCE */
7930                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7931                         switch (src_data) {
7932                         case 0:
7933                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7934                                 break;
7935                         case 1:
7936                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7937                                 break;
7938                         default:
7939                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7940                                 break;
7941                         }
7942                         break;
7943                 case 176: /* GFX RB CP_INT */
7944                 case 177: /* GFX IB CP_INT */
7945                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7946                         break;
7947                 case 181: /* CP EOP event */
7948                         DRM_DEBUG("IH: CP EOP\n");
7949                         /* XXX check the bitfield order! */
7950                         me_id = (ring_id & 0x60) >> 5;
7951                         pipe_id = (ring_id & 0x18) >> 3;
7952                         queue_id = (ring_id & 0x7) >> 0;
7953                         switch (me_id) {
7954                         case 0:
7955                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7956                                 break;
7957                         case 1:
7958                         case 2:
7959                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7960                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7961                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7962                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7963                                 break;
7964                         }
7965                         break;
7966                 case 184: /* CP Privileged reg access */
7967                         DRM_ERROR("Illegal register access in command stream\n");
7968                         /* XXX check the bitfield order! */
7969                         me_id = (ring_id & 0x60) >> 5;
7970                         pipe_id = (ring_id & 0x18) >> 3;
7971                         queue_id = (ring_id & 0x7) >> 0;
7972                         switch (me_id) {
7973                         case 0:
7974                                 /* This results in a full GPU reset, but all we need to do is soft
7975                                  * reset the CP for gfx
7976                                  */
7977                                 queue_reset = true;
7978                                 break;
7979                         case 1:
7980                                 /* XXX compute */
7981                                 queue_reset = true;
7982                                 break;
7983                         case 2:
7984                                 /* XXX compute */
7985                                 queue_reset = true;
7986                                 break;
7987                         }
7988                         break;
7989                 case 185: /* CP Privileged inst */
7990                         DRM_ERROR("Illegal instruction in command stream\n");
7991                         /* XXX check the bitfield order! */
7992                         me_id = (ring_id & 0x60) >> 5;
7993                         pipe_id = (ring_id & 0x18) >> 3;
7994                         queue_id = (ring_id & 0x7) >> 0;
7995                         switch (me_id) {
7996                         case 0:
7997                                 /* This results in a full GPU reset, but all we need to do is soft
7998                                  * reset the CP for gfx
7999                                  */
8000                                 queue_reset = true;
8001                                 break;
8002                         case 1:
8003                                 /* XXX compute */
8004                                 queue_reset = true;
8005                                 break;
8006                         case 2:
8007                                 /* XXX compute */
8008                                 queue_reset = true;
8009                                 break;
8010                         }
8011                         break;
8012                 case 224: /* SDMA trap event */
8013                         /* XXX check the bitfield order! */
8014                         me_id = (ring_id & 0x3) >> 0;
8015                         queue_id = (ring_id & 0xc) >> 2;
8016                         DRM_DEBUG("IH: SDMA trap\n");
8017                         switch (me_id) {
8018                         case 0:
8019                                 switch (queue_id) {
8020                                 case 0:
8021                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8022                                         break;
8023                                 case 1:
8024                                         /* XXX compute */
8025                                         break;
8026                                 case 2:
8027                                         /* XXX compute */
8028                                         break;
8029                                 }
8030                                 break;
8031                         case 1:
8032                                 switch (queue_id) {
8033                                 case 0:
8034                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8035                                         break;
8036                                 case 1:
8037                                         /* XXX compute */
8038                                         break;
8039                                 case 2:
8040                                         /* XXX compute */
8041                                         break;
8042                                 }
8043                                 break;
8044                         }
8045                         break;
8046                 case 230: /* thermal low to high */
8047                         DRM_DEBUG("IH: thermal low to high\n");
8048                         rdev->pm.dpm.thermal.high_to_low = false;
8049                         queue_thermal = true;
8050                         break;
8051                 case 231: /* thermal high to low */
8052                         DRM_DEBUG("IH: thermal high to low\n");
8053                         rdev->pm.dpm.thermal.high_to_low = true;
8054                         queue_thermal = true;
8055                         break;
8056                 case 233: /* GUI IDLE */
8057                         DRM_DEBUG("IH: GUI idle\n");
8058                         break;
8059                 case 241: /* SDMA Privileged inst */
8060                 case 247: /* SDMA Privileged inst */
8061                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8062                         /* XXX check the bitfield order! */
8063                         me_id = (ring_id & 0x3) >> 0;
8064                         queue_id = (ring_id & 0xc) >> 2;
8065                         switch (me_id) {
8066                         case 0:
8067                                 switch (queue_id) {
8068                                 case 0:
8069                                         queue_reset = true;
8070                                         break;
8071                                 case 1:
8072                                         /* XXX compute */
8073                                         queue_reset = true;
8074                                         break;
8075                                 case 2:
8076                                         /* XXX compute */
8077                                         queue_reset = true;
8078                                         break;
8079                                 }
8080                                 break;
8081                         case 1:
8082                                 switch (queue_id) {
8083                                 case 0:
8084                                         queue_reset = true;
8085                                         break;
8086                                 case 1:
8087                                         /* XXX compute */
8088                                         queue_reset = true;
8089                                         break;
8090                                 case 2:
8091                                         /* XXX compute */
8092                                         queue_reset = true;
8093                                         break;
8094                                 }
8095                                 break;
8096                         }
8097                         break;
8098                 default:
8099                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8100                         break;
8101                 }
8102
8103                 /* wptr/rptr are in bytes! */
8104                 rptr += 16;
8105                 rptr &= rdev->ih.ptr_mask;
8106                 WREG32(IH_RB_RPTR, rptr);
8107         }
8108         if (queue_dp)
8109                 schedule_work(&rdev->dp_work);
8110         if (queue_hotplug)
8111                 schedule_delayed_work(&rdev->hotplug_work, 0);
8112         if (queue_reset) {
8113                 rdev->needs_reset = true;
8114                 wake_up_all(&rdev->fence_queue);
8115         }
8116         if (queue_thermal)
8117                 schedule_work(&rdev->pm.dpm.thermal.work);
8118         rdev->ih.rptr = rptr;
8119         atomic_set(&rdev->ih.lock, 0);
8120
8121         /* make sure wptr hasn't changed while processing */
8122         wptr = cik_get_ih_wptr(rdev);
8123         if (wptr != rptr)
8124                 goto restart_ih;
8125
8126         return IRQ_HANDLED;
8127 }
8128
8129 /*
8130  * startup/shutdown callbacks
8131  */
8132 static void cik_uvd_init(struct radeon_device *rdev)
8133 {
8134         int r;
8135
8136         if (!rdev->has_uvd)
8137                 return;
8138
8139         r = radeon_uvd_init(rdev);
8140         if (r) {
8141                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8142                 /*
8143                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8144                  * to early fails cik_uvd_start() and thus nothing happens
8145                  * there. So it is pointless to try to go through that code
8146                  * hence why we disable uvd here.
8147                  */
8148                 rdev->has_uvd = 0;
8149                 return;
8150         }
8151         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8152         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8153 }
8154
8155 static void cik_uvd_start(struct radeon_device *rdev)
8156 {
8157         int r;
8158
8159         if (!rdev->has_uvd)
8160                 return;
8161
8162         r = radeon_uvd_resume(rdev);
8163         if (r) {
8164                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8165                 goto error;
8166         }
8167         r = uvd_v4_2_resume(rdev);
8168         if (r) {
8169                 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8170                 goto error;
8171         }
8172         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8173         if (r) {
8174                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8175                 goto error;
8176         }
8177         return;
8178
8179 error:
8180         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8181 }
8182
8183 static void cik_uvd_resume(struct radeon_device *rdev)
8184 {
8185         struct radeon_ring *ring;
8186         int r;
8187
8188         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8189                 return;
8190
8191         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8192         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8193         if (r) {
8194                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8195                 return;
8196         }
8197         r = uvd_v1_0_init(rdev);
8198         if (r) {
8199                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8200                 return;
8201         }
8202 }
8203
8204 static void cik_vce_init(struct radeon_device *rdev)
8205 {
8206         int r;
8207
8208         if (!rdev->has_vce)
8209                 return;
8210
8211         r = radeon_vce_init(rdev);
8212         if (r) {
8213                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8214                 /*
8215                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
8216                  * to early fails cik_vce_start() and thus nothing happens
8217                  * there. So it is pointless to try to go through that code
8218                  * hence why we disable vce here.
8219                  */
8220                 rdev->has_vce = 0;
8221                 return;
8222         }
8223         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8224         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8225         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8226         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8227 }
8228
8229 static void cik_vce_start(struct radeon_device *rdev)
8230 {
8231         int r;
8232
8233         if (!rdev->has_vce)
8234                 return;
8235
8236         r = radeon_vce_resume(rdev);
8237         if (r) {
8238                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8239                 goto error;
8240         }
8241         r = vce_v2_0_resume(rdev);
8242         if (r) {
8243                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8244                 goto error;
8245         }
8246         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8247         if (r) {
8248                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8249                 goto error;
8250         }
8251         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8252         if (r) {
8253                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8254                 goto error;
8255         }
8256         return;
8257
8258 error:
8259         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8260         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8261 }
8262
8263 static void cik_vce_resume(struct radeon_device *rdev)
8264 {
8265         struct radeon_ring *ring;
8266         int r;
8267
8268         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8269                 return;
8270
8271         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8272         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8273         if (r) {
8274                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8275                 return;
8276         }
8277         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8278         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8279         if (r) {
8280                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8281                 return;
8282         }
8283         r = vce_v1_0_init(rdev);
8284         if (r) {
8285                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8286                 return;
8287         }
8288 }
8289
8290 /**
8291  * cik_startup - program the asic to a functional state
8292  *
8293  * @rdev: radeon_device pointer
8294  *
8295  * Programs the asic to a functional state (CIK).
8296  * Called by cik_init() and cik_resume().
8297  * Returns 0 for success, error for failure.
8298  */
8299 static int cik_startup(struct radeon_device *rdev)
8300 {
8301         struct radeon_ring *ring;
8302         u32 nop;
8303         int r;
8304
8305         /* enable pcie gen2/3 link */
8306         cik_pcie_gen3_enable(rdev);
8307         /* enable aspm */
8308         cik_program_aspm(rdev);
8309
8310         /* scratch needs to be initialized before MC */
8311         r = r600_vram_scratch_init(rdev);
8312         if (r)
8313                 return r;
8314
8315         cik_mc_program(rdev);
8316
8317         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8318                 r = ci_mc_load_microcode(rdev);
8319                 if (r) {
8320                         DRM_ERROR("Failed to load MC firmware!\n");
8321                         return r;
8322                 }
8323         }
8324
8325         r = cik_pcie_gart_enable(rdev);
8326         if (r)
8327                 return r;
8328         cik_gpu_init(rdev);
8329
8330         /* allocate rlc buffers */
8331         if (rdev->flags & RADEON_IS_IGP) {
8332                 if (rdev->family == CHIP_KAVERI) {
8333                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8334                         rdev->rlc.reg_list_size =
8335                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8336                 } else {
8337                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8338                         rdev->rlc.reg_list_size =
8339                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8340                 }
8341         }
8342         rdev->rlc.cs_data = ci_cs_data;
8343         rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8344         rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8345         r = sumo_rlc_init(rdev);
8346         if (r) {
8347                 DRM_ERROR("Failed to init rlc BOs!\n");
8348                 return r;
8349         }
8350
8351         /* allocate wb buffer */
8352         r = radeon_wb_init(rdev);
8353         if (r)
8354                 return r;
8355
8356         /* allocate mec buffers */
8357         r = cik_mec_init(rdev);
8358         if (r) {
8359                 DRM_ERROR("Failed to init MEC BOs!\n");
8360                 return r;
8361         }
8362
8363         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8364         if (r) {
8365                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8366                 return r;
8367         }
8368
8369         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8370         if (r) {
8371                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8372                 return r;
8373         }
8374
8375         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8376         if (r) {
8377                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8378                 return r;
8379         }
8380
8381         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8382         if (r) {
8383                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8384                 return r;
8385         }
8386
8387         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8388         if (r) {
8389                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8390                 return r;
8391         }
8392
8393         cik_uvd_start(rdev);
8394         cik_vce_start(rdev);
8395
8396         /* Enable IRQ */
8397         if (!rdev->irq.installed) {
8398                 r = radeon_irq_kms_init(rdev);
8399                 if (r)
8400                         return r;
8401         }
8402
8403         r = cik_irq_init(rdev);
8404         if (r) {
8405                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8406                 radeon_irq_kms_fini(rdev);
8407                 return r;
8408         }
8409         cik_irq_set(rdev);
8410
8411         if (rdev->family == CHIP_HAWAII) {
8412                 if (rdev->new_fw)
8413                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414                 else
8415                         nop = RADEON_CP_PACKET2;
8416         } else {
8417                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8418         }
8419
8420         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8421         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8422                              nop);
8423         if (r)
8424                 return r;
8425
8426         /* set up the compute queues */
8427         /* type-2 packets are deprecated on MEC, use type-3 instead */
8428         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8429         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8430                              nop);
8431         if (r)
8432                 return r;
8433         ring->me = 1; /* first MEC */
8434         ring->pipe = 0; /* first pipe */
8435         ring->queue = 0; /* first queue */
8436         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8437
8438         /* type-2 packets are deprecated on MEC, use type-3 instead */
8439         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8440         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8441                              nop);
8442         if (r)
8443                 return r;
8444         /* dGPU only have 1 MEC */
8445         ring->me = 1; /* first MEC */
8446         ring->pipe = 0; /* first pipe */
8447         ring->queue = 1; /* second queue */
8448         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8449
8450         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8451         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8452                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8453         if (r)
8454                 return r;
8455
8456         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8457         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8458                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8459         if (r)
8460                 return r;
8461
8462         r = cik_cp_resume(rdev);
8463         if (r)
8464                 return r;
8465
8466         r = cik_sdma_resume(rdev);
8467         if (r)
8468                 return r;
8469
8470         cik_uvd_resume(rdev);
8471         cik_vce_resume(rdev);
8472
8473         r = radeon_ib_pool_init(rdev);
8474         if (r) {
8475                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8476                 return r;
8477         }
8478
8479         r = radeon_vm_manager_init(rdev);
8480         if (r) {
8481                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8482                 return r;
8483         }
8484
8485         r = radeon_audio_init(rdev);
8486         if (r)
8487                 return r;
8488
8489         r = radeon_kfd_resume(rdev);
8490         if (r)
8491                 return r;
8492
8493         return 0;
8494 }
8495
8496 /**
8497  * cik_resume - resume the asic to a functional state
8498  *
8499  * @rdev: radeon_device pointer
8500  *
8501  * Programs the asic to a functional state (CIK).
8502  * Called at resume.
8503  * Returns 0 for success, error for failure.
8504  */
8505 int cik_resume(struct radeon_device *rdev)
8506 {
8507         int r;
8508
8509         /* post card */
8510         atom_asic_init(rdev->mode_info.atom_context);
8511
8512         /* init golden registers */
8513         cik_init_golden_registers(rdev);
8514
8515         if (rdev->pm.pm_method == PM_METHOD_DPM)
8516                 radeon_pm_resume(rdev);
8517
8518         rdev->accel_working = true;
8519         r = cik_startup(rdev);
8520         if (r) {
8521                 DRM_ERROR("cik startup failed on resume\n");
8522                 rdev->accel_working = false;
8523                 return r;
8524         }
8525
8526         return r;
8527
8528 }
8529
8530 /**
8531  * cik_suspend - suspend the asic
8532  *
8533  * @rdev: radeon_device pointer
8534  *
8535  * Bring the chip into a state suitable for suspend (CIK).
8536  * Called at suspend.
8537  * Returns 0 for success.
8538  */
8539 int cik_suspend(struct radeon_device *rdev)
8540 {
8541         radeon_kfd_suspend(rdev);
8542         radeon_pm_suspend(rdev);
8543         radeon_audio_fini(rdev);
8544         radeon_vm_manager_fini(rdev);
8545         cik_cp_enable(rdev, false);
8546         cik_sdma_enable(rdev, false);
8547         if (rdev->has_uvd) {
8548                 uvd_v1_0_fini(rdev);
8549                 radeon_uvd_suspend(rdev);
8550         }
8551         if (rdev->has_vce)
8552                 radeon_vce_suspend(rdev);
8553         cik_fini_pg(rdev);
8554         cik_fini_cg(rdev);
8555         cik_irq_suspend(rdev);
8556         radeon_wb_disable(rdev);
8557         cik_pcie_gart_disable(rdev);
8558         return 0;
8559 }
8560
8561 /* Plan is to move initialization in that function and use
8562  * helper function so that radeon_device_init pretty much
8563  * do nothing more than calling asic specific function. This
8564  * should also allow to remove a bunch of callback function
8565  * like vram_info.
8566  */
8567 /**
8568  * cik_init - asic specific driver and hw init
8569  *
8570  * @rdev: radeon_device pointer
8571  *
8572  * Setup asic specific driver variables and program the hw
8573  * to a functional state (CIK).
8574  * Called at driver startup.
8575  * Returns 0 for success, errors for failure.
8576  */
8577 int cik_init(struct radeon_device *rdev)
8578 {
8579         struct radeon_ring *ring;
8580         int r;
8581
8582         /* Read BIOS */
8583         if (!radeon_get_bios(rdev)) {
8584                 if (ASIC_IS_AVIVO(rdev))
8585                         return -EINVAL;
8586         }
8587         /* Must be an ATOMBIOS */
8588         if (!rdev->is_atom_bios) {
8589                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8590                 return -EINVAL;
8591         }
8592         r = radeon_atombios_init(rdev);
8593         if (r)
8594                 return r;
8595
8596         /* Post card if necessary */
8597         if (!radeon_card_posted(rdev)) {
8598                 if (!rdev->bios) {
8599                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8600                         return -EINVAL;
8601                 }
8602                 DRM_INFO("GPU not posted. posting now...\n");
8603                 atom_asic_init(rdev->mode_info.atom_context);
8604         }
8605         /* init golden registers */
8606         cik_init_golden_registers(rdev);
8607         /* Initialize scratch registers */
8608         cik_scratch_init(rdev);
8609         /* Initialize surface registers */
8610         radeon_surface_init(rdev);
8611         /* Initialize clocks */
8612         radeon_get_clock_info(rdev->ddev);
8613
8614         /* Fence driver */
8615         r = radeon_fence_driver_init(rdev);
8616         if (r)
8617                 return r;
8618
8619         /* initialize memory controller */
8620         r = cik_mc_init(rdev);
8621         if (r)
8622                 return r;
8623         /* Memory manager */
8624         r = radeon_bo_init(rdev);
8625         if (r)
8626                 return r;
8627
8628         if (rdev->flags & RADEON_IS_IGP) {
8629                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8631                         r = cik_init_microcode(rdev);
8632                         if (r) {
8633                                 DRM_ERROR("Failed to load firmware!\n");
8634                                 return r;
8635                         }
8636                 }
8637         } else {
8638                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8639                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8640                     !rdev->mc_fw) {
8641                         r = cik_init_microcode(rdev);
8642                         if (r) {
8643                                 DRM_ERROR("Failed to load firmware!\n");
8644                                 return r;
8645                         }
8646                 }
8647         }
8648
8649         /* Initialize power management */
8650         radeon_pm_init(rdev);
8651
8652         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8653         ring->ring_obj = NULL;
8654         r600_ring_init(rdev, ring, 1024 * 1024);
8655
8656         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8657         ring->ring_obj = NULL;
8658         r600_ring_init(rdev, ring, 1024 * 1024);
8659         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8660         if (r)
8661                 return r;
8662
8663         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8664         ring->ring_obj = NULL;
8665         r600_ring_init(rdev, ring, 1024 * 1024);
8666         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8667         if (r)
8668                 return r;
8669
8670         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8671         ring->ring_obj = NULL;
8672         r600_ring_init(rdev, ring, 256 * 1024);
8673
8674         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8675         ring->ring_obj = NULL;
8676         r600_ring_init(rdev, ring, 256 * 1024);
8677
8678         cik_uvd_init(rdev);
8679         cik_vce_init(rdev);
8680
8681         rdev->ih.ring_obj = NULL;
8682         r600_ih_ring_init(rdev, 64 * 1024);
8683
8684         r = r600_pcie_gart_init(rdev);
8685         if (r)
8686                 return r;
8687
8688         rdev->accel_working = true;
8689         r = cik_startup(rdev);
8690         if (r) {
8691                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8692                 cik_cp_fini(rdev);
8693                 cik_sdma_fini(rdev);
8694                 cik_irq_fini(rdev);
8695                 sumo_rlc_fini(rdev);
8696                 cik_mec_fini(rdev);
8697                 radeon_wb_fini(rdev);
8698                 radeon_ib_pool_fini(rdev);
8699                 radeon_vm_manager_fini(rdev);
8700                 radeon_irq_kms_fini(rdev);
8701                 cik_pcie_gart_fini(rdev);
8702                 rdev->accel_working = false;
8703         }
8704
8705         /* Don't start up if the MC ucode is missing.
8706          * The default clocks and voltages before the MC ucode
8707          * is loaded are not suffient for advanced operations.
8708          */
8709         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8710                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8711                 return -EINVAL;
8712         }
8713
8714         return 0;
8715 }
8716
8717 /**
8718  * cik_fini - asic specific driver and hw fini
8719  *
8720  * @rdev: radeon_device pointer
8721  *
8722  * Tear down the asic specific driver variables and program the hw
8723  * to an idle state (CIK).
8724  * Called at driver unload.
8725  */
8726 void cik_fini(struct radeon_device *rdev)
8727 {
8728         radeon_pm_fini(rdev);
8729         cik_cp_fini(rdev);
8730         cik_sdma_fini(rdev);
8731         cik_fini_pg(rdev);
8732         cik_fini_cg(rdev);
8733         cik_irq_fini(rdev);
8734         sumo_rlc_fini(rdev);
8735         cik_mec_fini(rdev);
8736         radeon_wb_fini(rdev);
8737         radeon_vm_manager_fini(rdev);
8738         radeon_ib_pool_fini(rdev);
8739         radeon_irq_kms_fini(rdev);
8740         uvd_v1_0_fini(rdev);
8741         radeon_uvd_fini(rdev);
8742         radeon_vce_fini(rdev);
8743         cik_pcie_gart_fini(rdev);
8744         r600_vram_scratch_fini(rdev);
8745         radeon_gem_fini(rdev);
8746         radeon_fence_driver_fini(rdev);
8747         radeon_bo_fini(rdev);
8748         radeon_atombios_fini(rdev);
8749         kfree(rdev->bios);
8750         rdev->bios = NULL;
8751 }
8752
8753 void dce8_program_fmt(struct drm_encoder *encoder)
8754 {
8755         struct drm_device *dev = encoder->dev;
8756         struct radeon_device *rdev = dev->dev_private;
8757         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8758         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8759         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8760         int bpc = 0;
8761         u32 tmp = 0;
8762         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8763
8764         if (connector) {
8765                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8766                 bpc = radeon_get_monitor_bpc(connector);
8767                 dither = radeon_connector->dither;
8768         }
8769
8770         /* LVDS/eDP FMT is set up by atom */
8771         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8772                 return;
8773
8774         /* not needed for analog */
8775         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8776             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8777                 return;
8778
8779         if (bpc == 0)
8780                 return;
8781
8782         switch (bpc) {
8783         case 6:
8784                 if (dither == RADEON_FMT_DITHER_ENABLE)
8785                         /* XXX sort out optimal dither settings */
8786                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8787                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8788                 else
8789                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8790                 break;
8791         case 8:
8792                 if (dither == RADEON_FMT_DITHER_ENABLE)
8793                         /* XXX sort out optimal dither settings */
8794                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795                                 FMT_RGB_RANDOM_ENABLE |
8796                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8797                 else
8798                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8799                 break;
8800         case 10:
8801                 if (dither == RADEON_FMT_DITHER_ENABLE)
8802                         /* XXX sort out optimal dither settings */
8803                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8804                                 FMT_RGB_RANDOM_ENABLE |
8805                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8806                 else
8807                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8808                 break;
8809         default:
8810                 /* not needed */
8811                 break;
8812         }
8813
8814         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8815 }
8816
8817 /* display watermark setup */
8818 /**
8819  * dce8_line_buffer_adjust - Set up the line buffer
8820  *
8821  * @rdev: radeon_device pointer
8822  * @radeon_crtc: the selected display controller
8823  * @mode: the current display mode on the selected display
8824  * controller
8825  *
8826  * Setup up the line buffer allocation for
8827  * the selected display controller (CIK).
8828  * Returns the line buffer size in pixels.
8829  */
8830 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8831                                    struct radeon_crtc *radeon_crtc,
8832                                    struct drm_display_mode *mode)
8833 {
8834         u32 tmp, buffer_alloc, i;
8835         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8836         /*
8837          * Line Buffer Setup
8838          * There are 6 line buffers, one for each display controllers.
8839          * There are 3 partitions per LB. Select the number of partitions
8840          * to enable based on the display width.  For display widths larger
8841          * than 4096, you need use to use 2 display controllers and combine
8842          * them using the stereo blender.
8843          */
8844         if (radeon_crtc->base.enabled && mode) {
8845                 if (mode->crtc_hdisplay < 1920) {
8846                         tmp = 1;
8847                         buffer_alloc = 2;
8848                 } else if (mode->crtc_hdisplay < 2560) {
8849                         tmp = 2;
8850                         buffer_alloc = 2;
8851                 } else if (mode->crtc_hdisplay < 4096) {
8852                         tmp = 0;
8853                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8854                 } else {
8855                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8856                         tmp = 0;
8857                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8858                 }
8859         } else {
8860                 tmp = 1;
8861                 buffer_alloc = 0;
8862         }
8863
8864         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8865                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8866
8867         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8868                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8869         for (i = 0; i < rdev->usec_timeout; i++) {
8870                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8871                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8872                         break;
8873                 udelay(1);
8874         }
8875
8876         if (radeon_crtc->base.enabled && mode) {
8877                 switch (tmp) {
8878                 case 0:
8879                 default:
8880                         return 4096 * 2;
8881                 case 1:
8882                         return 1920 * 2;
8883                 case 2:
8884                         return 2560 * 2;
8885                 }
8886         }
8887
8888         /* controller not enabled, so no lb used */
8889         return 0;
8890 }
8891
8892 /**
8893  * cik_get_number_of_dram_channels - get the number of dram channels
8894  *
8895  * @rdev: radeon_device pointer
8896  *
8897  * Look up the number of video ram channels (CIK).
8898  * Used for display watermark bandwidth calculations
8899  * Returns the number of dram channels
8900  */
8901 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8902 {
8903         u32 tmp = RREG32(MC_SHARED_CHMAP);
8904
8905         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8906         case 0:
8907         default:
8908                 return 1;
8909         case 1:
8910                 return 2;
8911         case 2:
8912                 return 4;
8913         case 3:
8914                 return 8;
8915         case 4:
8916                 return 3;
8917         case 5:
8918                 return 6;
8919         case 6:
8920                 return 10;
8921         case 7:
8922                 return 12;
8923         case 8:
8924                 return 16;
8925         }
8926 }
8927
8928 struct dce8_wm_params {
8929         u32 dram_channels; /* number of dram channels */
8930         u32 yclk;          /* bandwidth per dram data pin in kHz */
8931         u32 sclk;          /* engine clock in kHz */
8932         u32 disp_clk;      /* display clock in kHz */
8933         u32 src_width;     /* viewport width */
8934         u32 active_time;   /* active display time in ns */
8935         u32 blank_time;    /* blank time in ns */
8936         bool interlaced;    /* mode is interlaced */
8937         fixed20_12 vsc;    /* vertical scale ratio */
8938         u32 num_heads;     /* number of active crtcs */
8939         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8940         u32 lb_size;       /* line buffer allocated to pipe */
8941         u32 vtaps;         /* vertical scaler taps */
8942 };
8943
8944 /**
8945  * dce8_dram_bandwidth - get the dram bandwidth
8946  *
8947  * @wm: watermark calculation data
8948  *
8949  * Calculate the raw dram bandwidth (CIK).
8950  * Used for display watermark bandwidth calculations
8951  * Returns the dram bandwidth in MBytes/s
8952  */
8953 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8954 {
8955         /* Calculate raw DRAM Bandwidth */
8956         fixed20_12 dram_efficiency; /* 0.7 */
8957         fixed20_12 yclk, dram_channels, bandwidth;
8958         fixed20_12 a;
8959
8960         a.full = dfixed_const(1000);
8961         yclk.full = dfixed_const(wm->yclk);
8962         yclk.full = dfixed_div(yclk, a);
8963         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8964         a.full = dfixed_const(10);
8965         dram_efficiency.full = dfixed_const(7);
8966         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8967         bandwidth.full = dfixed_mul(dram_channels, yclk);
8968         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8969
8970         return dfixed_trunc(bandwidth);
8971 }
8972
8973 /**
8974  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8975  *
8976  * @wm: watermark calculation data
8977  *
8978  * Calculate the dram bandwidth used for display (CIK).
8979  * Used for display watermark bandwidth calculations
8980  * Returns the dram bandwidth for display in MBytes/s
8981  */
8982 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8983 {
8984         /* Calculate DRAM Bandwidth and the part allocated to display. */
8985         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8986         fixed20_12 yclk, dram_channels, bandwidth;
8987         fixed20_12 a;
8988
8989         a.full = dfixed_const(1000);
8990         yclk.full = dfixed_const(wm->yclk);
8991         yclk.full = dfixed_div(yclk, a);
8992         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8993         a.full = dfixed_const(10);
8994         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8995         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8996         bandwidth.full = dfixed_mul(dram_channels, yclk);
8997         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8998
8999         return dfixed_trunc(bandwidth);
9000 }
9001
9002 /**
9003  * dce8_data_return_bandwidth - get the data return bandwidth
9004  *
9005  * @wm: watermark calculation data
9006  *
9007  * Calculate the data return bandwidth used for display (CIK).
9008  * Used for display watermark bandwidth calculations
9009  * Returns the data return bandwidth in MBytes/s
9010  */
9011 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9012 {
9013         /* Calculate the display Data return Bandwidth */
9014         fixed20_12 return_efficiency; /* 0.8 */
9015         fixed20_12 sclk, bandwidth;
9016         fixed20_12 a;
9017
9018         a.full = dfixed_const(1000);
9019         sclk.full = dfixed_const(wm->sclk);
9020         sclk.full = dfixed_div(sclk, a);
9021         a.full = dfixed_const(10);
9022         return_efficiency.full = dfixed_const(8);
9023         return_efficiency.full = dfixed_div(return_efficiency, a);
9024         a.full = dfixed_const(32);
9025         bandwidth.full = dfixed_mul(a, sclk);
9026         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9027
9028         return dfixed_trunc(bandwidth);
9029 }
9030
9031 /**
9032  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9033  *
9034  * @wm: watermark calculation data
9035  *
9036  * Calculate the dmif bandwidth used for display (CIK).
9037  * Used for display watermark bandwidth calculations
9038  * Returns the dmif bandwidth in MBytes/s
9039  */
9040 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9041 {
9042         /* Calculate the DMIF Request Bandwidth */
9043         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9044         fixed20_12 disp_clk, bandwidth;
9045         fixed20_12 a, b;
9046
9047         a.full = dfixed_const(1000);
9048         disp_clk.full = dfixed_const(wm->disp_clk);
9049         disp_clk.full = dfixed_div(disp_clk, a);
9050         a.full = dfixed_const(32);
9051         b.full = dfixed_mul(a, disp_clk);
9052
9053         a.full = dfixed_const(10);
9054         disp_clk_request_efficiency.full = dfixed_const(8);
9055         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9056
9057         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9058
9059         return dfixed_trunc(bandwidth);
9060 }
9061
9062 /**
9063  * dce8_available_bandwidth - get the min available bandwidth
9064  *
9065  * @wm: watermark calculation data
9066  *
9067  * Calculate the min available bandwidth used for display (CIK).
9068  * Used for display watermark bandwidth calculations
9069  * Returns the min available bandwidth in MBytes/s
9070  */
9071 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9072 {
9073         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9074         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9075         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9076         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9077
9078         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9079 }
9080
9081 /**
9082  * dce8_average_bandwidth - get the average available bandwidth
9083  *
9084  * @wm: watermark calculation data
9085  *
9086  * Calculate the average available bandwidth used for display (CIK).
9087  * Used for display watermark bandwidth calculations
9088  * Returns the average available bandwidth in MBytes/s
9089  */
9090 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9091 {
9092         /* Calculate the display mode Average Bandwidth
9093          * DisplayMode should contain the source and destination dimensions,
9094          * timing, etc.
9095          */
9096         fixed20_12 bpp;
9097         fixed20_12 line_time;
9098         fixed20_12 src_width;
9099         fixed20_12 bandwidth;
9100         fixed20_12 a;
9101
9102         a.full = dfixed_const(1000);
9103         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9104         line_time.full = dfixed_div(line_time, a);
9105         bpp.full = dfixed_const(wm->bytes_per_pixel);
9106         src_width.full = dfixed_const(wm->src_width);
9107         bandwidth.full = dfixed_mul(src_width, bpp);
9108         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9109         bandwidth.full = dfixed_div(bandwidth, line_time);
9110
9111         return dfixed_trunc(bandwidth);
9112 }
9113
9114 /**
9115  * dce8_latency_watermark - get the latency watermark
9116  *
9117  * @wm: watermark calculation data
9118  *
9119  * Calculate the latency watermark (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns the latency watermark in ns
9122  */
9123 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9124 {
9125         /* First calculate the latency in ns */
9126         u32 mc_latency = 2000; /* 2000 ns. */
9127         u32 available_bandwidth = dce8_available_bandwidth(wm);
9128         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9129         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9130         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9131         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9132                 (wm->num_heads * cursor_line_pair_return_time);
9133         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9134         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9135         u32 tmp, dmif_size = 12288;
9136         fixed20_12 a, b, c;
9137
9138         if (wm->num_heads == 0)
9139                 return 0;
9140
9141         a.full = dfixed_const(2);
9142         b.full = dfixed_const(1);
9143         if ((wm->vsc.full > a.full) ||
9144             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9145             (wm->vtaps >= 5) ||
9146             ((wm->vsc.full >= a.full) && wm->interlaced))
9147                 max_src_lines_per_dst_line = 4;
9148         else
9149                 max_src_lines_per_dst_line = 2;
9150
9151         a.full = dfixed_const(available_bandwidth);
9152         b.full = dfixed_const(wm->num_heads);
9153         a.full = dfixed_div(a, b);
9154         tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9155         tmp = min(dfixed_trunc(a), tmp);
9156
9157         lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9158
9159         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9160         b.full = dfixed_const(1000);
9161         c.full = dfixed_const(lb_fill_bw);
9162         b.full = dfixed_div(c, b);
9163         a.full = dfixed_div(a, b);
9164         line_fill_time = dfixed_trunc(a);
9165
9166         if (line_fill_time < wm->active_time)
9167                 return latency;
9168         else
9169                 return latency + (line_fill_time - wm->active_time);
9170
9171 }
9172
9173 /**
9174  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9175  * average and available dram bandwidth
9176  *
9177  * @wm: watermark calculation data
9178  *
9179  * Check if the display average bandwidth fits in the display
9180  * dram bandwidth (CIK).
9181  * Used for display watermark bandwidth calculations
9182  * Returns true if the display fits, false if not.
9183  */
9184 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9185 {
9186         if (dce8_average_bandwidth(wm) <=
9187             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9188                 return true;
9189         else
9190                 return false;
9191 }
9192
9193 /**
9194  * dce8_average_bandwidth_vs_available_bandwidth - check
9195  * average and available bandwidth
9196  *
9197  * @wm: watermark calculation data
9198  *
9199  * Check if the display average bandwidth fits in the display
9200  * available bandwidth (CIK).
9201  * Used for display watermark bandwidth calculations
9202  * Returns true if the display fits, false if not.
9203  */
9204 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9205 {
9206         if (dce8_average_bandwidth(wm) <=
9207             (dce8_available_bandwidth(wm) / wm->num_heads))
9208                 return true;
9209         else
9210                 return false;
9211 }
9212
9213 /**
9214  * dce8_check_latency_hiding - check latency hiding
9215  *
9216  * @wm: watermark calculation data
9217  *
9218  * Check latency hiding (CIK).
9219  * Used for display watermark bandwidth calculations
9220  * Returns true if the display fits, false if not.
9221  */
9222 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9223 {
9224         u32 lb_partitions = wm->lb_size / wm->src_width;
9225         u32 line_time = wm->active_time + wm->blank_time;
9226         u32 latency_tolerant_lines;
9227         u32 latency_hiding;
9228         fixed20_12 a;
9229
9230         a.full = dfixed_const(1);
9231         if (wm->vsc.full > a.full)
9232                 latency_tolerant_lines = 1;
9233         else {
9234                 if (lb_partitions <= (wm->vtaps + 1))
9235                         latency_tolerant_lines = 1;
9236                 else
9237                         latency_tolerant_lines = 2;
9238         }
9239
9240         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9241
9242         if (dce8_latency_watermark(wm) <= latency_hiding)
9243                 return true;
9244         else
9245                 return false;
9246 }
9247
9248 /**
9249  * dce8_program_watermarks - program display watermarks
9250  *
9251  * @rdev: radeon_device pointer
9252  * @radeon_crtc: the selected display controller
9253  * @lb_size: line buffer size
9254  * @num_heads: number of display controllers in use
9255  *
9256  * Calculate and program the display watermarks for the
9257  * selected display controller (CIK).
9258  */
9259 static void dce8_program_watermarks(struct radeon_device *rdev,
9260                                     struct radeon_crtc *radeon_crtc,
9261                                     u32 lb_size, u32 num_heads)
9262 {
9263         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9264         struct dce8_wm_params wm_low, wm_high;
9265         u32 active_time;
9266         u32 line_time = 0;
9267         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9268         u32 tmp, wm_mask;
9269
9270         if (radeon_crtc->base.enabled && num_heads && mode) {
9271                 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9272                                             (u32)mode->clock);
9273                 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9274                                           (u32)mode->clock);
9275                 line_time = min(line_time, (u32)65535);
9276
9277                 /* watermark for high clocks */
9278                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9279                     rdev->pm.dpm_enabled) {
9280                         wm_high.yclk =
9281                                 radeon_dpm_get_mclk(rdev, false) * 10;
9282                         wm_high.sclk =
9283                                 radeon_dpm_get_sclk(rdev, false) * 10;
9284                 } else {
9285                         wm_high.yclk = rdev->pm.current_mclk * 10;
9286                         wm_high.sclk = rdev->pm.current_sclk * 10;
9287                 }
9288
9289                 wm_high.disp_clk = mode->clock;
9290                 wm_high.src_width = mode->crtc_hdisplay;
9291                 wm_high.active_time = active_time;
9292                 wm_high.blank_time = line_time - wm_high.active_time;
9293                 wm_high.interlaced = false;
9294                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9295                         wm_high.interlaced = true;
9296                 wm_high.vsc = radeon_crtc->vsc;
9297                 wm_high.vtaps = 1;
9298                 if (radeon_crtc->rmx_type != RMX_OFF)
9299                         wm_high.vtaps = 2;
9300                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9301                 wm_high.lb_size = lb_size;
9302                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9303                 wm_high.num_heads = num_heads;
9304
9305                 /* set for high clocks */
9306                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9307
9308                 /* possibly force display priority to high */
9309                 /* should really do this at mode validation time... */
9310                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9311                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9312                     !dce8_check_latency_hiding(&wm_high) ||
9313                     (rdev->disp_priority == 2)) {
9314                         DRM_DEBUG_KMS("force priority to high\n");
9315                 }
9316
9317                 /* watermark for low clocks */
9318                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9319                     rdev->pm.dpm_enabled) {
9320                         wm_low.yclk =
9321                                 radeon_dpm_get_mclk(rdev, true) * 10;
9322                         wm_low.sclk =
9323                                 radeon_dpm_get_sclk(rdev, true) * 10;
9324                 } else {
9325                         wm_low.yclk = rdev->pm.current_mclk * 10;
9326                         wm_low.sclk = rdev->pm.current_sclk * 10;
9327                 }
9328
9329                 wm_low.disp_clk = mode->clock;
9330                 wm_low.src_width = mode->crtc_hdisplay;
9331                 wm_low.active_time = active_time;
9332                 wm_low.blank_time = line_time - wm_low.active_time;
9333                 wm_low.interlaced = false;
9334                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9335                         wm_low.interlaced = true;
9336                 wm_low.vsc = radeon_crtc->vsc;
9337                 wm_low.vtaps = 1;
9338                 if (radeon_crtc->rmx_type != RMX_OFF)
9339                         wm_low.vtaps = 2;
9340                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9341                 wm_low.lb_size = lb_size;
9342                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9343                 wm_low.num_heads = num_heads;
9344
9345                 /* set for low clocks */
9346                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9347
9348                 /* possibly force display priority to high */
9349                 /* should really do this at mode validation time... */
9350                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9351                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9352                     !dce8_check_latency_hiding(&wm_low) ||
9353                     (rdev->disp_priority == 2)) {
9354                         DRM_DEBUG_KMS("force priority to high\n");
9355                 }
9356
9357                 /* Save number of lines the linebuffer leads before the scanout */
9358                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9359         }
9360
9361         /* select wm A */
9362         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363         tmp = wm_mask;
9364         tmp &= ~LATENCY_WATERMARK_MASK(3);
9365         tmp |= LATENCY_WATERMARK_MASK(1);
9366         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9367         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9368                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9369                 LATENCY_HIGH_WATERMARK(line_time)));
9370         /* select wm B */
9371         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9372         tmp &= ~LATENCY_WATERMARK_MASK(3);
9373         tmp |= LATENCY_WATERMARK_MASK(2);
9374         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9375         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9376                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9377                 LATENCY_HIGH_WATERMARK(line_time)));
9378         /* restore original selection */
9379         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9380
9381         /* save values for DPM */
9382         radeon_crtc->line_time = line_time;
9383         radeon_crtc->wm_high = latency_watermark_a;
9384         radeon_crtc->wm_low = latency_watermark_b;
9385 }
9386
9387 /**
9388  * dce8_bandwidth_update - program display watermarks
9389  *
9390  * @rdev: radeon_device pointer
9391  *
9392  * Calculate and program the display watermarks and line
9393  * buffer allocation (CIK).
9394  */
9395 void dce8_bandwidth_update(struct radeon_device *rdev)
9396 {
9397         struct drm_display_mode *mode = NULL;
9398         u32 num_heads = 0, lb_size;
9399         int i;
9400
9401         if (!rdev->mode_info.mode_config_initialized)
9402                 return;
9403
9404         radeon_update_display_priority(rdev);
9405
9406         for (i = 0; i < rdev->num_crtc; i++) {
9407                 if (rdev->mode_info.crtcs[i]->base.enabled)
9408                         num_heads++;
9409         }
9410         for (i = 0; i < rdev->num_crtc; i++) {
9411                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9412                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9413                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9414         }
9415 }
9416
9417 /**
9418  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9419  *
9420  * @rdev: radeon_device pointer
9421  *
9422  * Fetches a GPU clock counter snapshot (SI).
9423  * Returns the 64 bit clock counter snapshot.
9424  */
9425 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9426 {
9427         uint64_t clock;
9428
9429         mutex_lock(&rdev->gpu_clock_mutex);
9430         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9431         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9432                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9433         mutex_unlock(&rdev->gpu_clock_mutex);
9434         return clock;
9435 }
9436
9437 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9438                              u32 cntl_reg, u32 status_reg)
9439 {
9440         int r, i;
9441         struct atom_clock_dividers dividers;
9442         uint32_t tmp;
9443
9444         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9445                                            clock, false, &dividers);
9446         if (r)
9447                 return r;
9448
9449         tmp = RREG32_SMC(cntl_reg);
9450         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9451         tmp |= dividers.post_divider;
9452         WREG32_SMC(cntl_reg, tmp);
9453
9454         for (i = 0; i < 100; i++) {
9455                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9456                         break;
9457                 mdelay(10);
9458         }
9459         if (i == 100)
9460                 return -ETIMEDOUT;
9461
9462         return 0;
9463 }
9464
9465 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9466 {
9467         int r = 0;
9468
9469         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9470         if (r)
9471                 return r;
9472
9473         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9474         return r;
9475 }
9476
9477 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9478 {
9479         int r, i;
9480         struct atom_clock_dividers dividers;
9481         u32 tmp;
9482
9483         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9484                                            ecclk, false, &dividers);
9485         if (r)
9486                 return r;
9487
9488         for (i = 0; i < 100; i++) {
9489                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9490                         break;
9491                 mdelay(10);
9492         }
9493         if (i == 100)
9494                 return -ETIMEDOUT;
9495
9496         tmp = RREG32_SMC(CG_ECLK_CNTL);
9497         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9498         tmp |= dividers.post_divider;
9499         WREG32_SMC(CG_ECLK_CNTL, tmp);
9500
9501         for (i = 0; i < 100; i++) {
9502                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9503                         break;
9504                 mdelay(10);
9505         }
9506         if (i == 100)
9507                 return -ETIMEDOUT;
9508
9509         return 0;
9510 }
9511
9512 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9513 {
9514         struct pci_dev *root = rdev->pdev->bus->self;
9515         int bridge_pos, gpu_pos;
9516         u32 speed_cntl, mask, current_data_rate;
9517         int ret, i;
9518         u16 tmp16;
9519
9520         if (pci_is_root_bus(rdev->pdev->bus))
9521                 return;
9522
9523         if (radeon_pcie_gen2 == 0)
9524                 return;
9525
9526         if (rdev->flags & RADEON_IS_IGP)
9527                 return;
9528
9529         if (!(rdev->flags & RADEON_IS_PCIE))
9530                 return;
9531
9532         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9533         if (ret != 0)
9534                 return;
9535
9536         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9537                 return;
9538
9539         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9540         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9541                 LC_CURRENT_DATA_RATE_SHIFT;
9542         if (mask & DRM_PCIE_SPEED_80) {
9543                 if (current_data_rate == 2) {
9544                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9545                         return;
9546                 }
9547                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9548         } else if (mask & DRM_PCIE_SPEED_50) {
9549                 if (current_data_rate == 1) {
9550                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9551                         return;
9552                 }
9553                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9554         }
9555
9556         bridge_pos = pci_pcie_cap(root);
9557         if (!bridge_pos)
9558                 return;
9559
9560         gpu_pos = pci_pcie_cap(rdev->pdev);
9561         if (!gpu_pos)
9562                 return;
9563
9564         if (mask & DRM_PCIE_SPEED_80) {
9565                 /* re-try equalization if gen3 is not already enabled */
9566                 if (current_data_rate != 2) {
9567                         u16 bridge_cfg, gpu_cfg;
9568                         u16 bridge_cfg2, gpu_cfg2;
9569                         u32 max_lw, current_lw, tmp;
9570
9571                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9572                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9573
9574                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9575                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9576
9577                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9578                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9579
9580                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9581                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9582                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9583
9584                         if (current_lw < max_lw) {
9585                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9586                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9587                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9588                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9589                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9590                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9591                                 }
9592                         }
9593
9594                         for (i = 0; i < 10; i++) {
9595                                 /* check status */
9596                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9597                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9598                                         break;
9599
9600                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9601                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9602
9603                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9604                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9605
9606                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9607                                 tmp |= LC_SET_QUIESCE;
9608                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9609
9610                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9611                                 tmp |= LC_REDO_EQ;
9612                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9613
9614                                 mdelay(100);
9615
9616                                 /* linkctl */
9617                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9618                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9619                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9620                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9621
9622                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9623                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9624                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9625                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9626
9627                                 /* linkctl2 */
9628                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9629                                 tmp16 &= ~((1 << 4) | (7 << 9));
9630                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9631                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9632
9633                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9634                                 tmp16 &= ~((1 << 4) | (7 << 9));
9635                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9636                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9637
9638                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9639                                 tmp &= ~LC_SET_QUIESCE;
9640                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9641                         }
9642                 }
9643         }
9644
9645         /* set the link speed */
9646         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9647         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9648         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9649
9650         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9651         tmp16 &= ~0xf;
9652         if (mask & DRM_PCIE_SPEED_80)
9653                 tmp16 |= 3; /* gen3 */
9654         else if (mask & DRM_PCIE_SPEED_50)
9655                 tmp16 |= 2; /* gen2 */
9656         else
9657                 tmp16 |= 1; /* gen1 */
9658         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9659
9660         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9661         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9662         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9663
9664         for (i = 0; i < rdev->usec_timeout; i++) {
9665                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9666                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9667                         break;
9668                 udelay(1);
9669         }
9670 }
9671
9672 static void cik_program_aspm(struct radeon_device *rdev)
9673 {
9674         u32 data, orig;
9675         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9676         bool disable_clkreq = false;
9677
9678         if (radeon_aspm == 0)
9679                 return;
9680
9681         /* XXX double check IGPs */
9682         if (rdev->flags & RADEON_IS_IGP)
9683                 return;
9684
9685         if (!(rdev->flags & RADEON_IS_PCIE))
9686                 return;
9687
9688         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9689         data &= ~LC_XMIT_N_FTS_MASK;
9690         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9691         if (orig != data)
9692                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9693
9694         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9695         data |= LC_GO_TO_RECOVERY;
9696         if (orig != data)
9697                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9698
9699         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9700         data |= P_IGNORE_EDB_ERR;
9701         if (orig != data)
9702                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9703
9704         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9705         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9706         data |= LC_PMI_TO_L1_DIS;
9707         if (!disable_l0s)
9708                 data |= LC_L0S_INACTIVITY(7);
9709
9710         if (!disable_l1) {
9711                 data |= LC_L1_INACTIVITY(7);
9712                 data &= ~LC_PMI_TO_L1_DIS;
9713                 if (orig != data)
9714                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9715
9716                 if (!disable_plloff_in_l1) {
9717                         bool clk_req_support;
9718
9719                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9720                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9721                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9722                         if (orig != data)
9723                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9724
9725                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9726                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9727                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9728                         if (orig != data)
9729                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9730
9731                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9732                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9733                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9734                         if (orig != data)
9735                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9736
9737                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9738                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9739                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9740                         if (orig != data)
9741                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9742
9743                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9744                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9745                         data |= LC_DYN_LANES_PWR_STATE(3);
9746                         if (orig != data)
9747                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9748
9749                         if (!disable_clkreq &&
9750                             !pci_is_root_bus(rdev->pdev->bus)) {
9751                                 struct pci_dev *root = rdev->pdev->bus->self;
9752                                 u32 lnkcap;
9753
9754                                 clk_req_support = false;
9755                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9756                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9757                                         clk_req_support = true;
9758                         } else {
9759                                 clk_req_support = false;
9760                         }
9761
9762                         if (clk_req_support) {
9763                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9764                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9765                                 if (orig != data)
9766                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9767
9768                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9769                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9770                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9771                                 if (orig != data)
9772                                         WREG32_SMC(THM_CLK_CNTL, data);
9773
9774                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9775                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9776                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9777                                 if (orig != data)
9778                                         WREG32_SMC(MISC_CLK_CTRL, data);
9779
9780                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9781                                 data &= ~BCLK_AS_XCLK;
9782                                 if (orig != data)
9783                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9784
9785                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9786                                 data &= ~FORCE_BIF_REFCLK_EN;
9787                                 if (orig != data)
9788                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9789
9790                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9791                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9792                                 data |= MPLL_CLKOUT_SEL(4);
9793                                 if (orig != data)
9794                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9795                         }
9796                 }
9797         } else {
9798                 if (orig != data)
9799                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9800         }
9801
9802         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9803         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9804         if (orig != data)
9805                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9806
9807         if (!disable_l0s) {
9808                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9809                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9810                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9811                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9812                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9813                                 data &= ~LC_L0S_INACTIVITY_MASK;
9814                                 if (orig != data)
9815                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9816                         }
9817                 }
9818         }
9819 }
This page took 0.614354 seconds and 4 git commands to generate.