]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
Merge branch 'entropy'
[linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v10_0.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_psp.h"
32 #include "amdgpu_smu.h"
33 #include "nv.h"
34 #include "nvd.h"
35
36 #include "gc/gc_10_1_0_offset.h"
37 #include "gc/gc_10_1_0_sh_mask.h"
38 #include "navi10_enum.h"
39 #include "hdp/hdp_5_0_0_offset.h"
40 #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx10.h"
45 #include "v10_structs.h"
46 #include "gfx_v10_0.h"
47 #include "nbio_v2_3.h"
48
49 /**
50  * Navi10 has two graphic rings to share each graphic pipe.
51  * 1. Primary ring
52  * 2. Async ring
53  *
54  * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
55  * first.
56  */
57 #define GFX10_NUM_GFX_RINGS     2
58 #define GFX10_MEC_HPD_SIZE      2048
59
60 #define F32_CE_PROGRAM_RAM_SIZE         65536
61 #define RLCG_UCODE_LOADING_START_ADDRESS        0x00002000L
62
63 #define mmCGTT_GS_NGG_CLK_CTRL  0x5087
64 #define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1
65
66 MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/navi10_me.bin");
69 MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin");
74 MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin");
75 MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin");
76 MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin");
77 MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin");
78 MODULE_FIRMWARE("amdgpu/navi14_ce.bin");
79 MODULE_FIRMWARE("amdgpu/navi14_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/navi14_me.bin");
81 MODULE_FIRMWARE("amdgpu/navi14_mec.bin");
82 MODULE_FIRMWARE("amdgpu/navi14_mec2.bin");
83 MODULE_FIRMWARE("amdgpu/navi14_rlc.bin");
84
85 MODULE_FIRMWARE("amdgpu/navi12_ce.bin");
86 MODULE_FIRMWARE("amdgpu/navi12_pfp.bin");
87 MODULE_FIRMWARE("amdgpu/navi12_me.bin");
88 MODULE_FIRMWARE("amdgpu/navi12_mec.bin");
89 MODULE_FIRMWARE("amdgpu/navi12_mec2.bin");
90 MODULE_FIRMWARE("amdgpu/navi12_rlc.bin");
91
92 static const struct soc15_reg_golden golden_settings_gc_10_1[] =
93 {
94         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
95         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
96         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
97         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
98         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
99         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
100         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100),
101         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
102         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
103         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff),
104         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000),
105         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
106         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
107         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000),
108         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
109         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
110         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
111         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
112         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
113         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
114         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
115         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
116         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
117         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
118         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
119         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
120         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
121         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
122         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
123         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
124         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
125         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
126         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
127         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
128         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
129         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
130         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
131 };
132
133 static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
134 {
135         /* Pending on emulation bring up */
136 };
137
138 static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
139 {
140         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
141         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
142         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
143         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
144         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
145         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
146         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
147         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
148         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
149         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
150         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
151         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
152         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
153         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
154         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
155         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
156         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
157         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
158         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
159         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
160         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
161         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7),
162         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
163         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
164         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
165         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
166         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
167         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
168         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
169         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
170         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
171         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
172         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
173         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
174         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000),
175 };
176
177 static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
178 {
179         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
180         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
181         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
182         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
183         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
184         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
185         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
186         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100),
187         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
188         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000),
189         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff),
190         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
191         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
192         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
193         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
194         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
195         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
196         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
197         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
198         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
199         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
200         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
201         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
202         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
203         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
204         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
205         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
206         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02),
207         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
208         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
209         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000),
210         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
211         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
212         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
213         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
214         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
215         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
216         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
217         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
218         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
219 };
220
221 static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
222 {
223         /* Pending on emulation bring up */
224 };
225
226 static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
227 {
228         /* Pending on emulation bring up */
229 };
230
231 #define DEFAULT_SH_MEM_CONFIG \
232         ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
233          (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
234          (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
235          (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
236
237
238 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
239 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
240 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
241 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
242 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
243                                  struct amdgpu_cu_info *cu_info);
244 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
245 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
246                                    u32 sh_num, u32 instance);
247 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
248
249 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
250 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
251 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
252 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
253 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
254 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
255 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
256
257 static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
258 {
259         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
260         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
261                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
262         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
263         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
264         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
265         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
266         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
267         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
268 }
269
270 static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
271                                  struct amdgpu_ring *ring)
272 {
273         struct amdgpu_device *adev = kiq_ring->adev;
274         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
275         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
276         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
277
278         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
279         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
280         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
281                           PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
282                           PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
283                           PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
284                           PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
285                           PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
286                           PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
287                           PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
288                           PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
289                           PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
290         amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
291         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
292         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
293         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
294         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
295 }
296
297 static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
298                                    struct amdgpu_ring *ring,
299                                    enum amdgpu_unmap_queues_action action,
300                                    u64 gpu_addr, u64 seq)
301 {
302         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
303
304         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
305         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
306                           PACKET3_UNMAP_QUEUES_ACTION(action) |
307                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
308                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
309                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
310         amdgpu_ring_write(kiq_ring,
311                   PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
312
313         if (action == PREEMPT_QUEUES_NO_UNMAP) {
314                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
315                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
316                 amdgpu_ring_write(kiq_ring, seq);
317         } else {
318                 amdgpu_ring_write(kiq_ring, 0);
319                 amdgpu_ring_write(kiq_ring, 0);
320                 amdgpu_ring_write(kiq_ring, 0);
321         }
322 }
323
324 static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
325                                    struct amdgpu_ring *ring,
326                                    u64 addr,
327                                    u64 seq)
328 {
329         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
330
331         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
332         amdgpu_ring_write(kiq_ring,
333                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
334                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
335                           PACKET3_QUERY_STATUS_COMMAND(2));
336         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
337                           PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
338                           PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
339         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
340         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
341         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
342         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
343 }
344
345 static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
346         .kiq_set_resources = gfx10_kiq_set_resources,
347         .kiq_map_queues = gfx10_kiq_map_queues,
348         .kiq_unmap_queues = gfx10_kiq_unmap_queues,
349         .kiq_query_status = gfx10_kiq_query_status,
350         .set_resources_size = 8,
351         .map_queues_size = 7,
352         .unmap_queues_size = 6,
353         .query_status_size = 7,
354 };
355
356 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
357 {
358         adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
359 }
360
361 static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
362 {
363         switch (adev->asic_type) {
364         case CHIP_NAVI10:
365                 soc15_program_register_sequence(adev,
366                                                 golden_settings_gc_10_1,
367                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_1));
368                 soc15_program_register_sequence(adev,
369                                                 golden_settings_gc_10_0_nv10,
370                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
371                 break;
372         case CHIP_NAVI14:
373                 soc15_program_register_sequence(adev,
374                                                 golden_settings_gc_10_1_1,
375                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1));
376                 soc15_program_register_sequence(adev,
377                                                 golden_settings_gc_10_1_nv14,
378                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14));
379                 break;
380         case CHIP_NAVI12:
381                 soc15_program_register_sequence(adev,
382                                                 golden_settings_gc_10_1_2,
383                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2));
384                 soc15_program_register_sequence(adev,
385                                                 golden_settings_gc_10_1_2_nv12,
386                                                 (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12));
387                 break;
388         default:
389                 break;
390         }
391 }
392
393 static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
394 {
395         adev->gfx.scratch.num_reg = 8;
396         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
397         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
398 }
399
400 static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
401                                        bool wc, uint32_t reg, uint32_t val)
402 {
403         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
404         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
405                           WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
406         amdgpu_ring_write(ring, reg);
407         amdgpu_ring_write(ring, 0);
408         amdgpu_ring_write(ring, val);
409 }
410
411 static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
412                                   int mem_space, int opt, uint32_t addr0,
413                                   uint32_t addr1, uint32_t ref, uint32_t mask,
414                                   uint32_t inv)
415 {
416         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
417         amdgpu_ring_write(ring,
418                           /* memory (1) or register (0) */
419                           (WAIT_REG_MEM_MEM_SPACE(mem_space) |
420                            WAIT_REG_MEM_OPERATION(opt) | /* wait */
421                            WAIT_REG_MEM_FUNCTION(3) |  /* equal */
422                            WAIT_REG_MEM_ENGINE(eng_sel)));
423
424         if (mem_space)
425                 BUG_ON(addr0 & 0x3); /* Dword align */
426         amdgpu_ring_write(ring, addr0);
427         amdgpu_ring_write(ring, addr1);
428         amdgpu_ring_write(ring, ref);
429         amdgpu_ring_write(ring, mask);
430         amdgpu_ring_write(ring, inv); /* poll interval */
431 }
432
433 static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
434 {
435         struct amdgpu_device *adev = ring->adev;
436         uint32_t scratch;
437         uint32_t tmp = 0;
438         unsigned i;
439         int r;
440
441         r = amdgpu_gfx_scratch_get(adev, &scratch);
442         if (r) {
443                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
444                 return r;
445         }
446
447         WREG32(scratch, 0xCAFEDEAD);
448
449         r = amdgpu_ring_alloc(ring, 3);
450         if (r) {
451                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
452                           ring->idx, r);
453                 amdgpu_gfx_scratch_free(adev, scratch);
454                 return r;
455         }
456
457         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
458         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
459         amdgpu_ring_write(ring, 0xDEADBEEF);
460         amdgpu_ring_commit(ring);
461
462         for (i = 0; i < adev->usec_timeout; i++) {
463                 tmp = RREG32(scratch);
464                 if (tmp == 0xDEADBEEF)
465                         break;
466                 if (amdgpu_emu_mode == 1)
467                         msleep(1);
468                 else
469                         udelay(1);
470         }
471         if (i < adev->usec_timeout) {
472                 if (amdgpu_emu_mode == 1)
473                         DRM_INFO("ring test on %d succeeded in %d msecs\n",
474                                  ring->idx, i);
475                 else
476                         DRM_INFO("ring test on %d succeeded in %d usecs\n",
477                                  ring->idx, i);
478         } else {
479                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
480                           ring->idx, scratch, tmp);
481                 r = -EINVAL;
482         }
483         amdgpu_gfx_scratch_free(adev, scratch);
484
485         return r;
486 }
487
488 static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
489 {
490         struct amdgpu_device *adev = ring->adev;
491         struct amdgpu_ib ib;
492         struct dma_fence *f = NULL;
493         uint32_t scratch;
494         uint32_t tmp = 0;
495         long r;
496
497         r = amdgpu_gfx_scratch_get(adev, &scratch);
498         if (r) {
499                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
500                 return r;
501         }
502
503         WREG32(scratch, 0xCAFEDEAD);
504
505         memset(&ib, 0, sizeof(ib));
506         r = amdgpu_ib_get(adev, NULL, 256, &ib);
507         if (r) {
508                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
509                 goto err1;
510         }
511
512         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
513         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
514         ib.ptr[2] = 0xDEADBEEF;
515         ib.length_dw = 3;
516
517         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
518         if (r)
519                 goto err2;
520
521         r = dma_fence_wait_timeout(f, false, timeout);
522         if (r == 0) {
523                 DRM_ERROR("amdgpu: IB test timed out.\n");
524                 r = -ETIMEDOUT;
525                 goto err2;
526         } else if (r < 0) {
527                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
528                 goto err2;
529         }
530
531         tmp = RREG32(scratch);
532         if (tmp == 0xDEADBEEF) {
533                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
534                 r = 0;
535         } else {
536                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
537                           scratch, tmp);
538                 r = -EINVAL;
539         }
540 err2:
541         amdgpu_ib_free(adev, &ib, NULL);
542         dma_fence_put(f);
543 err1:
544         amdgpu_gfx_scratch_free(adev, scratch);
545
546         return r;
547 }
548
549 static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
550 {
551         release_firmware(adev->gfx.pfp_fw);
552         adev->gfx.pfp_fw = NULL;
553         release_firmware(adev->gfx.me_fw);
554         adev->gfx.me_fw = NULL;
555         release_firmware(adev->gfx.ce_fw);
556         adev->gfx.ce_fw = NULL;
557         release_firmware(adev->gfx.rlc_fw);
558         adev->gfx.rlc_fw = NULL;
559         release_firmware(adev->gfx.mec_fw);
560         adev->gfx.mec_fw = NULL;
561         release_firmware(adev->gfx.mec2_fw);
562         adev->gfx.mec2_fw = NULL;
563
564         kfree(adev->gfx.rlc.register_list_format);
565 }
566
567 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
568 {
569         const struct rlc_firmware_header_v2_1 *rlc_hdr;
570
571         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
572         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
573         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
574         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
575         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
576         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
577         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
578         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
579         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
580         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
581         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
582         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
583         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
584         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
585                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
586 }
587
588 static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
589 {
590         switch (adev->asic_type) {
591         case CHIP_NAVI10:
592                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
593                 break;
594         default:
595                 break;
596         }
597 }
598
599 static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
600 {
601         const char *chip_name;
602         char fw_name[40];
603         char wks[10];
604         int err;
605         struct amdgpu_firmware_info *info = NULL;
606         const struct common_firmware_header *header = NULL;
607         const struct gfx_firmware_header_v1_0 *cp_hdr;
608         const struct rlc_firmware_header_v2_0 *rlc_hdr;
609         unsigned int *tmp = NULL;
610         unsigned int i = 0;
611         uint16_t version_major;
612         uint16_t version_minor;
613
614         DRM_DEBUG("\n");
615
616         memset(wks, 0, sizeof(wks));
617         switch (adev->asic_type) {
618         case CHIP_NAVI10:
619                 chip_name = "navi10";
620                 break;
621         case CHIP_NAVI14:
622                 chip_name = "navi14";
623                 if (!(adev->pdev->device == 0x7340 &&
624                       adev->pdev->revision != 0x00))
625                         snprintf(wks, sizeof(wks), "_wks");
626                 break;
627         case CHIP_NAVI12:
628                 chip_name = "navi12";
629                 break;
630         default:
631                 BUG();
632         }
633
634         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);
635         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
636         if (err)
637                 goto out;
638         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
639         if (err)
640                 goto out;
641         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
642         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
643         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
644
645         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);
646         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
647         if (err)
648                 goto out;
649         err = amdgpu_ucode_validate(adev->gfx.me_fw);
650         if (err)
651                 goto out;
652         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
653         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
654         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
655
656         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);
657         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
658         if (err)
659                 goto out;
660         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
661         if (err)
662                 goto out;
663         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
664         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
665         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
666
667         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
668         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
669         if (err)
670                 goto out;
671         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
672         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
673         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
674         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
675         if (version_major == 2 && version_minor == 1)
676                 adev->gfx.rlc.is_rlc_v2_1 = true;
677
678         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
679         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
680         adev->gfx.rlc.save_and_restore_offset =
681                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
682         adev->gfx.rlc.clear_state_descriptor_offset =
683                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
684         adev->gfx.rlc.avail_scratch_ram_locations =
685                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
686         adev->gfx.rlc.reg_restore_list_size =
687                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
688         adev->gfx.rlc.reg_list_format_start =
689                         le32_to_cpu(rlc_hdr->reg_list_format_start);
690         adev->gfx.rlc.reg_list_format_separate_start =
691                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
692         adev->gfx.rlc.starting_offsets_start =
693                         le32_to_cpu(rlc_hdr->starting_offsets_start);
694         adev->gfx.rlc.reg_list_format_size_bytes =
695                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
696         adev->gfx.rlc.reg_list_size_bytes =
697                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
698         adev->gfx.rlc.register_list_format =
699                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
700                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
701         if (!adev->gfx.rlc.register_list_format) {
702                 err = -ENOMEM;
703                 goto out;
704         }
705
706         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
707                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
708         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
709                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
710
711         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
712
713         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
714                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
715         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
716                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
717
718         if (adev->gfx.rlc.is_rlc_v2_1)
719                 gfx_v10_0_init_rlc_ext_microcode(adev);
720
721         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
722         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
723         if (err)
724                 goto out;
725         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
726         if (err)
727                 goto out;
728         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
729         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
730         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
731
732         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);
733         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
734         if (!err) {
735                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
736                 if (err)
737                         goto out;
738                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
739                 adev->gfx.mec2_fw->data;
740                 adev->gfx.mec2_fw_version =
741                 le32_to_cpu(cp_hdr->header.ucode_version);
742                 adev->gfx.mec2_feature_version =
743                 le32_to_cpu(cp_hdr->ucode_feature_version);
744         } else {
745                 err = 0;
746                 adev->gfx.mec2_fw = NULL;
747         }
748
749         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
750                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
751                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
752                 info->fw = adev->gfx.pfp_fw;
753                 header = (const struct common_firmware_header *)info->fw->data;
754                 adev->firmware.fw_size +=
755                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
756
757                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
758                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
759                 info->fw = adev->gfx.me_fw;
760                 header = (const struct common_firmware_header *)info->fw->data;
761                 adev->firmware.fw_size +=
762                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
763
764                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
765                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
766                 info->fw = adev->gfx.ce_fw;
767                 header = (const struct common_firmware_header *)info->fw->data;
768                 adev->firmware.fw_size +=
769                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
770
771                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
772                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
773                 info->fw = adev->gfx.rlc_fw;
774                 header = (const struct common_firmware_header *)info->fw->data;
775                 adev->firmware.fw_size +=
776                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
777
778                 if (adev->gfx.rlc.is_rlc_v2_1 &&
779                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
780                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
781                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
782                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
783                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
784                         info->fw = adev->gfx.rlc_fw;
785                         adev->firmware.fw_size +=
786                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
787
788                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
789                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
790                         info->fw = adev->gfx.rlc_fw;
791                         adev->firmware.fw_size +=
792                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
793
794                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
795                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
796                         info->fw = adev->gfx.rlc_fw;
797                         adev->firmware.fw_size +=
798                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
799                 }
800
801                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
802                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
803                 info->fw = adev->gfx.mec_fw;
804                 header = (const struct common_firmware_header *)info->fw->data;
805                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
806                 adev->firmware.fw_size +=
807                         ALIGN(le32_to_cpu(header->ucode_size_bytes) -
808                               le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
809
810                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
811                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
812                 info->fw = adev->gfx.mec_fw;
813                 adev->firmware.fw_size +=
814                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
815
816                 if (adev->gfx.mec2_fw) {
817                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
818                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
819                         info->fw = adev->gfx.mec2_fw;
820                         header = (const struct common_firmware_header *)info->fw->data;
821                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
822                         adev->firmware.fw_size +=
823                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) -
824                                       le32_to_cpu(cp_hdr->jt_size) * 4,
825                                       PAGE_SIZE);
826                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
827                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
828                         info->fw = adev->gfx.mec2_fw;
829                         adev->firmware.fw_size +=
830                                 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
831                                       PAGE_SIZE);
832                 }
833         }
834
835 out:
836         if (err) {
837                 dev_err(adev->dev,
838                         "gfx10: Failed to load firmware \"%s\"\n",
839                         fw_name);
840                 release_firmware(adev->gfx.pfp_fw);
841                 adev->gfx.pfp_fw = NULL;
842                 release_firmware(adev->gfx.me_fw);
843                 adev->gfx.me_fw = NULL;
844                 release_firmware(adev->gfx.ce_fw);
845                 adev->gfx.ce_fw = NULL;
846                 release_firmware(adev->gfx.rlc_fw);
847                 adev->gfx.rlc_fw = NULL;
848                 release_firmware(adev->gfx.mec_fw);
849                 adev->gfx.mec_fw = NULL;
850                 release_firmware(adev->gfx.mec2_fw);
851                 adev->gfx.mec2_fw = NULL;
852         }
853
854         gfx_v10_0_check_gfxoff_flag(adev);
855
856         return err;
857 }
858
859 static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
860 {
861         u32 count = 0;
862         const struct cs_section_def *sect = NULL;
863         const struct cs_extent_def *ext = NULL;
864
865         /* begin clear state */
866         count += 2;
867         /* context control state */
868         count += 3;
869
870         for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
871                 for (ext = sect->section; ext->extent != NULL; ++ext) {
872                         if (sect->id == SECT_CONTEXT)
873                                 count += 2 + ext->reg_count;
874                         else
875                                 return 0;
876                 }
877         }
878
879         /* set PA_SC_TILE_STEERING_OVERRIDE */
880         count += 3;
881         /* end clear state */
882         count += 2;
883         /* clear state */
884         count += 2;
885
886         return count;
887 }
888
889 static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
890                                     volatile u32 *buffer)
891 {
892         u32 count = 0, i;
893         const struct cs_section_def *sect = NULL;
894         const struct cs_extent_def *ext = NULL;
895         int ctx_reg_offset;
896
897         if (adev->gfx.rlc.cs_data == NULL)
898                 return;
899         if (buffer == NULL)
900                 return;
901
902         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
903         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
904
905         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
906         buffer[count++] = cpu_to_le32(0x80000000);
907         buffer[count++] = cpu_to_le32(0x80000000);
908
909         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
910                 for (ext = sect->section; ext->extent != NULL; ++ext) {
911                         if (sect->id == SECT_CONTEXT) {
912                                 buffer[count++] =
913                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
914                                 buffer[count++] = cpu_to_le32(ext->reg_index -
915                                                 PACKET3_SET_CONTEXT_REG_START);
916                                 for (i = 0; i < ext->reg_count; i++)
917                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
918                         } else {
919                                 return;
920                         }
921                 }
922         }
923
924         ctx_reg_offset =
925                 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
926         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
927         buffer[count++] = cpu_to_le32(ctx_reg_offset);
928         buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
929
930         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
931         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
932
933         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
934         buffer[count++] = cpu_to_le32(0);
935 }
936
937 static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
938 {
939         /* clear state block */
940         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
941                         &adev->gfx.rlc.clear_state_gpu_addr,
942                         (void **)&adev->gfx.rlc.cs_ptr);
943
944         /* jump table block */
945         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
946                         &adev->gfx.rlc.cp_table_gpu_addr,
947                         (void **)&adev->gfx.rlc.cp_table_ptr);
948 }
949
950 static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
951 {
952         const struct cs_section_def *cs_data;
953         int r;
954
955         adev->gfx.rlc.cs_data = gfx10_cs_data;
956
957         cs_data = adev->gfx.rlc.cs_data;
958
959         if (cs_data) {
960                 /* init clear state block */
961                 r = amdgpu_gfx_rlc_init_csb(adev);
962                 if (r)
963                         return r;
964         }
965
966         return 0;
967 }
968
969 static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
970 {
971         int r;
972
973         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
974         if (unlikely(r != 0))
975                 return r;
976
977         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
978                         AMDGPU_GEM_DOMAIN_VRAM);
979         if (!r)
980                 adev->gfx.rlc.clear_state_gpu_addr =
981                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
982
983         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
984
985         return r;
986 }
987
988 static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
989 {
990         int r;
991
992         if (!adev->gfx.rlc.clear_state_obj)
993                 return;
994
995         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
996         if (likely(r == 0)) {
997                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
998                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
999         }
1000 }
1001
1002 static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
1003 {
1004         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1005         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1006 }
1007
1008 static int gfx_v10_0_me_init(struct amdgpu_device *adev)
1009 {
1010         int r;
1011
1012         bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
1013
1014         amdgpu_gfx_graphics_queue_acquire(adev);
1015
1016         r = gfx_v10_0_init_microcode(adev);
1017         if (r)
1018                 DRM_ERROR("Failed to load gfx firmware!\n");
1019
1020         return r;
1021 }
1022
1023 static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
1024 {
1025         int r;
1026         u32 *hpd;
1027         const __le32 *fw_data = NULL;
1028         unsigned fw_size;
1029         u32 *fw = NULL;
1030         size_t mec_hpd_size;
1031
1032         const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
1033
1034         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1035
1036         /* take ownership of the relevant compute queues */
1037         amdgpu_gfx_compute_queue_acquire(adev);
1038         mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
1039
1040         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1041                                       AMDGPU_GEM_DOMAIN_GTT,
1042                                       &adev->gfx.mec.hpd_eop_obj,
1043                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1044                                       (void **)&hpd);
1045         if (r) {
1046                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1047                 gfx_v10_0_mec_fini(adev);
1048                 return r;
1049         }
1050
1051         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1052
1053         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1054         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1055
1056         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1057                 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1058
1059                 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1060                          le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1061                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1062
1063                 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1064                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1065                                               &adev->gfx.mec.mec_fw_obj,
1066                                               &adev->gfx.mec.mec_fw_gpu_addr,
1067                                               (void **)&fw);
1068                 if (r) {
1069                         dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
1070                         gfx_v10_0_mec_fini(adev);
1071                         return r;
1072                 }
1073
1074                 memcpy(fw, fw_data, fw_size);
1075
1076                 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1077                 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1078         }
1079
1080         return 0;
1081 }
1082
1083 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
1084 {
1085         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1086                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1087                 (address << SQ_IND_INDEX__INDEX__SHIFT));
1088         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1089 }
1090
1091 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
1092                            uint32_t thread, uint32_t regno,
1093                            uint32_t num, uint32_t *out)
1094 {
1095         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1096                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1097                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1098                 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
1099                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1100         while (num--)
1101                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1102 }
1103
1104 static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1105 {
1106         /* in gfx10 the SIMD_ID is specified as part of the INSTANCE
1107          * field when performing a select_se_sh so it should be
1108          * zero here */
1109         WARN_ON(simd != 0);
1110
1111         /* type 2 wave data */
1112         dst[(*no_fields)++] = 2;
1113         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
1114         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
1115         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
1116         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
1117         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
1118         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
1119         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
1120         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0);
1121         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
1122         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
1123         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1124         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1125         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1126         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1127         dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1128 }
1129
1130 static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1131                                      uint32_t wave, uint32_t start,
1132                                      uint32_t size, uint32_t *dst)
1133 {
1134         WARN_ON(simd != 0);
1135
1136         wave_read_regs(
1137                 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1138                 dst);
1139 }
1140
1141 static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1142                                       uint32_t wave, uint32_t thread,
1143                                       uint32_t start, uint32_t size,
1144                                       uint32_t *dst)
1145 {
1146         wave_read_regs(
1147                 adev, wave, thread,
1148                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1149 }
1150
1151 static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev,
1152                                                                           u32 me, u32 pipe, u32 q, u32 vm)
1153  {
1154        nv_grbm_select(adev, me, pipe, q, vm);
1155  }
1156
1157
1158 static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {
1159         .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter,
1160         .select_se_sh = &gfx_v10_0_select_se_sh,
1161         .read_wave_data = &gfx_v10_0_read_wave_data,
1162         .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs,
1163         .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs,
1164         .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q,
1165 };
1166
1167 static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
1168 {
1169         u32 gb_addr_config;
1170
1171         adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
1172
1173         switch (adev->asic_type) {
1174         case CHIP_NAVI10:
1175         case CHIP_NAVI14:
1176         case CHIP_NAVI12:
1177                 adev->gfx.config.max_hw_contexts = 8;
1178                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1179                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1180                 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1181                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1182                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1183                 break;
1184         default:
1185                 BUG();
1186                 break;
1187         }
1188
1189         adev->gfx.config.gb_addr_config = gb_addr_config;
1190
1191         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1192                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
1193                                       GB_ADDR_CONFIG, NUM_PIPES);
1194
1195         adev->gfx.config.max_tile_pipes =
1196                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1197
1198         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1199                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
1200                                       GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
1201         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1202                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
1203                                       GB_ADDR_CONFIG, NUM_RB_PER_SE);
1204         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1205                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
1206                                       GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
1207         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1208                         REG_GET_FIELD(adev->gfx.config.gb_addr_config,
1209                                       GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
1210 }
1211
1212 static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1213                                    int me, int pipe, int queue)
1214 {
1215         int r;
1216         struct amdgpu_ring *ring;
1217         unsigned int irq_type;
1218
1219         ring = &adev->gfx.gfx_ring[ring_id];
1220
1221         ring->me = me;
1222         ring->pipe = pipe;
1223         ring->queue = queue;
1224
1225         ring->ring_obj = NULL;
1226         ring->use_doorbell = true;
1227
1228         if (!ring_id)
1229                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1230         else
1231                 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1232         sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1233
1234         irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1235         r = amdgpu_ring_init(adev, ring, 1024,
1236                              &adev->gfx.eop_irq, irq_type);
1237         if (r)
1238                 return r;
1239         return 0;
1240 }
1241
1242 static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1243                                        int mec, int pipe, int queue)
1244 {
1245         int r;
1246         unsigned irq_type;
1247         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1248
1249         ring = &adev->gfx.compute_ring[ring_id];
1250
1251         /* mec0 is me1 */
1252         ring->me = mec + 1;
1253         ring->pipe = pipe;
1254         ring->queue = queue;
1255
1256         ring->ring_obj = NULL;
1257         ring->use_doorbell = true;
1258         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1259         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1260                                 + (ring_id * GFX10_MEC_HPD_SIZE);
1261         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1262
1263         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1264                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1265                 + ring->pipe;
1266
1267         /* type-2 packets are deprecated on MEC, use type-3 instead */
1268         r = amdgpu_ring_init(adev, ring, 1024,
1269                              &adev->gfx.eop_irq, irq_type);
1270         if (r)
1271                 return r;
1272
1273         return 0;
1274 }
1275
1276 static int gfx_v10_0_sw_init(void *handle)
1277 {
1278         int i, j, k, r, ring_id = 0;
1279         struct amdgpu_kiq *kiq;
1280         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1281
1282         switch (adev->asic_type) {
1283         case CHIP_NAVI10:
1284         case CHIP_NAVI14:
1285         case CHIP_NAVI12:
1286                 adev->gfx.me.num_me = 1;
1287                 adev->gfx.me.num_pipe_per_me = 2;
1288                 adev->gfx.me.num_queue_per_pipe = 1;
1289                 adev->gfx.mec.num_mec = 2;
1290                 adev->gfx.mec.num_pipe_per_mec = 4;
1291                 adev->gfx.mec.num_queue_per_pipe = 8;
1292                 break;
1293         default:
1294                 adev->gfx.me.num_me = 1;
1295                 adev->gfx.me.num_pipe_per_me = 1;
1296                 adev->gfx.me.num_queue_per_pipe = 1;
1297                 adev->gfx.mec.num_mec = 1;
1298                 adev->gfx.mec.num_pipe_per_mec = 4;
1299                 adev->gfx.mec.num_queue_per_pipe = 8;
1300                 break;
1301         }
1302
1303         /* KIQ event */
1304         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
1305                               GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
1306                               &adev->gfx.kiq.irq);
1307         if (r)
1308                 return r;
1309
1310         /* EOP Event */
1311         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
1312                               GFX_10_1__SRCID__CP_EOP_INTERRUPT,
1313                               &adev->gfx.eop_irq);
1314         if (r)
1315                 return r;
1316
1317         /* Privileged reg */
1318         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
1319                               &adev->gfx.priv_reg_irq);
1320         if (r)
1321                 return r;
1322
1323         /* Privileged inst */
1324         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT,
1325                               &adev->gfx.priv_inst_irq);
1326         if (r)
1327                 return r;
1328
1329         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1330
1331         gfx_v10_0_scratch_init(adev);
1332
1333         r = gfx_v10_0_me_init(adev);
1334         if (r)
1335                 return r;
1336
1337         r = gfx_v10_0_rlc_init(adev);
1338         if (r) {
1339                 DRM_ERROR("Failed to init rlc BOs!\n");
1340                 return r;
1341         }
1342
1343         r = gfx_v10_0_mec_init(adev);
1344         if (r) {
1345                 DRM_ERROR("Failed to init MEC BOs!\n");
1346                 return r;
1347         }
1348
1349         /* set up the gfx ring */
1350         for (i = 0; i < adev->gfx.me.num_me; i++) {
1351                 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1352                         for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1353                                 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1354                                         continue;
1355
1356                                 r = gfx_v10_0_gfx_ring_init(adev, ring_id,
1357                                                             i, k, j);
1358                                 if (r)
1359                                         return r;
1360                                 ring_id++;
1361                         }
1362                 }
1363         }
1364
1365         ring_id = 0;
1366         /* set up the compute queues - allocate horizontally across pipes */
1367         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1368                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1369                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1370                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
1371                                                                      j))
1372                                         continue;
1373
1374                                 r = gfx_v10_0_compute_ring_init(adev, ring_id,
1375                                                                 i, k, j);
1376                                 if (r)
1377                                         return r;
1378
1379                                 ring_id++;
1380                         }
1381                 }
1382         }
1383
1384         r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
1385         if (r) {
1386                 DRM_ERROR("Failed to init KIQ BOs!\n");
1387                 return r;
1388         }
1389
1390         kiq = &adev->gfx.kiq;
1391         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1392         if (r)
1393                 return r;
1394
1395         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
1396         if (r)
1397                 return r;
1398
1399         /* allocate visible FB for rlc auto-loading fw */
1400         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1401                 r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
1402                 if (r)
1403                         return r;
1404         }
1405
1406         adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
1407
1408         gfx_v10_0_gpu_early_init(adev);
1409
1410         return 0;
1411 }
1412
1413 static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev)
1414 {
1415         amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1416                               &adev->gfx.pfp.pfp_fw_gpu_addr,
1417                               (void **)&adev->gfx.pfp.pfp_fw_ptr);
1418 }
1419
1420 static void gfx_v10_0_ce_fini(struct amdgpu_device *adev)
1421 {
1422         amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj,
1423                               &adev->gfx.ce.ce_fw_gpu_addr,
1424                               (void **)&adev->gfx.ce.ce_fw_ptr);
1425 }
1426
1427 static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
1428 {
1429         amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1430                               &adev->gfx.me.me_fw_gpu_addr,
1431                               (void **)&adev->gfx.me.me_fw_ptr);
1432 }
1433
1434 static int gfx_v10_0_sw_fini(void *handle)
1435 {
1436         int i;
1437         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1438
1439         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1440                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1441         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1442                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1443
1444         amdgpu_gfx_mqd_sw_fini(adev);
1445         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1446         amdgpu_gfx_kiq_fini(adev);
1447
1448         gfx_v10_0_pfp_fini(adev);
1449         gfx_v10_0_ce_fini(adev);
1450         gfx_v10_0_me_fini(adev);
1451         gfx_v10_0_rlc_fini(adev);
1452         gfx_v10_0_mec_fini(adev);
1453
1454         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1455                 gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
1456
1457         gfx_v10_0_free_microcode(adev);
1458
1459         return 0;
1460 }
1461
1462
1463 static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev)
1464 {
1465         /* TODO */
1466 }
1467
1468 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1469                                    u32 sh_num, u32 instance)
1470 {
1471         u32 data;
1472
1473         if (instance == 0xffffffff)
1474                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1475                                      INSTANCE_BROADCAST_WRITES, 1);
1476         else
1477                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1478                                      instance);
1479
1480         if (se_num == 0xffffffff)
1481                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1482                                      1);
1483         else
1484                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1485
1486         if (sh_num == 0xffffffff)
1487                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1488                                      1);
1489         else
1490                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1491
1492         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
1493 }
1494
1495 static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1496 {
1497         u32 data, mask;
1498
1499         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1500         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1501
1502         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1503         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1504
1505         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1506                                          adev->gfx.config.max_sh_per_se);
1507
1508         return (~data) & mask;
1509 }
1510
1511 static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
1512 {
1513         int i, j;
1514         u32 data;
1515         u32 active_rbs = 0;
1516         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1517                                         adev->gfx.config.max_sh_per_se;
1518
1519         mutex_lock(&adev->grbm_idx_mutex);
1520         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1521                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1522                         gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
1523                         data = gfx_v10_0_get_rb_active_bitmap(adev);
1524                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1525                                                rb_bitmap_width_per_sh);
1526                 }
1527         }
1528         gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1529         mutex_unlock(&adev->grbm_idx_mutex);
1530
1531         adev->gfx.config.backend_enable_mask = active_rbs;
1532         adev->gfx.config.num_rbs = hweight32(active_rbs);
1533 }
1534
1535 static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev)
1536 {
1537         uint32_t num_sc;
1538         uint32_t enabled_rb_per_sh;
1539         uint32_t active_rb_bitmap;
1540         uint32_t num_rb_per_sc;
1541         uint32_t num_packer_per_sc;
1542         uint32_t pa_sc_tile_steering_override;
1543
1544         /* init num_sc */
1545         num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
1546                         adev->gfx.config.num_sc_per_sh;
1547         /* init num_rb_per_sc */
1548         active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev);
1549         enabled_rb_per_sh = hweight32(active_rb_bitmap);
1550         num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh;
1551         /* init num_packer_per_sc */
1552         num_packer_per_sc = adev->gfx.config.num_packer_per_sc;
1553
1554         pa_sc_tile_steering_override = 0;
1555         pa_sc_tile_steering_override |=
1556                 (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) &
1557                 PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK;
1558         pa_sc_tile_steering_override |=
1559                 (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) &
1560                 PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK;
1561         pa_sc_tile_steering_override |=
1562                 (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) &
1563                 PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK;
1564
1565         return pa_sc_tile_steering_override;
1566 }
1567
1568 #define DEFAULT_SH_MEM_BASES    (0x6000)
1569 #define FIRST_COMPUTE_VMID      (8)
1570 #define LAST_COMPUTE_VMID       (16)
1571
1572 static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
1573 {
1574         int i;
1575         uint32_t sh_mem_bases;
1576
1577         /*
1578          * Configure apertures:
1579          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1580          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1581          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1582          */
1583         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1584
1585         mutex_lock(&adev->srbm_mutex);
1586         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1587                 nv_grbm_select(adev, 0, 0, 0, i);
1588                 /* CP and shaders */
1589                 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1590                 WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1591         }
1592         nv_grbm_select(adev, 0, 0, 0, 0);
1593         mutex_unlock(&adev->srbm_mutex);
1594
1595         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
1596            acccess. These should be enabled by FW for target VMIDs. */
1597         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1598                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
1599                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
1600                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
1601                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
1602         }
1603 }
1604
1605 static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
1606 {
1607         int vmid;
1608
1609         /*
1610          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1611          * access. Compute VMIDs should be enabled by FW for target VMIDs,
1612          * the driver can enable them for graphics. VMID0 should maintain
1613          * access so that HWS firmware can save/restore entries.
1614          */
1615         for (vmid = 1; vmid < 16; vmid++) {
1616                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
1617                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
1618                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
1619                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
1620         }
1621 }
1622
1623
1624 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
1625 {
1626         int i, j, k;
1627         int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1;
1628         u32 tmp, wgp_active_bitmap = 0;
1629         u32 gcrd_targets_disable_tcp = 0;
1630         u32 utcl_invreq_disable = 0;
1631         /*
1632          * GCRD_TARGETS_DISABLE field contains
1633          * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
1634          * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]
1635          */
1636         u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
1637                 2 * max_wgp_per_sh + /* TCP */
1638                 max_wgp_per_sh + /* SQC */
1639                 4); /* GL1C */
1640         /*
1641          * UTCL1_UTCL0_INVREQ_DISABLE field contains
1642          * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
1643          * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]
1644          */
1645         u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
1646                 2 * max_wgp_per_sh + /* TCP */
1647                 2 * max_wgp_per_sh + /* SQC */
1648                 4 + /* RMI */
1649                 1); /* SQG */
1650
1651         if (adev->asic_type == CHIP_NAVI10 ||
1652             adev->asic_type == CHIP_NAVI14 ||
1653             adev->asic_type == CHIP_NAVI12) {
1654                 mutex_lock(&adev->grbm_idx_mutex);
1655                 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1656                         for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1657                                 gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
1658                                 wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
1659                                 /*
1660                                  * Set corresponding TCP bits for the inactive WGPs in
1661                                  * GCRD_SA_TARGETS_DISABLE
1662                                  */
1663                                 gcrd_targets_disable_tcp = 0;
1664                                 /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
1665                                 utcl_invreq_disable = 0;
1666
1667                                 for (k = 0; k < max_wgp_per_sh; k++) {
1668                                         if (!(wgp_active_bitmap & (1 << k))) {
1669                                                 gcrd_targets_disable_tcp |= 3 << (2 * k);
1670                                                 utcl_invreq_disable |= (3 << (2 * k)) |
1671                                                         (3 << (2 * (max_wgp_per_sh + k)));
1672                                         }
1673                                 }
1674
1675                                 tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
1676                                 /* only override TCP & SQC bits */
1677                                 tmp &= 0xffffffff << (4 * max_wgp_per_sh);
1678                                 tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
1679                                 WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
1680
1681                                 tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
1682                                 /* only override TCP bits */
1683                                 tmp &= 0xffffffff << (2 * max_wgp_per_sh);
1684                                 tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
1685                                 WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
1686                         }
1687                 }
1688
1689                 gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1690                 mutex_unlock(&adev->grbm_idx_mutex);
1691         }
1692 }
1693
1694 static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
1695 {
1696         u32 tmp;
1697         int i;
1698
1699         WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1700
1701         gfx_v10_0_tiling_mode_table_init(adev);
1702
1703         gfx_v10_0_setup_rb(adev);
1704         gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
1705         adev->gfx.config.pa_sc_tile_steering_override =
1706                 gfx_v10_0_init_pa_sc_tile_steering_override(adev);
1707
1708         /* XXX SH_MEM regs */
1709         /* where to put LDS, scratch, GPUVM in FSA64 space */
1710         mutex_lock(&adev->srbm_mutex);
1711         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
1712                 nv_grbm_select(adev, 0, 0, 0, i);
1713                 /* CP and shaders */
1714                 WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1715                 if (i != 0) {
1716                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1717                                 (adev->gmc.private_aperture_start >> 48));
1718                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1719                                 (adev->gmc.shared_aperture_start >> 48));
1720                         WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1721                 }
1722         }
1723         nv_grbm_select(adev, 0, 0, 0, 0);
1724
1725         mutex_unlock(&adev->srbm_mutex);
1726
1727         gfx_v10_0_init_compute_vmid(adev);
1728         gfx_v10_0_init_gds_vmid(adev);
1729
1730 }
1731
1732 static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1733                                                bool enable)
1734 {
1735         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
1736
1737         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1738                             enable ? 1 : 0);
1739         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1740                             enable ? 1 : 0);
1741         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1742                             enable ? 1 : 0);
1743         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1744                             enable ? 1 : 0);
1745
1746         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
1747 }
1748
1749 static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
1750 {
1751         /* csib */
1752         WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
1753                      adev->gfx.rlc.clear_state_gpu_addr >> 32);
1754         WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
1755                      adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1756         WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1757 }
1758
1759 static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
1760 {
1761         int i;
1762
1763         gfx_v10_0_init_csb(adev);
1764
1765         for (i = 0; i < adev->num_vmhubs; i++)
1766                 amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
1767
1768         /* TODO: init power gating */
1769         return;
1770 }
1771
1772 void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
1773 {
1774         u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
1775
1776         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1777         WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
1778 }
1779
1780 static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev)
1781 {
1782         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1783         udelay(50);
1784         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1785         udelay(50);
1786 }
1787
1788 static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1789                                              bool enable)
1790 {
1791         uint32_t rlc_pg_cntl;
1792
1793         rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
1794
1795         if (!enable) {
1796                 /* RLC_PG_CNTL[23] = 0 (default)
1797                  * RLC will wait for handshake acks with SMU
1798                  * GFXOFF will be enabled
1799                  * RLC_PG_CNTL[23] = 1
1800                  * RLC will not issue any message to SMU
1801                  * hence no handshake between SMU & RLC
1802                  * GFXOFF will be disabled
1803                  */
1804                 rlc_pg_cntl |= 0x800000;
1805         } else
1806                 rlc_pg_cntl &= ~0x800000;
1807         WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
1808 }
1809
1810 static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
1811 {
1812         /* TODO: enable rlc & smu handshake until smu
1813          * and gfxoff feature works as expected */
1814         if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1815                 gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
1816
1817         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
1818         udelay(50);
1819 }
1820
1821 static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
1822 {
1823         uint32_t tmp;
1824
1825         /* enable Save Restore Machine */
1826         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1827         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1828         tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1829         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1830 }
1831
1832 static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
1833 {
1834         const struct rlc_firmware_header_v2_0 *hdr;
1835         const __le32 *fw_data;
1836         unsigned i, fw_size;
1837
1838         if (!adev->gfx.rlc_fw)
1839                 return -EINVAL;
1840
1841         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1842         amdgpu_ucode_print_rlc_hdr(&hdr->header);
1843
1844         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1845                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1846         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1847
1848         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
1849                      RLCG_UCODE_LOADING_START_ADDRESS);
1850
1851         for (i = 0; i < fw_size; i++)
1852                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA,
1853                              le32_to_cpup(fw_data++));
1854
1855         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
1856
1857         return 0;
1858 }
1859
1860 static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
1861 {
1862         int r;
1863
1864         if (amdgpu_sriov_vf(adev))
1865                 return 0;
1866
1867         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1868                 r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
1869                 if (r)
1870                         return r;
1871                 gfx_v10_0_init_pg(adev);
1872
1873                 /* enable RLC SRM */
1874                 gfx_v10_0_rlc_enable_srm(adev);
1875
1876         } else {
1877                 adev->gfx.rlc.funcs->stop(adev);
1878
1879                 /* disable CG */
1880                 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
1881
1882                 /* disable PG */
1883                 WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
1884
1885                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1886                         /* legacy rlc firmware loading */
1887                         r = gfx_v10_0_rlc_load_microcode(adev);
1888                         if (r)
1889                                 return r;
1890                 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1891                         /* rlc backdoor autoload firmware */
1892                         r = gfx_v10_0_rlc_backdoor_autoload_enable(adev);
1893                         if (r)
1894                                 return r;
1895                 }
1896
1897                 gfx_v10_0_init_pg(adev);
1898                 adev->gfx.rlc.funcs->start(adev);
1899
1900                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1901                         r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
1902                         if (r)
1903                                 return r;
1904                 }
1905         }
1906         return 0;
1907 }
1908
1909 static struct {
1910         FIRMWARE_ID     id;
1911         unsigned int    offset;
1912         unsigned int    size;
1913 } rlc_autoload_info[FIRMWARE_ID_MAX];
1914
1915 static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
1916 {
1917         int ret;
1918         RLC_TABLE_OF_CONTENT *rlc_toc;
1919
1920         ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE,
1921                                         AMDGPU_GEM_DOMAIN_GTT,
1922                                         &adev->gfx.rlc.rlc_toc_bo,
1923                                         &adev->gfx.rlc.rlc_toc_gpu_addr,
1924                                         (void **)&adev->gfx.rlc.rlc_toc_buf);
1925         if (ret) {
1926                 dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret);
1927                 return ret;
1928         }
1929
1930         /* Copy toc from psp sos fw to rlc toc buffer */
1931         memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size);
1932
1933         rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf;
1934         while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) &&
1935                 (rlc_toc->id < FIRMWARE_ID_MAX)) {
1936                 if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) &&
1937                     (rlc_toc->id <= FIRMWARE_ID_CP_MES)) {
1938                         /* Offset needs 4KB alignment */
1939                         rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE);
1940                 }
1941
1942                 rlc_autoload_info[rlc_toc->id].id = rlc_toc->id;
1943                 rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4;
1944                 rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
1945
1946                 rlc_toc++;
1947         };
1948
1949         return 0;
1950 }
1951
1952 static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev)
1953 {
1954         uint32_t total_size = 0;
1955         FIRMWARE_ID id;
1956         int ret;
1957
1958         ret = gfx_v10_0_parse_rlc_toc(adev);
1959         if (ret) {
1960                 dev_err(adev->dev, "failed to parse rlc toc\n");
1961                 return 0;
1962         }
1963
1964         for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++)
1965                 total_size += rlc_autoload_info[id].size;
1966
1967         /* In case the offset in rlc toc ucode is aligned */
1968         if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset)
1969                 total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset +
1970                                 rlc_autoload_info[FIRMWARE_ID_MAX-1].size;
1971
1972         return total_size;
1973 }
1974
1975 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev)
1976 {
1977         int r;
1978         uint32_t total_size;
1979
1980         total_size = gfx_v10_0_calc_toc_total_size(adev);
1981
1982         r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE,
1983                                       AMDGPU_GEM_DOMAIN_GTT,
1984                                       &adev->gfx.rlc.rlc_autoload_bo,
1985                                       &adev->gfx.rlc.rlc_autoload_gpu_addr,
1986                                       (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1987         if (r) {
1988                 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1989                 return r;
1990         }
1991
1992         return 0;
1993 }
1994
1995 static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev)
1996 {
1997         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo,
1998                               &adev->gfx.rlc.rlc_toc_gpu_addr,
1999                               (void **)&adev->gfx.rlc.rlc_toc_buf);
2000         amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
2001                               &adev->gfx.rlc.rlc_autoload_gpu_addr,
2002                               (void **)&adev->gfx.rlc.rlc_autoload_ptr);
2003 }
2004
2005 static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
2006                                                        FIRMWARE_ID id,
2007                                                        const void *fw_data,
2008                                                        uint32_t fw_size)
2009 {
2010         uint32_t toc_offset;
2011         uint32_t toc_fw_size;
2012         char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
2013
2014         if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX)
2015                 return;
2016
2017         toc_offset = rlc_autoload_info[id].offset;
2018         toc_fw_size = rlc_autoload_info[id].size;
2019
2020         if (fw_size == 0)
2021                 fw_size = toc_fw_size;
2022
2023         if (fw_size > toc_fw_size)
2024                 fw_size = toc_fw_size;
2025
2026         memcpy(ptr + toc_offset, fw_data, fw_size);
2027
2028         if (fw_size < toc_fw_size)
2029                 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
2030 }
2031
2032 static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
2033 {
2034         void *data;
2035         uint32_t size;
2036
2037         data = adev->gfx.rlc.rlc_toc_buf;
2038         size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size;
2039
2040         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2041                                                    FIRMWARE_ID_RLC_TOC,
2042                                                    data, size);
2043 }
2044
2045 static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
2046 {
2047         const __le32 *fw_data;
2048         uint32_t fw_size;
2049         const struct gfx_firmware_header_v1_0 *cp_hdr;
2050         const struct rlc_firmware_header_v2_0 *rlc_hdr;
2051
2052         /* pfp ucode */
2053         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
2054                 adev->gfx.pfp_fw->data;
2055         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2056                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
2057         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
2058         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2059                                                    FIRMWARE_ID_CP_PFP,
2060                                                    fw_data, fw_size);
2061
2062         /* ce ucode */
2063         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
2064                 adev->gfx.ce_fw->data;
2065         fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
2066                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
2067         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
2068         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2069                                                    FIRMWARE_ID_CP_CE,
2070                                                    fw_data, fw_size);
2071
2072         /* me ucode */
2073         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
2074                 adev->gfx.me_fw->data;
2075         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2076                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
2077         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
2078         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2079                                                    FIRMWARE_ID_CP_ME,
2080                                                    fw_data, fw_size);
2081
2082         /* rlc ucode */
2083         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
2084                 adev->gfx.rlc_fw->data;
2085         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2086                 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
2087         fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
2088         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2089                                                    FIRMWARE_ID_RLC_G_UCODE,
2090                                                    fw_data, fw_size);
2091
2092         /* mec1 ucode */
2093         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
2094                 adev->gfx.mec_fw->data;
2095         fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
2096                 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
2097         fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
2098                 cp_hdr->jt_size * 4;
2099         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2100                                                    FIRMWARE_ID_CP_MEC,
2101                                                    fw_data, fw_size);
2102         /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
2103 }
2104
2105 /* Temporarily put sdma part here */
2106 static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
2107 {
2108         const __le32 *fw_data;
2109         uint32_t fw_size;
2110         const struct sdma_firmware_header_v1_0 *sdma_hdr;
2111         int i;
2112
2113         for (i = 0; i < adev->sdma.num_instances; i++) {
2114                 sdma_hdr = (const struct sdma_firmware_header_v1_0 *)
2115                         adev->sdma.instance[i].fw->data;
2116                 fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data +
2117                         le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
2118                 fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes);
2119
2120                 if (i == 0) {
2121                         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2122                                 FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size);
2123                         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2124                                 FIRMWARE_ID_SDMA0_JT,
2125                                 (uint32_t *)fw_data +
2126                                 sdma_hdr->jt_offset,
2127                                 sdma_hdr->jt_size * 4);
2128                 } else if (i == 1) {
2129                         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2130                                 FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size);
2131                         gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
2132                                 FIRMWARE_ID_SDMA1_JT,
2133                                 (uint32_t *)fw_data +
2134                                 sdma_hdr->jt_offset,
2135                                 sdma_hdr->jt_size * 4);
2136                 }
2137         }
2138 }
2139
2140 static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
2141 {
2142         uint32_t rlc_g_offset, rlc_g_size, tmp;
2143         uint64_t gpu_addr;
2144
2145         gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
2146         gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
2147         gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
2148
2149         rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset;
2150         rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size;
2151         gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
2152
2153         WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr));
2154         WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr));
2155         WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size);
2156
2157         tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR);
2158         if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK |
2159                    RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) {
2160                 DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
2161                 return -EINVAL;
2162         }
2163
2164         tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
2165         if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) {
2166                 DRM_ERROR("RLC ROM should halt itself\n");
2167                 return -EINVAL;
2168         }
2169
2170         return 0;
2171 }
2172
2173 static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev)
2174 {
2175         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2176         uint32_t tmp;
2177         int i;
2178         uint64_t addr;
2179
2180         /* Trigger an invalidation of the L1 instruction caches */
2181         tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
2182         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2183         WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
2184
2185         /* Wait for invalidation complete */
2186         for (i = 0; i < usec_timeout; i++) {
2187                 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
2188                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2189                         INVALIDATE_CACHE_COMPLETE))
2190                         break;
2191                 udelay(1);
2192         }
2193
2194         if (i >= usec_timeout) {
2195                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2196                 return -EINVAL;
2197         }
2198
2199         /* Program me ucode address into intruction cache address register */
2200         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2201                 rlc_autoload_info[FIRMWARE_ID_CP_ME].offset;
2202         WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
2203                         lower_32_bits(addr) & 0xFFFFF000);
2204         WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
2205                         upper_32_bits(addr));
2206
2207         return 0;
2208 }
2209
2210 static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev)
2211 {
2212         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2213         uint32_t tmp;
2214         int i;
2215         uint64_t addr;
2216
2217         /* Trigger an invalidation of the L1 instruction caches */
2218         tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
2219         tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2220         WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
2221
2222         /* Wait for invalidation complete */
2223         for (i = 0; i < usec_timeout; i++) {
2224                 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
2225                 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
2226                         INVALIDATE_CACHE_COMPLETE))
2227                         break;
2228                 udelay(1);
2229         }
2230
2231         if (i >= usec_timeout) {
2232                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2233                 return -EINVAL;
2234         }
2235
2236         /* Program ce ucode address into intruction cache address register */
2237         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2238                 rlc_autoload_info[FIRMWARE_ID_CP_CE].offset;
2239         WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
2240                         lower_32_bits(addr) & 0xFFFFF000);
2241         WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
2242                         upper_32_bits(addr));
2243
2244         return 0;
2245 }
2246
2247 static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev)
2248 {
2249         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2250         uint32_t tmp;
2251         int i;
2252         uint64_t addr;
2253
2254         /* Trigger an invalidation of the L1 instruction caches */
2255         tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
2256         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2257         WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
2258
2259         /* Wait for invalidation complete */
2260         for (i = 0; i < usec_timeout; i++) {
2261                 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
2262                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2263                         INVALIDATE_CACHE_COMPLETE))
2264                         break;
2265                 udelay(1);
2266         }
2267
2268         if (i >= usec_timeout) {
2269                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2270                 return -EINVAL;
2271         }
2272
2273         /* Program pfp ucode address into intruction cache address register */
2274         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2275                 rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset;
2276         WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
2277                         lower_32_bits(addr) & 0xFFFFF000);
2278         WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
2279                         upper_32_bits(addr));
2280
2281         return 0;
2282 }
2283
2284 static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev)
2285 {
2286         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2287         uint32_t tmp;
2288         int i;
2289         uint64_t addr;
2290
2291         /* Trigger an invalidation of the L1 instruction caches */
2292         tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
2293         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2294         WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
2295
2296         /* Wait for invalidation complete */
2297         for (i = 0; i < usec_timeout; i++) {
2298                 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
2299                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2300                         INVALIDATE_CACHE_COMPLETE))
2301                         break;
2302                 udelay(1);
2303         }
2304
2305         if (i >= usec_timeout) {
2306                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2307                 return -EINVAL;
2308         }
2309
2310         /* Program mec1 ucode address into intruction cache address register */
2311         addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2312                 rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset;
2313         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2314                         lower_32_bits(addr) & 0xFFFFF000);
2315         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2316                         upper_32_bits(addr));
2317
2318         return 0;
2319 }
2320
2321 static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2322 {
2323         uint32_t cp_status;
2324         uint32_t bootload_status;
2325         int i, r;
2326
2327         for (i = 0; i < adev->usec_timeout; i++) {
2328                 cp_status = RREG32_SOC15(GC, 0, mmCP_STAT);
2329                 bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS);
2330                 if ((cp_status == 0) &&
2331                     (REG_GET_FIELD(bootload_status,
2332                         RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2333                         break;
2334                 }
2335                 udelay(1);
2336         }
2337
2338         if (i >= adev->usec_timeout) {
2339                 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2340                 return -ETIMEDOUT;
2341         }
2342
2343         if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2344                 r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev);
2345                 if (r)
2346                         return r;
2347
2348                 r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev);
2349                 if (r)
2350                         return r;
2351
2352                 r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev);
2353                 if (r)
2354                         return r;
2355
2356                 r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev);
2357                 if (r)
2358                         return r;
2359         }
2360
2361         return 0;
2362 }
2363
2364 static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2365 {
2366         int i;
2367         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2368
2369         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2370         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2371         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2372         if (!enable) {
2373                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2374                         adev->gfx.gfx_ring[i].sched.ready = false;
2375         }
2376         WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
2377         udelay(50);
2378 }
2379
2380 static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2381 {
2382         int r;
2383         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2384         const __le32 *fw_data;
2385         unsigned i, fw_size;
2386         uint32_t tmp;
2387         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2388
2389         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2390                 adev->gfx.pfp_fw->data;
2391
2392         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2393
2394         fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2395                 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2396         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2397
2398         r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2399                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2400                                       &adev->gfx.pfp.pfp_fw_obj,
2401                                       &adev->gfx.pfp.pfp_fw_gpu_addr,
2402                                       (void **)&adev->gfx.pfp.pfp_fw_ptr);
2403         if (r) {
2404                 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2405                 gfx_v10_0_pfp_fini(adev);
2406                 return r;
2407         }
2408
2409         memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2410
2411         amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2412         amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2413
2414         /* Trigger an invalidation of the L1 instruction caches */
2415         tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
2416         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2417         WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
2418
2419         /* Wait for invalidation complete */
2420         for (i = 0; i < usec_timeout; i++) {
2421                 tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
2422                 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2423                         INVALIDATE_CACHE_COMPLETE))
2424                         break;
2425                 udelay(1);
2426         }
2427
2428         if (i >= usec_timeout) {
2429                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2430                 return -EINVAL;
2431         }
2432
2433         if (amdgpu_emu_mode == 1)
2434                 adev->nbio_funcs->hdp_flush(adev, NULL);
2435
2436         tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
2437         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2438         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2439         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2440         tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2441         WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp);
2442         WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
2443                 adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000);
2444         WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
2445                 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2446
2447         return 0;
2448 }
2449
2450 static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
2451 {
2452         int r;
2453         const struct gfx_firmware_header_v1_0 *ce_hdr;
2454         const __le32 *fw_data;
2455         unsigned i, fw_size;
2456         uint32_t tmp;
2457         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2458
2459         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2460                 adev->gfx.ce_fw->data;
2461
2462         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2463
2464         fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
2465                 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2466         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes);
2467
2468         r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes,
2469                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2470                                       &adev->gfx.ce.ce_fw_obj,
2471                                       &adev->gfx.ce.ce_fw_gpu_addr,
2472                                       (void **)&adev->gfx.ce.ce_fw_ptr);
2473         if (r) {
2474                 dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r);
2475                 gfx_v10_0_ce_fini(adev);
2476                 return r;
2477         }
2478
2479         memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size);
2480
2481         amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj);
2482         amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj);
2483
2484         /* Trigger an invalidation of the L1 instruction caches */
2485         tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
2486         tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2487         WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
2488
2489         /* Wait for invalidation complete */
2490         for (i = 0; i < usec_timeout; i++) {
2491                 tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
2492                 if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
2493                         INVALIDATE_CACHE_COMPLETE))
2494                         break;
2495                 udelay(1);
2496         }
2497
2498         if (i >= usec_timeout) {
2499                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2500                 return -EINVAL;
2501         }
2502
2503         if (amdgpu_emu_mode == 1)
2504                 adev->nbio_funcs->hdp_flush(adev, NULL);
2505
2506         tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
2507         tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
2508         tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0);
2509         tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0);
2510         tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2511         WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
2512                 adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000);
2513         WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
2514                 upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
2515
2516         return 0;
2517 }
2518
2519 static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
2520 {
2521         int r;
2522         const struct gfx_firmware_header_v1_0 *me_hdr;
2523         const __le32 *fw_data;
2524         unsigned i, fw_size;
2525         uint32_t tmp;
2526         uint32_t usec_timeout = 50000;  /* wait for 50ms */
2527
2528         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2529                 adev->gfx.me_fw->data;
2530
2531         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2532
2533         fw_data = (const __le32 *)(adev->gfx.me_fw->data +
2534                 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2535         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
2536
2537         r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
2538                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2539                                       &adev->gfx.me.me_fw_obj,
2540                                       &adev->gfx.me.me_fw_gpu_addr,
2541                                       (void **)&adev->gfx.me.me_fw_ptr);
2542         if (r) {
2543                 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
2544                 gfx_v10_0_me_fini(adev);
2545                 return r;
2546         }
2547
2548         memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
2549
2550         amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
2551         amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
2552
2553         /* Trigger an invalidation of the L1 instruction caches */
2554         tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
2555         tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2556         WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
2557
2558         /* Wait for invalidation complete */
2559         for (i = 0; i < usec_timeout; i++) {
2560                 tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
2561                 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2562                         INVALIDATE_CACHE_COMPLETE))
2563                         break;
2564                 udelay(1);
2565         }
2566
2567         if (i >= usec_timeout) {
2568                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2569                 return -EINVAL;
2570         }
2571
2572         if (amdgpu_emu_mode == 1)
2573                 adev->nbio_funcs->hdp_flush(adev, NULL);
2574
2575         tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
2576         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2577         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2578         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2579         tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2580         WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
2581                 adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000);
2582         WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
2583                 upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
2584
2585         return 0;
2586 }
2587
2588 static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2589 {
2590         int r;
2591
2592         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2593                 return -EINVAL;
2594
2595         gfx_v10_0_cp_gfx_enable(adev, false);
2596
2597         r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev);
2598         if (r) {
2599                 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
2600                 return r;
2601         }
2602
2603         r = gfx_v10_0_cp_gfx_load_ce_microcode(adev);
2604         if (r) {
2605                 dev_err(adev->dev, "(%d) failed to load ce fw\n", r);
2606                 return r;
2607         }
2608
2609         r = gfx_v10_0_cp_gfx_load_me_microcode(adev);
2610         if (r) {
2611                 dev_err(adev->dev, "(%d) failed to load me fw\n", r);
2612                 return r;
2613         }
2614
2615         return 0;
2616 }
2617
2618 static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
2619 {
2620         struct amdgpu_ring *ring;
2621         const struct cs_section_def *sect = NULL;
2622         const struct cs_extent_def *ext = NULL;
2623         int r, i;
2624         int ctx_reg_offset;
2625
2626         /* init the CP */
2627         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
2628                      adev->gfx.config.max_hw_contexts - 1);
2629         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2630
2631         gfx_v10_0_cp_gfx_enable(adev, true);
2632
2633         ring = &adev->gfx.gfx_ring[0];
2634         r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
2635         if (r) {
2636                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2637                 return r;
2638         }
2639
2640         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2641         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2642
2643         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2644         amdgpu_ring_write(ring, 0x80000000);
2645         amdgpu_ring_write(ring, 0x80000000);
2646
2647         for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
2648                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2649                         if (sect->id == SECT_CONTEXT) {
2650                                 amdgpu_ring_write(ring,
2651                                                   PACKET3(PACKET3_SET_CONTEXT_REG,
2652                                                           ext->reg_count));
2653                                 amdgpu_ring_write(ring, ext->reg_index -
2654                                                   PACKET3_SET_CONTEXT_REG_START);
2655                                 for (i = 0; i < ext->reg_count; i++)
2656                                         amdgpu_ring_write(ring, ext->extent[i]);
2657                         }
2658                 }
2659         }
2660
2661         ctx_reg_offset =
2662                 SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
2663         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
2664         amdgpu_ring_write(ring, ctx_reg_offset);
2665         amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
2666
2667         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2668         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2669
2670         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2671         amdgpu_ring_write(ring, 0);
2672
2673         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2674         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2675         amdgpu_ring_write(ring, 0x8000);
2676         amdgpu_ring_write(ring, 0x8000);
2677
2678         amdgpu_ring_commit(ring);
2679
2680         /* submit cs packet to copy state 0 to next available state */
2681         ring = &adev->gfx.gfx_ring[1];
2682         r = amdgpu_ring_alloc(ring, 2);
2683         if (r) {
2684                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2685                 return r;
2686         }
2687
2688         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2689         amdgpu_ring_write(ring, 0);
2690
2691         amdgpu_ring_commit(ring);
2692
2693         return 0;
2694 }
2695
2696 static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
2697                                          CP_PIPE_ID pipe)
2698 {
2699         u32 tmp;
2700
2701         tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL);
2702         tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
2703
2704         WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp);
2705 }
2706
2707 static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
2708                                           struct amdgpu_ring *ring)
2709 {
2710         u32 tmp;
2711
2712         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2713         if (ring->use_doorbell) {
2714                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2715                                     DOORBELL_OFFSET, ring->doorbell_index);
2716                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2717                                     DOORBELL_EN, 1);
2718         } else {
2719                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2720                                     DOORBELL_EN, 0);
2721         }
2722         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2723         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2724                             DOORBELL_RANGE_LOWER, ring->doorbell_index);
2725         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2726
2727         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2728                      CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2729 }
2730
2731 static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
2732 {
2733         struct amdgpu_ring *ring;
2734         u32 tmp;
2735         u32 rb_bufsz;
2736         u64 rb_addr, rptr_addr, wptr_gpu_addr;
2737         u32 i;
2738
2739         /* Set the write pointer delay */
2740         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2741
2742         /* set the RB to use vmid 0 */
2743         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2744
2745         /* Init gfx ring 0 for pipe 0 */
2746         mutex_lock(&adev->srbm_mutex);
2747         gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2748         mutex_unlock(&adev->srbm_mutex);
2749         /* Set ring buffer size */
2750         ring = &adev->gfx.gfx_ring[0];
2751         rb_bufsz = order_base_2(ring->ring_size / 8);
2752         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2753         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2754 #ifdef __BIG_ENDIAN
2755         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2756 #endif
2757         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2758
2759         /* Initialize the ring buffer's write pointers */
2760         ring->wptr = 0;
2761         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2762         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2763
2764         /* set the wb address wether it's enabled or not */
2765         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2766         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2767         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
2768                      CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2769
2770         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2771         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
2772                      lower_32_bits(wptr_gpu_addr));
2773         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
2774                      upper_32_bits(wptr_gpu_addr));
2775
2776         mdelay(1);
2777         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2778
2779         rb_addr = ring->gpu_addr >> 8;
2780         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2781         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2782
2783         WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
2784
2785         gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
2786
2787         /* Init gfx ring 1 for pipe 1 */
2788         mutex_lock(&adev->srbm_mutex);
2789         gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
2790         mutex_unlock(&adev->srbm_mutex);
2791         ring = &adev->gfx.gfx_ring[1];
2792         rb_bufsz = order_base_2(ring->ring_size / 8);
2793         tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
2794         tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
2795         WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
2796         /* Initialize the ring buffer's write pointers */
2797         ring->wptr = 0;
2798         WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
2799         WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
2800         /* Set the wb address wether it's enabled or not */
2801         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2802         WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
2803         WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
2804                 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2805         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2806         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
2807                 lower_32_bits(wptr_gpu_addr));
2808         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
2809                 upper_32_bits(wptr_gpu_addr));
2810
2811         mdelay(1);
2812         WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
2813
2814         rb_addr = ring->gpu_addr >> 8;
2815         WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
2816         WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
2817         WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
2818
2819         gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
2820
2821         /* Switch to pipe 0 */
2822         mutex_lock(&adev->srbm_mutex);
2823         gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
2824         mutex_unlock(&adev->srbm_mutex);
2825
2826         /* start the ring */
2827         gfx_v10_0_cp_gfx_start(adev);
2828
2829         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2830                 ring = &adev->gfx.gfx_ring[i];
2831                 ring->sched.ready = true;
2832         }
2833
2834         return 0;
2835 }
2836
2837 static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2838 {
2839         int i;
2840
2841         if (enable) {
2842                 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
2843         } else {
2844                 WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
2845                              (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
2846                               CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2847                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2848                         adev->gfx.compute_ring[i].sched.ready = false;
2849                 adev->gfx.kiq.ring.sched.ready = false;
2850         }
2851         udelay(50);
2852 }
2853
2854 static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2855 {
2856         const struct gfx_firmware_header_v1_0 *mec_hdr;
2857         const __le32 *fw_data;
2858         unsigned i;
2859         u32 tmp;
2860         u32 usec_timeout = 50000; /* Wait for 50 ms */
2861
2862         if (!adev->gfx.mec_fw)
2863                 return -EINVAL;
2864
2865         gfx_v10_0_cp_compute_enable(adev, false);
2866
2867         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2868         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2869
2870         fw_data = (const __le32 *)
2871                 (adev->gfx.mec_fw->data +
2872                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2873
2874         /* Trigger an invalidation of the L1 instruction caches */
2875         tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
2876         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2877         WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
2878
2879         /* Wait for invalidation complete */
2880         for (i = 0; i < usec_timeout; i++) {
2881                 tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
2882                 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2883                                        INVALIDATE_CACHE_COMPLETE))
2884                         break;
2885                 udelay(1);
2886         }
2887
2888         if (i >= usec_timeout) {
2889                 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2890                 return -EINVAL;
2891         }
2892
2893         if (amdgpu_emu_mode == 1)
2894                 adev->nbio_funcs->hdp_flush(adev, NULL);
2895
2896         tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
2897         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2898         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2899         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2900         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2901
2902         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr &
2903                      0xFFFFF000);
2904         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2905                      upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2906
2907         /* MEC1 */
2908         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0);
2909
2910         for (i = 0; i < mec_hdr->jt_size; i++)
2911                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2912                              le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2913
2914         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
2915
2916         /*
2917          * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
2918          * different microcode than MEC1.
2919          */
2920
2921         return 0;
2922 }
2923
2924 static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
2925 {
2926         uint32_t tmp;
2927         struct amdgpu_device *adev = ring->adev;
2928
2929         /* tell RLC which is KIQ queue */
2930         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2931         tmp &= 0xffffff00;
2932         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2933         WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2934         tmp |= 0x80;
2935         WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2936 }
2937
2938 static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
2939 {
2940         struct amdgpu_device *adev = ring->adev;
2941         struct v10_gfx_mqd *mqd = ring->mqd_ptr;
2942         uint64_t hqd_gpu_addr, wb_gpu_addr;
2943         uint32_t tmp;
2944         uint32_t rb_bufsz;
2945
2946         /* set up gfx hqd wptr */
2947         mqd->cp_gfx_hqd_wptr = 0;
2948         mqd->cp_gfx_hqd_wptr_hi = 0;
2949
2950         /* set the pointer to the MQD */
2951         mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
2952         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2953
2954         /* set up mqd control */
2955         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
2956         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
2957         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
2958         tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
2959         mqd->cp_gfx_mqd_control = tmp;
2960
2961         /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
2962         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID);
2963         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
2964         mqd->cp_gfx_hqd_vmid = 0;
2965
2966         /* set up default queue priority level
2967          * 0x0 = low priority, 0x1 = high priority */
2968         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
2969         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
2970         mqd->cp_gfx_hqd_queue_priority = tmp;
2971
2972         /* set up time quantum */
2973         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
2974         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
2975         mqd->cp_gfx_hqd_quantum = tmp;
2976
2977         /* set up gfx hqd base. this is similar as CP_RB_BASE */
2978         hqd_gpu_addr = ring->gpu_addr >> 8;
2979         mqd->cp_gfx_hqd_base = hqd_gpu_addr;
2980         mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
2981
2982         /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
2983         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2984         mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
2985         mqd->cp_gfx_hqd_rptr_addr_hi =
2986                 upper_32_bits(wb_gpu_addr) & 0xffff;
2987
2988         /* set up rb_wptr_poll addr */
2989         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2990         mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2991         mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2992
2993         /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
2994         rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
2995         tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
2996         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
2997         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
2998 #ifdef __BIG_ENDIAN
2999         tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3000 #endif
3001         mqd->cp_gfx_hqd_cntl = tmp;
3002
3003         /* set up cp_doorbell_control */
3004         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3005         if (ring->use_doorbell) {
3006                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3007                                     DOORBELL_OFFSET, ring->doorbell_index);
3008                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3009                                     DOORBELL_EN, 1);
3010         } else
3011                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3012                                     DOORBELL_EN, 0);
3013         mqd->cp_rb_doorbell_control = tmp;
3014
3015         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3016         ring->wptr = 0;
3017         mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
3018
3019         /* active the queue */
3020         mqd->cp_gfx_hqd_active = 1;
3021
3022         return 0;
3023 }
3024
3025 #ifdef BRING_UP_DEBUG
3026 static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
3027 {
3028         struct amdgpu_device *adev = ring->adev;
3029         struct v10_gfx_mqd *mqd = ring->mqd_ptr;
3030
3031         /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
3032         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
3033         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
3034
3035         /* set GFX_MQD_BASE */
3036         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
3037         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3038
3039         /* set GFX_MQD_CONTROL */
3040         WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
3041
3042         /* set GFX_HQD_VMID to 0 */
3043         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
3044
3045         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
3046                         mqd->cp_gfx_hqd_queue_priority);
3047         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
3048
3049         /* set GFX_HQD_BASE, similar as CP_RB_BASE */
3050         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
3051         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
3052
3053         /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
3054         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
3055         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
3056
3057         /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
3058         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
3059
3060         /* set RB_WPTR_POLL_ADDR */
3061         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
3062         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
3063
3064         /* set RB_DOORBELL_CONTROL */
3065         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
3066
3067         /* active the queue */
3068         WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
3069
3070         return 0;
3071 }
3072 #endif
3073
3074 static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
3075 {
3076         struct amdgpu_device *adev = ring->adev;
3077         struct v10_gfx_mqd *mqd = ring->mqd_ptr;
3078
3079         if (!adev->in_gpu_reset && !adev->in_suspend) {
3080                 memset((void *)mqd, 0, sizeof(*mqd));
3081                 mutex_lock(&adev->srbm_mutex);
3082                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3083                 gfx_v10_0_gfx_mqd_init(ring);
3084 #ifdef BRING_UP_DEBUG
3085                 gfx_v10_0_gfx_queue_init_register(ring);
3086 #endif
3087                 nv_grbm_select(adev, 0, 0, 0, 0);
3088                 mutex_unlock(&adev->srbm_mutex);
3089                 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
3090                         memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
3091         } else if (adev->in_gpu_reset) {
3092                 /* reset mqd with the backup copy */
3093                 if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
3094                         memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
3095                 /* reset the ring */
3096                 ring->wptr = 0;
3097                 amdgpu_ring_clear_ring(ring);
3098 #ifdef BRING_UP_DEBUG
3099                 mutex_lock(&adev->srbm_mutex);
3100                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3101                 gfx_v10_0_gfx_queue_init_register(ring);
3102                 nv_grbm_select(adev, 0, 0, 0, 0);
3103                 mutex_unlock(&adev->srbm_mutex);
3104 #endif
3105         } else {
3106                 amdgpu_ring_clear_ring(ring);
3107         }
3108
3109         return 0;
3110 }
3111
3112 #ifndef BRING_UP_DEBUG
3113 static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
3114 {
3115         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3116         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3117         int r, i;
3118
3119         if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
3120                 return -EINVAL;
3121
3122         r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
3123                                         adev->gfx.num_gfx_rings);
3124         if (r) {
3125                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3126                 return r;
3127         }
3128
3129         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3130                 kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
3131
3132         r = amdgpu_ring_test_ring(kiq_ring);
3133         if (r) {
3134                 DRM_ERROR("kfq enable failed\n");
3135                 kiq_ring->sched.ready = false;
3136         }
3137         return r;
3138 }
3139 #endif
3140
3141 static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3142 {
3143         int r, i;
3144         struct amdgpu_ring *ring;
3145
3146         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3147                 ring = &adev->gfx.gfx_ring[i];
3148
3149                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3150                 if (unlikely(r != 0))
3151                         goto done;
3152
3153                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3154                 if (!r) {
3155                         r = gfx_v10_0_gfx_init_queue(ring);
3156                         amdgpu_bo_kunmap(ring->mqd_obj);
3157                         ring->mqd_ptr = NULL;
3158                 }
3159                 amdgpu_bo_unreserve(ring->mqd_obj);
3160                 if (r)
3161                         goto done;
3162         }
3163 #ifndef BRING_UP_DEBUG
3164         r = gfx_v10_0_kiq_enable_kgq(adev);
3165         if (r)
3166                 goto done;
3167 #endif
3168         r = gfx_v10_0_cp_gfx_start(adev);
3169         if (r)
3170                 goto done;
3171
3172         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3173                 ring = &adev->gfx.gfx_ring[i];
3174                 ring->sched.ready = true;
3175         }
3176 done:
3177         return r;
3178 }
3179
3180 static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
3181 {
3182         struct amdgpu_device *adev = ring->adev;
3183         struct v10_compute_mqd *mqd = ring->mqd_ptr;
3184         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3185         uint32_t tmp;
3186
3187         mqd->header = 0xC0310800;
3188         mqd->compute_pipelinestat_enable = 0x00000001;
3189         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3190         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3191         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3192         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3193         mqd->compute_misc_reserved = 0x00000003;
3194
3195         eop_base_addr = ring->eop_gpu_addr >> 8;
3196         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3197         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3198
3199         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3200         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3201         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3202                         (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1));
3203
3204         mqd->cp_hqd_eop_control = tmp;
3205
3206         /* enable doorbell? */
3207         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3208
3209         if (ring->use_doorbell) {
3210                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3211                                     DOORBELL_OFFSET, ring->doorbell_index);
3212                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3213                                     DOORBELL_EN, 1);
3214                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3215                                     DOORBELL_SOURCE, 0);
3216                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3217                                     DOORBELL_HIT, 0);
3218         } else {
3219                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3220                                     DOORBELL_EN, 0);
3221         }
3222
3223         mqd->cp_hqd_pq_doorbell_control = tmp;
3224
3225         /* disable the queue if it's active */
3226         ring->wptr = 0;
3227         mqd->cp_hqd_dequeue_request = 0;
3228         mqd->cp_hqd_pq_rptr = 0;
3229         mqd->cp_hqd_pq_wptr_lo = 0;
3230         mqd->cp_hqd_pq_wptr_hi = 0;
3231
3232         /* set the pointer to the MQD */
3233         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3234         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3235
3236         /* set MQD vmid to 0 */
3237         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3238         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3239         mqd->cp_mqd_control = tmp;
3240
3241         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3242         hqd_gpu_addr = ring->gpu_addr >> 8;
3243         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3244         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3245
3246         /* set up the HQD, this is similar to CP_RB0_CNTL */
3247         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3248         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3249                             (order_base_2(ring->ring_size / 4) - 1));
3250         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3251                             ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3252 #ifdef __BIG_ENDIAN
3253         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3254 #endif
3255         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3256         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
3257         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3258         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3259         mqd->cp_hqd_pq_control = tmp;
3260
3261         /* set the wb address whether it's enabled or not */
3262         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3263         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3264         mqd->cp_hqd_pq_rptr_report_addr_hi =
3265                 upper_32_bits(wb_gpu_addr) & 0xffff;
3266
3267         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3268         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3269         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3270         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3271
3272         tmp = 0;
3273         /* enable the doorbell if requested */
3274         if (ring->use_doorbell) {
3275                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3276                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3277                                 DOORBELL_OFFSET, ring->doorbell_index);
3278
3279                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3280                                     DOORBELL_EN, 1);
3281                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3282                                     DOORBELL_SOURCE, 0);
3283                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3284                                     DOORBELL_HIT, 0);
3285         }
3286
3287         mqd->cp_hqd_pq_doorbell_control = tmp;
3288
3289         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3290         ring->wptr = 0;
3291         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3292
3293         /* set the vmid for the queue */
3294         mqd->cp_hqd_vmid = 0;
3295
3296         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3297         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3298         mqd->cp_hqd_persistent_state = tmp;
3299
3300         /* set MIN_IB_AVAIL_SIZE */
3301         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3302         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3303         mqd->cp_hqd_ib_control = tmp;
3304
3305         /* activate the queue */
3306         mqd->cp_hqd_active = 1;
3307
3308         return 0;
3309 }
3310
3311 static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
3312 {
3313         struct amdgpu_device *adev = ring->adev;
3314         struct v10_compute_mqd *mqd = ring->mqd_ptr;
3315         int j;
3316
3317         /* disable wptr polling */
3318         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3319
3320         /* write the EOP addr */
3321         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3322                mqd->cp_hqd_eop_base_addr_lo);
3323         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3324                mqd->cp_hqd_eop_base_addr_hi);
3325
3326         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3327         WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
3328                mqd->cp_hqd_eop_control);
3329
3330         /* enable doorbell? */
3331         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3332                mqd->cp_hqd_pq_doorbell_control);
3333
3334         /* disable the queue if it's active */
3335         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3336                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3337                 for (j = 0; j < adev->usec_timeout; j++) {
3338                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3339                                 break;
3340                         udelay(1);
3341                 }
3342                 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3343                        mqd->cp_hqd_dequeue_request);
3344                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
3345                        mqd->cp_hqd_pq_rptr);
3346                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3347                        mqd->cp_hqd_pq_wptr_lo);
3348                 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3349                        mqd->cp_hqd_pq_wptr_hi);
3350         }
3351
3352         /* set the pointer to the MQD */
3353         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
3354                mqd->cp_mqd_base_addr_lo);
3355         WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3356                mqd->cp_mqd_base_addr_hi);
3357
3358         /* set MQD vmid to 0 */
3359         WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
3360                mqd->cp_mqd_control);
3361
3362         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3363         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
3364                mqd->cp_hqd_pq_base_lo);
3365         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
3366                mqd->cp_hqd_pq_base_hi);
3367
3368         /* set up the HQD, this is similar to CP_RB0_CNTL */
3369         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
3370                mqd->cp_hqd_pq_control);
3371
3372         /* set the wb address whether it's enabled or not */
3373         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3374                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3375         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3376                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3377
3378         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3379         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3380                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3381         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3382                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3383
3384         /* enable the doorbell if requested */
3385         if (ring->use_doorbell) {
3386                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3387                         (adev->doorbell_index.kiq * 2) << 2);
3388                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3389                         (adev->doorbell_index.userqueue_end * 2) << 2);
3390         }
3391
3392         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3393                mqd->cp_hqd_pq_doorbell_control);
3394
3395         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3396         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3397                mqd->cp_hqd_pq_wptr_lo);
3398         WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3399                mqd->cp_hqd_pq_wptr_hi);
3400
3401         /* set the vmid for the queue */
3402         WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3403
3404         WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3405                mqd->cp_hqd_persistent_state);
3406
3407         /* activate the queue */
3408         WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
3409                mqd->cp_hqd_active);
3410
3411         if (ring->use_doorbell)
3412                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3413
3414         return 0;
3415 }
3416
3417 static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
3418 {
3419         struct amdgpu_device *adev = ring->adev;
3420         struct v10_compute_mqd *mqd = ring->mqd_ptr;
3421         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3422
3423         gfx_v10_0_kiq_setting(ring);
3424
3425         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3426                 /* reset MQD to a clean status */
3427                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3428                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3429
3430                 /* reset ring buffer */
3431                 ring->wptr = 0;
3432                 amdgpu_ring_clear_ring(ring);
3433
3434                 mutex_lock(&adev->srbm_mutex);
3435                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3436                 gfx_v10_0_kiq_init_register(ring);
3437                 nv_grbm_select(adev, 0, 0, 0, 0);
3438                 mutex_unlock(&adev->srbm_mutex);
3439         } else {
3440                 memset((void *)mqd, 0, sizeof(*mqd));
3441                 mutex_lock(&adev->srbm_mutex);
3442                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3443                 gfx_v10_0_compute_mqd_init(ring);
3444                 gfx_v10_0_kiq_init_register(ring);
3445                 nv_grbm_select(adev, 0, 0, 0, 0);
3446                 mutex_unlock(&adev->srbm_mutex);
3447
3448                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3449                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3450         }
3451
3452         return 0;
3453 }
3454
3455 static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
3456 {
3457         struct amdgpu_device *adev = ring->adev;
3458         struct v10_compute_mqd *mqd = ring->mqd_ptr;
3459         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3460
3461         if (!adev->in_gpu_reset && !adev->in_suspend) {
3462                 memset((void *)mqd, 0, sizeof(*mqd));
3463                 mutex_lock(&adev->srbm_mutex);
3464                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3465                 gfx_v10_0_compute_mqd_init(ring);
3466                 nv_grbm_select(adev, 0, 0, 0, 0);
3467                 mutex_unlock(&adev->srbm_mutex);
3468
3469                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3470                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3471         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3472                 /* reset MQD to a clean status */
3473                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3474                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
3475
3476                 /* reset ring buffer */
3477                 ring->wptr = 0;
3478                 amdgpu_ring_clear_ring(ring);
3479         } else {
3480                 amdgpu_ring_clear_ring(ring);
3481         }
3482
3483         return 0;
3484 }
3485
3486 static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
3487 {
3488         struct amdgpu_ring *ring;
3489         int r;
3490
3491         ring = &adev->gfx.kiq.ring;
3492
3493         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3494         if (unlikely(r != 0))
3495                 return r;
3496
3497         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3498         if (unlikely(r != 0))
3499                 return r;
3500
3501         gfx_v10_0_kiq_init_queue(ring);
3502         amdgpu_bo_kunmap(ring->mqd_obj);
3503         ring->mqd_ptr = NULL;
3504         amdgpu_bo_unreserve(ring->mqd_obj);
3505         ring->sched.ready = true;
3506         return 0;
3507 }
3508
3509 static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
3510 {
3511         struct amdgpu_ring *ring = NULL;
3512         int r = 0, i;
3513
3514         gfx_v10_0_cp_compute_enable(adev, true);
3515
3516         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3517                 ring = &adev->gfx.compute_ring[i];
3518
3519                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3520                 if (unlikely(r != 0))
3521                         goto done;
3522                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3523                 if (!r) {
3524                         r = gfx_v10_0_kcq_init_queue(ring);
3525                         amdgpu_bo_kunmap(ring->mqd_obj);
3526                         ring->mqd_ptr = NULL;
3527                 }
3528                 amdgpu_bo_unreserve(ring->mqd_obj);
3529                 if (r)
3530                         goto done;
3531         }
3532
3533         r = amdgpu_gfx_enable_kcq(adev);
3534 done:
3535         return r;
3536 }
3537
3538 static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
3539 {
3540         int r, i;
3541         struct amdgpu_ring *ring;
3542
3543         if (!(adev->flags & AMD_IS_APU))
3544                 gfx_v10_0_enable_gui_idle_interrupt(adev, false);
3545
3546         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3547                 /* legacy firmware loading */
3548                 r = gfx_v10_0_cp_gfx_load_microcode(adev);
3549                 if (r)
3550                         return r;
3551
3552                 r = gfx_v10_0_cp_compute_load_microcode(adev);
3553                 if (r)
3554                         return r;
3555         }
3556
3557         r = gfx_v10_0_kiq_resume(adev);
3558         if (r)
3559                 return r;
3560
3561         r = gfx_v10_0_kcq_resume(adev);
3562         if (r)
3563                 return r;
3564
3565         if (!amdgpu_async_gfx_ring) {
3566                 r = gfx_v10_0_cp_gfx_resume(adev);
3567                 if (r)
3568                         return r;
3569         } else {
3570                 r = gfx_v10_0_cp_async_gfx_ring_resume(adev);
3571                 if (r)
3572                         return r;
3573         }
3574
3575         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3576                 ring = &adev->gfx.gfx_ring[i];
3577                 DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
3578                          i, ring->me, ring->pipe, ring->queue);
3579                 r = amdgpu_ring_test_ring(ring);
3580                 if (r) {
3581                         ring->sched.ready = false;
3582                         return r;
3583                 }
3584         }
3585
3586         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3587                 ring = &adev->gfx.compute_ring[i];
3588                 ring->sched.ready = true;
3589                 DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
3590                          i, ring->me, ring->pipe, ring->queue);
3591                 r = amdgpu_ring_test_ring(ring);
3592                 if (r)
3593                         ring->sched.ready = false;
3594         }
3595
3596         return 0;
3597 }
3598
3599 static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable)
3600 {
3601         gfx_v10_0_cp_gfx_enable(adev, enable);
3602         gfx_v10_0_cp_compute_enable(adev, enable);
3603 }
3604
3605 static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
3606 {
3607         uint32_t data, pattern = 0xDEADBEEF;
3608
3609         /* check if mmVGT_ESGS_RING_SIZE_UMD
3610          * has been remapped to mmVGT_ESGS_RING_SIZE */
3611         data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
3612
3613         WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
3614
3615         WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
3616
3617         if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
3618                 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
3619                 return true;
3620         } else {
3621                 WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
3622                 return false;
3623         }
3624 }
3625
3626 static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
3627 {
3628         uint32_t data;
3629
3630         /* initialize cam_index to 0
3631          * index will auto-inc after each data writting */
3632         WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
3633
3634         /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
3635         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
3636                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3637                (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
3638                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3639         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3640         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3641
3642         /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
3643         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
3644                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3645                (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
3646                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3647         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3648         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3649
3650         /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
3651         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
3652                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3653                (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
3654                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3655         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3656         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3657
3658         /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
3659         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
3660                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3661                (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
3662                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3663         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3664         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3665
3666         /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
3667         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
3668                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3669                (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
3670                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3671         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3672         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3673
3674         /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
3675         data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
3676                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3677                (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
3678                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3679         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3680         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3681
3682         /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
3683         data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
3684                 GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
3685                (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
3686                 GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
3687         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
3688         WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
3689 }
3690
3691 static int gfx_v10_0_hw_init(void *handle)
3692 {
3693         int r;
3694         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3695
3696         r = gfx_v10_0_csb_vram_pin(adev);
3697         if (r)
3698                 return r;
3699
3700         if (!amdgpu_emu_mode)
3701                 gfx_v10_0_init_golden_registers(adev);
3702
3703         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
3704                 /**
3705                  * For gfx 10, rlc firmware loading relies on smu firmware is
3706                  * loaded firstly, so in direct type, it has to load smc ucode
3707                  * here before rlc.
3708                  */
3709                 r = smu_load_microcode(&adev->smu);
3710                 if (r)
3711                         return r;
3712
3713                 r = smu_check_fw_status(&adev->smu);
3714                 if (r) {
3715                         pr_err("SMC firmware status is not correct\n");
3716                         return r;
3717                 }
3718         }
3719
3720         /* if GRBM CAM not remapped, set up the remapping */
3721         if (!gfx_v10_0_check_grbm_cam_remapping(adev))
3722                 gfx_v10_0_setup_grbm_cam_remapping(adev);
3723
3724         gfx_v10_0_constants_init(adev);
3725
3726         r = gfx_v10_0_rlc_resume(adev);
3727         if (r)
3728                 return r;
3729
3730         /*
3731          * init golden registers and rlc resume may override some registers,
3732          * reconfig them here
3733          */
3734         gfx_v10_0_tcp_harvest(adev);
3735
3736         r = gfx_v10_0_cp_resume(adev);
3737         if (r)
3738                 return r;
3739
3740         return r;
3741 }
3742
3743 #ifndef BRING_UP_DEBUG
3744 static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
3745 {
3746         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
3747         struct amdgpu_ring *kiq_ring = &kiq->ring;
3748         int i;
3749
3750         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
3751                 return -EINVAL;
3752
3753         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
3754                                         adev->gfx.num_gfx_rings))
3755                 return -ENOMEM;
3756
3757         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3758                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
3759                                            PREEMPT_QUEUES, 0, 0);
3760
3761         return amdgpu_ring_test_ring(kiq_ring);
3762 }
3763 #endif
3764
3765 static int gfx_v10_0_hw_fini(void *handle)
3766 {
3767         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3768         int r;
3769
3770         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3771         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3772 #ifndef BRING_UP_DEBUG
3773         if (amdgpu_async_gfx_ring) {
3774                 r = gfx_v10_0_kiq_disable_kgq(adev);
3775                 if (r)
3776                         DRM_ERROR("KGQ disable failed\n");
3777         }
3778 #endif
3779         if (amdgpu_gfx_disable_kcq(adev))
3780                 DRM_ERROR("KCQ disable failed\n");
3781         if (amdgpu_sriov_vf(adev)) {
3782                 pr_debug("For SRIOV client, shouldn't do anything.\n");
3783                 return 0;
3784         }
3785         gfx_v10_0_cp_enable(adev, false);
3786         gfx_v10_0_enable_gui_idle_interrupt(adev, false);
3787         gfx_v10_0_csb_vram_unpin(adev);
3788
3789         return 0;
3790 }
3791
3792 static int gfx_v10_0_suspend(void *handle)
3793 {
3794         return gfx_v10_0_hw_fini(handle);
3795 }
3796
3797 static int gfx_v10_0_resume(void *handle)
3798 {
3799         return gfx_v10_0_hw_init(handle);
3800 }
3801
3802 static bool gfx_v10_0_is_idle(void *handle)
3803 {
3804         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3805
3806         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3807                                 GRBM_STATUS, GUI_ACTIVE))
3808                 return false;
3809         else
3810                 return true;
3811 }
3812
3813 static int gfx_v10_0_wait_for_idle(void *handle)
3814 {
3815         unsigned i;
3816         u32 tmp;
3817         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3818
3819         for (i = 0; i < adev->usec_timeout; i++) {
3820                 /* read MC_STATUS */
3821                 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
3822                         GRBM_STATUS__GUI_ACTIVE_MASK;
3823
3824                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3825                         return 0;
3826                 udelay(1);
3827         }
3828         return -ETIMEDOUT;
3829 }
3830
3831 static int gfx_v10_0_soft_reset(void *handle)
3832 {
3833         u32 grbm_soft_reset = 0;
3834         u32 tmp;
3835         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3836
3837         /* GRBM_STATUS */
3838         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3839         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3840                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3841                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
3842                    GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
3843                    GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK
3844                    | GRBM_STATUS__BCI_BUSY_MASK)) {
3845                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3846                                                 GRBM_SOFT_RESET, SOFT_RESET_CP,
3847                                                 1);
3848                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3849                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX,
3850                                                 1);
3851         }
3852
3853         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3854                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3855                                                 GRBM_SOFT_RESET, SOFT_RESET_CP,
3856                                                 1);
3857         }
3858
3859         /* GRBM_STATUS2 */
3860         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3861         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3862                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3863                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC,
3864                                                 1);
3865
3866         if (grbm_soft_reset) {
3867                 /* stop the rlc */
3868                 gfx_v10_0_rlc_stop(adev);
3869
3870                 /* Disable GFX parsing/prefetching */
3871                 gfx_v10_0_cp_gfx_enable(adev, false);
3872
3873                 /* Disable MEC parsing/prefetching */
3874                 gfx_v10_0_cp_compute_enable(adev, false);
3875
3876                 if (grbm_soft_reset) {
3877                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3878                         tmp |= grbm_soft_reset;
3879                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3880                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3881                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3882
3883                         udelay(50);
3884
3885                         tmp &= ~grbm_soft_reset;
3886                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3887                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3888                 }
3889
3890                 /* Wait a little for things to settle down */
3891                 udelay(50);
3892         }
3893         return 0;
3894 }
3895
3896 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3897 {
3898         uint64_t clock;
3899
3900         mutex_lock(&adev->gfx.gpu_clock_mutex);
3901         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3902         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3903                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3904         mutex_unlock(&adev->gfx.gpu_clock_mutex);
3905         return clock;
3906 }
3907
3908 static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3909                                            uint32_t vmid,
3910                                            uint32_t gds_base, uint32_t gds_size,
3911                                            uint32_t gws_base, uint32_t gws_size,
3912                                            uint32_t oa_base, uint32_t oa_size)
3913 {
3914         struct amdgpu_device *adev = ring->adev;
3915
3916         /* GDS Base */
3917         gfx_v10_0_write_data_to_reg(ring, 0, false,
3918                                     SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3919                                     gds_base);
3920
3921         /* GDS Size */
3922         gfx_v10_0_write_data_to_reg(ring, 0, false,
3923                                     SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3924                                     gds_size);
3925
3926         /* GWS */
3927         gfx_v10_0_write_data_to_reg(ring, 0, false,
3928                                     SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3929                                     gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3930
3931         /* OA */
3932         gfx_v10_0_write_data_to_reg(ring, 0, false,
3933                                     SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3934                                     (1 << (oa_size + oa_base)) - (1 << oa_base));
3935 }
3936
3937 static int gfx_v10_0_early_init(void *handle)
3938 {
3939         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3940
3941         adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS;
3942         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3943
3944         gfx_v10_0_set_kiq_pm4_funcs(adev);
3945         gfx_v10_0_set_ring_funcs(adev);
3946         gfx_v10_0_set_irq_funcs(adev);
3947         gfx_v10_0_set_gds_init(adev);
3948         gfx_v10_0_set_rlc_funcs(adev);
3949
3950         return 0;
3951 }
3952
3953 static int gfx_v10_0_late_init(void *handle)
3954 {
3955         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3956         int r;
3957
3958         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3959         if (r)
3960                 return r;
3961
3962         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3963         if (r)
3964                 return r;
3965
3966         return 0;
3967 }
3968
3969 static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
3970 {
3971         uint32_t rlc_cntl;
3972
3973         /* if RLC is not enabled, do nothing */
3974         rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3975         return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
3976 }
3977
3978 static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
3979 {
3980         uint32_t data;
3981         unsigned i;
3982
3983         data = RLC_SAFE_MODE__CMD_MASK;
3984         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3985         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3986
3987         /* wait for RLC_SAFE_MODE */
3988         for (i = 0; i < adev->usec_timeout; i++) {
3989                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3990                         break;
3991                 udelay(1);
3992         }
3993 }
3994
3995 static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
3996 {
3997         uint32_t data;
3998
3999         data = RLC_SAFE_MODE__CMD_MASK;
4000         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4001 }
4002
4003 static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4004                                                       bool enable)
4005 {
4006         uint32_t data, def;
4007
4008         /* It is disabled by HW by default */
4009         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4010                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4011                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4012                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4013                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4014                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4015
4016                 /* only for Vega10 & Raven1 */
4017                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4018
4019                 if (def != data)
4020                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4021
4022                 /* MGLS is a global flag to control all MGLS in GFX */
4023                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4024                         /* 2 - RLC memory Light sleep */
4025                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4026                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4027                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4028                                 if (def != data)
4029                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4030                         }
4031                         /* 3 - CP memory Light sleep */
4032                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4033                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4034                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4035                                 if (def != data)
4036                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4037                         }
4038                 }
4039         } else {
4040                 /* 1 - MGCG_OVERRIDE */
4041                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4042                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4043                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4044                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4045                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4046                 if (def != data)
4047                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4048
4049                 /* 2 - disable MGLS in RLC */
4050                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4051                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4052                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4053                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4054                 }
4055
4056                 /* 3 - disable MGLS in CP */
4057                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4058                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4059                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4060                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4061                 }
4062         }
4063 }
4064
4065 static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
4066                                            bool enable)
4067 {
4068         uint32_t data, def;
4069
4070         /* Enable 3D CGCG/CGLS */
4071         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4072                 /* write cmd to clear cgcg/cgls ov */
4073                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4074                 /* unset CGCG override */
4075                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4076                 /* update CGCG and CGLS override bits */
4077                 if (def != data)
4078                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4079                 /* enable 3Dcgcg FSM(0x0000363f) */
4080                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4081                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4082                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4083                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4084                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4085                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4086                 if (def != data)
4087                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4088
4089                 /* set IDLE_POLL_COUNT(0x00900100) */
4090                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4091                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4092                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4093                 if (def != data)
4094                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4095         } else {
4096                 /* Disable CGCG/CGLS */
4097                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4098                 /* disable cgcg, cgls should be disabled */
4099                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4100                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4101                 /* disable cgcg and cgls in FSM */
4102                 if (def != data)
4103                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4104         }
4105 }
4106
4107 static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4108                                                       bool enable)
4109 {
4110         uint32_t def, data;
4111
4112         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4113                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4114                 /* unset CGCG override */
4115                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4116                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4117                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4118                 else
4119                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4120                 /* update CGCG and CGLS override bits */
4121                 if (def != data)
4122                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4123
4124                 /* enable cgcg FSM(0x0000363F) */
4125                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4126                 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4127                         RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4128                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4129                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4130                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4131                 if (def != data)
4132                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4133
4134                 /* set IDLE_POLL_COUNT(0x00900100) */
4135                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4136                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4137                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4138                 if (def != data)
4139                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4140         } else {
4141                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4142                 /* reset CGCG/CGLS bits */
4143                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4144                 /* disable cgcg and cgls in FSM */
4145                 if (def != data)
4146                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4147         }
4148 }
4149
4150 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4151                                             bool enable)
4152 {
4153         amdgpu_gfx_rlc_enter_safe_mode(adev);
4154
4155         if (enable) {
4156                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4157                  * ===  MGCG + MGLS ===
4158                  */
4159                 gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
4160                 /* ===  CGCG /CGLS for GFX 3D Only === */
4161                 gfx_v10_0_update_3d_clock_gating(adev, enable);
4162                 /* ===  CGCG + CGLS === */
4163                 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
4164         } else {
4165                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4166                  * ===  CGCG + CGLS ===
4167                  */
4168                 gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
4169                 /* ===  CGCG /CGLS for GFX 3D Only === */
4170                 gfx_v10_0_update_3d_clock_gating(adev, enable);
4171                 /* ===  MGCG + MGLS === */
4172                 gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
4173         }
4174
4175         if (adev->cg_flags &
4176             (AMD_CG_SUPPORT_GFX_MGCG |
4177              AMD_CG_SUPPORT_GFX_CGLS |
4178              AMD_CG_SUPPORT_GFX_CGCG |
4179              AMD_CG_SUPPORT_GFX_CGLS |
4180              AMD_CG_SUPPORT_GFX_3D_CGCG |
4181              AMD_CG_SUPPORT_GFX_3D_CGLS))
4182                 gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
4183
4184         amdgpu_gfx_rlc_exit_safe_mode(adev);
4185
4186         return 0;
4187 }
4188
4189 static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
4190         .is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
4191         .set_safe_mode = gfx_v10_0_set_safe_mode,
4192         .unset_safe_mode = gfx_v10_0_unset_safe_mode,
4193         .init = gfx_v10_0_rlc_init,
4194         .get_csb_size = gfx_v10_0_get_csb_size,
4195         .get_csb_buffer = gfx_v10_0_get_csb_buffer,
4196         .resume = gfx_v10_0_rlc_resume,
4197         .stop = gfx_v10_0_rlc_stop,
4198         .reset = gfx_v10_0_rlc_reset,
4199         .start = gfx_v10_0_rlc_start
4200 };
4201
4202 static int gfx_v10_0_set_powergating_state(void *handle,
4203                                           enum amd_powergating_state state)
4204 {
4205         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4206         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4207         switch (adev->asic_type) {
4208         case CHIP_NAVI10:
4209         case CHIP_NAVI14:
4210                 if (!enable) {
4211                         amdgpu_gfx_off_ctrl(adev, false);
4212                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4213                 } else
4214                         amdgpu_gfx_off_ctrl(adev, true);
4215                 break;
4216         default:
4217                 break;
4218         }
4219         return 0;
4220 }
4221
4222 static int gfx_v10_0_set_clockgating_state(void *handle,
4223                                           enum amd_clockgating_state state)
4224 {
4225         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4226
4227         switch (adev->asic_type) {
4228         case CHIP_NAVI10:
4229         case CHIP_NAVI14:
4230         case CHIP_NAVI12:
4231                 gfx_v10_0_update_gfx_clock_gating(adev,
4232                                                  state == AMD_CG_STATE_GATE ? true : false);
4233                 break;
4234         default:
4235                 break;
4236         }
4237         return 0;
4238 }
4239
4240 static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
4241 {
4242         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4243         int data;
4244
4245         /* AMD_CG_SUPPORT_GFX_MGCG */
4246         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4247         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4248                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4249
4250         /* AMD_CG_SUPPORT_GFX_CGCG */
4251         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4252         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4253                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4254
4255         /* AMD_CG_SUPPORT_GFX_CGLS */
4256         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4257                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4258
4259         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4260         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4261         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4262                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4263
4264         /* AMD_CG_SUPPORT_GFX_CP_LS */
4265         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4266         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4267                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4268
4269         /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4270         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4271         if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4272                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4273
4274         /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4275         if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4276                 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4277 }
4278
4279 static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4280 {
4281         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
4282 }
4283
4284 static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4285 {
4286         struct amdgpu_device *adev = ring->adev;
4287         u64 wptr;
4288
4289         /* XXX check if swapping is necessary on BE */
4290         if (ring->use_doorbell) {
4291                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4292         } else {
4293                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4294                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4295         }
4296
4297         return wptr;
4298 }
4299
4300 static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4301 {
4302         struct amdgpu_device *adev = ring->adev;
4303
4304         if (ring->use_doorbell) {
4305                 /* XXX check if swapping is necessary on BE */
4306                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4307                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4308         } else {
4309                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4310                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4311         }
4312 }
4313
4314 static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4315 {
4316         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
4317 }
4318
4319 static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4320 {
4321         u64 wptr;
4322
4323         /* XXX check if swapping is necessary on BE */
4324         if (ring->use_doorbell)
4325                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4326         else
4327                 BUG();
4328         return wptr;
4329 }
4330
4331 static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4332 {
4333         struct amdgpu_device *adev = ring->adev;
4334
4335         /* XXX check if swapping is necessary on BE */
4336         if (ring->use_doorbell) {
4337                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4338                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4339         } else {
4340                 BUG(); /* only DOORBELL method supported on gfx10 now */
4341         }
4342 }
4343
4344 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4345 {
4346         struct amdgpu_device *adev = ring->adev;
4347         u32 ref_and_mask, reg_mem_engine;
4348         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4349
4350         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4351                 switch (ring->me) {
4352                 case 1:
4353                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4354                         break;
4355                 case 2:
4356                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4357                         break;
4358                 default:
4359                         return;
4360                 }
4361                 reg_mem_engine = 0;
4362         } else {
4363                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4364                 reg_mem_engine = 1; /* pfp */
4365         }
4366
4367         gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4368                                adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4369                                adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4370                                ref_and_mask, ref_and_mask, 0x20);
4371 }
4372
4373 static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4374                                        struct amdgpu_job *job,
4375                                        struct amdgpu_ib *ib,
4376                                        uint32_t flags)
4377 {
4378         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4379         u32 header, control = 0;
4380
4381         if (ib->flags & AMDGPU_IB_FLAG_CE)
4382                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
4383         else
4384                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4385
4386         control |= ib->length_dw | (vmid << 24);
4387
4388         if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4389                 control |= INDIRECT_BUFFER_PRE_ENB(1);
4390
4391                 if (flags & AMDGPU_IB_PREEMPTED)
4392                         control |= INDIRECT_BUFFER_PRE_RESUME(1);
4393
4394                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4395                         gfx_v10_0_ring_emit_de_meta(ring,
4396                                     flags & AMDGPU_IB_PREEMPTED ? true : false);
4397         }
4398
4399         amdgpu_ring_write(ring, header);
4400         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4401         amdgpu_ring_write(ring,
4402 #ifdef __BIG_ENDIAN
4403                 (2 << 0) |
4404 #endif
4405                 lower_32_bits(ib->gpu_addr));
4406         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4407         amdgpu_ring_write(ring, control);
4408 }
4409
4410 static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4411                                            struct amdgpu_job *job,
4412                                            struct amdgpu_ib *ib,
4413                                            uint32_t flags)
4414 {
4415         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4416         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4417
4418         /* Currently, there is a high possibility to get wave ID mismatch
4419          * between ME and GDS, leading to a hw deadlock, because ME generates
4420          * different wave IDs than the GDS expects. This situation happens
4421          * randomly when at least 5 compute pipes use GDS ordered append.
4422          * The wave IDs generated by ME are also wrong after suspend/resume.
4423          * Those are probably bugs somewhere else in the kernel driver.
4424          *
4425          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4426          * GDS to 0 for this ring (me/pipe).
4427          */
4428         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4429                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4430                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4431                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4432         }
4433
4434         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4435         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4436         amdgpu_ring_write(ring,
4437 #ifdef __BIG_ENDIAN
4438                                 (2 << 0) |
4439 #endif
4440                                 lower_32_bits(ib->gpu_addr));
4441         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4442         amdgpu_ring_write(ring, control);
4443 }
4444
4445 static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4446                                      u64 seq, unsigned flags)
4447 {
4448         struct amdgpu_device *adev = ring->adev;
4449         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4450         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4451
4452         /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
4453         if (adev->pdev->device == 0x50)
4454                 int_sel = false;
4455
4456         /* RELEASE_MEM - flush caches, send int */
4457         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4458         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
4459                                  PACKET3_RELEASE_MEM_GCR_GL2_WB |
4460                                  PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
4461                                  PACKET3_RELEASE_MEM_GCR_GLM_WB |
4462                                  PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
4463                                  PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4464                                  PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
4465         amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
4466                                  PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
4467
4468         /*
4469          * the address should be Qword aligned if 64bit write, Dword
4470          * aligned if only send 32bit data low (discard data high)
4471          */
4472         if (write64bit)
4473                 BUG_ON(addr & 0x7);
4474         else
4475                 BUG_ON(addr & 0x3);
4476         amdgpu_ring_write(ring, lower_32_bits(addr));
4477         amdgpu_ring_write(ring, upper_32_bits(addr));
4478         amdgpu_ring_write(ring, lower_32_bits(seq));
4479         amdgpu_ring_write(ring, upper_32_bits(seq));
4480         amdgpu_ring_write(ring, 0);
4481 }
4482
4483 static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4484 {
4485         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4486         uint32_t seq = ring->fence_drv.sync_seq;
4487         uint64_t addr = ring->fence_drv.gpu_addr;
4488
4489         gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
4490                                upper_32_bits(addr), seq, 0xffffffff, 4);
4491 }
4492
4493 static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4494                                          unsigned vmid, uint64_t pd_addr)
4495 {
4496         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4497
4498         /* compute doesn't have PFP */
4499         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4500                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4501                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4502                 amdgpu_ring_write(ring, 0x0);
4503         }
4504 }
4505
4506 static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4507                                           u64 seq, unsigned int flags)
4508 {
4509         struct amdgpu_device *adev = ring->adev;
4510
4511         /* we only allocate 32bit for each seq wb address */
4512         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4513
4514         /* write fence seq to the "addr" */
4515         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4516         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4517                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4518         amdgpu_ring_write(ring, lower_32_bits(addr));
4519         amdgpu_ring_write(ring, upper_32_bits(addr));
4520         amdgpu_ring_write(ring, lower_32_bits(seq));
4521
4522         if (flags & AMDGPU_FENCE_FLAG_INT) {
4523                 /* set register to trigger INT */
4524                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4525                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4526                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4527                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4528                 amdgpu_ring_write(ring, 0);
4529                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4530         }
4531 }
4532
4533 static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
4534 {
4535         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4536         amdgpu_ring_write(ring, 0);
4537 }
4538
4539 static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4540 {
4541         uint32_t dw2 = 0;
4542
4543         if (amdgpu_mcbp)
4544                 gfx_v10_0_ring_emit_ce_meta(ring,
4545                                     flags & AMDGPU_IB_PREEMPTED ? true : false);
4546
4547         gfx_v10_0_ring_emit_tmz(ring, true);
4548
4549         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4550         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4551                 /* set load_global_config & load_global_uconfig */
4552                 dw2 |= 0x8001;
4553                 /* set load_cs_sh_regs */
4554                 dw2 |= 0x01000000;
4555                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
4556                 dw2 |= 0x10002;
4557
4558                 /* set load_ce_ram if preamble presented */
4559                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4560                         dw2 |= 0x10000000;
4561         } else {
4562                 /* still load_ce_ram if this is the first time preamble presented
4563                  * although there is no context switch happens.
4564                  */
4565                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4566                         dw2 |= 0x10000000;
4567         }
4568
4569         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4570         amdgpu_ring_write(ring, dw2);
4571         amdgpu_ring_write(ring, 0);
4572 }
4573
4574 static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4575 {
4576         unsigned ret;
4577
4578         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4579         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4580         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4581         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4582         ret = ring->wptr & ring->buf_mask;
4583         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4584
4585         return ret;
4586 }
4587
4588 static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4589 {
4590         unsigned cur;
4591         BUG_ON(offset > ring->buf_mask);
4592         BUG_ON(ring->ring[offset] != 0x55aa55aa);
4593
4594         cur = (ring->wptr - 1) & ring->buf_mask;
4595         if (likely(cur > offset))
4596                 ring->ring[offset] = cur - offset;
4597         else
4598                 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
4599 }
4600
4601 static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
4602 {
4603         int i, r = 0;
4604         struct amdgpu_device *adev = ring->adev;
4605         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4606         struct amdgpu_ring *kiq_ring = &kiq->ring;
4607
4608         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
4609                 return -EINVAL;
4610
4611         if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size))
4612                 return -ENOMEM;
4613
4614         /* assert preemption condition */
4615         amdgpu_ring_set_preempt_cond_exec(ring, false);
4616
4617         /* assert IB preemption, emit the trailing fence */
4618         kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
4619                                    ring->trail_fence_gpu_addr,
4620                                    ++ring->trail_seq);
4621         amdgpu_ring_commit(kiq_ring);
4622
4623         /* poll the trailing fence */
4624         for (i = 0; i < adev->usec_timeout; i++) {
4625                 if (ring->trail_seq ==
4626                     le32_to_cpu(*(ring->trail_fence_cpu_addr)))
4627                         break;
4628                 udelay(1);
4629         }
4630
4631         if (i >= adev->usec_timeout) {
4632                 r = -EINVAL;
4633                 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
4634         }
4635
4636         /* deassert preemption condition */
4637         amdgpu_ring_set_preempt_cond_exec(ring, true);
4638         return r;
4639 }
4640
4641 static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
4642 {
4643         struct amdgpu_device *adev = ring->adev;
4644         struct v10_ce_ib_state ce_payload = {0};
4645         uint64_t csa_addr;
4646         int cnt;
4647
4648         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4649         csa_addr = amdgpu_csa_vaddr(ring->adev);
4650
4651         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4652         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4653                                  WRITE_DATA_DST_SEL(8) |
4654                                  WR_CONFIRM) |
4655                                  WRITE_DATA_CACHE_POLICY(0));
4656         amdgpu_ring_write(ring, lower_32_bits(csa_addr +
4657                               offsetof(struct v10_gfx_meta_data, ce_payload)));
4658         amdgpu_ring_write(ring, upper_32_bits(csa_addr +
4659                               offsetof(struct v10_gfx_meta_data, ce_payload)));
4660
4661         if (resume)
4662                 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
4663                                            offsetof(struct v10_gfx_meta_data,
4664                                                     ce_payload),
4665                                            sizeof(ce_payload) >> 2);
4666         else
4667                 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
4668                                            sizeof(ce_payload) >> 2);
4669 }
4670
4671 static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
4672 {
4673         struct amdgpu_device *adev = ring->adev;
4674         struct v10_de_ib_state de_payload = {0};
4675         uint64_t csa_addr, gds_addr;
4676         int cnt;
4677
4678         csa_addr = amdgpu_csa_vaddr(ring->adev);
4679         gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
4680                          PAGE_SIZE);
4681         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4682         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4683
4684         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4685         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4686         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4687                                  WRITE_DATA_DST_SEL(8) |
4688                                  WR_CONFIRM) |
4689                                  WRITE_DATA_CACHE_POLICY(0));
4690         amdgpu_ring_write(ring, lower_32_bits(csa_addr +
4691                               offsetof(struct v10_gfx_meta_data, de_payload)));
4692         amdgpu_ring_write(ring, upper_32_bits(csa_addr +
4693                               offsetof(struct v10_gfx_meta_data, de_payload)));
4694
4695         if (resume)
4696                 amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
4697                                            offsetof(struct v10_gfx_meta_data,
4698                                                     de_payload),
4699                                            sizeof(de_payload) >> 2);
4700         else
4701                 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
4702                                            sizeof(de_payload) >> 2);
4703 }
4704
4705 static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4706 {
4707         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4708         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4709 }
4710
4711 static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4712 {
4713         struct amdgpu_device *adev = ring->adev;
4714
4715         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4716         amdgpu_ring_write(ring, 0 |     /* src: register*/
4717                                 (5 << 8) |      /* dst: memory */
4718                                 (1 << 20));     /* write confirm */
4719         amdgpu_ring_write(ring, reg);
4720         amdgpu_ring_write(ring, 0);
4721         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4722                                 adev->virt.reg_val_offs * 4));
4723         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4724                                 adev->virt.reg_val_offs * 4));
4725 }
4726
4727 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4728                                    uint32_t val)
4729 {
4730         uint32_t cmd = 0;
4731
4732         switch (ring->funcs->type) {
4733         case AMDGPU_RING_TYPE_GFX:
4734                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4735                 break;
4736         case AMDGPU_RING_TYPE_KIQ:
4737                 cmd = (1 << 16); /* no inc addr */
4738                 break;
4739         default:
4740                 cmd = WR_CONFIRM;
4741                 break;
4742         }
4743         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4744         amdgpu_ring_write(ring, cmd);
4745         amdgpu_ring_write(ring, reg);
4746         amdgpu_ring_write(ring, 0);
4747         amdgpu_ring_write(ring, val);
4748 }
4749
4750 static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4751                                         uint32_t val, uint32_t mask)
4752 {
4753         gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4754 }
4755
4756 static void
4757 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4758                                       uint32_t me, uint32_t pipe,
4759                                       enum amdgpu_interrupt_state state)
4760 {
4761         uint32_t cp_int_cntl, cp_int_cntl_reg;
4762
4763         if (!me) {
4764                 switch (pipe) {
4765                 case 0:
4766                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
4767                         break;
4768                 case 1:
4769                         cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
4770                         break;
4771                 default:
4772                         DRM_DEBUG("invalid pipe %d\n", pipe);
4773                         return;
4774                 }
4775         } else {
4776                 DRM_DEBUG("invalid me %d\n", me);
4777                 return;
4778         }
4779
4780         switch (state) {
4781         case AMDGPU_IRQ_STATE_DISABLE:
4782                 cp_int_cntl = RREG32(cp_int_cntl_reg);
4783                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4784                                             TIME_STAMP_INT_ENABLE, 0);
4785                 WREG32(cp_int_cntl_reg, cp_int_cntl);
4786                 break;
4787         case AMDGPU_IRQ_STATE_ENABLE:
4788                 cp_int_cntl = RREG32(cp_int_cntl_reg);
4789                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4790                                             TIME_STAMP_INT_ENABLE, 1);
4791                 WREG32(cp_int_cntl_reg, cp_int_cntl);
4792                 break;
4793         default:
4794                 break;
4795         }
4796 }
4797
4798 static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4799                                                      int me, int pipe,
4800                                                      enum amdgpu_interrupt_state state)
4801 {
4802         u32 mec_int_cntl, mec_int_cntl_reg;
4803
4804         /*
4805          * amdgpu controls only the first MEC. That's why this function only
4806          * handles the setting of interrupts for this specific MEC. All other
4807          * pipes' interrupts are set by amdkfd.
4808          */
4809
4810         if (me == 1) {
4811                 switch (pipe) {
4812                 case 0:
4813                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4814                         break;
4815                 case 1:
4816                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4817                         break;
4818                 case 2:
4819                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4820                         break;
4821                 case 3:
4822                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4823                         break;
4824                 default:
4825                         DRM_DEBUG("invalid pipe %d\n", pipe);
4826                         return;
4827                 }
4828         } else {
4829                 DRM_DEBUG("invalid me %d\n", me);
4830                 return;
4831         }
4832
4833         switch (state) {
4834         case AMDGPU_IRQ_STATE_DISABLE:
4835                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4836                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4837                                              TIME_STAMP_INT_ENABLE, 0);
4838                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4839                 break;
4840         case AMDGPU_IRQ_STATE_ENABLE:
4841                 mec_int_cntl = RREG32(mec_int_cntl_reg);
4842                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4843                                              TIME_STAMP_INT_ENABLE, 1);
4844                 WREG32(mec_int_cntl_reg, mec_int_cntl);
4845                 break;
4846         default:
4847                 break;
4848         }
4849 }
4850
4851 static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4852                                             struct amdgpu_irq_src *src,
4853                                             unsigned type,
4854                                             enum amdgpu_interrupt_state state)
4855 {
4856         switch (type) {
4857         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
4858                 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
4859                 break;
4860         case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
4861                 gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
4862                 break;
4863         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4864                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4865                 break;
4866         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4867                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4868                 break;
4869         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4870                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4871                 break;
4872         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4873                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4874                 break;
4875         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4876                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4877                 break;
4878         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4879                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4880                 break;
4881         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4882                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4883                 break;
4884         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4885                 gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4886                 break;
4887         default:
4888                 break;
4889         }
4890         return 0;
4891 }
4892
4893 static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
4894                              struct amdgpu_irq_src *source,
4895                              struct amdgpu_iv_entry *entry)
4896 {
4897         int i;
4898         u8 me_id, pipe_id, queue_id;
4899         struct amdgpu_ring *ring;
4900
4901         DRM_DEBUG("IH: CP EOP\n");
4902         me_id = (entry->ring_id & 0x0c) >> 2;
4903         pipe_id = (entry->ring_id & 0x03) >> 0;
4904         queue_id = (entry->ring_id & 0x70) >> 4;
4905
4906         switch (me_id) {
4907         case 0:
4908                 if (pipe_id == 0)
4909                         amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4910                 else
4911                         amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
4912                 break;
4913         case 1:
4914         case 2:
4915                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4916                         ring = &adev->gfx.compute_ring[i];
4917                         /* Per-queue interrupt is supported for MEC starting from VI.
4918                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
4919                           */
4920                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4921                                 amdgpu_fence_process(ring);
4922                 }
4923                 break;
4924         }
4925         return 0;
4926 }
4927
4928 static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4929                                               struct amdgpu_irq_src *source,
4930                                               unsigned type,
4931                                               enum amdgpu_interrupt_state state)
4932 {
4933         switch (state) {
4934         case AMDGPU_IRQ_STATE_DISABLE:
4935         case AMDGPU_IRQ_STATE_ENABLE:
4936                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4937                                PRIV_REG_INT_ENABLE,
4938                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4939                 break;
4940         default:
4941                 break;
4942         }
4943
4944         return 0;
4945 }
4946
4947 static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4948                                                struct amdgpu_irq_src *source,
4949                                                unsigned type,
4950                                                enum amdgpu_interrupt_state state)
4951 {
4952         switch (state) {
4953         case AMDGPU_IRQ_STATE_DISABLE:
4954         case AMDGPU_IRQ_STATE_ENABLE:
4955                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4956                                PRIV_INSTR_INT_ENABLE,
4957                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4958         default:
4959                 break;
4960         }
4961
4962         return 0;
4963 }
4964
4965 static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
4966                                         struct amdgpu_iv_entry *entry)
4967 {
4968         u8 me_id, pipe_id, queue_id;
4969         struct amdgpu_ring *ring;
4970         int i;
4971
4972         me_id = (entry->ring_id & 0x0c) >> 2;
4973         pipe_id = (entry->ring_id & 0x03) >> 0;
4974         queue_id = (entry->ring_id & 0x70) >> 4;
4975
4976         switch (me_id) {
4977         case 0:
4978                 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4979                         ring = &adev->gfx.gfx_ring[i];
4980                         /* we only enabled 1 gfx queue per pipe for now */
4981                         if (ring->me == me_id && ring->pipe == pipe_id)
4982                                 drm_sched_fault(&ring->sched);
4983                 }
4984                 break;
4985         case 1:
4986         case 2:
4987                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4988                         ring = &adev->gfx.compute_ring[i];
4989                         if (ring->me == me_id && ring->pipe == pipe_id &&
4990                             ring->queue == queue_id)
4991                                 drm_sched_fault(&ring->sched);
4992                 }
4993                 break;
4994         default:
4995                 BUG();
4996         }
4997 }
4998
4999 static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
5000                                   struct amdgpu_irq_src *source,
5001                                   struct amdgpu_iv_entry *entry)
5002 {
5003         DRM_ERROR("Illegal register access in command stream\n");
5004         gfx_v10_0_handle_priv_fault(adev, entry);
5005         return 0;
5006 }
5007
5008 static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
5009                                    struct amdgpu_irq_src *source,
5010                                    struct amdgpu_iv_entry *entry)
5011 {
5012         DRM_ERROR("Illegal instruction in command stream\n");
5013         gfx_v10_0_handle_priv_fault(adev, entry);
5014         return 0;
5015 }
5016
5017 static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
5018                                              struct amdgpu_irq_src *src,
5019                                              unsigned int type,
5020                                              enum amdgpu_interrupt_state state)
5021 {
5022         uint32_t tmp, target;
5023         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
5024
5025         if (ring->me == 1)
5026                 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5027         else
5028                 target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
5029         target += ring->pipe;
5030
5031         switch (type) {
5032         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
5033                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
5034                         tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
5035                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
5036                                             GENERIC2_INT_ENABLE, 0);
5037                         WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
5038
5039                         tmp = RREG32(target);
5040                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
5041                                             GENERIC2_INT_ENABLE, 0);
5042                         WREG32(target, tmp);
5043                 } else {
5044                         tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
5045                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
5046                                             GENERIC2_INT_ENABLE, 1);
5047                         WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
5048
5049                         tmp = RREG32(target);
5050                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
5051                                             GENERIC2_INT_ENABLE, 1);
5052                         WREG32(target, tmp);
5053                 }
5054                 break;
5055         default:
5056                 BUG(); /* kiq only support GENERIC2_INT now */
5057                 break;
5058         }
5059         return 0;
5060 }
5061
5062 static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
5063                              struct amdgpu_irq_src *source,
5064                              struct amdgpu_iv_entry *entry)
5065 {
5066         u8 me_id, pipe_id, queue_id;
5067         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
5068
5069         me_id = (entry->ring_id & 0x0c) >> 2;
5070         pipe_id = (entry->ring_id & 0x03) >> 0;
5071         queue_id = (entry->ring_id & 0x70) >> 4;
5072         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
5073                    me_id, pipe_id, queue_id);
5074
5075         amdgpu_fence_process(ring);
5076         return 0;
5077 }
5078
5079 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
5080         .name = "gfx_v10_0",
5081         .early_init = gfx_v10_0_early_init,
5082         .late_init = gfx_v10_0_late_init,
5083         .sw_init = gfx_v10_0_sw_init,
5084         .sw_fini = gfx_v10_0_sw_fini,
5085         .hw_init = gfx_v10_0_hw_init,
5086         .hw_fini = gfx_v10_0_hw_fini,
5087         .suspend = gfx_v10_0_suspend,
5088         .resume = gfx_v10_0_resume,
5089         .is_idle = gfx_v10_0_is_idle,
5090         .wait_for_idle = gfx_v10_0_wait_for_idle,
5091         .soft_reset = gfx_v10_0_soft_reset,
5092         .set_clockgating_state = gfx_v10_0_set_clockgating_state,
5093         .set_powergating_state = gfx_v10_0_set_powergating_state,
5094         .get_clockgating_state = gfx_v10_0_get_clockgating_state,
5095 };
5096
5097 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5098         .type = AMDGPU_RING_TYPE_GFX,
5099         .align_mask = 0xff,
5100         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5101         .support_64bit_ptrs = true,
5102         .vmhub = AMDGPU_GFXHUB_0,
5103         .get_rptr = gfx_v10_0_ring_get_rptr_gfx,
5104         .get_wptr = gfx_v10_0_ring_get_wptr_gfx,
5105         .set_wptr = gfx_v10_0_ring_set_wptr_gfx,
5106         .emit_frame_size = /* totally 242 maximum if 16 IBs */
5107                 5 + /* COND_EXEC */
5108                 7 + /* PIPELINE_SYNC */
5109                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5110                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5111                 2 + /* VM_FLUSH */
5112                 8 + /* FENCE for VM_FLUSH */
5113                 20 + /* GDS switch */
5114                 4 + /* double SWITCH_BUFFER,
5115                      * the first COND_EXEC jump to the place
5116                      * just prior to this double SWITCH_BUFFER
5117                      */
5118                 5 + /* COND_EXEC */
5119                 7 + /* HDP_flush */
5120                 4 + /* VGT_flush */
5121                 14 + /* CE_META */
5122                 31 + /* DE_META */
5123                 3 + /* CNTX_CTRL */
5124                 5 + /* HDP_INVL */
5125                 8 + 8 + /* FENCE x2 */
5126                 2, /* SWITCH_BUFFER */
5127         .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
5128         .emit_ib = gfx_v10_0_ring_emit_ib_gfx,
5129         .emit_fence = gfx_v10_0_ring_emit_fence,
5130         .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
5131         .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
5132         .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
5133         .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
5134         .test_ring = gfx_v10_0_ring_test_ring,
5135         .test_ib = gfx_v10_0_ring_test_ib,
5136         .insert_nop = amdgpu_ring_insert_nop,
5137         .pad_ib = amdgpu_ring_generic_pad_ib,
5138         .emit_switch_buffer = gfx_v10_0_ring_emit_sb,
5139         .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
5140         .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
5141         .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
5142         .preempt_ib = gfx_v10_0_ring_preempt_ib,
5143         .emit_tmz = gfx_v10_0_ring_emit_tmz,
5144         .emit_wreg = gfx_v10_0_ring_emit_wreg,
5145         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5146 };
5147
5148 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
5149         .type = AMDGPU_RING_TYPE_COMPUTE,
5150         .align_mask = 0xff,
5151         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5152         .support_64bit_ptrs = true,
5153         .vmhub = AMDGPU_GFXHUB_0,
5154         .get_rptr = gfx_v10_0_ring_get_rptr_compute,
5155         .get_wptr = gfx_v10_0_ring_get_wptr_compute,
5156         .set_wptr = gfx_v10_0_ring_set_wptr_compute,
5157         .emit_frame_size =
5158                 20 + /* gfx_v10_0_ring_emit_gds_switch */
5159                 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5160                 5 + /* hdp invalidate */
5161                 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5162                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5163                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5164                 2 + /* gfx_v10_0_ring_emit_vm_flush */
5165                 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
5166         .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
5167         .emit_ib = gfx_v10_0_ring_emit_ib_compute,
5168         .emit_fence = gfx_v10_0_ring_emit_fence,
5169         .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
5170         .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
5171         .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
5172         .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
5173         .test_ring = gfx_v10_0_ring_test_ring,
5174         .test_ib = gfx_v10_0_ring_test_ib,
5175         .insert_nop = amdgpu_ring_insert_nop,
5176         .pad_ib = amdgpu_ring_generic_pad_ib,
5177         .emit_wreg = gfx_v10_0_ring_emit_wreg,
5178         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5179 };
5180
5181 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
5182         .type = AMDGPU_RING_TYPE_KIQ,
5183         .align_mask = 0xff,
5184         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
5185         .support_64bit_ptrs = true,
5186         .vmhub = AMDGPU_GFXHUB_0,
5187         .get_rptr = gfx_v10_0_ring_get_rptr_compute,
5188         .get_wptr = gfx_v10_0_ring_get_wptr_compute,
5189         .set_wptr = gfx_v10_0_ring_set_wptr_compute,
5190         .emit_frame_size =
5191                 20 + /* gfx_v10_0_ring_emit_gds_switch */
5192                 7 + /* gfx_v10_0_ring_emit_hdp_flush */
5193                 5 + /*hdp invalidate */
5194                 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
5195                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5196                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5197                 2 + /* gfx_v10_0_ring_emit_vm_flush */
5198                 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5199         .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
5200         .emit_ib = gfx_v10_0_ring_emit_ib_compute,
5201         .emit_fence = gfx_v10_0_ring_emit_fence_kiq,
5202         .test_ring = gfx_v10_0_ring_test_ring,
5203         .test_ib = gfx_v10_0_ring_test_ib,
5204         .insert_nop = amdgpu_ring_insert_nop,
5205         .pad_ib = amdgpu_ring_generic_pad_ib,
5206         .emit_rreg = gfx_v10_0_ring_emit_rreg,
5207         .emit_wreg = gfx_v10_0_ring_emit_wreg,
5208         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
5209 };
5210
5211 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
5212 {
5213         int i;
5214
5215         adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
5216
5217         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5218                 adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
5219
5220         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5221                 adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute;
5222 }
5223
5224 static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = {
5225         .set = gfx_v10_0_set_eop_interrupt_state,
5226         .process = gfx_v10_0_eop_irq,
5227 };
5228
5229 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
5230         .set = gfx_v10_0_set_priv_reg_fault_state,
5231         .process = gfx_v10_0_priv_reg_irq,
5232 };
5233
5234 static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
5235         .set = gfx_v10_0_set_priv_inst_fault_state,
5236         .process = gfx_v10_0_priv_inst_irq,
5237 };
5238
5239 static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = {
5240         .set = gfx_v10_0_kiq_set_interrupt_state,
5241         .process = gfx_v10_0_kiq_irq,
5242 };
5243
5244 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
5245 {
5246         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5247         adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
5248
5249         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
5250         adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
5251
5252         adev->gfx.priv_reg_irq.num_types = 1;
5253         adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
5254
5255         adev->gfx.priv_inst_irq.num_types = 1;
5256         adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
5257 }
5258
5259 static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
5260 {
5261         switch (adev->asic_type) {
5262         case CHIP_NAVI10:
5263         case CHIP_NAVI14:
5264         case CHIP_NAVI12:
5265                 adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
5266                 break;
5267         default:
5268                 break;
5269         }
5270 }
5271
5272 static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
5273 {
5274         /* init asic gds info */
5275         switch (adev->asic_type) {
5276         case CHIP_NAVI10:
5277         default:
5278                 adev->gds.gds_size = 0x10000;
5279                 adev->gds.gds_compute_max_wave_id = 0x4ff;
5280                 break;
5281         }
5282
5283         adev->gds.gws_size = 64;
5284         adev->gds.oa_size = 16;
5285 }
5286
5287 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
5288                                                           u32 bitmap)
5289 {
5290         u32 data;
5291
5292         if (!bitmap)
5293                 return;
5294
5295         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5296         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5297
5298         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5299 }
5300
5301 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
5302 {
5303         u32 data, wgp_bitmask;
5304         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5305         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5306
5307         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
5308         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
5309
5310         wgp_bitmask =
5311                 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
5312
5313         return (~data) & wgp_bitmask;
5314 }
5315
5316 static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
5317 {
5318         u32 wgp_idx, wgp_active_bitmap;
5319         u32 cu_bitmap_per_wgp, cu_active_bitmap;
5320
5321         wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
5322         cu_active_bitmap = 0;
5323
5324         for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
5325                 /* if there is one WGP enabled, it means 2 CUs will be enabled */
5326                 cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
5327                 if (wgp_active_bitmap & (1 << wgp_idx))
5328                         cu_active_bitmap |= cu_bitmap_per_wgp;
5329         }
5330
5331         return cu_active_bitmap;
5332 }
5333
5334 static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
5335                                  struct amdgpu_cu_info *cu_info)
5336 {
5337         int i, j, k, counter, active_cu_number = 0;
5338         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5339         unsigned disable_masks[4 * 2];
5340
5341         if (!adev || !cu_info)
5342                 return -EINVAL;
5343
5344         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5345
5346         mutex_lock(&adev->grbm_idx_mutex);
5347         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5348                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5349                         mask = 1;
5350                         ao_bitmap = 0;
5351                         counter = 0;
5352                         gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
5353                         if (i < 4 && j < 2)
5354                                 gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
5355                                         adev, disable_masks[i * 2 + j]);
5356                         bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
5357                         cu_info->bitmap[i][j] = bitmap;
5358
5359                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
5360                                 if (bitmap & mask) {
5361                                         if (counter < adev->gfx.config.max_cu_per_sh)
5362                                                 ao_bitmap |= mask;
5363                                         counter++;
5364                                 }
5365                                 mask <<= 1;
5366                         }
5367                         active_cu_number += counter;
5368                         if (i < 2 && j < 2)
5369                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5370                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5371                 }
5372         }
5373         gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5374         mutex_unlock(&adev->grbm_idx_mutex);
5375
5376         cu_info->number = active_cu_number;
5377         cu_info->ao_cu_mask = ao_cu_mask;
5378         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5379
5380         return 0;
5381 }
5382
5383 const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
5384 {
5385         .type = AMD_IP_BLOCK_TYPE_GFX,
5386         .major = 10,
5387         .minor = 0,
5388         .rev = 0,
5389         .funcs = &gfx_v10_0_ip_funcs,
5390 };
This page took 0.360417 seconds and 4 git commands to generate.