]> Git Repo - linux.git/blob - drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
Merge tag 'for-5.19/drivers-2022-06-02' of git://git.kernel.dk/linux-block
[linux.git] / drivers / gpu / drm / amd / amdgpu / umc_v6_7.c
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include "umc_v6_7.h"
24 #include "amdgpu_ras.h"
25 #include "amdgpu_umc.h"
26 #include "amdgpu.h"
27
28 #include "umc/umc_6_7_0_offset.h"
29 #include "umc/umc_6_7_0_sh_mask.h"
30
31 const uint32_t
32         umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
33                 {28, 20, 24, 16, 12, 4, 8, 0},
34                 {6, 30, 2, 26, 22, 14, 18, 10},
35                 {19, 11, 15, 7, 3, 27, 31, 23},
36                 {9, 1, 5, 29, 25, 17, 21, 13}
37 };
38 const uint32_t
39         umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM] = {
40                 {19, 11, 15, 7, 3, 27, 31, 23},
41                 {9, 1, 5, 29, 25, 17, 21, 13},
42                 {28, 20, 24, 16, 12, 4, 8, 0},
43                 {6, 30, 2, 26, 22, 14, 18, 10},
44 };
45
46 static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev,
47                                               uint32_t umc_inst,
48                                               uint32_t ch_inst)
49 {
50         uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst;
51
52         /* adjust umc and channel index offset,
53          * the register address is not linear on each umc instace */
54         umc_inst = index / 4;
55         ch_inst = index % 4;
56
57         return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst;
58 }
59
60 static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev,
61                                               uint32_t umc_inst,
62                                               uint32_t ch_inst)
63 {
64         return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
65 }
66
67 static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
68                                                   uint64_t mc_umc_status, uint32_t umc_reg_offset)
69 {
70         uint32_t mc_umc_addr;
71         uint64_t reg_value;
72
73         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
74                 dev_info(adev->dev, "Deferred error, no user action is needed.\n");
75
76         if (mc_umc_status)
77                 dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
78
79         /* print IPID registers value */
80         mc_umc_addr =
81                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
82         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
83         if (reg_value)
84                 dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
85
86         /* print SYND registers value */
87         mc_umc_addr =
88                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
89         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
90         if (reg_value)
91                 dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
92
93         /* print MISC0 registers value */
94         mc_umc_addr =
95                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
96         reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
97         if (reg_value)
98                 dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
99 }
100
101 static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
102                                                    uint32_t umc_inst, uint32_t ch_inst,
103                                                    unsigned long *error_count)
104 {
105         uint64_t mc_umc_status;
106         uint32_t eccinfo_table_idx;
107         uint32_t umc_reg_offset;
108         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
109
110         umc_reg_offset = get_umc_v6_7_reg_offset(adev,
111                                                 umc_inst, ch_inst);
112
113         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
114         /* check for SRAM correctable error
115           MCUMC_STATUS is a 64 bit register */
116         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
117         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
118             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
119                 *error_count += 1;
120
121                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
122         }
123 }
124
125 static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
126                                                           uint32_t umc_inst, uint32_t ch_inst,
127                                                       unsigned long *error_count)
128 {
129         uint64_t mc_umc_status;
130         uint32_t eccinfo_table_idx;
131         uint32_t umc_reg_offset;
132         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
133
134         umc_reg_offset = get_umc_v6_7_reg_offset(adev,
135                                                 umc_inst, ch_inst);
136
137         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
138         /* check the MCUMC_STATUS */
139         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
140         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
141             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
142             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
143             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
144             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
145             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
146                 *error_count += 1;
147
148                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
149         }
150 }
151
152 static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
153                                            void *ras_error_status)
154 {
155         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
156
157         uint32_t umc_inst        = 0;
158         uint32_t ch_inst         = 0;
159
160         /*TODO: driver needs to toggle DF Cstate to ensure
161          * safe access of UMC registers. Will add the protection */
162         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
163                 umc_v6_7_ecc_info_query_correctable_error_count(adev,
164                                                       umc_inst, ch_inst,
165                                                       &(err_data->ce_count));
166                 umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
167                                                       umc_inst, ch_inst,
168                                                           &(err_data->ue_count));
169         }
170 }
171
172 static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
173                                          struct ras_err_data *err_data,
174                                          uint32_t ch_inst,
175                                          uint32_t umc_inst)
176 {
177         uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
178         uint32_t channel_index;
179         uint32_t eccinfo_table_idx;
180         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
181
182         eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
183         channel_index =
184                 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
185
186         mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
187
188         if (mc_umc_status == 0)
189                 return;
190
191         if (!err_data->err_addr)
192                 return;
193
194         /* calculate error address if ue/ce error is detected */
195         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
196             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
197             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
198
199                 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
200                 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
201
202                 /* translate umc channel address to soc pa, 3 parts are included */
203                 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
204                                 ADDR_OF_256B_BLOCK(channel_index) |
205                                 OFFSET_IN_256B_BLOCK(err_addr);
206
207                 /* The umc channel bits are not original values, they are hashed */
208                 SET_CHANNEL_HASH(channel_index, soc_pa);
209
210                 /* clear [C4 C3 C2] in soc physical address */
211                 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
212
213                 /* we only save ue error information currently, ce is skipped */
214                 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
215                                 == 1) {
216                         /* loop for all possibilities of [C4 C3 C2] */
217                         for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
218                                 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
219                                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
220                                 amdgpu_umc_fill_error_record(err_data, err_addr,
221                                         retired_page, channel_index, umc_inst);
222
223                                 /* shift R14 bit */
224                                 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
225                                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
226                                 amdgpu_umc_fill_error_record(err_data, err_addr,
227                                         retired_page, channel_index, umc_inst);
228                         }
229                 }
230         }
231 }
232
233 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
234                                              void *ras_error_status)
235 {
236         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
237
238         uint32_t umc_inst        = 0;
239         uint32_t ch_inst         = 0;
240
241         /*TODO: driver needs to toggle DF Cstate to ensure
242          * safe access of UMC resgisters. Will add the protection
243          * when firmware interface is ready */
244         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
245                 umc_v6_7_ecc_info_query_error_address(adev,
246                                              err_data,
247                                              ch_inst,
248                                              umc_inst);
249         }
250 }
251
252 static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
253                                                    uint32_t umc_reg_offset,
254                                                    unsigned long *error_count)
255 {
256         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
257         uint32_t ecc_err_cnt, ecc_err_cnt_addr;
258         uint64_t mc_umc_status;
259         uint32_t mc_umc_status_addr;
260
261         /* UMC 6_1_1 registers */
262         ecc_err_cnt_sel_addr =
263                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCntSel);
264         ecc_err_cnt_addr =
265                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccErrCnt);
266         mc_umc_status_addr =
267                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
268
269         /* select the lower chip and check the error count */
270         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
271         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
272                                         EccErrCntCsSel, 0);
273         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
274
275         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
276         *error_count +=
277                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
278                  UMC_V6_7_CE_CNT_INIT);
279
280         /* select the higher chip and check the err counter */
281         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel,
282                                         EccErrCntCsSel, 1);
283         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
284
285         ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
286         *error_count +=
287                 (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) -
288                  UMC_V6_7_CE_CNT_INIT);
289
290         /* check for SRAM correctable error
291           MCUMC_STATUS is a 64 bit register */
292         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
293         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
294             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
295                 *error_count += 1;
296
297                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
298         }
299 }
300
301 static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
302                                                       uint32_t umc_reg_offset,
303                                                       unsigned long *error_count)
304 {
305         uint64_t mc_umc_status;
306         uint32_t mc_umc_status_addr;
307
308         mc_umc_status_addr =
309                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
310
311         /* check the MCUMC_STATUS */
312         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
313         if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
314             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
315             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
316             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
317             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
318             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
319                 *error_count += 1;
320
321                 umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
322         }
323 }
324
325 static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
326                                                    uint32_t umc_reg_offset)
327 {
328         uint32_t ecc_err_cnt_addr;
329         uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
330
331         ecc_err_cnt_sel_addr =
332                 SOC15_REG_OFFSET(UMC, 0,
333                                 regUMCCH0_0_EccErrCntSel);
334         ecc_err_cnt_addr =
335                 SOC15_REG_OFFSET(UMC, 0,
336                                 regUMCCH0_0_EccErrCnt);
337
338         /* select the lower chip */
339         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
340                                        umc_reg_offset) * 4);
341         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
342                                         UMCCH0_0_EccErrCntSel,
343                                         EccErrCntCsSel, 0);
344         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
345                         ecc_err_cnt_sel);
346
347         /* clear lower chip error count */
348         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
349                         UMC_V6_7_CE_CNT_INIT);
350
351         /* select the higher chip */
352         ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr +
353                                         umc_reg_offset) * 4);
354         ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel,
355                                         UMCCH0_0_EccErrCntSel,
356                                         EccErrCntCsSel, 1);
357         WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
358                         ecc_err_cnt_sel);
359
360         /* clear higher chip error count */
361         WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
362                         UMC_V6_7_CE_CNT_INIT);
363 }
364
365 static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
366 {
367         uint32_t umc_inst        = 0;
368         uint32_t ch_inst         = 0;
369         uint32_t umc_reg_offset  = 0;
370
371         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
372                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
373                                                          umc_inst,
374                                                          ch_inst);
375
376                 umc_v6_7_reset_error_count_per_channel(adev,
377                                                        umc_reg_offset);
378         }
379 }
380
381 static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
382                                            void *ras_error_status)
383 {
384         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
385
386         uint32_t umc_inst        = 0;
387         uint32_t ch_inst         = 0;
388         uint32_t umc_reg_offset  = 0;
389
390         /*TODO: driver needs to toggle DF Cstate to ensure
391          * safe access of UMC registers. Will add the protection */
392         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
393                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
394                                                          umc_inst,
395                                                          ch_inst);
396                 umc_v6_7_query_correctable_error_count(adev,
397                                                        umc_reg_offset,
398                                                        &(err_data->ce_count));
399                 umc_v6_7_querry_uncorrectable_error_count(adev,
400                                                           umc_reg_offset,
401                                                           &(err_data->ue_count));
402         }
403
404         umc_v6_7_reset_error_count(adev);
405 }
406
407 static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
408                                          struct ras_err_data *err_data,
409                                          uint32_t umc_reg_offset,
410                                          uint32_t ch_inst,
411                                          uint32_t umc_inst)
412 {
413         uint32_t mc_umc_status_addr;
414         uint32_t channel_index;
415         uint64_t mc_umc_status, mc_umc_addrt0;
416         uint64_t err_addr, soc_pa, retired_page, column;
417
418         mc_umc_status_addr =
419                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
420         mc_umc_addrt0 =
421                 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
422
423         mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
424
425         if (mc_umc_status == 0)
426                 return;
427
428         if (!err_data->err_addr) {
429                 /* clear umc status */
430                 WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
431                 return;
432         }
433
434         channel_index =
435                 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
436
437         /* calculate error address if ue/ce error is detected */
438         if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
439             (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
440             REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
441
442                 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
443                 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
444
445                 /* translate umc channel address to soc pa, 3 parts are included */
446                 soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
447                                 ADDR_OF_256B_BLOCK(channel_index) |
448                                 OFFSET_IN_256B_BLOCK(err_addr);
449
450                 /* The umc channel bits are not original values, they are hashed */
451                 SET_CHANNEL_HASH(channel_index, soc_pa);
452
453                 /* clear [C4 C3 C2] in soc physical address */
454                 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
455
456                 /* we only save ue error information currently, ce is skipped */
457                 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
458                                 == 1) {
459                         /* loop for all possibilities of [C4 C3 C2] */
460                         for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
461                                 retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
462                                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
463                                 amdgpu_umc_fill_error_record(err_data, err_addr,
464                                         retired_page, channel_index, umc_inst);
465
466                                 /* shift R14 bit */
467                                 retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
468                                 dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
469                                 amdgpu_umc_fill_error_record(err_data, err_addr,
470                                         retired_page, channel_index, umc_inst);
471                         }
472                 }
473         }
474
475         /* clear umc status */
476         WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
477 }
478
479 static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
480                                              void *ras_error_status)
481 {
482         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
483
484         uint32_t umc_inst        = 0;
485         uint32_t ch_inst         = 0;
486         uint32_t umc_reg_offset  = 0;
487
488         /*TODO: driver needs to toggle DF Cstate to ensure
489          * safe access of UMC resgisters. Will add the protection
490          * when firmware interface is ready */
491         LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
492                 umc_reg_offset = get_umc_v6_7_reg_offset(adev,
493                                                          umc_inst,
494                                                          ch_inst);
495                 umc_v6_7_query_error_address(adev,
496                                              err_data,
497                                              umc_reg_offset,
498                                              ch_inst,
499                                              umc_inst);
500         }
501 }
502
503 static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
504                                                 struct amdgpu_device *adev,
505                                                 uint32_t umc_reg_offset)
506 {
507         uint32_t ecc_ctrl_addr, ecc_ctrl;
508
509         ecc_ctrl_addr =
510                 SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_EccCtrl);
511         ecc_ctrl = RREG32_PCIE((ecc_ctrl_addr +
512                                         umc_reg_offset) * 4);
513
514         return REG_GET_FIELD(ecc_ctrl, UMCCH0_0_EccCtrl, UCFatalEn);
515 }
516
517 static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev)
518 {
519         uint32_t umc_reg_offset  = 0;
520
521         /* Enabling fatal error in umc instance0 channel0 will be
522          * considered as fatal error mode
523          */
524         umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0);
525         return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset);
526 }
527
528 const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = {
529         .query_ras_error_count = umc_v6_7_query_ras_error_count,
530         .query_ras_error_address = umc_v6_7_query_ras_error_address,
531 };
532
533 struct amdgpu_umc_ras umc_v6_7_ras = {
534         .ras_block = {
535                 .hw_ops = &umc_v6_7_ras_hw_ops,
536         },
537         .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
538         .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
539         .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
540 };
This page took 0.066979 seconds and 4 git commands to generate.