2 * Copyright 2018 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
26 #include "df/df_3_6_default.h"
27 #include "df/df_3_6_offset.h"
28 #include "df/df_3_6_sh_mask.h"
30 #define DF_3_6_SMN_REG_INST_DIST 0x8
31 #define DF_3_6_INST_CNT 8
33 /* Defined in global_features.h as FTI_PERFMON_VISIBLE */
34 #define DF_V3_6_MAX_COUNTERS 4
36 /* get flags from df perfmon config */
37 #define DF_V3_6_GET_EVENT(x) (x & 0xFFUL)
38 #define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
39 #define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
40 #define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL
42 static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
43 16, 32, 0, 0, 0, 2, 4, 8};
45 static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
48 unsigned long flags, address, data;
49 uint32_t ficadl_val, ficadh_val;
51 address = adev->nbio.funcs->get_pcie_index_offset(adev);
52 data = adev->nbio.funcs->get_pcie_data_offset(adev);
54 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
55 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
56 WREG32(data, ficaa_val);
58 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
59 ficadl_val = RREG32(data);
61 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
62 ficadh_val = RREG32(data);
64 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
66 return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
69 static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
70 uint32_t ficadl_val, uint32_t ficadh_val)
72 unsigned long flags, address, data;
74 address = adev->nbio.funcs->get_pcie_index_offset(adev);
75 data = adev->nbio.funcs->get_pcie_data_offset(adev);
77 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
78 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
79 WREG32(data, ficaa_val);
81 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
82 WREG32(data, ficadl_val);
84 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
85 WREG32(data, ficadh_val);
87 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
91 * df_v3_6_perfmon_rreg - read perfmon lo and hi
93 * required to be atomic. no mmio method provided so subsequent reads for lo
94 * and hi require to preserve df finite state machine
96 static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
97 uint32_t lo_addr, uint32_t *lo_val,
98 uint32_t hi_addr, uint32_t *hi_val)
100 unsigned long flags, address, data;
102 address = adev->nbio.funcs->get_pcie_index_offset(adev);
103 data = adev->nbio.funcs->get_pcie_data_offset(adev);
105 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
106 WREG32(address, lo_addr);
107 *lo_val = RREG32(data);
108 WREG32(address, hi_addr);
109 *hi_val = RREG32(data);
110 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
114 * df_v3_6_perfmon_wreg - write to perfmon lo and hi
116 * required to be atomic. no mmio method provided so subsequent reads after
117 * data writes cannot occur to preserve data fabrics finite state machine.
119 static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
120 uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
122 unsigned long flags, address, data;
124 address = adev->nbio.funcs->get_pcie_index_offset(adev);
125 data = adev->nbio.funcs->get_pcie_data_offset(adev);
127 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
128 WREG32(address, lo_addr);
129 WREG32(data, lo_val);
130 WREG32(address, hi_addr);
131 WREG32(data, hi_val);
132 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
135 /* same as perfmon_wreg but return status on write value check */
136 static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
137 uint32_t lo_addr, uint32_t lo_val,
138 uint32_t hi_addr, uint32_t hi_val)
140 unsigned long flags, address, data;
141 uint32_t lo_val_rb, hi_val_rb;
143 address = adev->nbio.funcs->get_pcie_index_offset(adev);
144 data = adev->nbio.funcs->get_pcie_data_offset(adev);
146 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
147 WREG32(address, lo_addr);
148 WREG32(data, lo_val);
149 WREG32(address, hi_addr);
150 WREG32(data, hi_val);
152 WREG32(address, lo_addr);
153 lo_val_rb = RREG32(data);
154 WREG32(address, hi_addr);
155 hi_val_rb = RREG32(data);
156 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
158 if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
166 * retry arming counters every 100 usecs within 1 millisecond interval.
167 * if retry fails after time out, return error.
169 #define ARM_RETRY_USEC_TIMEOUT 1000
170 #define ARM_RETRY_USEC_INTERVAL 100
171 static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
172 uint32_t lo_addr, uint32_t lo_val,
173 uint32_t hi_addr, uint32_t hi_val)
175 int countdown = ARM_RETRY_USEC_TIMEOUT;
179 if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
183 countdown -= ARM_RETRY_USEC_INTERVAL;
184 udelay(ARM_RETRY_USEC_INTERVAL);
187 return countdown > 0 ? 0 : -ETIME;
190 /* get the number of df counters available */
191 static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
192 struct device_attribute *attr,
195 struct amdgpu_device *adev;
196 struct drm_device *ddev;
199 ddev = dev_get_drvdata(dev);
200 adev = drm_to_adev(ddev);
203 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
204 if (adev->df_perfmon_config_assign_mask[i] == 0)
208 return sysfs_emit(buf, "%i\n", count);
211 /* device attr for available perfmon counters */
212 static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
214 static void df_v3_6_query_hashes(struct amdgpu_device *adev)
218 adev->df.hash_status.hash_64k = false;
219 adev->df.hash_status.hash_2m = false;
220 adev->df.hash_status.hash_1g = false;
222 if (adev->asic_type != CHIP_ARCTURUS)
225 /* encoding for hash-enabled on Arcturus */
226 if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
227 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
228 adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
229 DF_CS_UMC_AON0_DfGlobalCtrl,
231 adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
232 DF_CS_UMC_AON0_DfGlobalCtrl,
234 adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
235 DF_CS_UMC_AON0_DfGlobalCtrl,
241 static void df_v3_6_sw_init(struct amdgpu_device *adev)
245 ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
247 DRM_ERROR("failed to create file for available df counters\n");
249 for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
250 adev->df_perfmon_config_assign_mask[i] = 0;
252 df_v3_6_query_hashes(adev);
255 static void df_v3_6_sw_fini(struct amdgpu_device *adev)
258 device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
262 static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
268 tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
269 tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
270 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
272 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
273 mmFabricConfigAccessControl_DEFAULT);
276 static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
280 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
281 tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
282 tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
287 static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
289 int fb_channel_number;
291 fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
292 if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
293 fb_channel_number = 0;
295 return df_v3_6_channel_number[fb_channel_number];
298 static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
303 if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
304 /* Put DF on broadcast mode */
305 adev->df.funcs->enable_broadcast_mode(adev, true);
308 tmp = RREG32_SOC15(DF, 0,
309 mmDF_PIE_AON0_DfGlobalClkGater);
310 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
311 tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
313 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
315 tmp = RREG32_SOC15(DF, 0,
316 mmDF_PIE_AON0_DfGlobalClkGater);
317 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
318 tmp |= DF_V3_6_MGCG_DISABLE;
320 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
323 /* Exit broadcast mode */
324 adev->df.funcs->enable_broadcast_mode(adev, false);
328 static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
333 /* AMD_CG_SUPPORT_DF_MGCG */
334 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
335 if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
336 *flags |= AMD_CG_SUPPORT_DF_MGCG;
339 /* get assigned df perfmon ctr as int */
340 static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev,
345 return ((config & 0x0FFFFFFUL) ==
346 adev->df_perfmon_config_assign_mask[counter_idx]);
350 /* get address based on counter assignment */
351 static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
355 uint32_t *lo_base_addr,
356 uint32_t *hi_base_addr)
358 if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
361 switch (counter_idx) {
364 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
365 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
368 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
369 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
372 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
373 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
376 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
377 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
384 /* get read counter address */
385 static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
388 uint32_t *lo_base_addr,
389 uint32_t *hi_base_addr)
391 df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr,
395 /* get control counter settings i.e. address and values to set */
396 static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
399 uint32_t *lo_base_addr,
400 uint32_t *hi_base_addr,
406 uint32_t eventsel, instance, unitmask;
407 uint32_t instance_10, instance_5432, instance_76;
409 df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr,
412 if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
413 DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
414 *lo_base_addr, *hi_base_addr);
418 eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
419 unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
420 instance = DF_V3_6_GET_INSTANCE(config);
422 instance_10 = instance & 0x3;
423 instance_5432 = (instance >> 2) & 0xf;
424 instance_76 = (instance >> 6) & 0x3;
426 *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
427 *lo_val = is_enable ? *lo_val | (1 << 22) : *lo_val & ~(1 << 22);
428 *hi_val = (instance_76 << 29) | instance_5432;
430 DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
431 config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
436 /* add df performance counters for read */
437 static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
442 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
443 if (adev->df_perfmon_config_assign_mask[i] == 0U) {
444 adev->df_perfmon_config_assign_mask[i] =
445 config & 0x0FFFFFFUL;
453 #define DEFERRED_ARM_MASK (1 << 31)
454 static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
455 int counter_idx, uint64_t config,
459 if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
463 adev->df_perfmon_config_assign_mask[counter_idx] |=
466 adev->df_perfmon_config_assign_mask[counter_idx] &=
472 static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
476 return (df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
477 (adev->df_perfmon_config_assign_mask[counter_idx]
478 & DEFERRED_ARM_MASK));
482 /* release performance counter */
483 static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
487 if (df_v3_6_pmc_has_counter(adev, config, counter_idx))
488 adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;
492 static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
496 uint32_t lo_base_addr = 0, hi_base_addr = 0;
498 df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr,
501 if ((lo_base_addr == 0) || (hi_base_addr == 0))
504 df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
507 /* return available counter if is_add == 1 otherwise return error status. */
508 static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
509 int counter_idx, int is_add)
511 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
512 int err = 0, ret = 0;
514 switch (adev->asic_type) {
518 return df_v3_6_pmc_add_cntr(adev, config);
520 ret = df_v3_6_pmc_get_ctrl_settings(adev,
532 err = df_v3_6_perfmon_arm_with_retry(adev,
539 ret = df_v3_6_pmc_set_deferred(adev, config,
550 static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
551 int counter_idx, int is_remove)
553 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
556 switch (adev->asic_type) {
559 ret = df_v3_6_pmc_get_ctrl_settings(adev,
571 df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
572 hi_base_addr, hi_val);
575 df_v3_6_reset_perfmon_cntr(adev, config, counter_idx);
576 df_v3_6_pmc_release_cntr(adev, config, counter_idx);
587 static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
592 uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
595 switch (adev->asic_type) {
598 df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
599 &lo_base_addr, &hi_base_addr);
601 if ((lo_base_addr == 0) || (hi_base_addr == 0))
604 /* rearm the counter or throw away count value on failure */
605 if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {
606 int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
607 lo_base_addr, lo_val,
608 hi_base_addr, hi_val);
613 df_v3_6_pmc_set_deferred(adev, config, counter_idx,
617 df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
618 hi_base_addr, &hi_val);
620 *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
622 if (*count >= DF_V3_6_PERFMON_OVERFLOW)
625 DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
626 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
634 const struct amdgpu_df_funcs df_v3_6_funcs = {
635 .sw_init = df_v3_6_sw_init,
636 .sw_fini = df_v3_6_sw_fini,
637 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
638 .get_fb_channel_number = df_v3_6_get_fb_channel_number,
639 .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
640 .update_medium_grain_clock_gating =
641 df_v3_6_update_medium_grain_clock_gating,
642 .get_clockgating_state = df_v3_6_get_clockgating_state,
643 .pmc_start = df_v3_6_pmc_start,
644 .pmc_stop = df_v3_6_pmc_stop,
645 .pmc_get_count = df_v3_6_pmc_get_count,
646 .get_fica = df_v3_6_get_fica,
647 .set_fica = df_v3_6_set_fica,