]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/gt/intel_rc6.c
Merge tag 'clang-format-6.8' of https://github.com/ojeda/linux
[linux.git] / drivers / gpu / drm / i915 / gt / intel_rc6.c
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5
6 #include <linux/pm_runtime.h>
7 #include <linux/string_helpers.h>
8
9 #include "gem/i915_gem_region.h"
10 #include "i915_drv.h"
11 #include "i915_reg.h"
12 #include "i915_vgpu.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gt.h"
15 #include "intel_gt_pm.h"
16 #include "intel_gt_regs.h"
17 #include "intel_pcode.h"
18 #include "intel_rc6.h"
19
20 /**
21  * DOC: RC6
22  *
23  * RC6 is a special power stage which allows the GPU to enter an very
24  * low-voltage mode when idle, using down to 0V while at this stage.  This
25  * stage is entered automatically when the GPU is idle when RC6 support is
26  * enabled, and as soon as new workload arises GPU wakes up automatically as
27  * well.
28  *
29  * There are different RC6 modes available in Intel GPU, which differentiate
30  * among each other with the latency required to enter and leave RC6 and
31  * voltage consumed by the GPU in different states.
32  *
33  * The combination of the following flags define which states GPU is allowed
34  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
35  * RC6pp is deepest RC6. Their support by hardware varies according to the
36  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
37  * which brings the most power savings; deeper states save more power, but
38  * require higher latency to switch to and wake up.
39  */
40
41 static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
42 {
43         return container_of(rc6, struct intel_gt, rc6);
44 }
45
46 static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
47 {
48         return rc6_to_gt(rc)->uncore;
49 }
50
51 static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
52 {
53         return rc6_to_gt(rc)->i915;
54 }
55
56 static void gen11_rc6_enable(struct intel_rc6 *rc6)
57 {
58         struct intel_gt *gt = rc6_to_gt(rc6);
59         struct intel_uncore *uncore = gt->uncore;
60         struct intel_engine_cs *engine;
61         enum intel_engine_id id;
62         u32 pg_enable;
63         int i;
64
65         /*
66          * With GuCRC, these parameters are set by GuC
67          */
68         if (!intel_uc_uses_guc_rc(&gt->uc)) {
69                 /* 2b: Program RC6 thresholds.*/
70                 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
71                 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
72
73                 intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
74                 intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
75                 for_each_engine(engine, rc6_to_gt(rc6), id)
76                         intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
77
78                 intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
79
80                 intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
81
82                 intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
83         }
84
85         /*
86          * 2c: Program Coarse Power Gating Policies.
87          *
88          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
89          * use instead is a more conservative estimate for the maximum time
90          * it takes us to service a CS interrupt and submit a new ELSP - that
91          * is the time which the GPU is idle waiting for the CPU to select the
92          * next request to execute. If the idle hysteresis is less than that
93          * interrupt service latency, the hardware will automatically gate
94          * the power well and we will then incur the wake up cost on top of
95          * the service latency. A similar guide from plane_state is that we
96          * do not want the enable hysteresis to less than the wakeup latency.
97          *
98          * igt/gem_exec_nop/sequential provides a rough estimate for the
99          * service latency, and puts it under 10us for Icelake, similar to
100          * Broadwell+, To be conservative, we want to factor in a context
101          * switch on top (due to ksoftirqd).
102          */
103         intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
104         intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
105
106         /* 3a: Enable RC6
107          *
108          * With GuCRC, we do not enable bit 31 of RC_CTL,
109          * thus allowing GuC to control RC6 entry/exit fully instead.
110          * We will not set the HW ENABLE and EI bits
111          */
112         if (!intel_guc_rc_enable(&gt->uc.guc))
113                 rc6->ctl_enable = GEN6_RC_CTL_RC6_ENABLE;
114         else
115                 rc6->ctl_enable =
116                         GEN6_RC_CTL_HW_ENABLE |
117                         GEN6_RC_CTL_RC6_ENABLE |
118                         GEN6_RC_CTL_EI_MODE(1);
119
120         /*
121          * BSpec 52698 - Render powergating must be off.
122          * FIXME BSpec is outdated, disabling powergating for MTL is just
123          * temporary wa and should be removed after fixing real cause
124          * of forcewake timeouts.
125          */
126         if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
127                 pg_enable =
128                         GEN9_MEDIA_PG_ENABLE |
129                         GEN11_MEDIA_SAMPLER_PG_ENABLE;
130         else
131                 pg_enable =
132                         GEN9_RENDER_PG_ENABLE |
133                         GEN9_MEDIA_PG_ENABLE |
134                         GEN11_MEDIA_SAMPLER_PG_ENABLE;
135
136         if (GRAPHICS_VER(gt->i915) >= 12) {
137                 for (i = 0; i < I915_MAX_VCS; i++)
138                         if (HAS_ENGINE(gt, _VCS(i)))
139                                 pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) |
140                                               VDN_MFX_POWERGATE_ENABLE(i));
141         }
142
143         intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
144 }
145
146 static void gen9_rc6_enable(struct intel_rc6 *rc6)
147 {
148         struct intel_uncore *uncore = rc6_to_uncore(rc6);
149         struct intel_engine_cs *engine;
150         enum intel_engine_id id;
151
152         /* 2b: Program RC6 thresholds.*/
153         if (GRAPHICS_VER(rc6_to_i915(rc6)) >= 11) {
154                 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
155                 intel_uncore_write_fw(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
156         } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
157                 /*
158                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
159                  * when CPG is enabled
160                  */
161                 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
162         } else {
163                 intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
164         }
165
166         intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
167         intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
168         for_each_engine(engine, rc6_to_gt(rc6), id)
169                 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
170
171         intel_uncore_write_fw(uncore, GUC_MAX_IDLE_COUNT, 0xA);
172
173         intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
174
175         /*
176          * 2c: Program Coarse Power Gating Policies.
177          *
178          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
179          * use instead is a more conservative estimate for the maximum time
180          * it takes us to service a CS interrupt and submit a new ELSP - that
181          * is the time which the GPU is idle waiting for the CPU to select the
182          * next request to execute. If the idle hysteresis is less than that
183          * interrupt service latency, the hardware will automatically gate
184          * the power well and we will then incur the wake up cost on top of
185          * the service latency. A similar guide from plane_state is that we
186          * do not want the enable hysteresis to less than the wakeup latency.
187          *
188          * igt/gem_exec_nop/sequential provides a rough estimate for the
189          * service latency, and puts it around 10us for Broadwell (and other
190          * big core) and around 40us for Broxton (and other low power cores).
191          * [Note that for legacy ringbuffer submission, this is less than 1us!]
192          * However, the wakeup latency on Broxton is closer to 100us. To be
193          * conservative, we have to factor in a context switch on top (due
194          * to ksoftirqd).
195          */
196         intel_uncore_write_fw(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
197         intel_uncore_write_fw(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
198
199         /* 3a: Enable RC6 */
200         intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
201
202         rc6->ctl_enable =
203                 GEN6_RC_CTL_HW_ENABLE |
204                 GEN6_RC_CTL_RC6_ENABLE |
205                 GEN6_RC_CTL_EI_MODE(1);
206
207         /*
208          * WaRsDisableCoarsePowerGating:skl,cnl
209          *   - Render/Media PG need to be disabled with RC6.
210          */
211         if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
212                 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
213                                       GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
214 }
215
216 static void gen8_rc6_enable(struct intel_rc6 *rc6)
217 {
218         struct intel_uncore *uncore = rc6_to_uncore(rc6);
219         struct intel_engine_cs *engine;
220         enum intel_engine_id id;
221
222         /* 2b: Program RC6 thresholds.*/
223         intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
224         intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
225         intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
226         for_each_engine(engine, rc6_to_gt(rc6), id)
227                 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
228         intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
229         intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
230
231         /* 3: Enable RC6 */
232         rc6->ctl_enable =
233             GEN6_RC_CTL_HW_ENABLE |
234             GEN7_RC_CTL_TO_MODE |
235             GEN6_RC_CTL_RC6_ENABLE;
236 }
237
238 static void gen6_rc6_enable(struct intel_rc6 *rc6)
239 {
240         struct intel_uncore *uncore = rc6_to_uncore(rc6);
241         struct drm_i915_private *i915 = rc6_to_i915(rc6);
242         struct intel_engine_cs *engine;
243         enum intel_engine_id id;
244         u32 rc6vids, rc6_mask;
245         int ret;
246
247         intel_uncore_write_fw(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
248         intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
249         intel_uncore_write_fw(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
250         intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
251         intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
252
253         for_each_engine(engine, rc6_to_gt(rc6), id)
254                 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
255
256         intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
257         intel_uncore_write_fw(uncore, GEN6_RC1e_THRESHOLD, 1000);
258         intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 50000);
259         intel_uncore_write_fw(uncore, GEN6_RC6p_THRESHOLD, 150000);
260         intel_uncore_write_fw(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
261
262         /* We don't use those on Haswell */
263         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
264         if (HAS_RC6p(i915))
265                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
266         if (HAS_RC6pp(i915))
267                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
268         rc6->ctl_enable =
269             rc6_mask |
270             GEN6_RC_CTL_EI_MODE(1) |
271             GEN6_RC_CTL_HW_ENABLE;
272
273         rc6vids = 0;
274         ret = snb_pcode_read(rc6_to_gt(rc6)->uncore, GEN6_PCODE_READ_RC6VIDS, &rc6vids, NULL);
275         if (GRAPHICS_VER(i915) == 6 && ret) {
276                 drm_dbg(&i915->drm, "Couldn't check for BIOS workaround\n");
277         } else if (GRAPHICS_VER(i915) == 6 &&
278                    (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
279                 drm_dbg(&i915->drm,
280                         "You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
281                         GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
282                 rc6vids &= 0xffff00;
283                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
284                 ret = snb_pcode_write(rc6_to_gt(rc6)->uncore, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
285                 if (ret)
286                         drm_err(&i915->drm,
287                                 "Couldn't fix incorrect rc6 voltage\n");
288         }
289 }
290
291 /* Check that the pcbr address is not empty. */
292 static int chv_rc6_init(struct intel_rc6 *rc6)
293 {
294         struct intel_uncore *uncore = rc6_to_uncore(rc6);
295         struct drm_i915_private *i915 = rc6_to_i915(rc6);
296         resource_size_t pctx_paddr, paddr;
297         resource_size_t pctx_size = 32 * SZ_1K;
298         u32 pcbr;
299
300         pcbr = intel_uncore_read(uncore, VLV_PCBR);
301         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
302                 drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
303                 paddr = i915->dsm.stolen.end + 1 - pctx_size;
304                 GEM_BUG_ON(paddr > U32_MAX);
305
306                 pctx_paddr = (paddr & ~4095);
307                 intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
308         }
309
310         return 0;
311 }
312
313 static int vlv_rc6_init(struct intel_rc6 *rc6)
314 {
315         struct drm_i915_private *i915 = rc6_to_i915(rc6);
316         struct intel_uncore *uncore = rc6_to_uncore(rc6);
317         struct drm_i915_gem_object *pctx;
318         resource_size_t pctx_paddr;
319         resource_size_t pctx_size = 24 * SZ_1K;
320         u32 pcbr;
321
322         pcbr = intel_uncore_read(uncore, VLV_PCBR);
323         if (pcbr) {
324                 /* BIOS set it up already, grab the pre-alloc'd space */
325                 resource_size_t pcbr_offset;
326
327                 pcbr_offset = (pcbr & ~4095) - i915->dsm.stolen.start;
328                 pctx = i915_gem_object_create_region_at(i915->mm.stolen_region,
329                                                         pcbr_offset,
330                                                         pctx_size,
331                                                         0);
332                 if (IS_ERR(pctx))
333                         return PTR_ERR(pctx);
334
335                 goto out;
336         }
337
338         drm_dbg(&i915->drm, "BIOS didn't set up PCBR, fixing up\n");
339
340         /*
341          * From the Gunit register HAS:
342          * The Gfx driver is expected to program this register and ensure
343          * proper allocation within Gfx stolen memory.  For example, this
344          * register should be programmed such than the PCBR range does not
345          * overlap with other ranges, such as the frame buffer, protected
346          * memory, or any other relevant ranges.
347          */
348         pctx = i915_gem_object_create_stolen(i915, pctx_size);
349         if (IS_ERR(pctx)) {
350                 drm_dbg(&i915->drm,
351                         "not enough stolen space for PCTX, disabling\n");
352                 return PTR_ERR(pctx);
353         }
354
355         GEM_BUG_ON(range_overflows_end_t(u64,
356                                          i915->dsm.stolen.start,
357                                          pctx->stolen->start,
358                                          U32_MAX));
359         pctx_paddr = i915->dsm.stolen.start + pctx->stolen->start;
360         intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
361
362 out:
363         rc6->pctx = pctx;
364         return 0;
365 }
366
367 static void chv_rc6_enable(struct intel_rc6 *rc6)
368 {
369         struct intel_uncore *uncore = rc6_to_uncore(rc6);
370         struct intel_engine_cs *engine;
371         enum intel_engine_id id;
372
373         /* 2a: Program RC6 thresholds.*/
374         intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
375         intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
376         intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
377
378         for_each_engine(engine, rc6_to_gt(rc6), id)
379                 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
380         intel_uncore_write_fw(uncore, GEN6_RC_SLEEP, 0);
381
382         /* TO threshold set to 500 us (0x186 * 1.28 us) */
383         intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x186);
384
385         /* Allows RC6 residency counter to work */
386         intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
387                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
388                                                  VLV_MEDIA_RC6_COUNT_EN |
389                                                  VLV_RENDER_RC6_COUNT_EN));
390
391         /* 3: Enable RC6 */
392         rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
393 }
394
395 static void vlv_rc6_enable(struct intel_rc6 *rc6)
396 {
397         struct intel_uncore *uncore = rc6_to_uncore(rc6);
398         struct intel_engine_cs *engine;
399         enum intel_engine_id id;
400
401         intel_uncore_write_fw(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
402         intel_uncore_write_fw(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
403         intel_uncore_write_fw(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
404
405         for_each_engine(engine, rc6_to_gt(rc6), id)
406                 intel_uncore_write_fw(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
407
408         intel_uncore_write_fw(uncore, GEN6_RC6_THRESHOLD, 0x557);
409
410         /* Allows RC6 residency counter to work */
411         intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
412                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
413                                                  VLV_MEDIA_RC0_COUNT_EN |
414                                                  VLV_RENDER_RC0_COUNT_EN |
415                                                  VLV_MEDIA_RC6_COUNT_EN |
416                                                  VLV_RENDER_RC6_COUNT_EN));
417
418         rc6->ctl_enable =
419             GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
420 }
421
422 bool intel_check_bios_c6_setup(struct intel_rc6 *rc6)
423 {
424         if (!rc6->bios_state_captured) {
425                 struct intel_uncore *uncore = rc6_to_uncore(rc6);
426                 intel_wakeref_t wakeref;
427
428                 with_intel_runtime_pm(uncore->rpm, wakeref)
429                         rc6->bios_rc_state = intel_uncore_read(uncore, GEN6_RC_STATE);
430
431                 rc6->bios_state_captured = true;
432         }
433
434         return rc6->bios_rc_state & RC_SW_TARGET_STATE_MASK;
435 }
436
437 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
438 {
439         struct intel_uncore *uncore = rc6_to_uncore(rc6);
440         struct drm_i915_private *i915 = rc6_to_i915(rc6);
441         u32 rc6_ctx_base, rc_ctl, rc_sw_target;
442         bool enable_rc6 = true;
443
444         rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
445         rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
446         rc_sw_target &= RC_SW_TARGET_STATE_MASK;
447         rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
448         drm_dbg(&i915->drm, "BIOS enabled RC states: "
449                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
450                          str_on_off(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
451                          str_on_off(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
452                          rc_sw_target);
453
454         if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
455                 drm_dbg(&i915->drm, "RC6 Base location not set properly.\n");
456                 enable_rc6 = false;
457         }
458
459         /*
460          * The exact context size is not known for BXT, so assume a page size
461          * for this check.
462          */
463         rc6_ctx_base =
464                 intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
465         if (!(rc6_ctx_base >= i915->dsm.reserved.start &&
466               rc6_ctx_base + PAGE_SIZE < i915->dsm.reserved.end)) {
467                 drm_dbg(&i915->drm, "RC6 Base address not as expected.\n");
468                 enable_rc6 = false;
469         }
470
471         if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT(RENDER_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
472               (intel_uncore_read(uncore, PWRCTX_MAXCNT(GEN6_BSD_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
473               (intel_uncore_read(uncore, PWRCTX_MAXCNT(BLT_RING_BASE)) & IDLE_TIME_MASK) > 1 &&
474               (intel_uncore_read(uncore, PWRCTX_MAXCNT(VEBOX_RING_BASE)) & IDLE_TIME_MASK) > 1)) {
475                 drm_dbg(&i915->drm,
476                         "Engine Idle wait time not set properly.\n");
477                 enable_rc6 = false;
478         }
479
480         if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
481             !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
482             !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
483                 drm_dbg(&i915->drm, "Pushbus not setup properly.\n");
484                 enable_rc6 = false;
485         }
486
487         if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
488                 drm_dbg(&i915->drm, "GFX pause not setup properly.\n");
489                 enable_rc6 = false;
490         }
491
492         if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
493                 drm_dbg(&i915->drm, "GPM control not setup properly.\n");
494                 enable_rc6 = false;
495         }
496
497         return enable_rc6;
498 }
499
500 static bool rc6_supported(struct intel_rc6 *rc6)
501 {
502         struct drm_i915_private *i915 = rc6_to_i915(rc6);
503         struct intel_gt *gt = rc6_to_gt(rc6);
504
505         if (!HAS_RC6(i915))
506                 return false;
507
508         if (intel_vgpu_active(i915))
509                 return false;
510
511         if (is_mock_gt(rc6_to_gt(rc6)))
512                 return false;
513
514         if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
515                 drm_notice(&i915->drm,
516                            "RC6 and powersaving disabled by BIOS\n");
517                 return false;
518         }
519
520         if (IS_METEORLAKE(gt->i915) &&
521             !intel_check_bios_c6_setup(rc6)) {
522                 drm_notice(&i915->drm,
523                            "C6 disabled by BIOS\n");
524                 return false;
525         }
526
527         if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) {
528                 drm_notice(&i915->drm,
529                            "Media RC6 disabled on A step\n");
530                 return false;
531         }
532
533         return true;
534 }
535
536 static void rpm_get(struct intel_rc6 *rc6)
537 {
538         GEM_BUG_ON(rc6->wakeref);
539         pm_runtime_get_sync(rc6_to_i915(rc6)->drm.dev);
540         rc6->wakeref = true;
541 }
542
543 static void rpm_put(struct intel_rc6 *rc6)
544 {
545         GEM_BUG_ON(!rc6->wakeref);
546         pm_runtime_put(rc6_to_i915(rc6)->drm.dev);
547         rc6->wakeref = false;
548 }
549
550 static bool pctx_corrupted(struct intel_rc6 *rc6)
551 {
552         struct drm_i915_private *i915 = rc6_to_i915(rc6);
553
554         if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
555                 return false;
556
557         if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
558                 return false;
559
560         drm_notice(&i915->drm,
561                    "RC6 context corruption, disabling runtime power management\n");
562         return true;
563 }
564
565 static void __intel_rc6_disable(struct intel_rc6 *rc6)
566 {
567         struct drm_i915_private *i915 = rc6_to_i915(rc6);
568         struct intel_uncore *uncore = rc6_to_uncore(rc6);
569         struct intel_gt *gt = rc6_to_gt(rc6);
570
571         /* Take control of RC6 back from GuC */
572         intel_guc_rc_disable(&gt->uc.guc);
573
574         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
575         if (GRAPHICS_VER(i915) >= 9)
576                 intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
577         intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
578         intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
579         intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
580 }
581
582 static void rc6_res_reg_init(struct intel_rc6 *rc6)
583 {
584         i915_reg_t res_reg[INTEL_RC6_RES_MAX] = {
585                 [0 ... INTEL_RC6_RES_MAX - 1] = INVALID_MMIO_REG,
586         };
587
588         switch (rc6_to_gt(rc6)->type) {
589         case GT_MEDIA:
590                 res_reg[INTEL_RC6_RES_RC6] = MTL_MEDIA_MC6;
591                 break;
592         default:
593                 res_reg[INTEL_RC6_RES_RC6_LOCKED] = GEN6_GT_GFX_RC6_LOCKED;
594                 res_reg[INTEL_RC6_RES_RC6] = GEN6_GT_GFX_RC6;
595                 res_reg[INTEL_RC6_RES_RC6p] = GEN6_GT_GFX_RC6p;
596                 res_reg[INTEL_RC6_RES_RC6pp] = GEN6_GT_GFX_RC6pp;
597                 break;
598         }
599
600         memcpy(rc6->res_reg, res_reg, sizeof(res_reg));
601 }
602
603 void intel_rc6_init(struct intel_rc6 *rc6)
604 {
605         struct drm_i915_private *i915 = rc6_to_i915(rc6);
606         int err;
607
608         /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
609         rpm_get(rc6);
610
611         if (!rc6_supported(rc6))
612                 return;
613
614         rc6_res_reg_init(rc6);
615
616         if (IS_CHERRYVIEW(i915))
617                 err = chv_rc6_init(rc6);
618         else if (IS_VALLEYVIEW(i915))
619                 err = vlv_rc6_init(rc6);
620         else
621                 err = 0;
622
623         /* Sanitize rc6, ensure it is disabled before we are ready. */
624         __intel_rc6_disable(rc6);
625
626         rc6->supported = err == 0;
627 }
628
629 void intel_rc6_sanitize(struct intel_rc6 *rc6)
630 {
631         memset(rc6->prev_hw_residency, 0, sizeof(rc6->prev_hw_residency));
632
633         if (rc6->enabled) { /* unbalanced suspend/resume */
634                 rpm_get(rc6);
635                 rc6->enabled = false;
636         }
637
638         if (rc6->supported)
639                 __intel_rc6_disable(rc6);
640 }
641
642 void intel_rc6_enable(struct intel_rc6 *rc6)
643 {
644         struct drm_i915_private *i915 = rc6_to_i915(rc6);
645         struct intel_uncore *uncore = rc6_to_uncore(rc6);
646
647         if (!rc6->supported)
648                 return;
649
650         GEM_BUG_ON(rc6->enabled);
651
652         intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
653
654         if (IS_CHERRYVIEW(i915))
655                 chv_rc6_enable(rc6);
656         else if (IS_VALLEYVIEW(i915))
657                 vlv_rc6_enable(rc6);
658         else if (GRAPHICS_VER(i915) >= 11)
659                 gen11_rc6_enable(rc6);
660         else if (GRAPHICS_VER(i915) >= 9)
661                 gen9_rc6_enable(rc6);
662         else if (IS_BROADWELL(i915))
663                 gen8_rc6_enable(rc6);
664         else if (GRAPHICS_VER(i915) >= 6)
665                 gen6_rc6_enable(rc6);
666
667         rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
668         if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
669                 rc6->ctl_enable = 0;
670
671         intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
672
673         if (unlikely(pctx_corrupted(rc6)))
674                 return;
675
676         /* rc6 is ready, runtime-pm is go! */
677         rpm_put(rc6);
678         rc6->enabled = true;
679 }
680
681 void intel_rc6_unpark(struct intel_rc6 *rc6)
682 {
683         struct intel_uncore *uncore = rc6_to_uncore(rc6);
684
685         if (!rc6->enabled)
686                 return;
687
688         /* Restore HW timers for automatic RC6 entry while busy */
689         intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
690 }
691
692 void intel_rc6_park(struct intel_rc6 *rc6)
693 {
694         struct intel_uncore *uncore = rc6_to_uncore(rc6);
695         unsigned int target;
696
697         if (!rc6->enabled)
698                 return;
699
700         if (unlikely(pctx_corrupted(rc6))) {
701                 intel_rc6_disable(rc6);
702                 return;
703         }
704
705         if (!rc6->manual)
706                 return;
707
708         /* Turn off the HW timers and go directly to rc6 */
709         intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
710
711         if (HAS_RC6pp(rc6_to_i915(rc6)))
712                 target = 0x6; /* deepest rc6 */
713         else if (HAS_RC6p(rc6_to_i915(rc6)))
714                 target = 0x5; /* deep rc6 */
715         else
716                 target = 0x4; /* normal rc6 */
717         intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
718 }
719
720 void intel_rc6_disable(struct intel_rc6 *rc6)
721 {
722         if (!rc6->enabled)
723                 return;
724
725         rpm_get(rc6);
726         rc6->enabled = false;
727
728         __intel_rc6_disable(rc6);
729 }
730
731 void intel_rc6_fini(struct intel_rc6 *rc6)
732 {
733         struct drm_i915_gem_object *pctx;
734         struct intel_uncore *uncore = rc6_to_uncore(rc6);
735
736         intel_rc6_disable(rc6);
737
738         /* We want the BIOS C6 state preserved across loads for MTL */
739         if (IS_METEORLAKE(rc6_to_i915(rc6)) && rc6->bios_state_captured)
740                 intel_uncore_write_fw(uncore, GEN6_RC_STATE, rc6->bios_rc_state);
741
742         pctx = fetch_and_zero(&rc6->pctx);
743         if (pctx)
744                 i915_gem_object_put(pctx);
745
746         if (rc6->wakeref)
747                 rpm_put(rc6);
748 }
749
750 static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
751 {
752         u32 lower, upper, tmp;
753         int loop = 2;
754
755         /*
756          * The register accessed do not need forcewake. We borrow
757          * uncore lock to prevent concurrent access to range reg.
758          */
759         lockdep_assert_held(&uncore->lock);
760
761         /*
762          * vlv and chv residency counters are 40 bits in width.
763          * With a control bit, we can choose between upper or lower
764          * 32bit window into this counter.
765          *
766          * Although we always use the counter in high-range mode elsewhere,
767          * userspace may attempt to read the value before rc6 is initialised,
768          * before we have set the default VLV_COUNTER_CONTROL value. So always
769          * set the high bit to be safe.
770          */
771         intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
772                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
773         upper = intel_uncore_read_fw(uncore, reg);
774         do {
775                 tmp = upper;
776
777                 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
778                                       _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
779                 lower = intel_uncore_read_fw(uncore, reg);
780
781                 intel_uncore_write_fw(uncore, VLV_COUNTER_CONTROL,
782                                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
783                 upper = intel_uncore_read_fw(uncore, reg);
784         } while (upper != tmp && --loop);
785
786         /*
787          * Everywhere else we always use VLV_COUNTER_CONTROL with the
788          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
789          * now.
790          */
791
792         return lower | (u64)upper << 8;
793 }
794
795 u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, enum intel_rc6_res_type id)
796 {
797         struct drm_i915_private *i915 = rc6_to_i915(rc6);
798         struct intel_uncore *uncore = rc6_to_uncore(rc6);
799         u64 time_hw, prev_hw, overflow_hw;
800         i915_reg_t reg = rc6->res_reg[id];
801         unsigned int fw_domains;
802         unsigned long flags;
803         u32 mul, div;
804
805         if (!rc6->supported)
806                 return 0;
807
808         fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
809
810         spin_lock_irqsave(&uncore->lock, flags);
811         intel_uncore_forcewake_get__locked(uncore, fw_domains);
812
813         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
814         if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
815                 mul = 1000000;
816                 div = i915->czclk_freq;
817                 overflow_hw = BIT_ULL(40);
818                 time_hw = vlv_residency_raw(uncore, reg);
819         } else {
820                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
821                 if (IS_GEN9_LP(i915)) {
822                         mul = 10000;
823                         div = 12;
824                 } else {
825                         mul = 1280;
826                         div = 1;
827                 }
828
829                 overflow_hw = BIT_ULL(32);
830                 time_hw = intel_uncore_read_fw(uncore, reg);
831         }
832
833         /*
834          * Counter wrap handling.
835          *
836          * Store previous hw counter values for counter wrap-around handling. But
837          * relying on a sufficient frequency of queries otherwise counters can still wrap.
838          */
839         prev_hw = rc6->prev_hw_residency[id];
840         rc6->prev_hw_residency[id] = time_hw;
841
842         /* RC6 delta from last sample. */
843         if (time_hw >= prev_hw)
844                 time_hw -= prev_hw;
845         else
846                 time_hw += overflow_hw - prev_hw;
847
848         /* Add delta to RC6 extended raw driver copy. */
849         time_hw += rc6->cur_residency[id];
850         rc6->cur_residency[id] = time_hw;
851
852         intel_uncore_forcewake_put__locked(uncore, fw_domains);
853         spin_unlock_irqrestore(&uncore->lock, flags);
854
855         return mul_u64_u32_div(time_hw, mul, div);
856 }
857
858 u64 intel_rc6_residency_us(struct intel_rc6 *rc6, enum intel_rc6_res_type id)
859 {
860         return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, id), 1000);
861 }
862
863 void intel_rc6_print_residency(struct seq_file *m, const char *title,
864                                enum intel_rc6_res_type id)
865 {
866         struct intel_gt *gt = m->private;
867         i915_reg_t reg = gt->rc6.res_reg[id];
868         intel_wakeref_t wakeref;
869
870         with_intel_runtime_pm(gt->uncore->rpm, wakeref)
871                 seq_printf(m, "%s %u (%llu us)\n", title,
872                            intel_uncore_read(gt->uncore, reg),
873                            intel_rc6_residency_us(&gt->rc6, id));
874 }
875
876 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
877 #include "selftest_rc6.c"
878 #endif
This page took 0.083733 seconds and 4 git commands to generate.