2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28 #include <linux/cpufreq.h>
29 #include <linux/pm_runtime.h>
30 #include <drm/drm_plane_helper.h>
32 #include "intel_drv.h"
33 #include "../../../platform/x86/intel_ips.h"
34 #include <linux/module.h>
35 #include <drm/drm_atomic_helper.h>
40 * RC6 is a special power stage which allows the GPU to enter an very
41 * low-voltage mode when idle, using down to 0V while at this stage. This
42 * stage is entered automatically when the GPU is idle when RC6 support is
43 * enabled, and as soon as new workload arises GPU wakes up automatically as well.
45 * There are different RC6 modes available in Intel GPU, which differentiate
46 * among each other with the latency required to enter and leave RC6 and
47 * voltage consumed by the GPU in different states.
49 * The combination of the following flags define which states GPU is allowed
50 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
51 * RC6pp is deepest RC6. Their support by hardware varies according to the
52 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
53 * which brings the most power savings; deeper states save more power, but
54 * require higher latency to switch to and wake up.
57 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
59 if (HAS_LLC(dev_priv)) {
61 * WaCompressedResourceDisplayNewHashMode:skl,kbl
62 * Display WA #0390: skl,kbl
64 * Must match Sampler, Pixel Back End, and Media. See
65 * WaCompressedResourceSamplerPbeMediaNewHashMode.
67 I915_WRITE(CHICKEN_PAR1_1,
68 I915_READ(CHICKEN_PAR1_1) |
69 SKL_DE_COMPRESSED_HASH_MODE);
72 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
73 I915_WRITE(CHICKEN_PAR1_1,
74 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
76 /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
77 I915_WRITE(GEN8_CHICKEN_DCPR_1,
78 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
80 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
81 /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
82 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
84 DISP_FBC_MEMORY_WAKE);
86 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
87 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
88 ILK_DPFC_DISABLE_DUMMY0);
90 if (IS_SKYLAKE(dev_priv)) {
91 /* WaDisableDopClockGating */
92 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
93 & ~GEN7_DOP_CLOCK_GATE_ENABLE);
97 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
99 gen9_init_clock_gating(dev_priv);
101 /* WaDisableSDEUnitClockGating:bxt */
102 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
103 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
107 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
109 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
110 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
113 * Wa: Backlight PWM may stop in the asserted state, causing backlight
116 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
117 PWM1_GATING_DIS | PWM2_GATING_DIS);
120 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
122 gen9_init_clock_gating(dev_priv);
125 * WaDisablePWMClockGating:glk
126 * Backlight PWM may stop in the asserted state, causing backlight
129 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
130 PWM1_GATING_DIS | PWM2_GATING_DIS);
132 /* WaDDIIOTimeout:glk */
133 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
134 u32 val = I915_READ(CHICKEN_MISC_2);
135 val &= ~(GLK_CL0_PWR_DOWN |
138 I915_WRITE(CHICKEN_MISC_2, val);
143 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
147 tmp = I915_READ(CLKCFG);
149 switch (tmp & CLKCFG_FSB_MASK) {
151 dev_priv->fsb_freq = 533; /* 133*4 */
154 dev_priv->fsb_freq = 800; /* 200*4 */
157 dev_priv->fsb_freq = 667; /* 167*4 */
160 dev_priv->fsb_freq = 400; /* 100*4 */
164 switch (tmp & CLKCFG_MEM_MASK) {
166 dev_priv->mem_freq = 533;
169 dev_priv->mem_freq = 667;
172 dev_priv->mem_freq = 800;
176 /* detect pineview DDR3 setting */
177 tmp = I915_READ(CSHRDDR3CTL);
178 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
181 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
185 ddrpll = I915_READ16(DDRMPLL1);
186 csipll = I915_READ16(CSIPLL0);
188 switch (ddrpll & 0xff) {
190 dev_priv->mem_freq = 800;
193 dev_priv->mem_freq = 1066;
196 dev_priv->mem_freq = 1333;
199 dev_priv->mem_freq = 1600;
202 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
204 dev_priv->mem_freq = 0;
208 dev_priv->ips.r_t = dev_priv->mem_freq;
210 switch (csipll & 0x3ff) {
212 dev_priv->fsb_freq = 3200;
215 dev_priv->fsb_freq = 3733;
218 dev_priv->fsb_freq = 4266;
221 dev_priv->fsb_freq = 4800;
224 dev_priv->fsb_freq = 5333;
227 dev_priv->fsb_freq = 5866;
230 dev_priv->fsb_freq = 6400;
233 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
235 dev_priv->fsb_freq = 0;
239 if (dev_priv->fsb_freq == 3200) {
240 dev_priv->ips.c_m = 0;
241 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
242 dev_priv->ips.c_m = 1;
244 dev_priv->ips.c_m = 2;
248 static const struct cxsr_latency cxsr_latency_table[] = {
249 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
250 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
251 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
252 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
253 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
255 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
256 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
257 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
258 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
259 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
261 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
262 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
263 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
264 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
265 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
267 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
268 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
269 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
270 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
271 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
273 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
274 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
275 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
276 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
277 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
279 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
280 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
281 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
282 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
283 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
286 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
291 const struct cxsr_latency *latency;
294 if (fsb == 0 || mem == 0)
297 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
298 latency = &cxsr_latency_table[i];
299 if (is_desktop == latency->is_desktop &&
300 is_ddr3 == latency->is_ddr3 &&
301 fsb == latency->fsb_freq && mem == latency->mem_freq)
305 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
310 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
314 mutex_lock(&dev_priv->pcu_lock);
316 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
318 val &= ~FORCE_DDR_HIGH_FREQ;
320 val |= FORCE_DDR_HIGH_FREQ;
321 val &= ~FORCE_DDR_LOW_FREQ;
322 val |= FORCE_DDR_FREQ_REQ_ACK;
323 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
325 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
326 FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
327 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
329 mutex_unlock(&dev_priv->pcu_lock);
332 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
336 mutex_lock(&dev_priv->pcu_lock);
338 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
340 val |= DSP_MAXFIFO_PM5_ENABLE;
342 val &= ~DSP_MAXFIFO_PM5_ENABLE;
343 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
345 mutex_unlock(&dev_priv->pcu_lock);
348 #define FW_WM(value, plane) \
349 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
351 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
356 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
357 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
358 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
359 POSTING_READ(FW_BLC_SELF_VLV);
360 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
361 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
362 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
363 POSTING_READ(FW_BLC_SELF);
364 } else if (IS_PINEVIEW(dev_priv)) {
365 val = I915_READ(DSPFW3);
366 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
368 val |= PINEVIEW_SELF_REFRESH_EN;
370 val &= ~PINEVIEW_SELF_REFRESH_EN;
371 I915_WRITE(DSPFW3, val);
372 POSTING_READ(DSPFW3);
373 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
374 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
375 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
376 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
377 I915_WRITE(FW_BLC_SELF, val);
378 POSTING_READ(FW_BLC_SELF);
379 } else if (IS_I915GM(dev_priv)) {
381 * FIXME can't find a bit like this for 915G, and
382 * and yet it does have the related watermark in
383 * FW_BLC_SELF. What's going on?
385 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
386 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
387 _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
388 I915_WRITE(INSTPM, val);
389 POSTING_READ(INSTPM);
394 trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
396 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
397 enableddisabled(enable),
398 enableddisabled(was_enabled));
404 * intel_set_memory_cxsr - Configure CxSR state
405 * @dev_priv: i915 device
406 * @enable: Allow vs. disallow CxSR
408 * Allow or disallow the system to enter a special CxSR
409 * (C-state self refresh) state. What typically happens in CxSR mode
410 * is that several display FIFOs may get combined into a single larger
411 * FIFO for a particular plane (so called max FIFO mode) to allow the
412 * system to defer memory fetches longer, and the memory will enter
415 * Note that enabling CxSR does not guarantee that the system enter
416 * this special mode, nor does it guarantee that the system stays
417 * in that mode once entered. So this just allows/disallows the system
418 * to autonomously utilize the CxSR mode. Other factors such as core
419 * C-states will affect when/if the system actually enters/exits the
422 * Note that on VLV/CHV this actually only controls the max FIFO mode,
423 * and the system is free to enter/exit memory self refresh at any time
424 * even when the use of CxSR has been disallowed.
426 * While the system is actually in the CxSR/max FIFO mode, some plane
427 * control registers will not get latched on vblank. Thus in order to
428 * guarantee the system will respond to changes in the plane registers
429 * we must always disallow CxSR prior to making changes to those registers.
430 * Unfortunately the system will re-evaluate the CxSR conditions at
431 * frame start which happens after vblank start (which is when the plane
432 * registers would get latched), so we can't proceed with the plane update
433 * during the same frame where we disallowed CxSR.
435 * Certain platforms also have a deeper HPLL SR mode. Fortunately the
436 * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
437 * the hardware w.r.t. HPLL SR when writing to plane registers.
438 * Disallowing just CxSR is sufficient.
440 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
444 mutex_lock(&dev_priv->wm.wm_mutex);
445 ret = _intel_set_memory_cxsr(dev_priv, enable);
446 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
447 dev_priv->wm.vlv.cxsr = enable;
448 else if (IS_G4X(dev_priv))
449 dev_priv->wm.g4x.cxsr = enable;
450 mutex_unlock(&dev_priv->wm.wm_mutex);
456 * Latency for FIFO fetches is dependent on several factors:
457 * - memory configuration (speed, channels)
459 * - current MCH state
460 * It can be fairly high in some situations, so here we assume a fairly
461 * pessimal value. It's a tradeoff between extra memory fetches (if we
462 * set this value too high, the FIFO will fetch frequently to stay full)
463 * and power consumption (set it too low to save power and we might see
464 * FIFO underruns and display "flicker").
466 * A value of 5us seems to be a good balance; safe for very low end
467 * platforms but not overly aggressive on lower latency configs.
469 static const int pessimal_latency_ns = 5000;
471 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
472 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
474 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
476 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
477 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
478 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
479 enum pipe pipe = crtc->pipe;
480 int sprite0_start, sprite1_start;
483 uint32_t dsparb, dsparb2, dsparb3;
485 dsparb = I915_READ(DSPARB);
486 dsparb2 = I915_READ(DSPARB2);
487 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
488 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
491 dsparb = I915_READ(DSPARB);
492 dsparb2 = I915_READ(DSPARB2);
493 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
494 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
497 dsparb2 = I915_READ(DSPARB2);
498 dsparb3 = I915_READ(DSPARB3);
499 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
500 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
507 fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
508 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
509 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
510 fifo_state->plane[PLANE_CURSOR] = 63;
513 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
514 enum i9xx_plane_id i9xx_plane)
516 uint32_t dsparb = I915_READ(DSPARB);
519 size = dsparb & 0x7f;
520 if (i9xx_plane == PLANE_B)
521 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
523 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
524 dsparb, plane_name(i9xx_plane), size);
529 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
530 enum i9xx_plane_id i9xx_plane)
532 uint32_t dsparb = I915_READ(DSPARB);
535 size = dsparb & 0x1ff;
536 if (i9xx_plane == PLANE_B)
537 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
538 size >>= 1; /* Convert to cachelines */
540 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
541 dsparb, plane_name(i9xx_plane), size);
546 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
547 enum i9xx_plane_id i9xx_plane)
549 uint32_t dsparb = I915_READ(DSPARB);
552 size = dsparb & 0x7f;
553 size >>= 2; /* Convert to cachelines */
555 DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
556 dsparb, plane_name(i9xx_plane), size);
561 /* Pineview has different values for various configs */
562 static const struct intel_watermark_params pineview_display_wm = {
563 .fifo_size = PINEVIEW_DISPLAY_FIFO,
564 .max_wm = PINEVIEW_MAX_WM,
565 .default_wm = PINEVIEW_DFT_WM,
566 .guard_size = PINEVIEW_GUARD_WM,
567 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
569 static const struct intel_watermark_params pineview_display_hplloff_wm = {
570 .fifo_size = PINEVIEW_DISPLAY_FIFO,
571 .max_wm = PINEVIEW_MAX_WM,
572 .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
573 .guard_size = PINEVIEW_GUARD_WM,
574 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
576 static const struct intel_watermark_params pineview_cursor_wm = {
577 .fifo_size = PINEVIEW_CURSOR_FIFO,
578 .max_wm = PINEVIEW_CURSOR_MAX_WM,
579 .default_wm = PINEVIEW_CURSOR_DFT_WM,
580 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
581 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
583 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
584 .fifo_size = PINEVIEW_CURSOR_FIFO,
585 .max_wm = PINEVIEW_CURSOR_MAX_WM,
586 .default_wm = PINEVIEW_CURSOR_DFT_WM,
587 .guard_size = PINEVIEW_CURSOR_GUARD_WM,
588 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
590 static const struct intel_watermark_params i965_cursor_wm_info = {
591 .fifo_size = I965_CURSOR_FIFO,
592 .max_wm = I965_CURSOR_MAX_WM,
593 .default_wm = I965_CURSOR_DFT_WM,
595 .cacheline_size = I915_FIFO_LINE_SIZE,
597 static const struct intel_watermark_params i945_wm_info = {
598 .fifo_size = I945_FIFO_SIZE,
599 .max_wm = I915_MAX_WM,
602 .cacheline_size = I915_FIFO_LINE_SIZE,
604 static const struct intel_watermark_params i915_wm_info = {
605 .fifo_size = I915_FIFO_SIZE,
606 .max_wm = I915_MAX_WM,
609 .cacheline_size = I915_FIFO_LINE_SIZE,
611 static const struct intel_watermark_params i830_a_wm_info = {
612 .fifo_size = I855GM_FIFO_SIZE,
613 .max_wm = I915_MAX_WM,
616 .cacheline_size = I830_FIFO_LINE_SIZE,
618 static const struct intel_watermark_params i830_bc_wm_info = {
619 .fifo_size = I855GM_FIFO_SIZE,
620 .max_wm = I915_MAX_WM/2,
623 .cacheline_size = I830_FIFO_LINE_SIZE,
625 static const struct intel_watermark_params i845_wm_info = {
626 .fifo_size = I830_FIFO_SIZE,
627 .max_wm = I915_MAX_WM,
630 .cacheline_size = I830_FIFO_LINE_SIZE,
634 * intel_wm_method1 - Method 1 / "small buffer" watermark formula
635 * @pixel_rate: Pipe pixel rate in kHz
636 * @cpp: Plane bytes per pixel
637 * @latency: Memory wakeup latency in 0.1us units
639 * Compute the watermark using the method 1 or "small buffer"
640 * formula. The caller may additonally add extra cachelines
641 * to account for TLB misses and clock crossings.
643 * This method is concerned with the short term drain rate
644 * of the FIFO, ie. it does not account for blanking periods
645 * which would effectively reduce the average drain rate across
646 * a longer period. The name "small" refers to the fact the
647 * FIFO is relatively small compared to the amount of data
650 * The FIFO level vs. time graph might look something like:
654 * __---__---__ (- plane active, _ blanking)
657 * or perhaps like this:
660 * __----__----__ (- plane active, _ blanking)
664 * The watermark in bytes
666 static unsigned int intel_wm_method1(unsigned int pixel_rate,
668 unsigned int latency)
672 ret = (uint64_t) pixel_rate * cpp * latency;
673 ret = DIV_ROUND_UP_ULL(ret, 10000);
679 * intel_wm_method2 - Method 2 / "large buffer" watermark formula
680 * @pixel_rate: Pipe pixel rate in kHz
681 * @htotal: Pipe horizontal total
682 * @width: Plane width in pixels
683 * @cpp: Plane bytes per pixel
684 * @latency: Memory wakeup latency in 0.1us units
686 * Compute the watermark using the method 2 or "large buffer"
687 * formula. The caller may additonally add extra cachelines
688 * to account for TLB misses and clock crossings.
690 * This method is concerned with the long term drain rate
691 * of the FIFO, ie. it does account for blanking periods
692 * which effectively reduce the average drain rate across
693 * a longer period. The name "large" refers to the fact the
694 * FIFO is relatively large compared to the amount of data
697 * The FIFO level vs. time graph might look something like:
702 * __ --__--__--__--__--__--__ (- plane active, _ blanking)
706 * The watermark in bytes
708 static unsigned int intel_wm_method2(unsigned int pixel_rate,
712 unsigned int latency)
717 * FIXME remove once all users are computing
718 * watermarks in the correct place.
720 if (WARN_ON_ONCE(htotal == 0))
723 ret = (latency * pixel_rate) / (htotal * 10000);
724 ret = (ret + 1) * width * cpp;
730 * intel_calculate_wm - calculate watermark level
731 * @pixel_rate: pixel clock
732 * @wm: chip FIFO params
733 * @fifo_size: size of the FIFO buffer
734 * @cpp: bytes per pixel
735 * @latency_ns: memory latency for the platform
737 * Calculate the watermark level (the level at which the display plane will
738 * start fetching from memory again). Each chip has a different display
739 * FIFO size and allocation, so the caller needs to figure that out and pass
740 * in the correct intel_watermark_params structure.
742 * As the pixel clock runs, the FIFO will be drained at a rate that depends
743 * on the pixel size. When it reaches the watermark level, it'll start
744 * fetching FIFO line sized based chunks from memory until the FIFO fills
745 * past the watermark point. If the FIFO drains completely, a FIFO underrun
746 * will occur, and a display engine hang could result.
748 static unsigned int intel_calculate_wm(int pixel_rate,
749 const struct intel_watermark_params *wm,
750 int fifo_size, int cpp,
751 unsigned int latency_ns)
753 int entries, wm_size;
756 * Note: we need to make sure we don't overflow for various clock &
758 * clocks go from a few thousand to several hundred thousand.
759 * latency is usually a few thousand
761 entries = intel_wm_method1(pixel_rate, cpp,
763 entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
765 DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
767 wm_size = fifo_size - entries;
768 DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
770 /* Don't promote wm_size to unsigned... */
771 if (wm_size > wm->max_wm)
772 wm_size = wm->max_wm;
774 wm_size = wm->default_wm;
777 * Bspec seems to indicate that the value shouldn't be lower than
778 * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
779 * Lets go for 8 which is the burst size since certain platforms
780 * already use a hardcoded 8 (which is what the spec says should be
789 static bool is_disabling(int old, int new, int threshold)
791 return old >= threshold && new < threshold;
794 static bool is_enabling(int old, int new, int threshold)
796 return old < threshold && new >= threshold;
799 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
801 return dev_priv->wm.max_level + 1;
804 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
805 const struct intel_plane_state *plane_state)
807 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
809 /* FIXME check the 'enable' instead */
810 if (!crtc_state->base.active)
814 * Treat cursor with fb as always visible since cursor updates
815 * can happen faster than the vrefresh rate, and the current
816 * watermark code doesn't handle that correctly. Cursor updates
817 * which set/clear the fb or change the cursor size are going
818 * to get throttled by intel_legacy_cursor_update() to work
819 * around this problem with the watermark code.
821 if (plane->id == PLANE_CURSOR)
822 return plane_state->base.fb != NULL;
824 return plane_state->base.visible;
827 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
829 struct intel_crtc *crtc, *enabled = NULL;
831 for_each_intel_crtc(&dev_priv->drm, crtc) {
832 if (intel_crtc_active(crtc)) {
842 static void pineview_update_wm(struct intel_crtc *unused_crtc)
844 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
845 struct intel_crtc *crtc;
846 const struct cxsr_latency *latency;
850 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
855 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
856 intel_set_memory_cxsr(dev_priv, false);
860 crtc = single_enabled_crtc(dev_priv);
862 const struct drm_display_mode *adjusted_mode =
863 &crtc->config->base.adjusted_mode;
864 const struct drm_framebuffer *fb =
865 crtc->base.primary->state->fb;
866 int cpp = fb->format->cpp[0];
867 int clock = adjusted_mode->crtc_clock;
870 wm = intel_calculate_wm(clock, &pineview_display_wm,
871 pineview_display_wm.fifo_size,
872 cpp, latency->display_sr);
873 reg = I915_READ(DSPFW1);
874 reg &= ~DSPFW_SR_MASK;
875 reg |= FW_WM(wm, SR);
876 I915_WRITE(DSPFW1, reg);
877 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
880 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
881 pineview_display_wm.fifo_size,
882 4, latency->cursor_sr);
883 reg = I915_READ(DSPFW3);
884 reg &= ~DSPFW_CURSOR_SR_MASK;
885 reg |= FW_WM(wm, CURSOR_SR);
886 I915_WRITE(DSPFW3, reg);
888 /* Display HPLL off SR */
889 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
890 pineview_display_hplloff_wm.fifo_size,
891 cpp, latency->display_hpll_disable);
892 reg = I915_READ(DSPFW3);
893 reg &= ~DSPFW_HPLL_SR_MASK;
894 reg |= FW_WM(wm, HPLL_SR);
895 I915_WRITE(DSPFW3, reg);
897 /* cursor HPLL off SR */
898 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
899 pineview_display_hplloff_wm.fifo_size,
900 4, latency->cursor_hpll_disable);
901 reg = I915_READ(DSPFW3);
902 reg &= ~DSPFW_HPLL_CURSOR_MASK;
903 reg |= FW_WM(wm, HPLL_CURSOR);
904 I915_WRITE(DSPFW3, reg);
905 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
907 intel_set_memory_cxsr(dev_priv, true);
909 intel_set_memory_cxsr(dev_priv, false);
914 * Documentation says:
915 * "If the line size is small, the TLB fetches can get in the way of the
916 * data fetches, causing some lag in the pixel data return which is not
917 * accounted for in the above formulas. The following adjustment only
918 * needs to be applied if eight whole lines fit in the buffer at once.
919 * The WM is adjusted upwards by the difference between the FIFO size
920 * and the size of 8 whole lines. This adjustment is always performed
921 * in the actual pixel depth regardless of whether FBC is enabled or not."
923 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
925 int tlb_miss = fifo_size * 64 - width * cpp * 8;
927 return max(0, tlb_miss);
930 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
931 const struct g4x_wm_values *wm)
935 for_each_pipe(dev_priv, pipe)
936 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
939 FW_WM(wm->sr.plane, SR) |
940 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
941 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
942 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
944 (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
945 FW_WM(wm->sr.fbc, FBC_SR) |
946 FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
947 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
948 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
949 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
951 (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
952 FW_WM(wm->sr.cursor, CURSOR_SR) |
953 FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
954 FW_WM(wm->hpll.plane, HPLL_SR));
956 POSTING_READ(DSPFW1);
959 #define FW_WM_VLV(value, plane) \
960 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
962 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
963 const struct vlv_wm_values *wm)
967 for_each_pipe(dev_priv, pipe) {
968 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
970 I915_WRITE(VLV_DDL(pipe),
971 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
972 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
973 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
974 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
978 * Zero the (unused) WM1 watermarks, and also clear all the
979 * high order bits so that there are no out of bounds values
980 * present in the registers during the reprogramming.
982 I915_WRITE(DSPHOWM, 0);
983 I915_WRITE(DSPHOWM1, 0);
984 I915_WRITE(DSPFW4, 0);
985 I915_WRITE(DSPFW5, 0);
986 I915_WRITE(DSPFW6, 0);
989 FW_WM(wm->sr.plane, SR) |
990 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
991 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
992 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
994 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
995 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
996 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
998 FW_WM(wm->sr.cursor, CURSOR_SR));
1000 if (IS_CHERRYVIEW(dev_priv)) {
1001 I915_WRITE(DSPFW7_CHV,
1002 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1003 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1004 I915_WRITE(DSPFW8_CHV,
1005 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1006 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1007 I915_WRITE(DSPFW9_CHV,
1008 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1009 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1011 FW_WM(wm->sr.plane >> 9, SR_HI) |
1012 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1013 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1014 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1015 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1016 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1017 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1018 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1019 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1020 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1023 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1024 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1026 FW_WM(wm->sr.plane >> 9, SR_HI) |
1027 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1028 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1029 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1030 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1031 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1032 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1035 POSTING_READ(DSPFW1);
1040 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1042 /* all latencies in usec */
1043 dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1044 dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1045 dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1047 dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1050 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1053 * DSPCNTR[13] supposedly controls whether the
1054 * primary plane can use the FIFO space otherwise
1055 * reserved for the sprite plane. It's not 100% clear
1056 * what the actual FIFO size is, but it looks like we
1057 * can happily set both primary and sprite watermarks
1058 * up to 127 cachelines. So that would seem to mean
1059 * that either DSPCNTR[13] doesn't do anything, or that
1060 * the total FIFO is >= 256 cachelines in size. Either
1061 * way, we don't seem to have to worry about this
1062 * repartitioning as the maximum watermark value the
1063 * register can hold for each plane is lower than the
1064 * minimum FIFO size.
1070 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1072 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1074 MISSING_CASE(plane_id);
1079 static int g4x_fbc_fifo_size(int level)
1082 case G4X_WM_LEVEL_SR:
1084 case G4X_WM_LEVEL_HPLL:
1087 MISSING_CASE(level);
1092 static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1093 const struct intel_plane_state *plane_state,
1096 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1097 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1098 const struct drm_display_mode *adjusted_mode =
1099 &crtc_state->base.adjusted_mode;
1100 unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1101 unsigned int clock, htotal, cpp, width, wm;
1106 if (!intel_wm_plane_visible(crtc_state, plane_state))
1110 * Not 100% sure which way ELK should go here as the
1111 * spec only says CL/CTG should assume 32bpp and BW
1112 * doesn't need to. But as these things followed the
1113 * mobile vs. desktop lines on gen3 as well, let's
1114 * assume ELK doesn't need this.
1116 * The spec also fails to list such a restriction for
1117 * the HPLL watermark, which seems a little strange.
1118 * Let's use 32bpp for the HPLL watermark as well.
1120 if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1121 level != G4X_WM_LEVEL_NORMAL)
1124 cpp = plane_state->base.fb->format->cpp[0];
1126 clock = adjusted_mode->crtc_clock;
1127 htotal = adjusted_mode->crtc_htotal;
1129 if (plane->id == PLANE_CURSOR)
1130 width = plane_state->base.crtc_w;
1132 width = drm_rect_width(&plane_state->base.dst);
1134 if (plane->id == PLANE_CURSOR) {
1135 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1136 } else if (plane->id == PLANE_PRIMARY &&
1137 level == G4X_WM_LEVEL_NORMAL) {
1138 wm = intel_wm_method1(clock, cpp, latency);
1140 unsigned int small, large;
1142 small = intel_wm_method1(clock, cpp, latency);
1143 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1145 wm = min(small, large);
1148 wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1151 wm = DIV_ROUND_UP(wm, 64) + 2;
1153 return min_t(unsigned int, wm, USHRT_MAX);
1156 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1157 int level, enum plane_id plane_id, u16 value)
1159 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1162 for (; level < intel_wm_num_levels(dev_priv); level++) {
1163 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1165 dirty |= raw->plane[plane_id] != value;
1166 raw->plane[plane_id] = value;
1172 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1173 int level, u16 value)
1175 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1178 /* NORMAL level doesn't have an FBC watermark */
1179 level = max(level, G4X_WM_LEVEL_SR);
1181 for (; level < intel_wm_num_levels(dev_priv); level++) {
1182 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1184 dirty |= raw->fbc != value;
1191 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1192 const struct intel_plane_state *pstate,
1195 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1196 const struct intel_plane_state *plane_state)
1198 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1199 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1200 enum plane_id plane_id = plane->id;
1204 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1205 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1206 if (plane_id == PLANE_PRIMARY)
1207 dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1211 for (level = 0; level < num_levels; level++) {
1212 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1215 wm = g4x_compute_wm(crtc_state, plane_state, level);
1216 max_wm = g4x_plane_fifo_size(plane_id, level);
1221 dirty |= raw->plane[plane_id] != wm;
1222 raw->plane[plane_id] = wm;
1224 if (plane_id != PLANE_PRIMARY ||
1225 level == G4X_WM_LEVEL_NORMAL)
1228 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1229 raw->plane[plane_id]);
1230 max_wm = g4x_fbc_fifo_size(level);
1233 * FBC wm is not mandatory as we
1234 * can always just disable its use.
1239 dirty |= raw->fbc != wm;
1243 /* mark watermarks as invalid */
1244 dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1246 if (plane_id == PLANE_PRIMARY)
1247 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1251 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1253 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1254 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1255 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1257 if (plane_id == PLANE_PRIMARY)
1258 DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1259 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1260 crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1266 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1267 enum plane_id plane_id, int level)
1269 const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1271 return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1274 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1277 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1279 if (level > dev_priv->wm.max_level)
1282 return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1283 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1284 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1287 /* mark all levels starting from 'level' as invalid */
1288 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1289 struct g4x_wm_state *wm_state, int level)
1291 if (level <= G4X_WM_LEVEL_NORMAL) {
1292 enum plane_id plane_id;
1294 for_each_plane_id_on_crtc(crtc, plane_id)
1295 wm_state->wm.plane[plane_id] = USHRT_MAX;
1298 if (level <= G4X_WM_LEVEL_SR) {
1299 wm_state->cxsr = false;
1300 wm_state->sr.cursor = USHRT_MAX;
1301 wm_state->sr.plane = USHRT_MAX;
1302 wm_state->sr.fbc = USHRT_MAX;
1305 if (level <= G4X_WM_LEVEL_HPLL) {
1306 wm_state->hpll_en = false;
1307 wm_state->hpll.cursor = USHRT_MAX;
1308 wm_state->hpll.plane = USHRT_MAX;
1309 wm_state->hpll.fbc = USHRT_MAX;
1313 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1315 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1316 struct intel_atomic_state *state =
1317 to_intel_atomic_state(crtc_state->base.state);
1318 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1319 int num_active_planes = hweight32(crtc_state->active_planes &
1320 ~BIT(PLANE_CURSOR));
1321 const struct g4x_pipe_wm *raw;
1322 const struct intel_plane_state *old_plane_state;
1323 const struct intel_plane_state *new_plane_state;
1324 struct intel_plane *plane;
1325 enum plane_id plane_id;
1327 unsigned int dirty = 0;
1329 for_each_oldnew_intel_plane_in_state(state, plane,
1331 new_plane_state, i) {
1332 if (new_plane_state->base.crtc != &crtc->base &&
1333 old_plane_state->base.crtc != &crtc->base)
1336 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1337 dirty |= BIT(plane->id);
1343 level = G4X_WM_LEVEL_NORMAL;
1344 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1347 raw = &crtc_state->wm.g4x.raw[level];
1348 for_each_plane_id_on_crtc(crtc, plane_id)
1349 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1351 level = G4X_WM_LEVEL_SR;
1353 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1356 raw = &crtc_state->wm.g4x.raw[level];
1357 wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1358 wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1359 wm_state->sr.fbc = raw->fbc;
1361 wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1363 level = G4X_WM_LEVEL_HPLL;
1365 if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1368 raw = &crtc_state->wm.g4x.raw[level];
1369 wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1370 wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1371 wm_state->hpll.fbc = raw->fbc;
1373 wm_state->hpll_en = wm_state->cxsr;
1378 if (level == G4X_WM_LEVEL_NORMAL)
1381 /* invalidate the higher levels */
1382 g4x_invalidate_wms(crtc, wm_state, level);
1385 * Determine if the FBC watermark(s) can be used. IF
1386 * this isn't the case we prefer to disable the FBC
1387 ( watermark(s) rather than disable the SR/HPLL
1388 * level(s) entirely.
1390 wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1392 if (level >= G4X_WM_LEVEL_SR &&
1393 wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1394 wm_state->fbc_en = false;
1395 else if (level >= G4X_WM_LEVEL_HPLL &&
1396 wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1397 wm_state->fbc_en = false;
1402 static int g4x_compute_intermediate_wm(struct drm_device *dev,
1403 struct intel_crtc *crtc,
1404 struct intel_crtc_state *new_crtc_state)
1406 struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1407 const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1408 struct intel_atomic_state *intel_state =
1409 to_intel_atomic_state(new_crtc_state->base.state);
1410 const struct intel_crtc_state *old_crtc_state =
1411 intel_atomic_get_old_crtc_state(intel_state, crtc);
1412 const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1413 enum plane_id plane_id;
1415 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1416 *intermediate = *optimal;
1418 intermediate->cxsr = false;
1419 intermediate->hpll_en = false;
1423 intermediate->cxsr = optimal->cxsr && active->cxsr &&
1424 !new_crtc_state->disable_cxsr;
1425 intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1426 !new_crtc_state->disable_cxsr;
1427 intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1429 for_each_plane_id_on_crtc(crtc, plane_id) {
1430 intermediate->wm.plane[plane_id] =
1431 max(optimal->wm.plane[plane_id],
1432 active->wm.plane[plane_id]);
1434 WARN_ON(intermediate->wm.plane[plane_id] >
1435 g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1438 intermediate->sr.plane = max(optimal->sr.plane,
1440 intermediate->sr.cursor = max(optimal->sr.cursor,
1442 intermediate->sr.fbc = max(optimal->sr.fbc,
1445 intermediate->hpll.plane = max(optimal->hpll.plane,
1446 active->hpll.plane);
1447 intermediate->hpll.cursor = max(optimal->hpll.cursor,
1448 active->hpll.cursor);
1449 intermediate->hpll.fbc = max(optimal->hpll.fbc,
1452 WARN_ON((intermediate->sr.plane >
1453 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1454 intermediate->sr.cursor >
1455 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1456 intermediate->cxsr);
1457 WARN_ON((intermediate->sr.plane >
1458 g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1459 intermediate->sr.cursor >
1460 g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1461 intermediate->hpll_en);
1463 WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1464 intermediate->fbc_en && intermediate->cxsr);
1465 WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1466 intermediate->fbc_en && intermediate->hpll_en);
1470 * If our intermediate WM are identical to the final WM, then we can
1471 * omit the post-vblank programming; only update if it's different.
1473 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1474 new_crtc_state->wm.need_postvbl_update = true;
1479 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1480 struct g4x_wm_values *wm)
1482 struct intel_crtc *crtc;
1483 int num_active_crtcs = 0;
1489 for_each_intel_crtc(&dev_priv->drm, crtc) {
1490 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1495 if (!wm_state->cxsr)
1497 if (!wm_state->hpll_en)
1498 wm->hpll_en = false;
1499 if (!wm_state->fbc_en)
1505 if (num_active_crtcs != 1) {
1507 wm->hpll_en = false;
1511 for_each_intel_crtc(&dev_priv->drm, crtc) {
1512 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1513 enum pipe pipe = crtc->pipe;
1515 wm->pipe[pipe] = wm_state->wm;
1516 if (crtc->active && wm->cxsr)
1517 wm->sr = wm_state->sr;
1518 if (crtc->active && wm->hpll_en)
1519 wm->hpll = wm_state->hpll;
1523 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1525 struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1526 struct g4x_wm_values new_wm = {};
1528 g4x_merge_wm(dev_priv, &new_wm);
1530 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1533 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1534 _intel_set_memory_cxsr(dev_priv, false);
1536 g4x_write_wm_values(dev_priv, &new_wm);
1538 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1539 _intel_set_memory_cxsr(dev_priv, true);
1544 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1545 struct intel_crtc_state *crtc_state)
1547 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1548 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1550 mutex_lock(&dev_priv->wm.wm_mutex);
1551 crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1552 g4x_program_watermarks(dev_priv);
1553 mutex_unlock(&dev_priv->wm.wm_mutex);
1556 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1557 struct intel_crtc_state *crtc_state)
1559 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1560 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1562 if (!crtc_state->wm.need_postvbl_update)
1565 mutex_lock(&dev_priv->wm.wm_mutex);
1566 intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1567 g4x_program_watermarks(dev_priv);
1568 mutex_unlock(&dev_priv->wm.wm_mutex);
1571 /* latency must be in 0.1us units. */
1572 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1573 unsigned int htotal,
1576 unsigned int latency)
1580 ret = intel_wm_method2(pixel_rate, htotal,
1581 width, cpp, latency);
1582 ret = DIV_ROUND_UP(ret, 64);
1587 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1589 /* all latencies in usec */
1590 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1592 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1594 if (IS_CHERRYVIEW(dev_priv)) {
1595 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1596 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1598 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1602 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1603 const struct intel_plane_state *plane_state,
1606 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1607 struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1608 const struct drm_display_mode *adjusted_mode =
1609 &crtc_state->base.adjusted_mode;
1610 unsigned int clock, htotal, cpp, width, wm;
1612 if (dev_priv->wm.pri_latency[level] == 0)
1615 if (!intel_wm_plane_visible(crtc_state, plane_state))
1618 cpp = plane_state->base.fb->format->cpp[0];
1619 clock = adjusted_mode->crtc_clock;
1620 htotal = adjusted_mode->crtc_htotal;
1621 width = crtc_state->pipe_src_w;
1623 if (plane->id == PLANE_CURSOR) {
1625 * FIXME the formula gives values that are
1626 * too big for the cursor FIFO, and hence we
1627 * would never be able to use cursors. For
1628 * now just hardcode the watermark.
1632 wm = vlv_wm_method2(clock, htotal, width, cpp,
1633 dev_priv->wm.pri_latency[level] * 10);
1636 return min_t(unsigned int, wm, USHRT_MAX);
1639 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1641 return (active_planes & (BIT(PLANE_SPRITE0) |
1642 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1645 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1647 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1648 const struct g4x_pipe_wm *raw =
1649 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1650 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1651 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1652 int num_active_planes = hweight32(active_planes);
1653 const int fifo_size = 511;
1654 int fifo_extra, fifo_left = fifo_size;
1655 int sprite0_fifo_extra = 0;
1656 unsigned int total_rate;
1657 enum plane_id plane_id;
1660 * When enabling sprite0 after sprite1 has already been enabled
1661 * we tend to get an underrun unless sprite0 already has some
1662 * FIFO space allcoated. Hence we always allocate at least one
1663 * cacheline for sprite0 whenever sprite1 is enabled.
1665 * All other plane enable sequences appear immune to this problem.
1667 if (vlv_need_sprite0_fifo_workaround(active_planes))
1668 sprite0_fifo_extra = 1;
1670 total_rate = raw->plane[PLANE_PRIMARY] +
1671 raw->plane[PLANE_SPRITE0] +
1672 raw->plane[PLANE_SPRITE1] +
1675 if (total_rate > fifo_size)
1678 if (total_rate == 0)
1681 for_each_plane_id_on_crtc(crtc, plane_id) {
1684 if ((active_planes & BIT(plane_id)) == 0) {
1685 fifo_state->plane[plane_id] = 0;
1689 rate = raw->plane[plane_id];
1690 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1691 fifo_left -= fifo_state->plane[plane_id];
1694 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1695 fifo_left -= sprite0_fifo_extra;
1697 fifo_state->plane[PLANE_CURSOR] = 63;
1699 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1701 /* spread the remainder evenly */
1702 for_each_plane_id_on_crtc(crtc, plane_id) {
1708 if ((active_planes & BIT(plane_id)) == 0)
1711 plane_extra = min(fifo_extra, fifo_left);
1712 fifo_state->plane[plane_id] += plane_extra;
1713 fifo_left -= plane_extra;
1716 WARN_ON(active_planes != 0 && fifo_left != 0);
1718 /* give it all to the first plane if none are active */
1719 if (active_planes == 0) {
1720 WARN_ON(fifo_left != fifo_size);
1721 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1727 /* mark all levels starting from 'level' as invalid */
1728 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1729 struct vlv_wm_state *wm_state, int level)
1731 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1733 for (; level < intel_wm_num_levels(dev_priv); level++) {
1734 enum plane_id plane_id;
1736 for_each_plane_id_on_crtc(crtc, plane_id)
1737 wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1739 wm_state->sr[level].cursor = USHRT_MAX;
1740 wm_state->sr[level].plane = USHRT_MAX;
1744 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1749 return fifo_size - wm;
1753 * Starting from 'level' set all higher
1754 * levels to 'value' in the "raw" watermarks.
1756 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1757 int level, enum plane_id plane_id, u16 value)
1759 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1760 int num_levels = intel_wm_num_levels(dev_priv);
1763 for (; level < num_levels; level++) {
1764 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1766 dirty |= raw->plane[plane_id] != value;
1767 raw->plane[plane_id] = value;
1773 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1774 const struct intel_plane_state *plane_state)
1776 struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1777 enum plane_id plane_id = plane->id;
1778 int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1782 if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1783 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1787 for (level = 0; level < num_levels; level++) {
1788 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1789 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1790 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1795 dirty |= raw->plane[plane_id] != wm;
1796 raw->plane[plane_id] = wm;
1799 /* mark all higher levels as invalid */
1800 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1804 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1806 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1807 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1808 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1813 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1814 enum plane_id plane_id, int level)
1816 const struct g4x_pipe_wm *raw =
1817 &crtc_state->wm.vlv.raw[level];
1818 const struct vlv_fifo_state *fifo_state =
1819 &crtc_state->wm.vlv.fifo_state;
1821 return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1824 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1826 return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1827 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1828 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1829 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1832 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1834 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1835 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1836 struct intel_atomic_state *state =
1837 to_intel_atomic_state(crtc_state->base.state);
1838 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1839 const struct vlv_fifo_state *fifo_state =
1840 &crtc_state->wm.vlv.fifo_state;
1841 int num_active_planes = hweight32(crtc_state->active_planes &
1842 ~BIT(PLANE_CURSOR));
1843 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1844 const struct intel_plane_state *old_plane_state;
1845 const struct intel_plane_state *new_plane_state;
1846 struct intel_plane *plane;
1847 enum plane_id plane_id;
1849 unsigned int dirty = 0;
1851 for_each_oldnew_intel_plane_in_state(state, plane,
1853 new_plane_state, i) {
1854 if (new_plane_state->base.crtc != &crtc->base &&
1855 old_plane_state->base.crtc != &crtc->base)
1858 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1859 dirty |= BIT(plane->id);
1863 * DSPARB registers may have been reset due to the
1864 * power well being turned off. Make sure we restore
1865 * them to a consistent state even if no primary/sprite
1866 * planes are initially active.
1869 crtc_state->fifo_changed = true;
1874 /* cursor changes don't warrant a FIFO recompute */
1875 if (dirty & ~BIT(PLANE_CURSOR)) {
1876 const struct intel_crtc_state *old_crtc_state =
1877 intel_atomic_get_old_crtc_state(state, crtc);
1878 const struct vlv_fifo_state *old_fifo_state =
1879 &old_crtc_state->wm.vlv.fifo_state;
1881 ret = vlv_compute_fifo(crtc_state);
1885 if (needs_modeset ||
1886 memcmp(old_fifo_state, fifo_state,
1887 sizeof(*fifo_state)) != 0)
1888 crtc_state->fifo_changed = true;
1891 /* initially allow all levels */
1892 wm_state->num_levels = intel_wm_num_levels(dev_priv);
1894 * Note that enabling cxsr with no primary/sprite planes
1895 * enabled can wedge the pipe. Hence we only allow cxsr
1896 * with exactly one enabled primary/sprite plane.
1898 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1900 for (level = 0; level < wm_state->num_levels; level++) {
1901 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1902 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1904 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1907 for_each_plane_id_on_crtc(crtc, plane_id) {
1908 wm_state->wm[level].plane[plane_id] =
1909 vlv_invert_wm_value(raw->plane[plane_id],
1910 fifo_state->plane[plane_id]);
1913 wm_state->sr[level].plane =
1914 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1915 raw->plane[PLANE_SPRITE0],
1916 raw->plane[PLANE_SPRITE1]),
1919 wm_state->sr[level].cursor =
1920 vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1927 /* limit to only levels we can actually handle */
1928 wm_state->num_levels = level;
1930 /* invalidate the higher levels */
1931 vlv_invalidate_wms(crtc, wm_state, level);
1936 #define VLV_FIFO(plane, value) \
1937 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1939 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1940 struct intel_crtc_state *crtc_state)
1942 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1943 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1944 const struct vlv_fifo_state *fifo_state =
1945 &crtc_state->wm.vlv.fifo_state;
1946 int sprite0_start, sprite1_start, fifo_size;
1948 if (!crtc_state->fifo_changed)
1951 sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1952 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1953 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1955 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1956 WARN_ON(fifo_size != 511);
1958 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1961 * uncore.lock serves a double purpose here. It allows us to
1962 * use the less expensive I915_{READ,WRITE}_FW() functions, and
1963 * it protects the DSPARB registers from getting clobbered by
1964 * parallel updates from multiple pipes.
1966 * intel_pipe_update_start() has already disabled interrupts
1967 * for us, so a plain spin_lock() is sufficient here.
1969 spin_lock(&dev_priv->uncore.lock);
1971 switch (crtc->pipe) {
1972 uint32_t dsparb, dsparb2, dsparb3;
1974 dsparb = I915_READ_FW(DSPARB);
1975 dsparb2 = I915_READ_FW(DSPARB2);
1977 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1978 VLV_FIFO(SPRITEB, 0xff));
1979 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1980 VLV_FIFO(SPRITEB, sprite1_start));
1982 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1983 VLV_FIFO(SPRITEB_HI, 0x1));
1984 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1985 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1987 I915_WRITE_FW(DSPARB, dsparb);
1988 I915_WRITE_FW(DSPARB2, dsparb2);
1991 dsparb = I915_READ_FW(DSPARB);
1992 dsparb2 = I915_READ_FW(DSPARB2);
1994 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1995 VLV_FIFO(SPRITED, 0xff));
1996 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1997 VLV_FIFO(SPRITED, sprite1_start));
1999 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2000 VLV_FIFO(SPRITED_HI, 0xff));
2001 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2002 VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2004 I915_WRITE_FW(DSPARB, dsparb);
2005 I915_WRITE_FW(DSPARB2, dsparb2);
2008 dsparb3 = I915_READ_FW(DSPARB3);
2009 dsparb2 = I915_READ_FW(DSPARB2);
2011 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2012 VLV_FIFO(SPRITEF, 0xff));
2013 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2014 VLV_FIFO(SPRITEF, sprite1_start));
2016 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2017 VLV_FIFO(SPRITEF_HI, 0xff));
2018 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2019 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2021 I915_WRITE_FW(DSPARB3, dsparb3);
2022 I915_WRITE_FW(DSPARB2, dsparb2);
2028 POSTING_READ_FW(DSPARB);
2030 spin_unlock(&dev_priv->uncore.lock);
2035 static int vlv_compute_intermediate_wm(struct drm_device *dev,
2036 struct intel_crtc *crtc,
2037 struct intel_crtc_state *new_crtc_state)
2039 struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2040 const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2041 struct intel_atomic_state *intel_state =
2042 to_intel_atomic_state(new_crtc_state->base.state);
2043 const struct intel_crtc_state *old_crtc_state =
2044 intel_atomic_get_old_crtc_state(intel_state, crtc);
2045 const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2048 if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2049 *intermediate = *optimal;
2051 intermediate->cxsr = false;
2055 intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2056 intermediate->cxsr = optimal->cxsr && active->cxsr &&
2057 !new_crtc_state->disable_cxsr;
2059 for (level = 0; level < intermediate->num_levels; level++) {
2060 enum plane_id plane_id;
2062 for_each_plane_id_on_crtc(crtc, plane_id) {
2063 intermediate->wm[level].plane[plane_id] =
2064 min(optimal->wm[level].plane[plane_id],
2065 active->wm[level].plane[plane_id]);
2068 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2069 active->sr[level].plane);
2070 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2071 active->sr[level].cursor);
2074 vlv_invalidate_wms(crtc, intermediate, level);
2078 * If our intermediate WM are identical to the final WM, then we can
2079 * omit the post-vblank programming; only update if it's different.
2081 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2082 new_crtc_state->wm.need_postvbl_update = true;
2087 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2088 struct vlv_wm_values *wm)
2090 struct intel_crtc *crtc;
2091 int num_active_crtcs = 0;
2093 wm->level = dev_priv->wm.max_level;
2096 for_each_intel_crtc(&dev_priv->drm, crtc) {
2097 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2102 if (!wm_state->cxsr)
2106 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2109 if (num_active_crtcs != 1)
2112 if (num_active_crtcs > 1)
2113 wm->level = VLV_WM_LEVEL_PM2;
2115 for_each_intel_crtc(&dev_priv->drm, crtc) {
2116 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2117 enum pipe pipe = crtc->pipe;
2119 wm->pipe[pipe] = wm_state->wm[wm->level];
2120 if (crtc->active && wm->cxsr)
2121 wm->sr = wm_state->sr[wm->level];
2123 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2124 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2125 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2126 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2130 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2132 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2133 struct vlv_wm_values new_wm = {};
2135 vlv_merge_wm(dev_priv, &new_wm);
2137 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2140 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2141 chv_set_memory_dvfs(dev_priv, false);
2143 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2144 chv_set_memory_pm5(dev_priv, false);
2146 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2147 _intel_set_memory_cxsr(dev_priv, false);
2149 vlv_write_wm_values(dev_priv, &new_wm);
2151 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2152 _intel_set_memory_cxsr(dev_priv, true);
2154 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2155 chv_set_memory_pm5(dev_priv, true);
2157 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2158 chv_set_memory_dvfs(dev_priv, true);
2163 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2164 struct intel_crtc_state *crtc_state)
2166 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2167 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2169 mutex_lock(&dev_priv->wm.wm_mutex);
2170 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2171 vlv_program_watermarks(dev_priv);
2172 mutex_unlock(&dev_priv->wm.wm_mutex);
2175 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2176 struct intel_crtc_state *crtc_state)
2178 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2179 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2181 if (!crtc_state->wm.need_postvbl_update)
2184 mutex_lock(&dev_priv->wm.wm_mutex);
2185 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2186 vlv_program_watermarks(dev_priv);
2187 mutex_unlock(&dev_priv->wm.wm_mutex);
2190 static void i965_update_wm(struct intel_crtc *unused_crtc)
2192 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2193 struct intel_crtc *crtc;
2198 /* Calc sr entries for one plane configs */
2199 crtc = single_enabled_crtc(dev_priv);
2201 /* self-refresh has much higher latency */
2202 static const int sr_latency_ns = 12000;
2203 const struct drm_display_mode *adjusted_mode =
2204 &crtc->config->base.adjusted_mode;
2205 const struct drm_framebuffer *fb =
2206 crtc->base.primary->state->fb;
2207 int clock = adjusted_mode->crtc_clock;
2208 int htotal = adjusted_mode->crtc_htotal;
2209 int hdisplay = crtc->config->pipe_src_w;
2210 int cpp = fb->format->cpp[0];
2213 entries = intel_wm_method2(clock, htotal,
2214 hdisplay, cpp, sr_latency_ns / 100);
2215 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2216 srwm = I965_FIFO_SIZE - entries;
2220 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2223 entries = intel_wm_method2(clock, htotal,
2224 crtc->base.cursor->state->crtc_w, 4,
2225 sr_latency_ns / 100);
2226 entries = DIV_ROUND_UP(entries,
2227 i965_cursor_wm_info.cacheline_size) +
2228 i965_cursor_wm_info.guard_size;
2230 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2231 if (cursor_sr > i965_cursor_wm_info.max_wm)
2232 cursor_sr = i965_cursor_wm_info.max_wm;
2234 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2235 "cursor %d\n", srwm, cursor_sr);
2237 cxsr_enabled = true;
2239 cxsr_enabled = false;
2240 /* Turn off self refresh if both pipes are enabled */
2241 intel_set_memory_cxsr(dev_priv, false);
2244 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2247 /* 965 has limitations... */
2248 I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2252 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2253 FW_WM(8, PLANEC_OLD));
2254 /* update cursor SR watermark */
2255 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2258 intel_set_memory_cxsr(dev_priv, true);
2263 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2265 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2266 const struct intel_watermark_params *wm_info;
2271 int planea_wm, planeb_wm;
2272 struct intel_crtc *crtc, *enabled = NULL;
2274 if (IS_I945GM(dev_priv))
2275 wm_info = &i945_wm_info;
2276 else if (!IS_GEN2(dev_priv))
2277 wm_info = &i915_wm_info;
2279 wm_info = &i830_a_wm_info;
2281 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2282 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2283 if (intel_crtc_active(crtc)) {
2284 const struct drm_display_mode *adjusted_mode =
2285 &crtc->config->base.adjusted_mode;
2286 const struct drm_framebuffer *fb =
2287 crtc->base.primary->state->fb;
2290 if (IS_GEN2(dev_priv))
2293 cpp = fb->format->cpp[0];
2295 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2296 wm_info, fifo_size, cpp,
2297 pessimal_latency_ns);
2300 planea_wm = fifo_size - wm_info->guard_size;
2301 if (planea_wm > (long)wm_info->max_wm)
2302 planea_wm = wm_info->max_wm;
2305 if (IS_GEN2(dev_priv))
2306 wm_info = &i830_bc_wm_info;
2308 fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2309 crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2310 if (intel_crtc_active(crtc)) {
2311 const struct drm_display_mode *adjusted_mode =
2312 &crtc->config->base.adjusted_mode;
2313 const struct drm_framebuffer *fb =
2314 crtc->base.primary->state->fb;
2317 if (IS_GEN2(dev_priv))
2320 cpp = fb->format->cpp[0];
2322 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2323 wm_info, fifo_size, cpp,
2324 pessimal_latency_ns);
2325 if (enabled == NULL)
2330 planeb_wm = fifo_size - wm_info->guard_size;
2331 if (planeb_wm > (long)wm_info->max_wm)
2332 planeb_wm = wm_info->max_wm;
2335 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2337 if (IS_I915GM(dev_priv) && enabled) {
2338 struct drm_i915_gem_object *obj;
2340 obj = intel_fb_obj(enabled->base.primary->state->fb);
2342 /* self-refresh seems busted with untiled */
2343 if (!i915_gem_object_is_tiled(obj))
2348 * Overlay gets an aggressive default since video jitter is bad.
2352 /* Play safe and disable self-refresh before adjusting watermarks. */
2353 intel_set_memory_cxsr(dev_priv, false);
2355 /* Calc sr entries for one plane configs */
2356 if (HAS_FW_BLC(dev_priv) && enabled) {
2357 /* self-refresh has much higher latency */
2358 static const int sr_latency_ns = 6000;
2359 const struct drm_display_mode *adjusted_mode =
2360 &enabled->config->base.adjusted_mode;
2361 const struct drm_framebuffer *fb =
2362 enabled->base.primary->state->fb;
2363 int clock = adjusted_mode->crtc_clock;
2364 int htotal = adjusted_mode->crtc_htotal;
2365 int hdisplay = enabled->config->pipe_src_w;
2369 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2372 cpp = fb->format->cpp[0];
2374 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2375 sr_latency_ns / 100);
2376 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2377 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2378 srwm = wm_info->fifo_size - entries;
2382 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2383 I915_WRITE(FW_BLC_SELF,
2384 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2386 I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2389 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2390 planea_wm, planeb_wm, cwm, srwm);
2392 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2393 fwater_hi = (cwm & 0x1f);
2395 /* Set request length to 8 cachelines per fetch */
2396 fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2397 fwater_hi = fwater_hi | (1 << 8);
2399 I915_WRITE(FW_BLC, fwater_lo);
2400 I915_WRITE(FW_BLC2, fwater_hi);
2403 intel_set_memory_cxsr(dev_priv, true);
2406 static void i845_update_wm(struct intel_crtc *unused_crtc)
2408 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2409 struct intel_crtc *crtc;
2410 const struct drm_display_mode *adjusted_mode;
2414 crtc = single_enabled_crtc(dev_priv);
2418 adjusted_mode = &crtc->config->base.adjusted_mode;
2419 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2421 dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2422 4, pessimal_latency_ns);
2423 fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2424 fwater_lo |= (3<<8) | planea_wm;
2426 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2428 I915_WRITE(FW_BLC, fwater_lo);
2431 /* latency must be in 0.1us units. */
2432 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2434 unsigned int latency)
2438 ret = intel_wm_method1(pixel_rate, cpp, latency);
2439 ret = DIV_ROUND_UP(ret, 64) + 2;
2444 /* latency must be in 0.1us units. */
2445 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2446 unsigned int htotal,
2449 unsigned int latency)
2453 ret = intel_wm_method2(pixel_rate, htotal,
2454 width, cpp, latency);
2455 ret = DIV_ROUND_UP(ret, 64) + 2;
2460 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2464 * Neither of these should be possible since this function shouldn't be
2465 * called if the CRTC is off or the plane is invisible. But let's be
2466 * extra paranoid to avoid a potential divide-by-zero if we screw up
2467 * elsewhere in the driver.
2471 if (WARN_ON(!horiz_pixels))
2474 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2477 struct ilk_wm_maximums {
2485 * For both WM_PIPE and WM_LP.
2486 * mem_value must be in 0.1us units.
2488 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2489 const struct intel_plane_state *pstate,
2493 uint32_t method1, method2;
2499 if (!intel_wm_plane_visible(cstate, pstate))
2502 cpp = pstate->base.fb->format->cpp[0];
2504 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2509 method2 = ilk_wm_method2(cstate->pixel_rate,
2510 cstate->base.adjusted_mode.crtc_htotal,
2511 drm_rect_width(&pstate->base.dst),
2514 return min(method1, method2);
2518 * For both WM_PIPE and WM_LP.
2519 * mem_value must be in 0.1us units.
2521 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2522 const struct intel_plane_state *pstate,
2525 uint32_t method1, method2;
2531 if (!intel_wm_plane_visible(cstate, pstate))
2534 cpp = pstate->base.fb->format->cpp[0];
2536 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2537 method2 = ilk_wm_method2(cstate->pixel_rate,
2538 cstate->base.adjusted_mode.crtc_htotal,
2539 drm_rect_width(&pstate->base.dst),
2541 return min(method1, method2);
2545 * For both WM_PIPE and WM_LP.
2546 * mem_value must be in 0.1us units.
2548 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2549 const struct intel_plane_state *pstate,
2557 if (!intel_wm_plane_visible(cstate, pstate))
2560 cpp = pstate->base.fb->format->cpp[0];
2562 return ilk_wm_method2(cstate->pixel_rate,
2563 cstate->base.adjusted_mode.crtc_htotal,
2564 pstate->base.crtc_w, cpp, mem_value);
2567 /* Only for WM_LP. */
2568 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2569 const struct intel_plane_state *pstate,
2574 if (!intel_wm_plane_visible(cstate, pstate))
2577 cpp = pstate->base.fb->format->cpp[0];
2579 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2583 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2585 if (INTEL_GEN(dev_priv) >= 8)
2587 else if (INTEL_GEN(dev_priv) >= 7)
2594 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2595 int level, bool is_sprite)
2597 if (INTEL_GEN(dev_priv) >= 8)
2598 /* BDW primary/sprite plane watermarks */
2599 return level == 0 ? 255 : 2047;
2600 else if (INTEL_GEN(dev_priv) >= 7)
2601 /* IVB/HSW primary/sprite plane watermarks */
2602 return level == 0 ? 127 : 1023;
2603 else if (!is_sprite)
2604 /* ILK/SNB primary plane watermarks */
2605 return level == 0 ? 127 : 511;
2607 /* ILK/SNB sprite plane watermarks */
2608 return level == 0 ? 63 : 255;
2612 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2614 if (INTEL_GEN(dev_priv) >= 7)
2615 return level == 0 ? 63 : 255;
2617 return level == 0 ? 31 : 63;
2620 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2622 if (INTEL_GEN(dev_priv) >= 8)
2628 /* Calculate the maximum primary/sprite plane watermark */
2629 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2631 const struct intel_wm_config *config,
2632 enum intel_ddb_partitioning ddb_partitioning,
2635 struct drm_i915_private *dev_priv = to_i915(dev);
2636 unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2638 /* if sprites aren't enabled, sprites get nothing */
2639 if (is_sprite && !config->sprites_enabled)
2642 /* HSW allows LP1+ watermarks even with multiple pipes */
2643 if (level == 0 || config->num_pipes_active > 1) {
2644 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2647 * For some reason the non self refresh
2648 * FIFO size is only half of the self
2649 * refresh FIFO size on ILK/SNB.
2651 if (INTEL_GEN(dev_priv) <= 6)
2655 if (config->sprites_enabled) {
2656 /* level 0 is always calculated with 1:1 split */
2657 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2666 /* clamp to max that the registers can hold */
2667 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2670 /* Calculate the maximum cursor plane watermark */
2671 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2673 const struct intel_wm_config *config)
2675 /* HSW LP1+ watermarks w/ multiple pipes */
2676 if (level > 0 && config->num_pipes_active > 1)
2679 /* otherwise just report max that registers can hold */
2680 return ilk_cursor_wm_reg_max(to_i915(dev), level);
2683 static void ilk_compute_wm_maximums(const struct drm_device *dev,
2685 const struct intel_wm_config *config,
2686 enum intel_ddb_partitioning ddb_partitioning,
2687 struct ilk_wm_maximums *max)
2689 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2690 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2691 max->cur = ilk_cursor_wm_max(dev, level, config);
2692 max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2695 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2697 struct ilk_wm_maximums *max)
2699 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2700 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2701 max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2702 max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2705 static bool ilk_validate_wm_level(int level,
2706 const struct ilk_wm_maximums *max,
2707 struct intel_wm_level *result)
2711 /* already determined to be invalid? */
2712 if (!result->enable)
2715 result->enable = result->pri_val <= max->pri &&
2716 result->spr_val <= max->spr &&
2717 result->cur_val <= max->cur;
2719 ret = result->enable;
2722 * HACK until we can pre-compute everything,
2723 * and thus fail gracefully if LP0 watermarks
2726 if (level == 0 && !result->enable) {
2727 if (result->pri_val > max->pri)
2728 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2729 level, result->pri_val, max->pri);
2730 if (result->spr_val > max->spr)
2731 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2732 level, result->spr_val, max->spr);
2733 if (result->cur_val > max->cur)
2734 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2735 level, result->cur_val, max->cur);
2737 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2738 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2739 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2740 result->enable = true;
2746 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2747 const struct intel_crtc *intel_crtc,
2749 struct intel_crtc_state *cstate,
2750 const struct intel_plane_state *pristate,
2751 const struct intel_plane_state *sprstate,
2752 const struct intel_plane_state *curstate,
2753 struct intel_wm_level *result)
2755 uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2756 uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2757 uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2759 /* WM1+ latency values stored in 0.5us units */
2767 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2768 pri_latency, level);
2769 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2773 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2776 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2778 result->enable = true;
2782 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2784 const struct intel_atomic_state *intel_state =
2785 to_intel_atomic_state(cstate->base.state);
2786 const struct drm_display_mode *adjusted_mode =
2787 &cstate->base.adjusted_mode;
2788 u32 linetime, ips_linetime;
2790 if (!cstate->base.active)
2792 if (WARN_ON(adjusted_mode->crtc_clock == 0))
2794 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2797 /* The WM are computed with base on how long it takes to fill a single
2798 * row at the given clock rate, multiplied by 8.
2800 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2801 adjusted_mode->crtc_clock);
2802 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2803 intel_state->cdclk.logical.cdclk);
2805 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2806 PIPE_WM_LINETIME_TIME(linetime);
2809 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2812 if (INTEL_GEN(dev_priv) >= 9) {
2815 int level, max_level = ilk_wm_max_level(dev_priv);
2817 /* read the first set of memory latencies[0:3] */
2818 val = 0; /* data0 to be programmed to 0 for first set */
2819 mutex_lock(&dev_priv->pcu_lock);
2820 ret = sandybridge_pcode_read(dev_priv,
2821 GEN9_PCODE_READ_MEM_LATENCY,
2823 mutex_unlock(&dev_priv->pcu_lock);
2826 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2830 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2831 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2832 GEN9_MEM_LATENCY_LEVEL_MASK;
2833 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2834 GEN9_MEM_LATENCY_LEVEL_MASK;
2835 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2836 GEN9_MEM_LATENCY_LEVEL_MASK;
2838 /* read the second set of memory latencies[4:7] */
2839 val = 1; /* data0 to be programmed to 1 for second set */
2840 mutex_lock(&dev_priv->pcu_lock);
2841 ret = sandybridge_pcode_read(dev_priv,
2842 GEN9_PCODE_READ_MEM_LATENCY,
2844 mutex_unlock(&dev_priv->pcu_lock);
2846 DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2850 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2851 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2852 GEN9_MEM_LATENCY_LEVEL_MASK;
2853 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2854 GEN9_MEM_LATENCY_LEVEL_MASK;
2855 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2856 GEN9_MEM_LATENCY_LEVEL_MASK;
2859 * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2860 * need to be disabled. We make sure to sanitize the values out
2861 * of the punit to satisfy this requirement.
2863 for (level = 1; level <= max_level; level++) {
2864 if (wm[level] == 0) {
2865 for (i = level + 1; i <= max_level; i++)
2872 * WaWmMemoryReadLatency:skl+,glk
2874 * punit doesn't take into account the read latency so we need
2875 * to add 2us to the various latency levels we retrieve from the
2876 * punit when level 0 response data us 0us.
2880 for (level = 1; level <= max_level; level++) {
2888 * WA Level-0 adjustment for 16GB DIMMs: SKL+
2889 * If we could not get dimm info enable this WA to prevent from
2890 * any underrun. If not able to get Dimm info assume 16GB dimm
2891 * to avoid any underrun.
2893 if (dev_priv->dram_info.is_16gb_dimm)
2896 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2897 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2899 wm[0] = (sskpd >> 56) & 0xFF;
2901 wm[0] = sskpd & 0xF;
2902 wm[1] = (sskpd >> 4) & 0xFF;
2903 wm[2] = (sskpd >> 12) & 0xFF;
2904 wm[3] = (sskpd >> 20) & 0x1FF;
2905 wm[4] = (sskpd >> 32) & 0x1FF;
2906 } else if (INTEL_GEN(dev_priv) >= 6) {
2907 uint32_t sskpd = I915_READ(MCH_SSKPD);
2909 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2910 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2911 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2912 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2913 } else if (INTEL_GEN(dev_priv) >= 5) {
2914 uint32_t mltr = I915_READ(MLTR_ILK);
2916 /* ILK primary LP0 latency is 700 ns */
2918 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2919 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2921 MISSING_CASE(INTEL_DEVID(dev_priv));
2925 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2928 /* ILK sprite LP0 latency is 1300 ns */
2929 if (IS_GEN5(dev_priv))
2933 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2936 /* ILK cursor LP0 latency is 1300 ns */
2937 if (IS_GEN5(dev_priv))
2941 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2943 /* how many WM levels are we expecting */
2944 if (INTEL_GEN(dev_priv) >= 9)
2946 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2948 else if (INTEL_GEN(dev_priv) >= 6)
2954 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2956 const uint16_t wm[8])
2958 int level, max_level = ilk_wm_max_level(dev_priv);
2960 for (level = 0; level <= max_level; level++) {
2961 unsigned int latency = wm[level];
2964 DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2970 * - latencies are in us on gen9.
2971 * - before then, WM1+ latency values are in 0.5us units
2973 if (INTEL_GEN(dev_priv) >= 9)
2978 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2979 name, level, wm[level],
2980 latency / 10, latency % 10);
2984 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2985 uint16_t wm[5], uint16_t min)
2987 int level, max_level = ilk_wm_max_level(dev_priv);
2992 wm[0] = max(wm[0], min);
2993 for (level = 1; level <= max_level; level++)
2994 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2999 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
3004 * The BIOS provided WM memory latency values are often
3005 * inadequate for high resolution displays. Adjust them.
3007 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3008 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3009 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3014 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3015 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3016 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3017 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3020 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3023 * On some SNB machines (Thinkpad X220 Tablet at least)
3024 * LP3 usage can cause vblank interrupts to be lost.
3025 * The DEIIR bit will go high but it looks like the CPU
3026 * never gets interrupted.
3028 * It's not clear whether other interrupt source could
3029 * be affected or if this is somehow limited to vblank
3030 * interrupts only. To play it safe we disable LP3
3031 * watermarks entirely.
3033 if (dev_priv->wm.pri_latency[3] == 0 &&
3034 dev_priv->wm.spr_latency[3] == 0 &&
3035 dev_priv->wm.cur_latency[3] == 0)
3038 dev_priv->wm.pri_latency[3] = 0;
3039 dev_priv->wm.spr_latency[3] = 0;
3040 dev_priv->wm.cur_latency[3] = 0;
3042 DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3043 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3044 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3045 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3048 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3050 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3052 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3053 sizeof(dev_priv->wm.pri_latency));
3054 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3055 sizeof(dev_priv->wm.pri_latency));
3057 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3058 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3060 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3061 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3062 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3064 if (IS_GEN6(dev_priv)) {
3065 snb_wm_latency_quirk(dev_priv);
3066 snb_wm_lp3_irq_quirk(dev_priv);
3070 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3072 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3073 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3076 static bool ilk_validate_pipe_wm(struct drm_device *dev,
3077 struct intel_pipe_wm *pipe_wm)
3079 /* LP0 watermark maximums depend on this pipe alone */
3080 const struct intel_wm_config config = {
3081 .num_pipes_active = 1,
3082 .sprites_enabled = pipe_wm->sprites_enabled,
3083 .sprites_scaled = pipe_wm->sprites_scaled,
3085 struct ilk_wm_maximums max;
3087 /* LP0 watermarks always use 1/2 DDB partitioning */
3088 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
3090 /* At least LP0 must be valid */
3091 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3092 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3099 /* Compute new watermarks for the pipe */
3100 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3102 struct drm_atomic_state *state = cstate->base.state;
3103 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3104 struct intel_pipe_wm *pipe_wm;
3105 struct drm_device *dev = state->dev;
3106 const struct drm_i915_private *dev_priv = to_i915(dev);
3107 struct drm_plane *plane;
3108 const struct drm_plane_state *plane_state;
3109 const struct intel_plane_state *pristate = NULL;
3110 const struct intel_plane_state *sprstate = NULL;
3111 const struct intel_plane_state *curstate = NULL;
3112 int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3113 struct ilk_wm_maximums max;
3115 pipe_wm = &cstate->wm.ilk.optimal;
3117 drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3118 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3120 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3122 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3124 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3128 pipe_wm->pipe_enabled = cstate->base.active;
3130 pipe_wm->sprites_enabled = sprstate->base.visible;
3131 pipe_wm->sprites_scaled = sprstate->base.visible &&
3132 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3133 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3136 usable_level = max_level;
3138 /* ILK/SNB: LP2+ watermarks only w/o sprites */
3139 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3142 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3143 if (pipe_wm->sprites_scaled)
3146 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3147 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3148 pristate, sprstate, curstate, &pipe_wm->wm[0]);
3150 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3151 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3153 if (!ilk_validate_pipe_wm(dev, pipe_wm))
3156 ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3158 for (level = 1; level <= usable_level; level++) {
3159 struct intel_wm_level *wm = &pipe_wm->wm[level];
3161 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3162 pristate, sprstate, curstate, wm);
3165 * Disable any watermark level that exceeds the
3166 * register maximums since such watermarks are
3169 if (!ilk_validate_wm_level(level, &max, wm)) {
3170 memset(wm, 0, sizeof(*wm));
3179 * Build a set of 'intermediate' watermark values that satisfy both the old
3180 * state and the new state. These can be programmed to the hardware
3183 static int ilk_compute_intermediate_wm(struct drm_device *dev,
3184 struct intel_crtc *intel_crtc,
3185 struct intel_crtc_state *newstate)
3187 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3188 struct intel_atomic_state *intel_state =
3189 to_intel_atomic_state(newstate->base.state);
3190 const struct intel_crtc_state *oldstate =
3191 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3192 const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3193 int level, max_level = ilk_wm_max_level(to_i915(dev));
3196 * Start with the final, target watermarks, then combine with the
3197 * currently active watermarks to get values that are safe both before
3198 * and after the vblank.
3200 *a = newstate->wm.ilk.optimal;
3201 if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
3204 a->pipe_enabled |= b->pipe_enabled;
3205 a->sprites_enabled |= b->sprites_enabled;
3206 a->sprites_scaled |= b->sprites_scaled;
3208 for (level = 0; level <= max_level; level++) {
3209 struct intel_wm_level *a_wm = &a->wm[level];
3210 const struct intel_wm_level *b_wm = &b->wm[level];
3212 a_wm->enable &= b_wm->enable;
3213 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3214 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3215 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3216 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3220 * We need to make sure that these merged watermark values are
3221 * actually a valid configuration themselves. If they're not,
3222 * there's no safe way to transition from the old state to
3223 * the new state, so we need to fail the atomic transaction.
3225 if (!ilk_validate_pipe_wm(dev, a))
3229 * If our intermediate WM are identical to the final WM, then we can
3230 * omit the post-vblank programming; only update if it's different.
3232 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3233 newstate->wm.need_postvbl_update = true;
3239 * Merge the watermarks from all active pipes for a specific level.
3241 static void ilk_merge_wm_level(struct drm_device *dev,
3243 struct intel_wm_level *ret_wm)
3245 const struct intel_crtc *intel_crtc;
3247 ret_wm->enable = true;
3249 for_each_intel_crtc(dev, intel_crtc) {
3250 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3251 const struct intel_wm_level *wm = &active->wm[level];
3253 if (!active->pipe_enabled)
3257 * The watermark values may have been used in the past,
3258 * so we must maintain them in the registers for some
3259 * time even if the level is now disabled.
3262 ret_wm->enable = false;
3264 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3265 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3266 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3267 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3272 * Merge all low power watermarks for all active pipes.
3274 static void ilk_wm_merge(struct drm_device *dev,
3275 const struct intel_wm_config *config,
3276 const struct ilk_wm_maximums *max,
3277 struct intel_pipe_wm *merged)
3279 struct drm_i915_private *dev_priv = to_i915(dev);
3280 int level, max_level = ilk_wm_max_level(dev_priv);
3281 int last_enabled_level = max_level;
3283 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3284 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3285 config->num_pipes_active > 1)
3286 last_enabled_level = 0;
3288 /* ILK: FBC WM must be disabled always */
3289 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3291 /* merge each WM1+ level */
3292 for (level = 1; level <= max_level; level++) {
3293 struct intel_wm_level *wm = &merged->wm[level];
3295 ilk_merge_wm_level(dev, level, wm);
3297 if (level > last_enabled_level)
3299 else if (!ilk_validate_wm_level(level, max, wm))
3300 /* make sure all following levels get disabled */
3301 last_enabled_level = level - 1;
3304 * The spec says it is preferred to disable
3305 * FBC WMs instead of disabling a WM level.
3307 if (wm->fbc_val > max->fbc) {
3309 merged->fbc_wm_enabled = false;
3314 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3316 * FIXME this is racy. FBC might get enabled later.
3317 * What we should check here is whether FBC can be
3318 * enabled sometime later.
3320 if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
3321 intel_fbc_is_active(dev_priv)) {
3322 for (level = 2; level <= max_level; level++) {
3323 struct intel_wm_level *wm = &merged->wm[level];
3330 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3332 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3333 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3336 /* The value we need to program into the WM_LPx latency field */
3337 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
3339 struct drm_i915_private *dev_priv = to_i915(dev);
3341 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3344 return dev_priv->wm.pri_latency[level];
3347 static void ilk_compute_wm_results(struct drm_device *dev,
3348 const struct intel_pipe_wm *merged,
3349 enum intel_ddb_partitioning partitioning,
3350 struct ilk_wm_values *results)
3352 struct drm_i915_private *dev_priv = to_i915(dev);
3353 struct intel_crtc *intel_crtc;
3356 results->enable_fbc_wm = merged->fbc_wm_enabled;
3357 results->partitioning = partitioning;
3359 /* LP1+ register values */
3360 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3361 const struct intel_wm_level *r;
3363 level = ilk_wm_lp_to_level(wm_lp, merged);
3365 r = &merged->wm[level];
3368 * Maintain the watermark values even if the level is
3369 * disabled. Doing otherwise could cause underruns.
3371 results->wm_lp[wm_lp - 1] =
3372 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
3373 (r->pri_val << WM1_LP_SR_SHIFT) |
3377 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3379 if (INTEL_GEN(dev_priv) >= 8)
3380 results->wm_lp[wm_lp - 1] |=
3381 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3383 results->wm_lp[wm_lp - 1] |=
3384 r->fbc_val << WM1_LP_FBC_SHIFT;
3387 * Always set WM1S_LP_EN when spr_val != 0, even if the
3388 * level is disabled. Doing otherwise could cause underruns.
3390 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3391 WARN_ON(wm_lp != 1);
3392 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3394 results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3397 /* LP0 register values */
3398 for_each_intel_crtc(dev, intel_crtc) {
3399 enum pipe pipe = intel_crtc->pipe;
3400 const struct intel_wm_level *r =
3401 &intel_crtc->wm.active.ilk.wm[0];
3403 if (WARN_ON(!r->enable))
3406 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3408 results->wm_pipe[pipe] =
3409 (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3410 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3415 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3416 * case both are at the same level. Prefer r1 in case they're the same. */
3417 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
3418 struct intel_pipe_wm *r1,
3419 struct intel_pipe_wm *r2)
3421 int level, max_level = ilk_wm_max_level(to_i915(dev));
3422 int level1 = 0, level2 = 0;
3424 for (level = 1; level <= max_level; level++) {
3425 if (r1->wm[level].enable)
3427 if (r2->wm[level].enable)
3431 if (level1 == level2) {
3432 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3436 } else if (level1 > level2) {
3443 /* dirty bits used to track which watermarks need changes */
3444 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3445 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3446 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3447 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3448 #define WM_DIRTY_FBC (1 << 24)
3449 #define WM_DIRTY_DDB (1 << 25)
3451 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3452 const struct ilk_wm_values *old,
3453 const struct ilk_wm_values *new)
3455 unsigned int dirty = 0;
3459 for_each_pipe(dev_priv, pipe) {
3460 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3461 dirty |= WM_DIRTY_LINETIME(pipe);
3462 /* Must disable LP1+ watermarks too */
3463 dirty |= WM_DIRTY_LP_ALL;
3466 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3467 dirty |= WM_DIRTY_PIPE(pipe);
3468 /* Must disable LP1+ watermarks too */
3469 dirty |= WM_DIRTY_LP_ALL;
3473 if (old->enable_fbc_wm != new->enable_fbc_wm) {
3474 dirty |= WM_DIRTY_FBC;
3475 /* Must disable LP1+ watermarks too */
3476 dirty |= WM_DIRTY_LP_ALL;
3479 if (old->partitioning != new->partitioning) {
3480 dirty |= WM_DIRTY_DDB;
3481 /* Must disable LP1+ watermarks too */
3482 dirty |= WM_DIRTY_LP_ALL;
3485 /* LP1+ watermarks already deemed dirty, no need to continue */
3486 if (dirty & WM_DIRTY_LP_ALL)
3489 /* Find the lowest numbered LP1+ watermark in need of an update... */
3490 for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3491 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3492 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3496 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3497 for (; wm_lp <= 3; wm_lp++)
3498 dirty |= WM_DIRTY_LP(wm_lp);
3503 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3506 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3507 bool changed = false;
3509 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3510 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3511 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3514 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3515 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3516 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3519 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3520 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3521 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3526 * Don't touch WM1S_LP_EN here.
3527 * Doing so could cause underruns.
3534 * The spec says we shouldn't write when we don't need, because every write
3535 * causes WMs to be re-evaluated, expending some power.
3537 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3538 struct ilk_wm_values *results)
3540 struct ilk_wm_values *previous = &dev_priv->wm.hw;
3544 dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3548 _ilk_disable_lp_wm(dev_priv, dirty);
3550 if (dirty & WM_DIRTY_PIPE(PIPE_A))
3551 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3552 if (dirty & WM_DIRTY_PIPE(PIPE_B))
3553 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3554 if (dirty & WM_DIRTY_PIPE(PIPE_C))
3555 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3557 if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3558 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3559 if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3560 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3561 if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3562 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3564 if (dirty & WM_DIRTY_DDB) {
3565 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3566 val = I915_READ(WM_MISC);
3567 if (results->partitioning == INTEL_DDB_PART_1_2)
3568 val &= ~WM_MISC_DATA_PARTITION_5_6;
3570 val |= WM_MISC_DATA_PARTITION_5_6;
3571 I915_WRITE(WM_MISC, val);
3573 val = I915_READ(DISP_ARB_CTL2);
3574 if (results->partitioning == INTEL_DDB_PART_1_2)
3575 val &= ~DISP_DATA_PARTITION_5_6;
3577 val |= DISP_DATA_PARTITION_5_6;
3578 I915_WRITE(DISP_ARB_CTL2, val);
3582 if (dirty & WM_DIRTY_FBC) {
3583 val = I915_READ(DISP_ARB_CTL);
3584 if (results->enable_fbc_wm)
3585 val &= ~DISP_FBC_WM_DIS;
3587 val |= DISP_FBC_WM_DIS;
3588 I915_WRITE(DISP_ARB_CTL, val);
3591 if (dirty & WM_DIRTY_LP(1) &&
3592 previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3593 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3595 if (INTEL_GEN(dev_priv) >= 7) {
3596 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3597 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3598 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3599 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3602 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3603 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3604 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3605 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3606 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3607 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3609 dev_priv->wm.hw = *results;
3612 bool ilk_disable_lp_wm(struct drm_device *dev)
3614 struct drm_i915_private *dev_priv = to_i915(dev);
3616 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3619 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3623 /* Slice 1 will always be enabled */
3626 /* Gen prior to GEN11 have only one DBuf slice */
3627 if (INTEL_GEN(dev_priv) < 11)
3628 return enabled_slices;
3630 if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3633 return enabled_slices;
3637 * FIXME: We still don't have the proper code detect if we need to apply the WA,
3638 * so assume we'll always need it in order to avoid underruns.
3640 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3642 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3644 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3651 intel_has_sagv(struct drm_i915_private *dev_priv)
3653 if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
3654 IS_CANNONLAKE(dev_priv))
3657 if (IS_SKYLAKE(dev_priv) &&
3658 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3665 * SAGV dynamically adjusts the system agent voltage and clock frequencies
3666 * depending on power and performance requirements. The display engine access
3667 * to system memory is blocked during the adjustment time. Because of the
3668 * blocking time, having this enabled can cause full system hangs and/or pipe
3669 * underruns if we don't meet all of the following requirements:
3671 * - <= 1 pipe enabled
3672 * - All planes can enable watermarks for latencies >= SAGV engine block time
3673 * - We're not using an interlaced display configuration
3676 intel_enable_sagv(struct drm_i915_private *dev_priv)
3680 if (!intel_has_sagv(dev_priv))
3683 if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3686 DRM_DEBUG_KMS("Enabling the SAGV\n");
3687 mutex_lock(&dev_priv->pcu_lock);
3689 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3692 /* We don't need to wait for the SAGV when enabling */
3693 mutex_unlock(&dev_priv->pcu_lock);
3696 * Some skl systems, pre-release machines in particular,
3697 * don't actually have an SAGV.
3699 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3700 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3701 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3703 } else if (ret < 0) {
3704 DRM_ERROR("Failed to enable the SAGV\n");
3708 dev_priv->sagv_status = I915_SAGV_ENABLED;
3713 intel_disable_sagv(struct drm_i915_private *dev_priv)
3717 if (!intel_has_sagv(dev_priv))
3720 if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3723 DRM_DEBUG_KMS("Disabling the SAGV\n");
3724 mutex_lock(&dev_priv->pcu_lock);
3726 /* bspec says to keep retrying for at least 1 ms */
3727 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3729 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3731 mutex_unlock(&dev_priv->pcu_lock);
3734 * Some skl systems, pre-release machines in particular,
3735 * don't actually have an SAGV.
3737 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3738 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3739 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3741 } else if (ret < 0) {
3742 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3746 dev_priv->sagv_status = I915_SAGV_DISABLED;
3750 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3752 struct drm_device *dev = state->dev;
3753 struct drm_i915_private *dev_priv = to_i915(dev);
3754 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3755 struct intel_crtc *crtc;
3756 struct intel_plane *plane;
3757 struct intel_crtc_state *cstate;
3760 int sagv_block_time_us;
3762 if (!intel_has_sagv(dev_priv))
3765 if (IS_GEN9(dev_priv))
3766 sagv_block_time_us = 30;
3767 else if (IS_GEN10(dev_priv))
3768 sagv_block_time_us = 20;
3770 sagv_block_time_us = 10;
3773 * SKL+ workaround: bspec recommends we disable the SAGV when we have
3774 * more then one pipe enabled
3776 * If there are no active CRTCs, no additional checks need be performed
3778 if (hweight32(intel_state->active_crtcs) == 0)
3780 else if (hweight32(intel_state->active_crtcs) > 1)
3783 /* Since we're now guaranteed to only have one active CRTC... */
3784 pipe = ffs(intel_state->active_crtcs) - 1;
3785 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3786 cstate = to_intel_crtc_state(crtc->base.state);
3788 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3791 for_each_intel_plane_on_crtc(dev, crtc, plane) {
3792 struct skl_plane_wm *wm =
3793 &cstate->wm.skl.optimal.planes[plane->id];
3795 /* Skip this plane if it's not enabled */
3796 if (!wm->wm[0].plane_en)
3799 /* Find the highest enabled wm level for this plane */
3800 for (level = ilk_wm_max_level(dev_priv);
3801 !wm->wm[level].plane_en; --level)
3804 latency = dev_priv->wm.skl_latency[level];
3806 if (skl_needs_memory_bw_wa(intel_state) &&
3807 plane->base.state->fb->modifier ==
3808 I915_FORMAT_MOD_X_TILED)
3812 * If any of the planes on this pipe don't enable wm levels that
3813 * incur memory latencies higher than sagv_block_time_us we
3814 * can't enable the SAGV.
3816 if (latency < sagv_block_time_us)
3823 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3824 const struct intel_crtc_state *cstate,
3825 const unsigned int total_data_rate,
3826 const int num_active,
3827 struct skl_ddb_allocation *ddb)
3829 const struct drm_display_mode *adjusted_mode;
3831 u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3833 WARN_ON(ddb_size == 0);
3835 if (INTEL_GEN(dev_priv) < 11)
3836 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3838 adjusted_mode = &cstate->base.adjusted_mode;
3839 total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
3842 * 12GB/s is maximum BW supported by single DBuf slice.
3844 if (total_data_bw >= GBps(12) || num_active > 1) {
3845 ddb->enabled_slices = 2;
3847 ddb->enabled_slices = 1;
3855 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3856 const struct intel_crtc_state *cstate,
3857 const unsigned int total_data_rate,
3858 struct skl_ddb_allocation *ddb,
3859 struct skl_ddb_entry *alloc, /* out */
3860 int *num_active /* out */)
3862 struct drm_atomic_state *state = cstate->base.state;
3863 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3864 struct drm_i915_private *dev_priv = to_i915(dev);
3865 struct drm_crtc *for_crtc = cstate->base.crtc;
3866 const struct drm_crtc_state *crtc_state;
3867 const struct drm_crtc *crtc;
3868 u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3869 enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3873 if (WARN_ON(!state) || !cstate->base.active) {
3876 *num_active = hweight32(dev_priv->active_crtcs);
3880 if (intel_state->active_pipe_changes)
3881 *num_active = hweight32(intel_state->active_crtcs);
3883 *num_active = hweight32(dev_priv->active_crtcs);
3885 ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3889 * If the state doesn't change the active CRTC's or there is no
3890 * modeset request, then there's no need to recalculate;
3891 * the existing pipe allocation limits should remain unchanged.
3892 * Note that we're safe from racing commits since any racing commit
3893 * that changes the active CRTC list or do modeset would need to
3894 * grab _all_ crtc locks, including the one we currently hold.
3896 if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3898 * alloc may be cleared by clear_intel_crtc_state,
3899 * copy from old state to be sure
3901 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3906 * Watermark/ddb requirement highly depends upon width of the
3907 * framebuffer, So instead of allocating DDB equally among pipes
3908 * distribute DDB based on resolution/width of the display.
3910 for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3911 const struct drm_display_mode *adjusted_mode;
3912 int hdisplay, vdisplay;
3915 if (!crtc_state->enable)
3918 pipe = to_intel_crtc(crtc)->pipe;
3919 adjusted_mode = &crtc_state->adjusted_mode;
3920 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3921 total_width += hdisplay;
3923 if (pipe < for_pipe)
3924 width_before_pipe += hdisplay;
3925 else if (pipe == for_pipe)
3926 pipe_width = hdisplay;
3929 alloc->start = ddb_size * width_before_pipe / total_width;
3930 alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3933 static unsigned int skl_cursor_allocation(int num_active)
3935 if (num_active == 1)
3941 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3942 struct skl_ddb_entry *entry, u32 reg)
3946 if (INTEL_GEN(dev_priv) >= 11)
3947 mask = ICL_DDB_ENTRY_MASK;
3949 mask = SKL_DDB_ENTRY_MASK;
3950 entry->start = reg & mask;
3951 entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
3958 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3959 const enum pipe pipe,
3960 const enum plane_id plane_id,
3961 struct skl_ddb_allocation *ddb /* out */)
3964 int fourcc, pixel_format;
3966 /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3967 if (plane_id == PLANE_CURSOR) {
3968 val = I915_READ(CUR_BUF_CFG(pipe));
3969 skl_ddb_entry_init_from_hw(dev_priv,
3970 &ddb->plane[pipe][plane_id], val);
3974 val = I915_READ(PLANE_CTL(pipe, plane_id));
3976 /* No DDB allocated for disabled planes */
3977 if (!(val & PLANE_CTL_ENABLE))
3980 pixel_format = val & PLANE_CTL_FORMAT_MASK;
3981 fourcc = skl_format_to_fourcc(pixel_format,
3982 val & PLANE_CTL_ORDER_RGBX,
3983 val & PLANE_CTL_ALPHA_MASK);
3985 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3987 * FIXME: add proper NV12 support for ICL. Avoid reading unclaimed
3988 * registers for now.
3990 if (INTEL_GEN(dev_priv) < 11)
3991 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
3993 if (fourcc == DRM_FORMAT_NV12) {
3994 skl_ddb_entry_init_from_hw(dev_priv,
3995 &ddb->plane[pipe][plane_id], val2);
3996 skl_ddb_entry_init_from_hw(dev_priv,
3997 &ddb->uv_plane[pipe][plane_id], val);
3999 skl_ddb_entry_init_from_hw(dev_priv,
4000 &ddb->plane[pipe][plane_id], val);
4004 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4005 struct skl_ddb_allocation *ddb /* out */)
4007 struct intel_crtc *crtc;
4009 memset(ddb, 0, sizeof(*ddb));
4011 ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4013 for_each_intel_crtc(&dev_priv->drm, crtc) {
4014 enum intel_display_power_domain power_domain;
4015 enum plane_id plane_id;
4016 enum pipe pipe = crtc->pipe;
4018 power_domain = POWER_DOMAIN_PIPE(pipe);
4019 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
4022 for_each_plane_id_on_crtc(crtc, plane_id)
4023 skl_ddb_get_hw_plane_state(dev_priv, pipe,
4026 intel_display_power_put(dev_priv, power_domain);
4031 * Determines the downscale amount of a plane for the purposes of watermark calculations.
4032 * The bspec defines downscale amount as:
4035 * Horizontal down scale amount = maximum[1, Horizontal source size /
4036 * Horizontal destination size]
4037 * Vertical down scale amount = maximum[1, Vertical source size /
4038 * Vertical destination size]
4039 * Total down scale amount = Horizontal down scale amount *
4040 * Vertical down scale amount
4043 * Return value is provided in 16.16 fixed point form to retain fractional part.
4044 * Caller should take care of dividing & rounding off the value.
4046 static uint_fixed_16_16_t
4047 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4048 const struct intel_plane_state *pstate)
4050 struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4051 uint32_t src_w, src_h, dst_w, dst_h;
4052 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4053 uint_fixed_16_16_t downscale_h, downscale_w;
4055 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4056 return u32_to_fixed16(0);
4058 /* n.b., src is 16.16 fixed point, dst is whole integer */
4059 if (plane->id == PLANE_CURSOR) {
4061 * Cursors only support 0/180 degree rotation,
4062 * hence no need to account for rotation here.
4064 src_w = pstate->base.src_w >> 16;
4065 src_h = pstate->base.src_h >> 16;
4066 dst_w = pstate->base.crtc_w;
4067 dst_h = pstate->base.crtc_h;
4070 * Src coordinates are already rotated by 270 degrees for
4071 * the 90/270 degree plane rotation cases (to match the
4072 * GTT mapping), hence no need to account for rotation here.
4074 src_w = drm_rect_width(&pstate->base.src) >> 16;
4075 src_h = drm_rect_height(&pstate->base.src) >> 16;
4076 dst_w = drm_rect_width(&pstate->base.dst);
4077 dst_h = drm_rect_height(&pstate->base.dst);
4080 fp_w_ratio = div_fixed16(src_w, dst_w);
4081 fp_h_ratio = div_fixed16(src_h, dst_h);
4082 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4083 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4085 return mul_fixed16(downscale_w, downscale_h);
4088 static uint_fixed_16_16_t
4089 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4091 uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4093 if (!crtc_state->base.enable)
4094 return pipe_downscale;
4096 if (crtc_state->pch_pfit.enabled) {
4097 uint32_t src_w, src_h, dst_w, dst_h;
4098 uint32_t pfit_size = crtc_state->pch_pfit.size;
4099 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4100 uint_fixed_16_16_t downscale_h, downscale_w;
4102 src_w = crtc_state->pipe_src_w;
4103 src_h = crtc_state->pipe_src_h;
4104 dst_w = pfit_size >> 16;
4105 dst_h = pfit_size & 0xffff;
4107 if (!dst_w || !dst_h)
4108 return pipe_downscale;
4110 fp_w_ratio = div_fixed16(src_w, dst_w);
4111 fp_h_ratio = div_fixed16(src_h, dst_h);
4112 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4113 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4115 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4118 return pipe_downscale;
4121 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4122 struct intel_crtc_state *cstate)
4124 struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4125 struct drm_crtc_state *crtc_state = &cstate->base;
4126 struct drm_atomic_state *state = crtc_state->state;
4127 struct drm_plane *plane;
4128 const struct drm_plane_state *pstate;
4129 struct intel_plane_state *intel_pstate;
4130 int crtc_clock, dotclk;
4131 uint32_t pipe_max_pixel_rate;
4132 uint_fixed_16_16_t pipe_downscale;
4133 uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4135 if (!cstate->base.enable)
4138 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4139 uint_fixed_16_16_t plane_downscale;
4140 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4143 if (!intel_wm_plane_visible(cstate,
4144 to_intel_plane_state(pstate)))
4147 if (WARN_ON(!pstate->fb))
4150 intel_pstate = to_intel_plane_state(pstate);
4151 plane_downscale = skl_plane_downscale_amount(cstate,
4153 bpp = pstate->fb->format->cpp[0] * 8;
4155 plane_downscale = mul_fixed16(plane_downscale,
4158 max_downscale = max_fixed16(plane_downscale, max_downscale);
4160 pipe_downscale = skl_pipe_downscale_amount(cstate);
4162 pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4164 crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4165 dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4167 if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4170 pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4172 if (pipe_max_pixel_rate < crtc_clock) {
4173 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4181 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4182 const struct drm_plane_state *pstate,
4185 struct intel_plane *intel_plane = to_intel_plane(pstate->plane);
4186 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4188 uint32_t width = 0, height = 0;
4189 struct drm_framebuffer *fb;
4191 uint_fixed_16_16_t down_scale_amount;
4193 if (!intel_pstate->base.visible)
4197 format = fb->format->format;
4199 if (intel_plane->id == PLANE_CURSOR)
4201 if (plane == 1 && format != DRM_FORMAT_NV12)
4205 * Src coordinates are already rotated by 270 degrees for
4206 * the 90/270 degree plane rotation cases (to match the
4207 * GTT mapping), hence no need to account for rotation here.
4209 width = drm_rect_width(&intel_pstate->base.src) >> 16;
4210 height = drm_rect_height(&intel_pstate->base.src) >> 16;
4212 /* UV plane does 1/2 pixel sub-sampling */
4213 if (plane == 1 && format == DRM_FORMAT_NV12) {
4218 data_rate = width * height * fb->format->cpp[plane];
4220 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4222 return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4226 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
4227 * a 8192x4096@32bpp framebuffer:
4228 * 3 * 4096 * 8192 * 4 < 2^32
4231 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4232 unsigned int *plane_data_rate,
4233 unsigned int *uv_plane_data_rate)
4235 struct drm_crtc_state *cstate = &intel_cstate->base;
4236 struct drm_atomic_state *state = cstate->state;
4237 struct drm_plane *plane;
4238 const struct drm_plane_state *pstate;
4239 unsigned int total_data_rate = 0;
4241 if (WARN_ON(!state))
4244 /* Calculate and cache data rate for each plane */
4245 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4246 enum plane_id plane_id = to_intel_plane(plane)->id;
4250 rate = skl_plane_relative_data_rate(intel_cstate,
4252 plane_data_rate[plane_id] = rate;
4254 total_data_rate += rate;
4257 rate = skl_plane_relative_data_rate(intel_cstate,
4259 uv_plane_data_rate[plane_id] = rate;
4261 total_data_rate += rate;
4264 return total_data_rate;
4268 skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane)
4270 struct drm_framebuffer *fb = pstate->fb;
4271 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4272 uint32_t src_w, src_h;
4273 uint32_t min_scanlines = 8;
4279 /* For packed formats, and uv-plane, return 0 */
4280 if (plane == 1 && fb->format->format != DRM_FORMAT_NV12)
4283 /* For Non Y-tile return 8-blocks */
4284 if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
4285 fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
4286 fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
4287 fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
4291 * Src coordinates are already rotated by 270 degrees for
4292 * the 90/270 degree plane rotation cases (to match the
4293 * GTT mapping), hence no need to account for rotation here.
4295 src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
4296 src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
4298 /* Halve UV plane width and height for NV12 */
4304 plane_bpp = fb->format->cpp[plane];
4306 if (drm_rotation_90_or_270(pstate->rotation)) {
4307 switch (plane_bpp) {
4321 WARN(1, "Unsupported pixel depth %u for rotation",
4327 return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
4331 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
4332 uint16_t *minimum, uint16_t *uv_minimum)
4334 const struct drm_plane_state *pstate;
4335 struct drm_plane *plane;
4337 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
4338 enum plane_id plane_id = to_intel_plane(plane)->id;
4340 if (plane_id == PLANE_CURSOR)
4343 if (!pstate->visible)
4346 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
4347 uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
4350 minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4354 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4355 struct skl_ddb_allocation *ddb /* out */)
4357 struct drm_atomic_state *state = cstate->base.state;
4358 struct drm_crtc *crtc = cstate->base.crtc;
4359 struct drm_device *dev = crtc->dev;
4360 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4361 enum pipe pipe = intel_crtc->pipe;
4362 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4363 uint16_t alloc_size, start;
4364 uint16_t minimum[I915_MAX_PLANES] = {};
4365 uint16_t uv_minimum[I915_MAX_PLANES] = {};
4366 unsigned int total_data_rate;
4367 enum plane_id plane_id;
4369 unsigned int plane_data_rate[I915_MAX_PLANES] = {};
4370 unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {};
4371 uint16_t total_min_blocks = 0;
4373 /* Clear the partitioning for disabled planes. */
4374 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
4375 memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe]));
4377 if (WARN_ON(!state))
4380 if (!cstate->base.active) {
4381 alloc->start = alloc->end = 0;
4385 total_data_rate = skl_get_total_relative_data_rate(cstate,
4387 uv_plane_data_rate);
4388 skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
4389 alloc, &num_active);
4390 alloc_size = skl_ddb_entry_size(alloc);
4391 if (alloc_size == 0)
4394 skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum);
4397 * 1. Allocate the mininum required blocks for each active plane
4398 * and allocate the cursor, it doesn't require extra allocation
4399 * proportional to the data rate.
4402 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4403 total_min_blocks += minimum[plane_id];
4404 total_min_blocks += uv_minimum[plane_id];
4407 if (total_min_blocks > alloc_size) {
4408 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4409 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4414 alloc_size -= total_min_blocks;
4415 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
4416 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
4419 * 2. Distribute the remaining space in proportion to the amount of
4420 * data each plane needs to fetch from memory.
4422 * FIXME: we may not allocate every single block here.
4424 if (total_data_rate == 0)
4427 start = alloc->start;
4428 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4429 unsigned int data_rate, uv_data_rate;
4430 uint16_t plane_blocks, uv_plane_blocks;
4432 if (plane_id == PLANE_CURSOR)
4435 data_rate = plane_data_rate[plane_id];
4438 * allocation for (packed formats) or (uv-plane part of planar format):
4439 * promote the expression to 64 bits to avoid overflowing, the
4440 * result is < available as data_rate / total_data_rate < 1
4442 plane_blocks = minimum[plane_id];
4443 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
4446 /* Leave disabled planes at (0,0) */
4448 ddb->plane[pipe][plane_id].start = start;
4449 ddb->plane[pipe][plane_id].end = start + plane_blocks;
4452 start += plane_blocks;
4454 /* Allocate DDB for UV plane for planar format/NV12 */
4455 uv_data_rate = uv_plane_data_rate[plane_id];
4457 uv_plane_blocks = uv_minimum[plane_id];
4458 uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate,
4462 ddb->uv_plane[pipe][plane_id].start = start;
4463 ddb->uv_plane[pipe][plane_id].end =
4464 start + uv_plane_blocks;
4467 start += uv_plane_blocks;
4474 * The max latency should be 257 (max the punit can code is 255 and we add 2us
4475 * for the read latency) and cpp should always be <= 8, so that
4476 * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4477 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4479 static uint_fixed_16_16_t
4480 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
4481 uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
4483 uint32_t wm_intermediate_val;
4484 uint_fixed_16_16_t ret;
4487 return FP_16_16_MAX;
4489 wm_intermediate_val = latency * pixel_rate * cpp;
4490 ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4492 if (INTEL_GEN(dev_priv) >= 10)
4493 ret = add_fixed16_u32(ret, 1);
4498 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4499 uint32_t pipe_htotal,
4501 uint_fixed_16_16_t plane_blocks_per_line)
4503 uint32_t wm_intermediate_val;
4504 uint_fixed_16_16_t ret;
4507 return FP_16_16_MAX;
4509 wm_intermediate_val = latency * pixel_rate;
4510 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4511 pipe_htotal * 1000);
4512 ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4516 static uint_fixed_16_16_t
4517 intel_get_linetime_us(struct intel_crtc_state *cstate)
4519 uint32_t pixel_rate;
4520 uint32_t crtc_htotal;
4521 uint_fixed_16_16_t linetime_us;
4523 if (!cstate->base.active)
4524 return u32_to_fixed16(0);
4526 pixel_rate = cstate->pixel_rate;
4528 if (WARN_ON(pixel_rate == 0))
4529 return u32_to_fixed16(0);
4531 crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4532 linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4538 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4539 const struct intel_plane_state *pstate)
4541 uint64_t adjusted_pixel_rate;
4542 uint_fixed_16_16_t downscale_amount;
4544 /* Shouldn't reach here on disabled planes... */
4545 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4549 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4550 * with additional adjustments for plane-specific scaling.
4552 adjusted_pixel_rate = cstate->pixel_rate;
4553 downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4555 return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4560 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4561 struct intel_crtc_state *cstate,
4562 const struct intel_plane_state *intel_pstate,
4563 struct skl_wm_params *wp, int plane_id)
4565 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4566 const struct drm_plane_state *pstate = &intel_pstate->base;
4567 const struct drm_framebuffer *fb = pstate->fb;
4568 uint32_t interm_pbpl;
4569 struct intel_atomic_state *state =
4570 to_intel_atomic_state(cstate->base.state);
4571 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4573 if (!intel_wm_plane_visible(cstate, intel_pstate))
4576 /* only NV12 format has two planes */
4577 if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) {
4578 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4582 wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4583 fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4584 fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4585 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4586 wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4587 wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4588 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4589 wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
4591 if (plane->id == PLANE_CURSOR) {
4592 wp->width = intel_pstate->base.crtc_w;
4595 * Src coordinates are already rotated by 270 degrees for
4596 * the 90/270 degree plane rotation cases (to match the
4597 * GTT mapping), hence no need to account for rotation here.
4599 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4602 if (plane_id == 1 && wp->is_planar)
4605 wp->cpp = fb->format->cpp[plane_id];
4606 wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4609 if (INTEL_GEN(dev_priv) >= 11 &&
4610 fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
4611 wp->dbuf_block_size = 256;
4613 wp->dbuf_block_size = 512;
4615 if (drm_rotation_90_or_270(pstate->rotation)) {
4619 wp->y_min_scanlines = 16;
4622 wp->y_min_scanlines = 8;
4625 wp->y_min_scanlines = 4;
4628 MISSING_CASE(wp->cpp);
4632 wp->y_min_scanlines = 4;
4635 if (apply_memory_bw_wa)
4636 wp->y_min_scanlines *= 2;
4638 wp->plane_bytes_per_line = wp->width * wp->cpp;
4640 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4641 wp->y_min_scanlines,
4642 wp->dbuf_block_size);
4644 if (INTEL_GEN(dev_priv) >= 10)
4647 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4648 wp->y_min_scanlines);
4649 } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4650 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4651 wp->dbuf_block_size);
4652 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4654 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4655 wp->dbuf_block_size) + 1;
4656 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4659 wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4660 wp->plane_blocks_per_line);
4661 wp->linetime_us = fixed16_to_u32_round_up(
4662 intel_get_linetime_us(cstate));
4667 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4668 struct intel_crtc_state *cstate,
4669 const struct intel_plane_state *intel_pstate,
4670 uint16_t ddb_allocation,
4672 const struct skl_wm_params *wp,
4673 const struct skl_wm_level *result_prev,
4674 struct skl_wm_level *result /* out */)
4676 const struct drm_plane_state *pstate = &intel_pstate->base;
4677 uint32_t latency = dev_priv->wm.skl_latency[level];
4678 uint_fixed_16_16_t method1, method2;
4679 uint_fixed_16_16_t selected_result;
4680 uint32_t res_blocks, res_lines;
4681 struct intel_atomic_state *state =
4682 to_intel_atomic_state(cstate->base.state);
4683 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4684 uint32_t min_disp_buf_needed;
4687 !intel_wm_plane_visible(cstate, intel_pstate)) {
4688 result->plane_en = false;
4692 /* Display WA #1141: kbl,cfl */
4693 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4694 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4695 dev_priv->ipc_enabled)
4698 if (apply_memory_bw_wa && wp->x_tiled)
4701 method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4702 wp->cpp, latency, wp->dbuf_block_size);
4703 method2 = skl_wm_method2(wp->plane_pixel_rate,
4704 cstate->base.adjusted_mode.crtc_htotal,
4706 wp->plane_blocks_per_line);
4709 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4711 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4712 wp->dbuf_block_size < 1) &&
4713 (wp->plane_bytes_per_line / wp->dbuf_block_size < 1))
4714 selected_result = method2;
4715 else if (ddb_allocation >=
4716 fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4717 selected_result = min_fixed16(method1, method2);
4718 else if (latency >= wp->linetime_us)
4719 selected_result = min_fixed16(method1, method2);
4721 selected_result = method1;
4724 res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4725 res_lines = div_round_up_fixed16(selected_result,
4726 wp->plane_blocks_per_line);
4728 /* Display WA #1125: skl,bxt,kbl,glk */
4729 if (level == 0 && wp->rc_surface)
4730 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4732 /* Display WA #1126: skl,bxt,kbl,glk */
4733 if (level >= 1 && level <= 7) {
4735 res_blocks += fixed16_to_u32_round_up(
4736 wp->y_tile_minimum);
4737 res_lines += wp->y_min_scanlines;
4743 * Make sure result blocks for higher latency levels are atleast
4744 * as high as level below the current level.
4745 * Assumption in DDB algorithm optimization for special cases.
4746 * Also covers Display WA #1125 for RC.
4748 if (result_prev->plane_res_b > res_blocks)
4749 res_blocks = result_prev->plane_res_b;
4752 if (INTEL_GEN(dev_priv) >= 11) {
4754 uint32_t extra_lines;
4755 uint_fixed_16_16_t fp_min_disp_buf_needed;
4757 if (res_lines % wp->y_min_scanlines == 0)
4758 extra_lines = wp->y_min_scanlines;
4760 extra_lines = wp->y_min_scanlines * 2 -
4761 res_lines % wp->y_min_scanlines;
4763 fp_min_disp_buf_needed = mul_u32_fixed16(res_lines +
4765 wp->plane_blocks_per_line);
4766 min_disp_buf_needed = fixed16_to_u32_round_up(
4767 fp_min_disp_buf_needed);
4769 min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10);
4772 min_disp_buf_needed = res_blocks;
4775 if ((level > 0 && res_lines > 31) ||
4776 res_blocks >= ddb_allocation ||
4777 min_disp_buf_needed >= ddb_allocation) {
4778 result->plane_en = false;
4781 * If there are no valid level 0 watermarks, then we can't
4782 * support this display configuration.
4787 struct drm_plane *plane = pstate->plane;
4789 DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4790 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4791 plane->base.id, plane->name,
4792 res_blocks, ddb_allocation, res_lines);
4798 * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A)
4799 * disable wm level 1-7 on NV12 planes
4801 if (wp->is_planar && level >= 1 &&
4802 (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
4803 IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) {
4804 result->plane_en = false;
4808 /* The number of lines are ignored for the level 0 watermark. */
4809 result->plane_res_b = res_blocks;
4810 result->plane_res_l = res_lines;
4811 result->plane_en = true;
4817 skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4818 struct skl_ddb_allocation *ddb,
4819 struct intel_crtc_state *cstate,
4820 const struct intel_plane_state *intel_pstate,
4821 const struct skl_wm_params *wm_params,
4822 struct skl_plane_wm *wm,
4825 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4826 struct drm_plane *plane = intel_pstate->base.plane;
4827 struct intel_plane *intel_plane = to_intel_plane(plane);
4828 uint16_t ddb_blocks;
4829 enum pipe pipe = intel_crtc->pipe;
4830 int level, max_level = ilk_wm_max_level(dev_priv);
4831 enum plane_id intel_plane_id = intel_plane->id;
4834 if (WARN_ON(!intel_pstate->base.fb))
4837 ddb_blocks = plane_id ?
4838 skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) :
4839 skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]);
4841 for (level = 0; level <= max_level; level++) {
4842 struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] :
4844 struct skl_wm_level *result_prev;
4847 result_prev = plane_id ? &wm->uv_wm[level - 1] :
4850 result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0];
4852 ret = skl_compute_plane_wm(dev_priv,
4864 if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12)
4865 wm->is_planar = true;
4871 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4873 struct drm_atomic_state *state = cstate->base.state;
4874 struct drm_i915_private *dev_priv = to_i915(state->dev);
4875 uint_fixed_16_16_t linetime_us;
4876 uint32_t linetime_wm;
4878 linetime_us = intel_get_linetime_us(cstate);
4880 if (is_fixed16_zero(linetime_us))
4883 linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4885 /* Display WA #1135: bxt:ALL GLK:ALL */
4886 if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4887 dev_priv->ipc_enabled)
4893 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4894 struct skl_wm_params *wp,
4895 struct skl_wm_level *wm_l0,
4896 uint16_t ddb_allocation,
4897 struct skl_wm_level *trans_wm /* out */)
4899 struct drm_device *dev = cstate->base.crtc->dev;
4900 const struct drm_i915_private *dev_priv = to_i915(dev);
4901 uint16_t trans_min, trans_y_tile_min;
4902 const uint16_t trans_amount = 10; /* This is configurable amount */
4903 uint16_t trans_offset_b, res_blocks;
4905 if (!cstate->base.active)
4908 /* Transition WM are not recommended by HW team for GEN9 */
4909 if (INTEL_GEN(dev_priv) <= 9)
4912 /* Transition WM don't make any sense if ipc is disabled */
4913 if (!dev_priv->ipc_enabled)
4917 if (INTEL_GEN(dev_priv) >= 10)
4920 trans_offset_b = trans_min + trans_amount;
4923 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4924 wp->y_tile_minimum);
4925 res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
4928 res_blocks = wm_l0->plane_res_b + trans_offset_b;
4930 /* WA BUG:1938466 add one block for non y-tile planes */
4931 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4938 if (res_blocks < ddb_allocation) {
4939 trans_wm->plane_res_b = res_blocks;
4940 trans_wm->plane_en = true;
4945 trans_wm->plane_en = false;
4948 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4949 struct skl_ddb_allocation *ddb,
4950 struct skl_pipe_wm *pipe_wm)
4952 struct drm_device *dev = cstate->base.crtc->dev;
4953 struct drm_crtc_state *crtc_state = &cstate->base;
4954 const struct drm_i915_private *dev_priv = to_i915(dev);
4955 struct drm_plane *plane;
4956 const struct drm_plane_state *pstate;
4957 struct skl_plane_wm *wm;
4961 * We'll only calculate watermarks for planes that are actually
4962 * enabled, so make sure all other planes are set as disabled.
4964 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4966 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4967 const struct intel_plane_state *intel_pstate =
4968 to_intel_plane_state(pstate);
4969 enum plane_id plane_id = to_intel_plane(plane)->id;
4970 struct skl_wm_params wm_params;
4971 enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
4972 uint16_t ddb_blocks;
4974 wm = &pipe_wm->planes[plane_id];
4975 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
4977 ret = skl_compute_plane_wm_params(dev_priv, cstate,
4978 intel_pstate, &wm_params, 0);
4982 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4983 intel_pstate, &wm_params, wm, 0);
4987 skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
4988 ddb_blocks, &wm->trans_wm);
4990 /* uv plane watermarks must also be validated for NV12/Planar */
4991 if (wm_params.is_planar) {
4992 memset(&wm_params, 0, sizeof(struct skl_wm_params));
4993 wm->is_planar = true;
4995 ret = skl_compute_plane_wm_params(dev_priv, cstate,
5001 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
5002 intel_pstate, &wm_params,
5009 pipe_wm->linetime = skl_compute_linetime_wm(cstate);
5014 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5016 const struct skl_ddb_entry *entry)
5019 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
5024 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5026 const struct skl_wm_level *level)
5030 if (level->plane_en) {
5032 val |= level->plane_res_b;
5033 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5036 I915_WRITE(reg, val);
5039 static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
5040 const struct skl_plane_wm *wm,
5041 const struct skl_ddb_allocation *ddb,
5042 enum plane_id plane_id)
5044 struct drm_crtc *crtc = &intel_crtc->base;
5045 struct drm_device *dev = crtc->dev;
5046 struct drm_i915_private *dev_priv = to_i915(dev);
5047 int level, max_level = ilk_wm_max_level(dev_priv);
5048 enum pipe pipe = intel_crtc->pipe;
5050 for (level = 0; level <= max_level; level++) {
5051 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5054 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5057 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5058 &ddb->plane[pipe][plane_id]);
5059 /* FIXME: add proper NV12 support for ICL. */
5060 if (INTEL_GEN(dev_priv) >= 11)
5061 return skl_ddb_entry_write(dev_priv,
5062 PLANE_BUF_CFG(pipe, plane_id),
5063 &ddb->plane[pipe][plane_id]);
5064 if (wm->is_planar) {
5065 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5066 &ddb->uv_plane[pipe][plane_id]);
5067 skl_ddb_entry_write(dev_priv,
5068 PLANE_NV12_BUF_CFG(pipe, plane_id),
5069 &ddb->plane[pipe][plane_id]);
5071 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5072 &ddb->plane[pipe][plane_id]);
5073 I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0);
5077 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
5078 const struct skl_plane_wm *wm,
5079 const struct skl_ddb_allocation *ddb)
5081 struct drm_crtc *crtc = &intel_crtc->base;
5082 struct drm_device *dev = crtc->dev;
5083 struct drm_i915_private *dev_priv = to_i915(dev);
5084 int level, max_level = ilk_wm_max_level(dev_priv);
5085 enum pipe pipe = intel_crtc->pipe;
5087 for (level = 0; level <= max_level; level++) {
5088 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5091 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5093 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
5094 &ddb->plane[pipe][PLANE_CURSOR]);
5097 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5098 const struct skl_wm_level *l2)
5100 if (l1->plane_en != l2->plane_en)
5103 /* If both planes aren't enabled, the rest shouldn't matter */
5107 return (l1->plane_res_l == l2->plane_res_l &&
5108 l1->plane_res_b == l2->plane_res_b);
5111 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5112 const struct skl_ddb_entry *b)
5114 return a->start < b->end && b->start < a->end;
5117 bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
5118 const struct skl_ddb_entry **entries,
5119 const struct skl_ddb_entry *ddb,
5124 for_each_pipe(dev_priv, pipe) {
5125 if (pipe != ignore && entries[pipe] &&
5126 skl_ddb_entries_overlap(ddb, entries[pipe]))
5133 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
5134 const struct skl_pipe_wm *old_pipe_wm,
5135 struct skl_pipe_wm *pipe_wm, /* out */
5136 struct skl_ddb_allocation *ddb, /* out */
5137 bool *changed /* out */)
5139 struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
5142 ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
5146 if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
5155 pipes_modified(struct drm_atomic_state *state)
5157 struct drm_crtc *crtc;
5158 struct drm_crtc_state *cstate;
5159 uint32_t i, ret = 0;
5161 for_each_new_crtc_in_state(state, crtc, cstate, i)
5162 ret |= drm_crtc_mask(crtc);
5168 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
5170 struct drm_atomic_state *state = cstate->base.state;
5171 struct drm_device *dev = state->dev;
5172 struct drm_crtc *crtc = cstate->base.crtc;
5173 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5174 struct drm_i915_private *dev_priv = to_i915(dev);
5175 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5176 struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5177 struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
5178 struct drm_plane_state *plane_state;
5179 struct drm_plane *plane;
5180 enum pipe pipe = intel_crtc->pipe;
5182 drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
5183 enum plane_id plane_id = to_intel_plane(plane)->id;
5185 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
5186 &new_ddb->plane[pipe][plane_id]) &&
5187 skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id],
5188 &new_ddb->uv_plane[pipe][plane_id]))
5191 plane_state = drm_atomic_get_plane_state(state, plane);
5192 if (IS_ERR(plane_state))
5193 return PTR_ERR(plane_state);
5200 skl_compute_ddb(struct drm_atomic_state *state)
5202 const struct drm_i915_private *dev_priv = to_i915(state->dev);
5203 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5204 struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
5205 struct intel_crtc *crtc;
5206 struct intel_crtc_state *cstate;
5209 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5211 for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) {
5212 ret = skl_allocate_pipe_ddb(cstate, ddb);
5216 ret = skl_ddb_add_affected_planes(cstate);
5225 skl_print_wm_changes(const struct drm_atomic_state *state)
5227 const struct drm_device *dev = state->dev;
5228 const struct drm_i915_private *dev_priv = to_i915(dev);
5229 const struct intel_atomic_state *intel_state =
5230 to_intel_atomic_state(state);
5231 const struct drm_crtc *crtc;
5232 const struct drm_crtc_state *cstate;
5233 const struct intel_plane *intel_plane;
5234 const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
5235 const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5238 for_each_new_crtc_in_state(state, crtc, cstate, i) {
5239 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5240 enum pipe pipe = intel_crtc->pipe;
5242 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
5243 enum plane_id plane_id = intel_plane->id;
5244 const struct skl_ddb_entry *old, *new;
5246 old = &old_ddb->plane[pipe][plane_id];
5247 new = &new_ddb->plane[pipe][plane_id];
5249 if (skl_ddb_entry_equal(old, new))
5252 DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5253 intel_plane->base.base.id,
5254 intel_plane->base.name,
5255 old->start, old->end,
5256 new->start, new->end);
5262 skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed)
5264 struct drm_device *dev = state->dev;
5265 const struct drm_i915_private *dev_priv = to_i915(dev);
5266 const struct drm_crtc *crtc;
5267 const struct drm_crtc_state *cstate;
5268 struct intel_crtc *intel_crtc;
5269 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5270 uint32_t realloc_pipes = pipes_modified(state);
5274 * When we distrust bios wm we always need to recompute to set the
5275 * expected DDB allocations for each CRTC.
5277 if (dev_priv->wm.distrust_bios_wm)
5281 * If this transaction isn't actually touching any CRTC's, don't
5282 * bother with watermark calculation. Note that if we pass this
5283 * test, we're guaranteed to hold at least one CRTC state mutex,
5284 * which means we can safely use values like dev_priv->active_crtcs
5285 * since any racing commits that want to update them would need to
5286 * hold _all_ CRTC state mutexes.
5288 for_each_new_crtc_in_state(state, crtc, cstate, i)
5295 * If this is our first atomic update following hardware readout,
5296 * we can't trust the DDB that the BIOS programmed for us. Let's
5297 * pretend that all pipes switched active status so that we'll
5298 * ensure a full DDB recompute.
5300 if (dev_priv->wm.distrust_bios_wm) {
5301 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5302 state->acquire_ctx);
5306 intel_state->active_pipe_changes = ~0;
5309 * We usually only initialize intel_state->active_crtcs if we
5310 * we're doing a modeset; make sure this field is always
5311 * initialized during the sanitization process that happens
5312 * on the first commit too.
5314 if (!intel_state->modeset)
5315 intel_state->active_crtcs = dev_priv->active_crtcs;
5319 * If the modeset changes which CRTC's are active, we need to
5320 * recompute the DDB allocation for *all* active pipes, even
5321 * those that weren't otherwise being modified in any way by this
5322 * atomic commit. Due to the shrinking of the per-pipe allocations
5323 * when new active CRTC's are added, it's possible for a pipe that
5324 * we were already using and aren't changing at all here to suddenly
5325 * become invalid if its DDB needs exceeds its new allocation.
5327 * Note that if we wind up doing a full DDB recompute, we can't let
5328 * any other display updates race with this transaction, so we need
5329 * to grab the lock on *all* CRTC's.
5331 if (intel_state->active_pipe_changes || intel_state->modeset) {
5333 intel_state->wm_results.dirty_pipes = ~0;
5337 * We're not recomputing for the pipes not included in the commit, so
5338 * make sure we start with the current state.
5340 for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
5341 struct intel_crtc_state *cstate;
5343 cstate = intel_atomic_get_crtc_state(state, intel_crtc);
5345 return PTR_ERR(cstate);
5352 skl_compute_wm(struct drm_atomic_state *state)
5354 struct drm_crtc *crtc;
5355 struct drm_crtc_state *cstate;
5356 struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5357 struct skl_ddb_values *results = &intel_state->wm_results;
5358 struct skl_pipe_wm *pipe_wm;
5359 bool changed = false;
5362 /* Clear all dirty flags */
5363 results->dirty_pipes = 0;
5365 ret = skl_ddb_add_affected_pipes(state, &changed);
5366 if (ret || !changed)
5369 ret = skl_compute_ddb(state);
5374 * Calculate WM's for all pipes that are part of this transaction.
5375 * Note that the DDB allocation above may have added more CRTC's that
5376 * weren't otherwise being modified (and set bits in dirty_pipes) if
5377 * pipe allocations had to change.
5379 * FIXME: Now that we're doing this in the atomic check phase, we
5380 * should allow skl_update_pipe_wm() to return failure in cases where
5381 * no suitable watermark values can be found.
5383 for_each_new_crtc_in_state(state, crtc, cstate, i) {
5384 struct intel_crtc_state *intel_cstate =
5385 to_intel_crtc_state(cstate);
5386 const struct skl_pipe_wm *old_pipe_wm =
5387 &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
5389 pipe_wm = &intel_cstate->wm.skl.optimal;
5390 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
5391 &results->ddb, &changed);
5396 results->dirty_pipes |= drm_crtc_mask(crtc);
5398 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
5399 /* This pipe's WM's did not change */
5402 intel_cstate->update_wm_pre = true;
5405 skl_print_wm_changes(state);
5410 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5411 struct intel_crtc_state *cstate)
5413 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5414 struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5415 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5416 const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5417 enum pipe pipe = crtc->pipe;
5418 enum plane_id plane_id;
5420 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5423 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5425 for_each_plane_id_on_crtc(crtc, plane_id) {
5426 if (plane_id != PLANE_CURSOR)
5427 skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
5430 skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
5435 static void skl_initial_wm(struct intel_atomic_state *state,
5436 struct intel_crtc_state *cstate)
5438 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5439 struct drm_device *dev = intel_crtc->base.dev;
5440 struct drm_i915_private *dev_priv = to_i915(dev);
5441 struct skl_ddb_values *results = &state->wm_results;
5442 struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw;
5443 enum pipe pipe = intel_crtc->pipe;
5445 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5448 mutex_lock(&dev_priv->wm.wm_mutex);
5450 if (cstate->base.active_changed)
5451 skl_atomic_update_crtc_wm(state, cstate);
5453 memcpy(hw_vals->ddb.uv_plane[pipe], results->ddb.uv_plane[pipe],
5454 sizeof(hw_vals->ddb.uv_plane[pipe]));
5455 memcpy(hw_vals->ddb.plane[pipe], results->ddb.plane[pipe],
5456 sizeof(hw_vals->ddb.plane[pipe]));
5458 mutex_unlock(&dev_priv->wm.wm_mutex);
5461 static void ilk_compute_wm_config(struct drm_device *dev,
5462 struct intel_wm_config *config)
5464 struct intel_crtc *crtc;
5466 /* Compute the currently _active_ config */
5467 for_each_intel_crtc(dev, crtc) {
5468 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5470 if (!wm->pipe_enabled)
5473 config->sprites_enabled |= wm->sprites_enabled;
5474 config->sprites_scaled |= wm->sprites_scaled;
5475 config->num_pipes_active++;
5479 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5481 struct drm_device *dev = &dev_priv->drm;
5482 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5483 struct ilk_wm_maximums max;
5484 struct intel_wm_config config = {};
5485 struct ilk_wm_values results = {};
5486 enum intel_ddb_partitioning partitioning;
5488 ilk_compute_wm_config(dev, &config);
5490 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
5491 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
5493 /* 5/6 split only in single pipe config on IVB+ */
5494 if (INTEL_GEN(dev_priv) >= 7 &&
5495 config.num_pipes_active == 1 && config.sprites_enabled) {
5496 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
5497 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
5499 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
5501 best_lp_wm = &lp_wm_1_2;
5504 partitioning = (best_lp_wm == &lp_wm_1_2) ?
5505 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5507 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
5509 ilk_write_wm_values(dev_priv, &results);
5512 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5513 struct intel_crtc_state *cstate)
5515 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5516 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5518 mutex_lock(&dev_priv->wm.wm_mutex);
5519 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5520 ilk_program_watermarks(dev_priv);
5521 mutex_unlock(&dev_priv->wm.wm_mutex);
5524 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5525 struct intel_crtc_state *cstate)
5527 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5528 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5530 mutex_lock(&dev_priv->wm.wm_mutex);
5531 if (cstate->wm.need_postvbl_update) {
5532 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5533 ilk_program_watermarks(dev_priv);
5535 mutex_unlock(&dev_priv->wm.wm_mutex);
5538 static inline void skl_wm_level_from_reg_val(uint32_t val,
5539 struct skl_wm_level *level)
5541 level->plane_en = val & PLANE_WM_EN;
5542 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5543 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5544 PLANE_WM_LINES_MASK;
5547 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
5548 struct skl_pipe_wm *out)
5550 struct drm_i915_private *dev_priv = to_i915(crtc->dev);
5551 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5552 enum pipe pipe = intel_crtc->pipe;
5553 int level, max_level;
5554 enum plane_id plane_id;
5557 max_level = ilk_wm_max_level(dev_priv);
5559 for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5560 struct skl_plane_wm *wm = &out->planes[plane_id];
5562 for (level = 0; level <= max_level; level++) {
5563 if (plane_id != PLANE_CURSOR)
5564 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5566 val = I915_READ(CUR_WM(pipe, level));
5568 skl_wm_level_from_reg_val(val, &wm->wm[level]);
5571 if (plane_id != PLANE_CURSOR)
5572 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5574 val = I915_READ(CUR_WM_TRANS(pipe));
5576 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5579 if (!intel_crtc->active)
5582 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5585 void skl_wm_get_hw_state(struct drm_device *dev)
5587 struct drm_i915_private *dev_priv = to_i915(dev);
5588 struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5589 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5590 struct drm_crtc *crtc;
5591 struct intel_crtc *intel_crtc;
5592 struct intel_crtc_state *cstate;
5594 skl_ddb_get_hw_state(dev_priv, ddb);
5595 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
5596 intel_crtc = to_intel_crtc(crtc);
5597 cstate = to_intel_crtc_state(crtc->state);
5599 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5601 if (intel_crtc->active)
5602 hw->dirty_pipes |= drm_crtc_mask(crtc);
5605 if (dev_priv->active_crtcs) {
5606 /* Fully recompute DDB on first atomic commit */
5607 dev_priv->wm.distrust_bios_wm = true;
5610 * Easy/common case; just sanitize DDB now if everything off
5611 * Keep dbuf slice info intact
5613 memset(ddb->plane, 0, sizeof(ddb->plane));
5614 memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane));
5618 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
5620 struct drm_device *dev = crtc->dev;
5621 struct drm_i915_private *dev_priv = to_i915(dev);
5622 struct ilk_wm_values *hw = &dev_priv->wm.hw;
5623 struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5624 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
5625 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5626 enum pipe pipe = intel_crtc->pipe;
5627 static const i915_reg_t wm0_pipe_reg[] = {
5628 [PIPE_A] = WM0_PIPEA_ILK,
5629 [PIPE_B] = WM0_PIPEB_ILK,
5630 [PIPE_C] = WM0_PIPEC_IVB,
5633 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5634 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5635 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5637 memset(active, 0, sizeof(*active));
5639 active->pipe_enabled = intel_crtc->active;
5641 if (active->pipe_enabled) {
5642 u32 tmp = hw->wm_pipe[pipe];
5645 * For active pipes LP0 watermark is marked as
5646 * enabled, and LP1+ watermaks as disabled since
5647 * we can't really reverse compute them in case
5648 * multiple pipes are active.
5650 active->wm[0].enable = true;
5651 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5652 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5653 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5654 active->linetime = hw->wm_linetime[pipe];
5656 int level, max_level = ilk_wm_max_level(dev_priv);
5659 * For inactive pipes, all watermark levels
5660 * should be marked as enabled but zeroed,
5661 * which is what we'd compute them to.
5663 for (level = 0; level <= max_level; level++)
5664 active->wm[level].enable = true;
5667 intel_crtc->wm.active.ilk = *active;
5670 #define _FW_WM(value, plane) \
5671 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5672 #define _FW_WM_VLV(value, plane) \
5673 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5675 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5676 struct g4x_wm_values *wm)
5680 tmp = I915_READ(DSPFW1);
5681 wm->sr.plane = _FW_WM(tmp, SR);
5682 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5683 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5684 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5686 tmp = I915_READ(DSPFW2);
5687 wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5688 wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5689 wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5690 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5691 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5692 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5694 tmp = I915_READ(DSPFW3);
5695 wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5696 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5697 wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5698 wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5701 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5702 struct vlv_wm_values *wm)
5707 for_each_pipe(dev_priv, pipe) {
5708 tmp = I915_READ(VLV_DDL(pipe));
5710 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5711 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5712 wm->ddl[pipe].plane[PLANE_CURSOR] =
5713 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5714 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5715 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5716 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5717 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5720 tmp = I915_READ(DSPFW1);
5721 wm->sr.plane = _FW_WM(tmp, SR);
5722 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5723 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5724 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5726 tmp = I915_READ(DSPFW2);
5727 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5728 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5729 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5731 tmp = I915_READ(DSPFW3);
5732 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5734 if (IS_CHERRYVIEW(dev_priv)) {
5735 tmp = I915_READ(DSPFW7_CHV);
5736 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5737 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5739 tmp = I915_READ(DSPFW8_CHV);
5740 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5741 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5743 tmp = I915_READ(DSPFW9_CHV);
5744 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5745 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5747 tmp = I915_READ(DSPHOWM);
5748 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5749 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5750 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5751 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5752 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5753 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5754 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5755 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5756 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5757 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5759 tmp = I915_READ(DSPFW7);
5760 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5761 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5763 tmp = I915_READ(DSPHOWM);
5764 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5765 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5766 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5767 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5768 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5769 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5770 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5777 void g4x_wm_get_hw_state(struct drm_device *dev)
5779 struct drm_i915_private *dev_priv = to_i915(dev);
5780 struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5781 struct intel_crtc *crtc;
5783 g4x_read_wm_values(dev_priv, wm);
5785 wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5787 for_each_intel_crtc(dev, crtc) {
5788 struct intel_crtc_state *crtc_state =
5789 to_intel_crtc_state(crtc->base.state);
5790 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5791 struct g4x_pipe_wm *raw;
5792 enum pipe pipe = crtc->pipe;
5793 enum plane_id plane_id;
5794 int level, max_level;
5796 active->cxsr = wm->cxsr;
5797 active->hpll_en = wm->hpll_en;
5798 active->fbc_en = wm->fbc_en;
5800 active->sr = wm->sr;
5801 active->hpll = wm->hpll;
5803 for_each_plane_id_on_crtc(crtc, plane_id) {
5804 active->wm.plane[plane_id] =
5805 wm->pipe[pipe].plane[plane_id];
5808 if (wm->cxsr && wm->hpll_en)
5809 max_level = G4X_WM_LEVEL_HPLL;
5811 max_level = G4X_WM_LEVEL_SR;
5813 max_level = G4X_WM_LEVEL_NORMAL;
5815 level = G4X_WM_LEVEL_NORMAL;
5816 raw = &crtc_state->wm.g4x.raw[level];
5817 for_each_plane_id_on_crtc(crtc, plane_id)
5818 raw->plane[plane_id] = active->wm.plane[plane_id];
5820 if (++level > max_level)
5823 raw = &crtc_state->wm.g4x.raw[level];
5824 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5825 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5826 raw->plane[PLANE_SPRITE0] = 0;
5827 raw->fbc = active->sr.fbc;
5829 if (++level > max_level)
5832 raw = &crtc_state->wm.g4x.raw[level];
5833 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5834 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5835 raw->plane[PLANE_SPRITE0] = 0;
5836 raw->fbc = active->hpll.fbc;
5839 for_each_plane_id_on_crtc(crtc, plane_id)
5840 g4x_raw_plane_wm_set(crtc_state, level,
5841 plane_id, USHRT_MAX);
5842 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5844 crtc_state->wm.g4x.optimal = *active;
5845 crtc_state->wm.g4x.intermediate = *active;
5847 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5849 wm->pipe[pipe].plane[PLANE_PRIMARY],
5850 wm->pipe[pipe].plane[PLANE_CURSOR],
5851 wm->pipe[pipe].plane[PLANE_SPRITE0]);
5854 DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5855 wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5856 DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5857 wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5858 DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5859 yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5862 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5864 struct intel_plane *plane;
5865 struct intel_crtc *crtc;
5867 mutex_lock(&dev_priv->wm.wm_mutex);
5869 for_each_intel_plane(&dev_priv->drm, plane) {
5870 struct intel_crtc *crtc =
5871 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5872 struct intel_crtc_state *crtc_state =
5873 to_intel_crtc_state(crtc->base.state);
5874 struct intel_plane_state *plane_state =
5875 to_intel_plane_state(plane->base.state);
5876 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5877 enum plane_id plane_id = plane->id;
5880 if (plane_state->base.visible)
5883 for (level = 0; level < 3; level++) {
5884 struct g4x_pipe_wm *raw =
5885 &crtc_state->wm.g4x.raw[level];
5887 raw->plane[plane_id] = 0;
5888 wm_state->wm.plane[plane_id] = 0;
5891 if (plane_id == PLANE_PRIMARY) {
5892 for (level = 0; level < 3; level++) {
5893 struct g4x_pipe_wm *raw =
5894 &crtc_state->wm.g4x.raw[level];
5898 wm_state->sr.fbc = 0;
5899 wm_state->hpll.fbc = 0;
5900 wm_state->fbc_en = false;
5904 for_each_intel_crtc(&dev_priv->drm, crtc) {
5905 struct intel_crtc_state *crtc_state =
5906 to_intel_crtc_state(crtc->base.state);
5908 crtc_state->wm.g4x.intermediate =
5909 crtc_state->wm.g4x.optimal;
5910 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5913 g4x_program_watermarks(dev_priv);
5915 mutex_unlock(&dev_priv->wm.wm_mutex);
5918 void vlv_wm_get_hw_state(struct drm_device *dev)
5920 struct drm_i915_private *dev_priv = to_i915(dev);
5921 struct vlv_wm_values *wm = &dev_priv->wm.vlv;
5922 struct intel_crtc *crtc;
5925 vlv_read_wm_values(dev_priv, wm);
5927 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5928 wm->level = VLV_WM_LEVEL_PM2;
5930 if (IS_CHERRYVIEW(dev_priv)) {
5931 mutex_lock(&dev_priv->pcu_lock);
5933 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5934 if (val & DSP_MAXFIFO_PM5_ENABLE)
5935 wm->level = VLV_WM_LEVEL_PM5;
5938 * If DDR DVFS is disabled in the BIOS, Punit
5939 * will never ack the request. So if that happens
5940 * assume we don't have to enable/disable DDR DVFS
5941 * dynamically. To test that just set the REQ_ACK
5942 * bit to poke the Punit, but don't change the
5943 * HIGH/LOW bits so that we don't actually change
5944 * the current state.
5946 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5947 val |= FORCE_DDR_FREQ_REQ_ACK;
5948 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5950 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5951 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5952 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5953 "assuming DDR DVFS is disabled\n");
5954 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5956 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5957 if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5958 wm->level = VLV_WM_LEVEL_DDR_DVFS;
5961 mutex_unlock(&dev_priv->pcu_lock);
5964 for_each_intel_crtc(dev, crtc) {
5965 struct intel_crtc_state *crtc_state =
5966 to_intel_crtc_state(crtc->base.state);
5967 struct vlv_wm_state *active = &crtc->wm.active.vlv;
5968 const struct vlv_fifo_state *fifo_state =
5969 &crtc_state->wm.vlv.fifo_state;
5970 enum pipe pipe = crtc->pipe;
5971 enum plane_id plane_id;
5974 vlv_get_fifo_size(crtc_state);
5976 active->num_levels = wm->level + 1;
5977 active->cxsr = wm->cxsr;
5979 for (level = 0; level < active->num_levels; level++) {
5980 struct g4x_pipe_wm *raw =
5981 &crtc_state->wm.vlv.raw[level];
5983 active->sr[level].plane = wm->sr.plane;
5984 active->sr[level].cursor = wm->sr.cursor;
5986 for_each_plane_id_on_crtc(crtc, plane_id) {
5987 active->wm[level].plane[plane_id] =
5988 wm->pipe[pipe].plane[plane_id];
5990 raw->plane[plane_id] =
5991 vlv_invert_wm_value(active->wm[level].plane[plane_id],
5992 fifo_state->plane[plane_id]);
5996 for_each_plane_id_on_crtc(crtc, plane_id)
5997 vlv_raw_plane_wm_set(crtc_state, level,
5998 plane_id, USHRT_MAX);
5999 vlv_invalidate_wms(crtc, active, level);
6001 crtc_state->wm.vlv.optimal = *active;
6002 crtc_state->wm.vlv.intermediate = *active;
6004 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6006 wm->pipe[pipe].plane[PLANE_PRIMARY],
6007 wm->pipe[pipe].plane[PLANE_CURSOR],
6008 wm->pipe[pipe].plane[PLANE_SPRITE0],
6009 wm->pipe[pipe].plane[PLANE_SPRITE1]);
6012 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6013 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6016 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6018 struct intel_plane *plane;
6019 struct intel_crtc *crtc;
6021 mutex_lock(&dev_priv->wm.wm_mutex);
6023 for_each_intel_plane(&dev_priv->drm, plane) {
6024 struct intel_crtc *crtc =
6025 intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6026 struct intel_crtc_state *crtc_state =
6027 to_intel_crtc_state(crtc->base.state);
6028 struct intel_plane_state *plane_state =
6029 to_intel_plane_state(plane->base.state);
6030 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6031 const struct vlv_fifo_state *fifo_state =
6032 &crtc_state->wm.vlv.fifo_state;
6033 enum plane_id plane_id = plane->id;
6036 if (plane_state->base.visible)
6039 for (level = 0; level < wm_state->num_levels; level++) {
6040 struct g4x_pipe_wm *raw =
6041 &crtc_state->wm.vlv.raw[level];
6043 raw->plane[plane_id] = 0;
6045 wm_state->wm[level].plane[plane_id] =
6046 vlv_invert_wm_value(raw->plane[plane_id],
6047 fifo_state->plane[plane_id]);
6051 for_each_intel_crtc(&dev_priv->drm, crtc) {
6052 struct intel_crtc_state *crtc_state =
6053 to_intel_crtc_state(crtc->base.state);
6055 crtc_state->wm.vlv.intermediate =
6056 crtc_state->wm.vlv.optimal;
6057 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6060 vlv_program_watermarks(dev_priv);
6062 mutex_unlock(&dev_priv->wm.wm_mutex);
6066 * FIXME should probably kill this and improve
6067 * the real watermark readout/sanitation instead
6069 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6071 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6072 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6073 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6076 * Don't touch WM1S_LP_EN here.
6077 * Doing so could cause underruns.
6081 void ilk_wm_get_hw_state(struct drm_device *dev)
6083 struct drm_i915_private *dev_priv = to_i915(dev);
6084 struct ilk_wm_values *hw = &dev_priv->wm.hw;
6085 struct drm_crtc *crtc;
6087 ilk_init_lp_watermarks(dev_priv);
6089 for_each_crtc(dev, crtc)
6090 ilk_pipe_wm_get_hw_state(crtc);
6092 hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6093 hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6094 hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6096 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6097 if (INTEL_GEN(dev_priv) >= 7) {
6098 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6099 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6102 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6103 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6104 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6105 else if (IS_IVYBRIDGE(dev_priv))
6106 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6107 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6110 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6114 * intel_update_watermarks - update FIFO watermark values based on current modes
6115 * @crtc: the #intel_crtc on which to compute the WM
6117 * Calculate watermark values for the various WM regs based on current mode
6118 * and plane configuration.
6120 * There are several cases to deal with here:
6121 * - normal (i.e. non-self-refresh)
6122 * - self-refresh (SR) mode
6123 * - lines are large relative to FIFO size (buffer can hold up to 2)
6124 * - lines are small relative to FIFO size (buffer can hold more than 2
6125 * lines), so need to account for TLB latency
6127 * The normal calculation is:
6128 * watermark = dotclock * bytes per pixel * latency
6129 * where latency is platform & configuration dependent (we assume pessimal
6132 * The SR calculation is:
6133 * watermark = (trunc(latency/line time)+1) * surface width *
6136 * line time = htotal / dotclock
6137 * surface width = hdisplay for normal plane and 64 for cursor
6138 * and latency is assumed to be high, as above.
6140 * The final value programmed to the register should always be rounded up,
6141 * and include an extra 2 entries to account for clock crossings.
6143 * We don't use the sprite, so we can ignore that. And on Crestline we have
6144 * to set the non-SR watermarks to 8.
6146 void intel_update_watermarks(struct intel_crtc *crtc)
6148 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6150 if (dev_priv->display.update_wm)
6151 dev_priv->display.update_wm(crtc);
6154 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6158 /* Display WA #0477 WaDisableIPC: skl */
6159 if (IS_SKYLAKE(dev_priv))
6160 dev_priv->ipc_enabled = false;
6162 /* Display WA #1141: SKL:all KBL:all CFL */
6163 if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
6164 !dev_priv->dram_info.symmetric_memory)
6165 dev_priv->ipc_enabled = false;
6167 val = I915_READ(DISP_ARB_CTL2);
6169 if (dev_priv->ipc_enabled)
6170 val |= DISP_IPC_ENABLE;
6172 val &= ~DISP_IPC_ENABLE;
6174 I915_WRITE(DISP_ARB_CTL2, val);
6177 void intel_init_ipc(struct drm_i915_private *dev_priv)
6179 dev_priv->ipc_enabled = false;
6180 if (!HAS_IPC(dev_priv))
6183 dev_priv->ipc_enabled = true;
6184 intel_enable_ipc(dev_priv);
6188 * Lock protecting IPS related data structures
6190 DEFINE_SPINLOCK(mchdev_lock);
6192 /* Global for IPS driver to get at the current i915 device. Protected by
6194 static struct drm_i915_private *i915_mch_dev;
6196 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6200 lockdep_assert_held(&mchdev_lock);
6202 rgvswctl = I915_READ16(MEMSWCTL);
6203 if (rgvswctl & MEMCTL_CMD_STS) {
6204 DRM_DEBUG("gpu busy, RCS change rejected\n");
6205 return false; /* still busy with another command */
6208 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6209 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6210 I915_WRITE16(MEMSWCTL, rgvswctl);
6211 POSTING_READ16(MEMSWCTL);
6213 rgvswctl |= MEMCTL_CMD_STS;
6214 I915_WRITE16(MEMSWCTL, rgvswctl);
6219 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6222 u8 fmax, fmin, fstart, vstart;
6224 spin_lock_irq(&mchdev_lock);
6226 rgvmodectl = I915_READ(MEMMODECTL);
6228 /* Enable temp reporting */
6229 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6230 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6232 /* 100ms RC evaluation intervals */
6233 I915_WRITE(RCUPEI, 100000);
6234 I915_WRITE(RCDNEI, 100000);
6236 /* Set max/min thresholds to 90ms and 80ms respectively */
6237 I915_WRITE(RCBMAXAVG, 90000);
6238 I915_WRITE(RCBMINAVG, 80000);
6240 I915_WRITE(MEMIHYST, 1);
6242 /* Set up min, max, and cur for interrupt handling */
6243 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6244 fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6245 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6246 MEMMODE_FSTART_SHIFT;
6248 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6251 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6252 dev_priv->ips.fstart = fstart;
6254 dev_priv->ips.max_delay = fstart;
6255 dev_priv->ips.min_delay = fmin;
6256 dev_priv->ips.cur_delay = fstart;
6258 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6259 fmax, fmin, fstart);
6261 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6264 * Interrupts will be enabled in ironlake_irq_postinstall
6267 I915_WRITE(VIDSTART, vstart);
6268 POSTING_READ(VIDSTART);
6270 rgvmodectl |= MEMMODE_SWMODE_EN;
6271 I915_WRITE(MEMMODECTL, rgvmodectl);
6273 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6274 DRM_ERROR("stuck trying to change perf mode\n");
6277 ironlake_set_drps(dev_priv, fstart);
6279 dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6280 I915_READ(DDREC) + I915_READ(CSIEC);
6281 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6282 dev_priv->ips.last_count2 = I915_READ(GFXEC);
6283 dev_priv->ips.last_time2 = ktime_get_raw_ns();
6285 spin_unlock_irq(&mchdev_lock);
6288 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6292 spin_lock_irq(&mchdev_lock);
6294 rgvswctl = I915_READ16(MEMSWCTL);
6296 /* Ack interrupts, disable EFC interrupt */
6297 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6298 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6299 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6300 I915_WRITE(DEIIR, DE_PCU_EVENT);
6301 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6303 /* Go back to the starting frequency */
6304 ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6306 rgvswctl |= MEMCTL_CMD_STS;
6307 I915_WRITE(MEMSWCTL, rgvswctl);
6310 spin_unlock_irq(&mchdev_lock);
6313 /* There's a funny hw issue where the hw returns all 0 when reading from
6314 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6315 * ourselves, instead of doing a rmw cycle (which might result in us clearing
6316 * all limits and the gpu stuck at whatever frequency it is at atm).
6318 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6320 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6323 /* Only set the down limit when we've reached the lowest level to avoid
6324 * getting more interrupts, otherwise leave this clear. This prevents a
6325 * race in the hw when coming out of rc6: There's a tiny window where
6326 * the hw runs at the minimal clock before selecting the desired
6327 * frequency, if the down threshold expires in that window we will not
6328 * receive a down interrupt. */
6329 if (INTEL_GEN(dev_priv) >= 9) {
6330 limits = (rps->max_freq_softlimit) << 23;
6331 if (val <= rps->min_freq_softlimit)
6332 limits |= (rps->min_freq_softlimit) << 14;
6334 limits = rps->max_freq_softlimit << 24;
6335 if (val <= rps->min_freq_softlimit)
6336 limits |= rps->min_freq_softlimit << 16;
6342 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6344 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6345 u32 threshold_up = 0, threshold_down = 0; /* in % */
6346 u32 ei_up = 0, ei_down = 0;
6348 lockdep_assert_held(&rps->power.mutex);
6350 if (new_power == rps->power.mode)
6353 /* Note the units here are not exactly 1us, but 1280ns. */
6354 switch (new_power) {
6356 /* Upclock if more than 95% busy over 16ms */
6360 /* Downclock if less than 85% busy over 32ms */
6362 threshold_down = 85;
6366 /* Upclock if more than 90% busy over 13ms */
6370 /* Downclock if less than 75% busy over 32ms */
6372 threshold_down = 75;
6376 /* Upclock if more than 85% busy over 10ms */
6380 /* Downclock if less than 60% busy over 32ms */
6382 threshold_down = 60;
6386 /* When byt can survive without system hang with dynamic
6387 * sw freq adjustments, this restriction can be lifted.
6389 if (IS_VALLEYVIEW(dev_priv))
6392 I915_WRITE(GEN6_RP_UP_EI,
6393 GT_INTERVAL_FROM_US(dev_priv, ei_up));
6394 I915_WRITE(GEN6_RP_UP_THRESHOLD,
6395 GT_INTERVAL_FROM_US(dev_priv,
6396 ei_up * threshold_up / 100));
6398 I915_WRITE(GEN6_RP_DOWN_EI,
6399 GT_INTERVAL_FROM_US(dev_priv, ei_down));
6400 I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6401 GT_INTERVAL_FROM_US(dev_priv,
6402 ei_down * threshold_down / 100));
6404 I915_WRITE(GEN6_RP_CONTROL,
6405 GEN6_RP_MEDIA_TURBO |
6406 GEN6_RP_MEDIA_HW_NORMAL_MODE |
6407 GEN6_RP_MEDIA_IS_GFX |
6409 GEN6_RP_UP_BUSY_AVG |
6410 GEN6_RP_DOWN_IDLE_AVG);
6413 rps->power.mode = new_power;
6414 rps->power.up_threshold = threshold_up;
6415 rps->power.down_threshold = threshold_down;
6418 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6420 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6423 new_power = rps->power.mode;
6424 switch (rps->power.mode) {
6426 if (val > rps->efficient_freq + 1 &&
6427 val > rps->cur_freq)
6428 new_power = BETWEEN;
6432 if (val <= rps->efficient_freq &&
6433 val < rps->cur_freq)
6434 new_power = LOW_POWER;
6435 else if (val >= rps->rp0_freq &&
6436 val > rps->cur_freq)
6437 new_power = HIGH_POWER;
6441 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6442 val < rps->cur_freq)
6443 new_power = BETWEEN;
6446 /* Max/min bins are special */
6447 if (val <= rps->min_freq_softlimit)
6448 new_power = LOW_POWER;
6449 if (val >= rps->max_freq_softlimit)
6450 new_power = HIGH_POWER;
6452 mutex_lock(&rps->power.mutex);
6453 if (rps->power.interactive)
6454 new_power = HIGH_POWER;
6455 rps_set_power(dev_priv, new_power);
6456 mutex_unlock(&rps->power.mutex);
6459 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6461 struct intel_rps *rps = &i915->gt_pm.rps;
6463 if (INTEL_GEN(i915) < 6)
6466 mutex_lock(&rps->power.mutex);
6468 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6469 rps_set_power(i915, HIGH_POWER);
6471 GEM_BUG_ON(!rps->power.interactive);
6472 rps->power.interactive--;
6474 mutex_unlock(&rps->power.mutex);
6477 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6479 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6482 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6483 if (val > rps->min_freq_softlimit)
6484 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6485 if (val < rps->max_freq_softlimit)
6486 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6488 mask &= dev_priv->pm_rps_events;
6490 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6493 /* gen6_set_rps is called to update the frequency request, but should also be
6494 * called when the range (min_delay and max_delay) is modified so that we can
6495 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6496 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6498 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6500 /* min/max delay may still have been modified so be sure to
6501 * write the limits value.
6503 if (val != rps->cur_freq) {
6504 gen6_set_rps_thresholds(dev_priv, val);
6506 if (INTEL_GEN(dev_priv) >= 9)
6507 I915_WRITE(GEN6_RPNSWREQ,
6508 GEN9_FREQUENCY(val));
6509 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6510 I915_WRITE(GEN6_RPNSWREQ,
6511 HSW_FREQUENCY(val));
6513 I915_WRITE(GEN6_RPNSWREQ,
6514 GEN6_FREQUENCY(val) |
6516 GEN6_AGGRESSIVE_TURBO);
6519 /* Make sure we continue to get interrupts
6520 * until we hit the minimum or maximum frequencies.
6522 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6523 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6525 rps->cur_freq = val;
6526 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6531 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6535 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6536 "Odd GPU freq value\n"))
6539 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6541 if (val != dev_priv->gt_pm.rps.cur_freq) {
6542 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6546 gen6_set_rps_thresholds(dev_priv, val);
6549 dev_priv->gt_pm.rps.cur_freq = val;
6550 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6555 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6557 * * If Gfx is Idle, then
6558 * 1. Forcewake Media well.
6559 * 2. Request idle freq.
6560 * 3. Release Forcewake of Media well.
6562 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6564 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6565 u32 val = rps->idle_freq;
6568 if (rps->cur_freq <= val)
6571 /* The punit delays the write of the frequency and voltage until it
6572 * determines the GPU is awake. During normal usage we don't want to
6573 * waste power changing the frequency if the GPU is sleeping (rc6).
6574 * However, the GPU and driver is now idle and we do not want to delay
6575 * switching to minimum voltage (reducing power whilst idle) as we do
6576 * not expect to be woken in the near future and so must flush the
6577 * change by waking the device.
6579 * We choose to take the media powerwell (either would do to trick the
6580 * punit into committing the voltage change) as that takes a lot less
6581 * power than the render powerwell.
6583 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6584 err = valleyview_set_rps(dev_priv, val);
6585 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6588 DRM_ERROR("Failed to set RPS for idle\n");
6591 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6593 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6595 mutex_lock(&dev_priv->pcu_lock);
6599 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6600 gen6_rps_reset_ei(dev_priv);
6601 I915_WRITE(GEN6_PMINTRMSK,
6602 gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6604 gen6_enable_rps_interrupts(dev_priv);
6606 /* Use the user's desired frequency as a guide, but for better
6607 * performance, jump directly to RPe as our starting frequency.
6609 freq = max(rps->cur_freq,
6610 rps->efficient_freq);
6612 if (intel_set_rps(dev_priv,
6614 rps->min_freq_softlimit,
6615 rps->max_freq_softlimit)))
6616 DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6618 mutex_unlock(&dev_priv->pcu_lock);
6621 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6623 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6625 /* Flush our bottom-half so that it does not race with us
6626 * setting the idle frequency and so that it is bounded by
6627 * our rpm wakeref. And then disable the interrupts to stop any
6628 * futher RPS reclocking whilst we are asleep.
6630 gen6_disable_rps_interrupts(dev_priv);
6632 mutex_lock(&dev_priv->pcu_lock);
6634 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6635 vlv_set_rps_idle(dev_priv);
6637 gen6_set_rps(dev_priv, rps->idle_freq);
6639 I915_WRITE(GEN6_PMINTRMSK,
6640 gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6642 mutex_unlock(&dev_priv->pcu_lock);
6645 void gen6_rps_boost(struct i915_request *rq,
6646 struct intel_rps_client *rps_client)
6648 struct intel_rps *rps = &rq->i915->gt_pm.rps;
6649 unsigned long flags;
6652 /* This is intentionally racy! We peek at the state here, then
6653 * validate inside the RPS worker.
6658 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
6661 /* Serializes with i915_request_retire() */
6663 spin_lock_irqsave(&rq->lock, flags);
6664 if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6665 boost = !atomic_fetch_inc(&rps->num_waiters);
6666 rq->waitboost = true;
6668 spin_unlock_irqrestore(&rq->lock, flags);
6672 if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6673 schedule_work(&rps->work);
6675 atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
6678 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6680 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6683 lockdep_assert_held(&dev_priv->pcu_lock);
6684 GEM_BUG_ON(val > rps->max_freq);
6685 GEM_BUG_ON(val < rps->min_freq);
6687 if (!rps->enabled) {
6688 rps->cur_freq = val;
6692 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6693 err = valleyview_set_rps(dev_priv, val);
6695 err = gen6_set_rps(dev_priv, val);
6700 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6702 I915_WRITE(GEN6_RC_CONTROL, 0);
6703 I915_WRITE(GEN9_PG_ENABLE, 0);
6706 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6708 I915_WRITE(GEN6_RP_CONTROL, 0);
6711 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6713 I915_WRITE(GEN6_RC_CONTROL, 0);
6716 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6718 I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6719 I915_WRITE(GEN6_RP_CONTROL, 0);
6722 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6724 I915_WRITE(GEN6_RC_CONTROL, 0);
6727 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6729 I915_WRITE(GEN6_RP_CONTROL, 0);
6732 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6734 /* We're doing forcewake before Disabling RC6,
6735 * This what the BIOS expects when going into suspend */
6736 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6738 I915_WRITE(GEN6_RC_CONTROL, 0);
6740 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6743 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6745 I915_WRITE(GEN6_RP_CONTROL, 0);
6748 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6750 bool enable_rc6 = true;
6751 unsigned long rc6_ctx_base;
6755 rc_ctl = I915_READ(GEN6_RC_CONTROL);
6756 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6757 RC_SW_TARGET_STATE_SHIFT;
6758 DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6759 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6760 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6761 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6764 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6765 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6770 * The exact context size is not known for BXT, so assume a page size
6773 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6774 if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6775 (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6776 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6780 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6781 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6782 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6783 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6784 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6788 if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6789 !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6790 !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6791 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6795 if (!I915_READ(GEN6_GFXPAUSE)) {
6796 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6800 if (!I915_READ(GEN8_MISC_CTRL0)) {
6801 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6808 static bool sanitize_rc6(struct drm_i915_private *i915)
6810 struct intel_device_info *info = mkwrite_device_info(i915);
6812 /* Powersaving is controlled by the host when inside a VM */
6813 if (intel_vgpu_active(i915))
6816 if (info->has_rc6 &&
6817 IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
6818 DRM_INFO("RC6 disabled by BIOS\n");
6823 * We assume that we do not have any deep rc6 levels if we don't have
6824 * have the previous rc6 level supported, i.e. we use HAS_RC6()
6825 * as the initial coarse check for rc6 in general, moving on to
6826 * progressively finer/deeper levels.
6828 if (!info->has_rc6 && info->has_rc6p)
6831 return info->has_rc6;
6834 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6836 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6838 /* All of these values are in units of 50MHz */
6840 /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6841 if (IS_GEN9_LP(dev_priv)) {
6842 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6843 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6844 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6845 rps->min_freq = (rp_state_cap >> 0) & 0xff;
6847 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6848 rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
6849 rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
6850 rps->min_freq = (rp_state_cap >> 16) & 0xff;
6852 /* hw_max = RP0 until we check for overclocking */
6853 rps->max_freq = rps->rp0_freq;
6855 rps->efficient_freq = rps->rp1_freq;
6856 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
6857 IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6858 u32 ddcc_status = 0;
6860 if (sandybridge_pcode_read(dev_priv,
6861 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6863 rps->efficient_freq =
6865 ((ddcc_status >> 8) & 0xff),
6870 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6871 /* Store the frequency values in 16.66 MHZ units, which is
6872 * the natural hardware unit for SKL
6874 rps->rp0_freq *= GEN9_FREQ_SCALER;
6875 rps->rp1_freq *= GEN9_FREQ_SCALER;
6876 rps->min_freq *= GEN9_FREQ_SCALER;
6877 rps->max_freq *= GEN9_FREQ_SCALER;
6878 rps->efficient_freq *= GEN9_FREQ_SCALER;
6882 static void reset_rps(struct drm_i915_private *dev_priv,
6883 int (*set)(struct drm_i915_private *, u8))
6885 struct intel_rps *rps = &dev_priv->gt_pm.rps;
6886 u8 freq = rps->cur_freq;
6889 rps->power.mode = -1;
6892 if (set(dev_priv, freq))
6893 DRM_ERROR("Failed to reset RPS to initial values\n");
6896 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
6897 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
6899 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6901 /* Program defaults and thresholds for RPS */
6902 if (IS_GEN9(dev_priv))
6903 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6904 GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
6906 /* 1 second timeout*/
6907 I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6908 GT_INTERVAL_FROM_US(dev_priv, 1000000));
6910 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
6912 /* Leaning on the below call to gen6_set_rps to program/setup the
6913 * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6914 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6915 reset_rps(dev_priv, gen6_set_rps);
6917 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6920 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
6922 struct intel_engine_cs *engine;
6923 enum intel_engine_id id;
6926 /* 1a: Software RC state - RC0 */
6927 I915_WRITE(GEN6_RC_STATE, 0);
6929 /* 1b: Get forcewake during program sequence. Although the driver
6930 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6931 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6933 /* 2a: Disable RC states. */
6934 I915_WRITE(GEN6_RC_CONTROL, 0);
6936 /* 2b: Program RC6 thresholds.*/
6937 if (INTEL_GEN(dev_priv) >= 10) {
6938 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
6939 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
6940 } else if (IS_SKYLAKE(dev_priv)) {
6942 * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6943 * when CPG is enabled
6945 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
6947 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
6950 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6951 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6952 for_each_engine(engine, dev_priv, id)
6953 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6955 if (HAS_GUC(dev_priv))
6956 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6958 I915_WRITE(GEN6_RC_SLEEP, 0);
6961 * 2c: Program Coarse Power Gating Policies.
6963 * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
6964 * use instead is a more conservative estimate for the maximum time
6965 * it takes us to service a CS interrupt and submit a new ELSP - that
6966 * is the time which the GPU is idle waiting for the CPU to select the
6967 * next request to execute. If the idle hysteresis is less than that
6968 * interrupt service latency, the hardware will automatically gate
6969 * the power well and we will then incur the wake up cost on top of
6970 * the service latency. A similar guide from intel_pstate is that we
6971 * do not want the enable hysteresis to less than the wakeup latency.
6973 * igt/gem_exec_nop/sequential provides a rough estimate for the
6974 * service latency, and puts it around 10us for Broadwell (and other
6975 * big core) and around 40us for Broxton (and other low power cores).
6976 * [Note that for legacy ringbuffer submission, this is less than 1us!]
6977 * However, the wakeup latency on Broxton is closer to 100us. To be
6978 * conservative, we have to factor in a context switch on top (due
6981 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
6982 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
6984 /* 3a: Enable RC6 */
6985 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
6987 /* WaRsUseTimeoutMode:cnl (pre-prod) */
6988 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
6989 rc6_mode = GEN7_RC_CTL_TO_MODE;
6991 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
6993 I915_WRITE(GEN6_RC_CONTROL,
6994 GEN6_RC_CTL_HW_ENABLE |
6995 GEN6_RC_CTL_RC6_ENABLE |
6999 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7000 * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7002 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7003 I915_WRITE(GEN9_PG_ENABLE, 0);
7005 I915_WRITE(GEN9_PG_ENABLE,
7006 GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7008 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7011 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7013 struct intel_engine_cs *engine;
7014 enum intel_engine_id id;
7016 /* 1a: Software RC state - RC0 */
7017 I915_WRITE(GEN6_RC_STATE, 0);
7019 /* 1b: Get forcewake during program sequence. Although the driver
7020 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7021 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7023 /* 2a: Disable RC states. */
7024 I915_WRITE(GEN6_RC_CONTROL, 0);
7026 /* 2b: Program RC6 thresholds.*/
7027 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7028 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7029 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7030 for_each_engine(engine, dev_priv, id)
7031 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7032 I915_WRITE(GEN6_RC_SLEEP, 0);
7033 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7037 I915_WRITE(GEN6_RC_CONTROL,
7038 GEN6_RC_CTL_HW_ENABLE |
7039 GEN7_RC_CTL_TO_MODE |
7040 GEN6_RC_CTL_RC6_ENABLE);
7042 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7045 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7047 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7049 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7051 /* 1 Program defaults and thresholds for RPS*/
7052 I915_WRITE(GEN6_RPNSWREQ,
7053 HSW_FREQUENCY(rps->rp1_freq));
7054 I915_WRITE(GEN6_RC_VIDEO_FREQ,
7055 HSW_FREQUENCY(rps->rp1_freq));
7056 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7057 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7059 /* Docs recommend 900MHz, and 300 MHz respectively */
7060 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7061 rps->max_freq_softlimit << 24 |
7062 rps->min_freq_softlimit << 16);
7064 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7065 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7066 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7067 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7069 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7072 I915_WRITE(GEN6_RP_CONTROL,
7073 GEN6_RP_MEDIA_TURBO |
7074 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7075 GEN6_RP_MEDIA_IS_GFX |
7077 GEN6_RP_UP_BUSY_AVG |
7078 GEN6_RP_DOWN_IDLE_AVG);
7080 reset_rps(dev_priv, gen6_set_rps);
7082 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7085 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7087 struct intel_engine_cs *engine;
7088 enum intel_engine_id id;
7089 u32 rc6vids, rc6_mask;
7093 I915_WRITE(GEN6_RC_STATE, 0);
7095 /* Clear the DBG now so we don't confuse earlier errors */
7096 gtfifodbg = I915_READ(GTFIFODBG);
7098 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7099 I915_WRITE(GTFIFODBG, gtfifodbg);
7102 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7104 /* disable the counters and set deterministic thresholds */
7105 I915_WRITE(GEN6_RC_CONTROL, 0);
7107 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7108 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7109 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7110 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7111 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7113 for_each_engine(engine, dev_priv, id)
7114 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7116 I915_WRITE(GEN6_RC_SLEEP, 0);
7117 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7118 if (IS_IVYBRIDGE(dev_priv))
7119 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7121 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7122 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7123 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7125 /* We don't use those on Haswell */
7126 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7127 if (HAS_RC6p(dev_priv))
7128 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7129 if (HAS_RC6pp(dev_priv))
7130 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7131 I915_WRITE(GEN6_RC_CONTROL,
7133 GEN6_RC_CTL_EI_MODE(1) |
7134 GEN6_RC_CTL_HW_ENABLE);
7137 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7138 if (IS_GEN6(dev_priv) && ret) {
7139 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7140 } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7141 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7142 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7143 rc6vids &= 0xffff00;
7144 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7145 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7147 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7150 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7153 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7155 /* Here begins a magic sequence of register writes to enable
7156 * auto-downclocking.
7158 * Perhaps there might be some value in exposing these to
7161 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7163 /* Power down if completely idle for over 50ms */
7164 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7165 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7167 reset_rps(dev_priv, gen6_set_rps);
7169 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7172 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7174 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7175 const int min_freq = 15;
7176 const int scaling_factor = 180;
7177 unsigned int gpu_freq;
7178 unsigned int max_ia_freq, min_ring_freq;
7179 unsigned int max_gpu_freq, min_gpu_freq;
7180 struct cpufreq_policy *policy;
7182 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7184 if (rps->max_freq <= rps->min_freq)
7187 policy = cpufreq_cpu_get(0);
7189 max_ia_freq = policy->cpuinfo.max_freq;
7190 cpufreq_cpu_put(policy);
7193 * Default to measured freq if none found, PCU will ensure we
7196 max_ia_freq = tsc_khz;
7199 /* Convert from kHz to MHz */
7200 max_ia_freq /= 1000;
7202 min_ring_freq = I915_READ(DCLK) & 0xf;
7203 /* convert DDR frequency from units of 266.6MHz to bandwidth */
7204 min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7206 min_gpu_freq = rps->min_freq;
7207 max_gpu_freq = rps->max_freq;
7208 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7209 /* Convert GT frequency to 50 HZ units */
7210 min_gpu_freq /= GEN9_FREQ_SCALER;
7211 max_gpu_freq /= GEN9_FREQ_SCALER;
7215 * For each potential GPU frequency, load a ring frequency we'd like
7216 * to use for memory access. We do this by specifying the IA frequency
7217 * the PCU should use as a reference to determine the ring frequency.
7219 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7220 const int diff = max_gpu_freq - gpu_freq;
7221 unsigned int ia_freq = 0, ring_freq = 0;
7223 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7225 * ring_freq = 2 * GT. ring_freq is in 100MHz units
7226 * No floor required for ring frequency on SKL.
7228 ring_freq = gpu_freq;
7229 } else if (INTEL_GEN(dev_priv) >= 8) {
7230 /* max(2 * GT, DDR). NB: GT is 50MHz units */
7231 ring_freq = max(min_ring_freq, gpu_freq);
7232 } else if (IS_HASWELL(dev_priv)) {
7233 ring_freq = mult_frac(gpu_freq, 5, 4);
7234 ring_freq = max(min_ring_freq, ring_freq);
7235 /* leave ia_freq as the default, chosen by cpufreq */
7237 /* On older processors, there is no separate ring
7238 * clock domain, so in order to boost the bandwidth
7239 * of the ring, we need to upclock the CPU (ia_freq).
7241 * For GPU frequencies less than 750MHz,
7242 * just use the lowest ring freq.
7244 if (gpu_freq < min_freq)
7247 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7248 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7251 sandybridge_pcode_write(dev_priv,
7252 GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7253 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7254 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7259 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7263 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7265 switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
7267 /* (2 * 4) config */
7268 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7271 /* (2 * 6) config */
7272 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7275 /* (2 * 8) config */
7277 /* Setting (2 * 8) Min RP0 for any other combination */
7278 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7282 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7287 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7291 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7292 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7297 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7301 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7302 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7307 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7311 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7312 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7313 FB_GFX_FREQ_FUSE_MASK);
7318 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7322 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7324 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7329 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7333 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7335 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7337 rp0 = min_t(u32, rp0, 0xea);
7342 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7346 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7347 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7348 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7349 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7354 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7358 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7360 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7361 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7362 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7363 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7364 * to make sure it matches what Punit accepts.
7366 return max_t(u32, val, 0xc0);
7369 /* Check that the pctx buffer wasn't move under us. */
7370 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7372 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7374 WARN_ON(pctx_addr != dev_priv->dsm.start +
7375 dev_priv->vlv_pctx->stolen->start);
7379 /* Check that the pcbr address is not empty. */
7380 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7382 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7384 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7387 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7389 resource_size_t pctx_paddr, paddr;
7390 resource_size_t pctx_size = 32*1024;
7393 pcbr = I915_READ(VLV_PCBR);
7394 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7395 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7396 paddr = dev_priv->dsm.end + 1 - pctx_size;
7397 GEM_BUG_ON(paddr > U32_MAX);
7399 pctx_paddr = (paddr & (~4095));
7400 I915_WRITE(VLV_PCBR, pctx_paddr);
7403 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7406 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7408 struct drm_i915_gem_object *pctx;
7409 resource_size_t pctx_paddr;
7410 resource_size_t pctx_size = 24*1024;
7413 pcbr = I915_READ(VLV_PCBR);
7415 /* BIOS set it up already, grab the pre-alloc'd space */
7416 resource_size_t pcbr_offset;
7418 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7419 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7421 I915_GTT_OFFSET_NONE,
7426 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7429 * From the Gunit register HAS:
7430 * The Gfx driver is expected to program this register and ensure
7431 * proper allocation within Gfx stolen memory. For example, this
7432 * register should be programmed such than the PCBR range does not
7433 * overlap with other ranges, such as the frame buffer, protected
7434 * memory, or any other relevant ranges.
7436 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7438 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7442 GEM_BUG_ON(range_overflows_t(u64,
7443 dev_priv->dsm.start,
7444 pctx->stolen->start,
7446 pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7447 I915_WRITE(VLV_PCBR, pctx_paddr);
7450 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7451 dev_priv->vlv_pctx = pctx;
7454 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7456 struct drm_i915_gem_object *pctx;
7458 pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7460 i915_gem_object_put(pctx);
7463 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7465 dev_priv->gt_pm.rps.gpll_ref_freq =
7466 vlv_get_cck_clock(dev_priv, "GPLL ref",
7467 CCK_GPLL_CLOCK_CONTROL,
7468 dev_priv->czclk_freq);
7470 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7471 dev_priv->gt_pm.rps.gpll_ref_freq);
7474 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7476 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7479 valleyview_setup_pctx(dev_priv);
7481 vlv_init_gpll_ref_freq(dev_priv);
7483 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7484 switch ((val >> 6) & 3) {
7487 dev_priv->mem_freq = 800;
7490 dev_priv->mem_freq = 1066;
7493 dev_priv->mem_freq = 1333;
7496 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7498 rps->max_freq = valleyview_rps_max_freq(dev_priv);
7499 rps->rp0_freq = rps->max_freq;
7500 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7501 intel_gpu_freq(dev_priv, rps->max_freq),
7504 rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7505 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7506 intel_gpu_freq(dev_priv, rps->efficient_freq),
7507 rps->efficient_freq);
7509 rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7510 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7511 intel_gpu_freq(dev_priv, rps->rp1_freq),
7514 rps->min_freq = valleyview_rps_min_freq(dev_priv);
7515 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7516 intel_gpu_freq(dev_priv, rps->min_freq),
7520 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7522 struct intel_rps *rps = &dev_priv->gt_pm.rps;
7525 cherryview_setup_pctx(dev_priv);
7527 vlv_init_gpll_ref_freq(dev_priv);
7529 mutex_lock(&dev_priv->sb_lock);
7530 val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7531 mutex_unlock(&dev_priv->sb_lock);
7533 switch ((val >> 2) & 0x7) {
7535 dev_priv->mem_freq = 2000;
7538 dev_priv->mem_freq = 1600;
7541 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7543 rps->max_freq = cherryview_rps_max_freq(dev_priv);
7544 rps->rp0_freq = rps->max_freq;
7545 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7546 intel_gpu_freq(dev_priv, rps->max_freq),
7549 rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7550 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7551 intel_gpu_freq(dev_priv, rps->efficient_freq),
7552 rps->efficient_freq);
7554 rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7555 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7556 intel_gpu_freq(dev_priv, rps->rp1_freq),
7559 rps->min_freq = cherryview_rps_min_freq(dev_priv);
7560 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7561 intel_gpu_freq(dev_priv, rps->min_freq),
7564 WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7566 "Odd GPU freq values\n");
7569 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7571 valleyview_cleanup_pctx(dev_priv);
7574 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7576 struct intel_engine_cs *engine;
7577 enum intel_engine_id id;
7578 u32 gtfifodbg, rc6_mode, pcbr;
7580 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7581 GT_FIFO_FREE_ENTRIES_CHV);
7583 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7585 I915_WRITE(GTFIFODBG, gtfifodbg);
7588 cherryview_check_pctx(dev_priv);
7590 /* 1a & 1b: Get forcewake during program sequence. Although the driver
7591 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7592 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7594 /* Disable RC states. */
7595 I915_WRITE(GEN6_RC_CONTROL, 0);
7597 /* 2a: Program RC6 thresholds.*/
7598 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7599 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7600 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7602 for_each_engine(engine, dev_priv, id)
7603 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7604 I915_WRITE(GEN6_RC_SLEEP, 0);
7606 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7607 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7609 /* Allows RC6 residency counter to work */
7610 I915_WRITE(VLV_COUNTER_CONTROL,
7611 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7612 VLV_MEDIA_RC6_COUNT_EN |
7613 VLV_RENDER_RC6_COUNT_EN));
7615 /* For now we assume BIOS is allocating and populating the PCBR */
7616 pcbr = I915_READ(VLV_PCBR);
7620 if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7621 rc6_mode = GEN7_RC_CTL_TO_MODE;
7622 I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7624 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7627 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7631 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7633 /* 1: Program defaults and thresholds for RPS*/
7634 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7635 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7636 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7637 I915_WRITE(GEN6_RP_UP_EI, 66000);
7638 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7640 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7643 I915_WRITE(GEN6_RP_CONTROL,
7644 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7645 GEN6_RP_MEDIA_IS_GFX |
7647 GEN6_RP_UP_BUSY_AVG |
7648 GEN6_RP_DOWN_IDLE_AVG);
7650 /* Setting Fixed Bias */
7651 val = VLV_OVERRIDE_EN |
7653 CHV_BIAS_CPU_50_SOC_50;
7654 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7656 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7658 /* RPS code assumes GPLL is used */
7659 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7661 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7662 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7664 reset_rps(dev_priv, valleyview_set_rps);
7666 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7669 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7671 struct intel_engine_cs *engine;
7672 enum intel_engine_id id;
7675 valleyview_check_pctx(dev_priv);
7677 gtfifodbg = I915_READ(GTFIFODBG);
7679 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7681 I915_WRITE(GTFIFODBG, gtfifodbg);
7684 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7686 /* Disable RC states. */
7687 I915_WRITE(GEN6_RC_CONTROL, 0);
7689 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7690 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7691 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7693 for_each_engine(engine, dev_priv, id)
7694 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7696 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7698 /* Allows RC6 residency counter to work */
7699 I915_WRITE(VLV_COUNTER_CONTROL,
7700 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7701 VLV_MEDIA_RC0_COUNT_EN |
7702 VLV_RENDER_RC0_COUNT_EN |
7703 VLV_MEDIA_RC6_COUNT_EN |
7704 VLV_RENDER_RC6_COUNT_EN));
7706 I915_WRITE(GEN6_RC_CONTROL,
7707 GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7709 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7712 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7716 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7718 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7719 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7720 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7721 I915_WRITE(GEN6_RP_UP_EI, 66000);
7722 I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7724 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7726 I915_WRITE(GEN6_RP_CONTROL,
7727 GEN6_RP_MEDIA_TURBO |
7728 GEN6_RP_MEDIA_HW_NORMAL_MODE |
7729 GEN6_RP_MEDIA_IS_GFX |
7731 GEN6_RP_UP_BUSY_AVG |
7732 GEN6_RP_DOWN_IDLE_CONT);
7734 /* Setting Fixed Bias */
7735 val = VLV_OVERRIDE_EN |
7737 VLV_BIAS_CPU_125_SOC_875;
7738 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7740 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7742 /* RPS code assumes GPLL is used */
7743 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7745 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7746 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7748 reset_rps(dev_priv, valleyview_set_rps);
7750 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7753 static unsigned long intel_pxfreq(u32 vidfreq)
7756 int div = (vidfreq & 0x3f0000) >> 16;
7757 int post = (vidfreq & 0x3000) >> 12;
7758 int pre = (vidfreq & 0x7);
7763 freq = ((div * 133333) / ((1<<post) * pre));
7768 static const struct cparams {
7774 { 1, 1333, 301, 28664 },
7775 { 1, 1066, 294, 24460 },
7776 { 1, 800, 294, 25192 },
7777 { 0, 1333, 276, 27605 },
7778 { 0, 1066, 276, 27605 },
7779 { 0, 800, 231, 23784 },
7782 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7784 u64 total_count, diff, ret;
7785 u32 count1, count2, count3, m = 0, c = 0;
7786 unsigned long now = jiffies_to_msecs(jiffies), diff1;
7789 lockdep_assert_held(&mchdev_lock);
7791 diff1 = now - dev_priv->ips.last_time1;
7793 /* Prevent division-by-zero if we are asking too fast.
7794 * Also, we don't get interesting results if we are polling
7795 * faster than once in 10ms, so just return the saved value
7799 return dev_priv->ips.chipset_power;
7801 count1 = I915_READ(DMIEC);
7802 count2 = I915_READ(DDREC);
7803 count3 = I915_READ(CSIEC);
7805 total_count = count1 + count2 + count3;
7807 /* FIXME: handle per-counter overflow */
7808 if (total_count < dev_priv->ips.last_count1) {
7809 diff = ~0UL - dev_priv->ips.last_count1;
7810 diff += total_count;
7812 diff = total_count - dev_priv->ips.last_count1;
7815 for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7816 if (cparams[i].i == dev_priv->ips.c_m &&
7817 cparams[i].t == dev_priv->ips.r_t) {
7824 diff = div_u64(diff, diff1);
7825 ret = ((m * diff) + c);
7826 ret = div_u64(ret, 10);
7828 dev_priv->ips.last_count1 = total_count;
7829 dev_priv->ips.last_time1 = now;
7831 dev_priv->ips.chipset_power = ret;
7836 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7840 if (!IS_GEN5(dev_priv))
7843 spin_lock_irq(&mchdev_lock);
7845 val = __i915_chipset_val(dev_priv);
7847 spin_unlock_irq(&mchdev_lock);
7852 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7854 unsigned long m, x, b;
7857 tsfs = I915_READ(TSFS);
7859 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7860 x = I915_READ8(TR1);
7862 b = tsfs & TSFS_INTR_MASK;
7864 return ((m * x) / 127) - b;
7867 static int _pxvid_to_vd(u8 pxvid)
7872 if (pxvid >= 8 && pxvid < 31)
7875 return (pxvid + 2) * 125;
7878 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
7880 const int vd = _pxvid_to_vd(pxvid);
7881 const int vm = vd - 1125;
7883 if (INTEL_INFO(dev_priv)->is_mobile)
7884 return vm > 0 ? vm : 0;
7889 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
7891 u64 now, diff, diffms;
7894 lockdep_assert_held(&mchdev_lock);
7896 now = ktime_get_raw_ns();
7897 diffms = now - dev_priv->ips.last_time2;
7898 do_div(diffms, NSEC_PER_MSEC);
7900 /* Don't divide by 0 */
7904 count = I915_READ(GFXEC);
7906 if (count < dev_priv->ips.last_count2) {
7907 diff = ~0UL - dev_priv->ips.last_count2;
7910 diff = count - dev_priv->ips.last_count2;
7913 dev_priv->ips.last_count2 = count;
7914 dev_priv->ips.last_time2 = now;
7916 /* More magic constants... */
7918 diff = div_u64(diff, diffms * 10);
7919 dev_priv->ips.gfx_power = diff;
7922 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7924 if (!IS_GEN5(dev_priv))
7927 spin_lock_irq(&mchdev_lock);
7929 __i915_update_gfx_val(dev_priv);
7931 spin_unlock_irq(&mchdev_lock);
7934 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
7936 unsigned long t, corr, state1, corr2, state2;
7939 lockdep_assert_held(&mchdev_lock);
7941 pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
7942 pxvid = (pxvid >> 24) & 0x7f;
7943 ext_v = pvid_to_extvid(dev_priv, pxvid);
7947 t = i915_mch_val(dev_priv);
7949 /* Revel in the empirically derived constants */
7951 /* Correction factor in 1/100000 units */
7953 corr = ((t * 2349) + 135940);
7955 corr = ((t * 964) + 29317);
7957 corr = ((t * 301) + 1004);
7959 corr = corr * ((150142 * state1) / 10000 - 78642);
7961 corr2 = (corr * dev_priv->ips.corr);
7963 state2 = (corr2 * state1) / 10000;
7964 state2 /= 100; /* convert to mW */
7966 __i915_update_gfx_val(dev_priv);
7968 return dev_priv->ips.gfx_power + state2;
7971 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7975 if (!IS_GEN5(dev_priv))
7978 spin_lock_irq(&mchdev_lock);
7980 val = __i915_gfx_val(dev_priv);
7982 spin_unlock_irq(&mchdev_lock);
7988 * i915_read_mch_val - return value for IPS use
7990 * Calculate and return a value for the IPS driver to use when deciding whether
7991 * we have thermal and power headroom to increase CPU or GPU power budget.
7993 unsigned long i915_read_mch_val(void)
7995 struct drm_i915_private *dev_priv;
7996 unsigned long chipset_val, graphics_val, ret = 0;
7998 spin_lock_irq(&mchdev_lock);
8001 dev_priv = i915_mch_dev;
8003 chipset_val = __i915_chipset_val(dev_priv);
8004 graphics_val = __i915_gfx_val(dev_priv);
8006 ret = chipset_val + graphics_val;
8009 spin_unlock_irq(&mchdev_lock);
8013 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8016 * i915_gpu_raise - raise GPU frequency limit
8018 * Raise the limit; IPS indicates we have thermal headroom.
8020 bool i915_gpu_raise(void)
8022 struct drm_i915_private *dev_priv;
8025 spin_lock_irq(&mchdev_lock);
8026 if (!i915_mch_dev) {
8030 dev_priv = i915_mch_dev;
8032 if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
8033 dev_priv->ips.max_delay--;
8036 spin_unlock_irq(&mchdev_lock);
8040 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8043 * i915_gpu_lower - lower GPU frequency limit
8045 * IPS indicates we're close to a thermal limit, so throttle back the GPU
8046 * frequency maximum.
8048 bool i915_gpu_lower(void)
8050 struct drm_i915_private *dev_priv;
8053 spin_lock_irq(&mchdev_lock);
8054 if (!i915_mch_dev) {
8058 dev_priv = i915_mch_dev;
8060 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
8061 dev_priv->ips.max_delay++;
8064 spin_unlock_irq(&mchdev_lock);
8068 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8071 * i915_gpu_busy - indicate GPU business to IPS
8073 * Tell the IPS driver whether or not the GPU is busy.
8075 bool i915_gpu_busy(void)
8079 spin_lock_irq(&mchdev_lock);
8081 ret = i915_mch_dev->gt.awake;
8082 spin_unlock_irq(&mchdev_lock);
8086 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8089 * i915_gpu_turbo_disable - disable graphics turbo
8091 * Disable graphics turbo by resetting the max frequency and setting the
8092 * current frequency to the default.
8094 bool i915_gpu_turbo_disable(void)
8096 struct drm_i915_private *dev_priv;
8099 spin_lock_irq(&mchdev_lock);
8100 if (!i915_mch_dev) {
8104 dev_priv = i915_mch_dev;
8106 dev_priv->ips.max_delay = dev_priv->ips.fstart;
8108 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
8112 spin_unlock_irq(&mchdev_lock);
8116 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8119 * Tells the intel_ips driver that the i915 driver is now loaded, if
8120 * IPS got loaded first.
8122 * This awkward dance is so that neither module has to depend on the
8123 * other in order for IPS to do the appropriate communication of
8124 * GPU turbo limits to i915.
8127 ips_ping_for_i915_load(void)
8131 link = symbol_get(ips_link_to_i915_driver);
8134 symbol_put(ips_link_to_i915_driver);
8138 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8140 /* We only register the i915 ips part with intel-ips once everything is
8141 * set up, to avoid intel-ips sneaking in and reading bogus values. */
8142 spin_lock_irq(&mchdev_lock);
8143 i915_mch_dev = dev_priv;
8144 spin_unlock_irq(&mchdev_lock);
8146 ips_ping_for_i915_load();
8149 void intel_gpu_ips_teardown(void)
8151 spin_lock_irq(&mchdev_lock);
8152 i915_mch_dev = NULL;
8153 spin_unlock_irq(&mchdev_lock);
8156 static void intel_init_emon(struct drm_i915_private *dev_priv)
8162 /* Disable to program */
8166 /* Program energy weights for various events */
8167 I915_WRITE(SDEW, 0x15040d00);
8168 I915_WRITE(CSIEW0, 0x007f0000);
8169 I915_WRITE(CSIEW1, 0x1e220004);
8170 I915_WRITE(CSIEW2, 0x04000004);
8172 for (i = 0; i < 5; i++)
8173 I915_WRITE(PEW(i), 0);
8174 for (i = 0; i < 3; i++)
8175 I915_WRITE(DEW(i), 0);
8177 /* Program P-state weights to account for frequency power adjustment */
8178 for (i = 0; i < 16; i++) {
8179 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8180 unsigned long freq = intel_pxfreq(pxvidfreq);
8181 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8186 val *= (freq / 1000);
8188 val /= (127*127*900);
8190 DRM_ERROR("bad pxval: %ld\n", val);
8193 /* Render standby states get 0 weight */
8197 for (i = 0; i < 4; i++) {
8198 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8199 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8200 I915_WRITE(PXW(i), val);
8203 /* Adjust magic regs to magic values (more experimental results) */
8204 I915_WRITE(OGW0, 0);
8205 I915_WRITE(OGW1, 0);
8206 I915_WRITE(EG0, 0x00007f00);
8207 I915_WRITE(EG1, 0x0000000e);
8208 I915_WRITE(EG2, 0x000e0000);
8209 I915_WRITE(EG3, 0x68000300);
8210 I915_WRITE(EG4, 0x42000000);
8211 I915_WRITE(EG5, 0x00140031);
8215 for (i = 0; i < 8; i++)
8216 I915_WRITE(PXWL(i), 0);
8218 /* Enable PMON + select events */
8219 I915_WRITE(ECR, 0x80000019);
8221 lcfuse = I915_READ(LCFUSE02);
8223 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8226 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8228 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8231 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8234 if (!sanitize_rc6(dev_priv)) {
8235 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8236 pm_runtime_get(&dev_priv->drm.pdev->dev);
8239 mutex_lock(&dev_priv->pcu_lock);
8241 /* Initialize RPS limits (for userspace) */
8242 if (IS_CHERRYVIEW(dev_priv))
8243 cherryview_init_gt_powersave(dev_priv);
8244 else if (IS_VALLEYVIEW(dev_priv))
8245 valleyview_init_gt_powersave(dev_priv);
8246 else if (INTEL_GEN(dev_priv) >= 6)
8247 gen6_init_rps_frequencies(dev_priv);
8249 /* Derive initial user preferences/limits from the hardware limits */
8250 rps->idle_freq = rps->min_freq;
8251 rps->cur_freq = rps->idle_freq;
8253 rps->max_freq_softlimit = rps->max_freq;
8254 rps->min_freq_softlimit = rps->min_freq;
8256 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8257 rps->min_freq_softlimit =
8259 rps->efficient_freq,
8260 intel_freq_opcode(dev_priv, 450));
8262 /* After setting max-softlimit, find the overclock max freq */
8263 if (IS_GEN6(dev_priv) ||
8264 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8267 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms);
8268 if (params & BIT(31)) { /* OC supported */
8269 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8270 (rps->max_freq & 0xff) * 50,
8271 (params & 0xff) * 50);
8272 rps->max_freq = params & 0xff;
8276 /* Finally allow us to boost to max by default */
8277 rps->boost_freq = rps->max_freq;
8279 mutex_unlock(&dev_priv->pcu_lock);
8282 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8284 if (IS_VALLEYVIEW(dev_priv))
8285 valleyview_cleanup_gt_powersave(dev_priv);
8287 if (!HAS_RC6(dev_priv))
8288 pm_runtime_put(&dev_priv->drm.pdev->dev);
8292 * intel_suspend_gt_powersave - suspend PM work and helper threads
8293 * @dev_priv: i915 device
8295 * We don't want to disable RC6 or other features here, we just want
8296 * to make sure any work we've queued has finished and won't bother
8297 * us while we're suspended.
8299 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8301 if (INTEL_GEN(dev_priv) < 6)
8304 /* gen6_rps_idle() will be called later to disable interrupts */
8307 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8309 dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8310 dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8311 intel_disable_gt_powersave(dev_priv);
8313 if (INTEL_GEN(dev_priv) >= 11)
8314 gen11_reset_rps_interrupts(dev_priv);
8315 else if (INTEL_GEN(dev_priv) >= 6)
8316 gen6_reset_rps_interrupts(dev_priv);
8319 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8321 lockdep_assert_held(&i915->pcu_lock);
8323 if (!i915->gt_pm.llc_pstate.enabled)
8326 /* Currently there is no HW configuration to be done to disable. */
8328 i915->gt_pm.llc_pstate.enabled = false;
8331 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8333 lockdep_assert_held(&dev_priv->pcu_lock);
8335 if (!dev_priv->gt_pm.rc6.enabled)
8338 if (INTEL_GEN(dev_priv) >= 9)
8339 gen9_disable_rc6(dev_priv);
8340 else if (IS_CHERRYVIEW(dev_priv))
8341 cherryview_disable_rc6(dev_priv);
8342 else if (IS_VALLEYVIEW(dev_priv))
8343 valleyview_disable_rc6(dev_priv);
8344 else if (INTEL_GEN(dev_priv) >= 6)
8345 gen6_disable_rc6(dev_priv);
8347 dev_priv->gt_pm.rc6.enabled = false;
8350 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8352 lockdep_assert_held(&dev_priv->pcu_lock);
8354 if (!dev_priv->gt_pm.rps.enabled)
8357 if (INTEL_GEN(dev_priv) >= 9)
8358 gen9_disable_rps(dev_priv);
8359 else if (IS_CHERRYVIEW(dev_priv))
8360 cherryview_disable_rps(dev_priv);
8361 else if (IS_VALLEYVIEW(dev_priv))
8362 valleyview_disable_rps(dev_priv);
8363 else if (INTEL_GEN(dev_priv) >= 6)
8364 gen6_disable_rps(dev_priv);
8365 else if (IS_IRONLAKE_M(dev_priv))
8366 ironlake_disable_drps(dev_priv);
8368 dev_priv->gt_pm.rps.enabled = false;
8371 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8373 mutex_lock(&dev_priv->pcu_lock);
8375 intel_disable_rc6(dev_priv);
8376 intel_disable_rps(dev_priv);
8377 if (HAS_LLC(dev_priv))
8378 intel_disable_llc_pstate(dev_priv);
8380 mutex_unlock(&dev_priv->pcu_lock);
8383 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8385 lockdep_assert_held(&i915->pcu_lock);
8387 if (i915->gt_pm.llc_pstate.enabled)
8390 gen6_update_ring_freq(i915);
8392 i915->gt_pm.llc_pstate.enabled = true;
8395 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8397 lockdep_assert_held(&dev_priv->pcu_lock);
8399 if (dev_priv->gt_pm.rc6.enabled)
8402 if (IS_CHERRYVIEW(dev_priv))
8403 cherryview_enable_rc6(dev_priv);
8404 else if (IS_VALLEYVIEW(dev_priv))
8405 valleyview_enable_rc6(dev_priv);
8406 else if (INTEL_GEN(dev_priv) >= 9)
8407 gen9_enable_rc6(dev_priv);
8408 else if (IS_BROADWELL(dev_priv))
8409 gen8_enable_rc6(dev_priv);
8410 else if (INTEL_GEN(dev_priv) >= 6)
8411 gen6_enable_rc6(dev_priv);
8413 dev_priv->gt_pm.rc6.enabled = true;
8416 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8418 struct intel_rps *rps = &dev_priv->gt_pm.rps;
8420 lockdep_assert_held(&dev_priv->pcu_lock);
8425 if (IS_CHERRYVIEW(dev_priv)) {
8426 cherryview_enable_rps(dev_priv);
8427 } else if (IS_VALLEYVIEW(dev_priv)) {
8428 valleyview_enable_rps(dev_priv);
8429 } else if (INTEL_GEN(dev_priv) >= 9) {
8430 gen9_enable_rps(dev_priv);
8431 } else if (IS_BROADWELL(dev_priv)) {
8432 gen8_enable_rps(dev_priv);
8433 } else if (INTEL_GEN(dev_priv) >= 6) {
8434 gen6_enable_rps(dev_priv);
8435 } else if (IS_IRONLAKE_M(dev_priv)) {
8436 ironlake_enable_drps(dev_priv);
8437 intel_init_emon(dev_priv);
8440 WARN_ON(rps->max_freq < rps->min_freq);
8441 WARN_ON(rps->idle_freq > rps->max_freq);
8443 WARN_ON(rps->efficient_freq < rps->min_freq);
8444 WARN_ON(rps->efficient_freq > rps->max_freq);
8446 rps->enabled = true;
8449 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8451 /* Powersaving is controlled by the host when inside a VM */
8452 if (intel_vgpu_active(dev_priv))
8455 mutex_lock(&dev_priv->pcu_lock);
8457 if (HAS_RC6(dev_priv))
8458 intel_enable_rc6(dev_priv);
8459 intel_enable_rps(dev_priv);
8460 if (HAS_LLC(dev_priv))
8461 intel_enable_llc_pstate(dev_priv);
8463 mutex_unlock(&dev_priv->pcu_lock);
8466 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8469 * On Ibex Peak and Cougar Point, we need to disable clock
8470 * gating for the panel power sequencer or it will fail to
8471 * start up when no ports are active.
8473 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8476 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8480 for_each_pipe(dev_priv, pipe) {
8481 I915_WRITE(DSPCNTR(pipe),
8482 I915_READ(DSPCNTR(pipe)) |
8483 DISPPLANE_TRICKLE_FEED_DISABLE);
8485 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8486 POSTING_READ(DSPSURF(pipe));
8490 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8492 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8496 * WaFbcDisableDpfcClockGating:ilk
8498 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8499 ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8500 ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8502 I915_WRITE(PCH_3DCGDIS0,
8503 MARIUNIT_CLOCK_GATE_DISABLE |
8504 SVSMUNIT_CLOCK_GATE_DISABLE);
8505 I915_WRITE(PCH_3DCGDIS1,
8506 VFMUNIT_CLOCK_GATE_DISABLE);
8509 * According to the spec the following bits should be set in
8510 * order to enable memory self-refresh
8511 * The bit 22/21 of 0x42004
8512 * The bit 5 of 0x42020
8513 * The bit 15 of 0x45000
8515 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8516 (I915_READ(ILK_DISPLAY_CHICKEN2) |
8517 ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8518 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8519 I915_WRITE(DISP_ARB_CTL,
8520 (I915_READ(DISP_ARB_CTL) |
8524 * Based on the document from hardware guys the following bits
8525 * should be set unconditionally in order to enable FBC.
8526 * The bit 22 of 0x42000
8527 * The bit 22 of 0x42004
8528 * The bit 7,8,9 of 0x42020.
8530 if (IS_IRONLAKE_M(dev_priv)) {
8531 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8532 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8533 I915_READ(ILK_DISPLAY_CHICKEN1) |
8535 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8536 I915_READ(ILK_DISPLAY_CHICKEN2) |
8540 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8542 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8543 I915_READ(ILK_DISPLAY_CHICKEN2) |
8544 ILK_ELPIN_409_SELECT);
8545 I915_WRITE(_3D_CHICKEN2,
8546 _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8547 _3D_CHICKEN2_WM_READ_PIPELINED);
8549 /* WaDisableRenderCachePipelinedFlush:ilk */
8550 I915_WRITE(CACHE_MODE_0,
8551 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8553 /* WaDisable_RenderCache_OperationalFlush:ilk */
8554 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8556 g4x_disable_trickle_feed(dev_priv);
8558 ibx_init_clock_gating(dev_priv);
8561 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8567 * On Ibex Peak and Cougar Point, we need to disable clock
8568 * gating for the panel power sequencer or it will fail to
8569 * start up when no ports are active.
8571 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8572 PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8573 PCH_CPUNIT_CLOCK_GATE_DISABLE);
8574 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8575 DPLS_EDP_PPS_FIX_DIS);
8576 /* The below fixes the weird display corruption, a few pixels shifted
8577 * downward, on (only) LVDS of some HP laptops with IVY.
8579 for_each_pipe(dev_priv, pipe) {
8580 val = I915_READ(TRANS_CHICKEN2(pipe));
8581 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8582 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8583 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8584 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8585 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8586 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8587 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8588 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8590 /* WADP0ClockGatingDisable */
8591 for_each_pipe(dev_priv, pipe) {
8592 I915_WRITE(TRANS_CHICKEN1(pipe),
8593 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8597 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8601 tmp = I915_READ(MCH_SSKPD);
8602 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8603 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8607 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8609 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8611 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8613 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8614 I915_READ(ILK_DISPLAY_CHICKEN2) |
8615 ILK_ELPIN_409_SELECT);
8617 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8618 I915_WRITE(_3D_CHICKEN,
8619 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8621 /* WaDisable_RenderCache_OperationalFlush:snb */
8622 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8625 * BSpec recoomends 8x4 when MSAA is used,
8626 * however in practice 16x4 seems fastest.
8628 * Note that PS/WM thread counts depend on the WIZ hashing
8629 * disable bit, which we don't touch here, but it's good
8630 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8632 I915_WRITE(GEN6_GT_MODE,
8633 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8635 I915_WRITE(CACHE_MODE_0,
8636 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8638 I915_WRITE(GEN6_UCGCTL1,
8639 I915_READ(GEN6_UCGCTL1) |
8640 GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8641 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8643 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8644 * gating disable must be set. Failure to set it results in
8645 * flickering pixels due to Z write ordering failures after
8646 * some amount of runtime in the Mesa "fire" demo, and Unigine
8647 * Sanctuary and Tropics, and apparently anything else with
8648 * alpha test or pixel discard.
8650 * According to the spec, bit 11 (RCCUNIT) must also be set,
8651 * but we didn't debug actual testcases to find it out.
8653 * WaDisableRCCUnitClockGating:snb
8654 * WaDisableRCPBUnitClockGating:snb
8656 I915_WRITE(GEN6_UCGCTL2,
8657 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8658 GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8660 /* WaStripsFansDisableFastClipPerformanceFix:snb */
8661 I915_WRITE(_3D_CHICKEN3,
8662 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8666 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8667 * 3DSTATE_SF number of SF output attributes is more than 16."
8669 I915_WRITE(_3D_CHICKEN3,
8670 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8673 * According to the spec the following bits should be
8674 * set in order to enable memory self-refresh and fbc:
8675 * The bit21 and bit22 of 0x42000
8676 * The bit21 and bit22 of 0x42004
8677 * The bit5 and bit7 of 0x42020
8678 * The bit14 of 0x70180
8679 * The bit14 of 0x71180
8681 * WaFbcAsynchFlipDisableFbcQueue:snb
8683 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8684 I915_READ(ILK_DISPLAY_CHICKEN1) |
8685 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8686 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8687 I915_READ(ILK_DISPLAY_CHICKEN2) |
8688 ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8689 I915_WRITE(ILK_DSPCLK_GATE_D,
8690 I915_READ(ILK_DSPCLK_GATE_D) |
8691 ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
8692 ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8694 g4x_disable_trickle_feed(dev_priv);
8696 cpt_init_clock_gating(dev_priv);
8698 gen6_check_mch_setup(dev_priv);
8701 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8703 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8706 * WaVSThreadDispatchOverride:ivb,vlv
8708 * This actually overrides the dispatch
8709 * mode for all thread types.
8711 reg &= ~GEN7_FF_SCHED_MASK;
8712 reg |= GEN7_FF_TS_SCHED_HW;
8713 reg |= GEN7_FF_VS_SCHED_HW;
8714 reg |= GEN7_FF_DS_SCHED_HW;
8716 I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8719 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8722 * TODO: this bit should only be enabled when really needed, then
8723 * disabled when not needed anymore in order to save power.
8725 if (HAS_PCH_LPT_LP(dev_priv))
8726 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8727 I915_READ(SOUTH_DSPCLK_GATE_D) |
8728 PCH_LP_PARTITION_LEVEL_DISABLE);
8730 /* WADPOClockGatingDisable:hsw */
8731 I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8732 I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8733 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8736 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8738 if (HAS_PCH_LPT_LP(dev_priv)) {
8739 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8741 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8742 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8746 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8747 int general_prio_credits,
8748 int high_prio_credits)
8753 /* WaTempDisableDOPClkGating:bdw */
8754 misccpctl = I915_READ(GEN7_MISCCPCTL);
8755 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8757 val = I915_READ(GEN8_L3SQCREG1);
8758 val &= ~L3_PRIO_CREDITS_MASK;
8759 val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8760 val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8761 I915_WRITE(GEN8_L3SQCREG1, val);
8764 * Wait at least 100 clocks before re-enabling clock gating.
8765 * See the definition of L3SQCREG1 in BSpec.
8767 POSTING_READ(GEN8_L3SQCREG1);
8769 I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8772 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8774 /* This is not an Wa. Enable to reduce Sampler power */
8775 I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8776 I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
8779 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8781 if (!HAS_PCH_CNP(dev_priv))
8784 /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8785 I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8786 CNP_PWM_CGE_GATING_DISABLE);
8789 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8792 cnp_init_clock_gating(dev_priv);
8794 /* This is not an Wa. Enable for better image quality */
8795 I915_WRITE(_3D_CHICKEN3,
8796 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8798 /* WaEnableChickenDCPR:cnl */
8799 I915_WRITE(GEN8_CHICKEN_DCPR_1,
8800 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8802 /* WaFbcWakeMemOn:cnl */
8803 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8804 DISP_FBC_MEMORY_WAKE);
8806 val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8807 /* ReadHitWriteOnlyDisable:cnl */
8808 val |= RCCUNIT_CLKGATE_DIS;
8809 /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8810 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8811 val |= SARBUNIT_CLKGATE_DIS;
8812 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
8814 /* Wa_2201832410:cnl */
8815 val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8816 val |= GWUNIT_CLKGATE_DIS;
8817 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8819 /* WaDisableVFclkgate:cnl */
8820 /* WaVFUnitClockGatingDisable:cnl */
8821 val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8822 val |= VFUNIT_CLKGATE_DIS;
8823 I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
8826 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8828 cnp_init_clock_gating(dev_priv);
8829 gen9_init_clock_gating(dev_priv);
8831 /* WaFbcNukeOnHostModify:cfl */
8832 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8833 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8836 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8838 gen9_init_clock_gating(dev_priv);
8840 /* WaDisableSDEUnitClockGating:kbl */
8841 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8842 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8843 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8845 /* WaDisableGamClockGating:kbl */
8846 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8847 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8848 GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
8850 /* WaFbcNukeOnHostModify:kbl */
8851 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8852 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8855 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
8857 gen9_init_clock_gating(dev_priv);
8859 /* WAC6entrylatency:skl */
8860 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8861 FBC_LLC_FULLY_OPEN);
8863 /* WaFbcNukeOnHostModify:skl */
8864 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8865 ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8868 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
8870 /* The GTT cache must be disabled if the system is using 2M pages. */
8871 bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8872 I915_GTT_PAGE_SIZE_2M);
8875 /* WaSwitchSolVfFArbitrationPriority:bdw */
8876 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8878 /* WaPsrDPAMaskVBlankInSRD:bdw */
8879 I915_WRITE(CHICKEN_PAR1_1,
8880 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8882 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8883 for_each_pipe(dev_priv, pipe) {
8884 I915_WRITE(CHICKEN_PIPESL_1(pipe),
8885 I915_READ(CHICKEN_PIPESL_1(pipe)) |
8886 BDW_DPRS_MASK_VBLANK_SRD);
8889 /* WaVSRefCountFullforceMissDisable:bdw */
8890 /* WaDSRefCountFullforceMissDisable:bdw */
8891 I915_WRITE(GEN7_FF_THREAD_MODE,
8892 I915_READ(GEN7_FF_THREAD_MODE) &
8893 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8895 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8896 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8898 /* WaDisableSDEUnitClockGating:bdw */
8899 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8900 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8902 /* WaProgramL3SqcReg1Default:bdw */
8903 gen8_set_l3sqc_credits(dev_priv, 30, 2);
8905 /* WaGttCachingOffByDefault:bdw */
8906 I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
8908 /* WaKVMNotificationOnConfigChange:bdw */
8909 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8910 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8912 lpt_init_clock_gating(dev_priv);
8914 /* WaDisableDopClockGating:bdw
8916 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8919 I915_WRITE(GEN6_UCGCTL1,
8920 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
8923 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
8925 /* L3 caching of data atomics doesn't work -- disable it. */
8926 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
8927 I915_WRITE(HSW_ROW_CHICKEN3,
8928 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
8930 /* This is required by WaCatErrorRejectionIssue:hsw */
8931 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8932 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8933 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8935 /* WaVSRefCountFullforceMissDisable:hsw */
8936 I915_WRITE(GEN7_FF_THREAD_MODE,
8937 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
8939 /* WaDisable_RenderCache_OperationalFlush:hsw */
8940 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8942 /* enable HiZ Raw Stall Optimization */
8943 I915_WRITE(CACHE_MODE_0_GEN7,
8944 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8946 /* WaDisable4x2SubspanOptimization:hsw */
8947 I915_WRITE(CACHE_MODE_1,
8948 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8951 * BSpec recommends 8x4 when MSAA is used,
8952 * however in practice 16x4 seems fastest.
8954 * Note that PS/WM thread counts depend on the WIZ hashing
8955 * disable bit, which we don't touch here, but it's good
8956 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8958 I915_WRITE(GEN7_GT_MODE,
8959 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8961 /* WaSampleCChickenBitEnable:hsw */
8962 I915_WRITE(HALF_SLICE_CHICKEN3,
8963 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
8965 /* WaSwitchSolVfFArbitrationPriority:hsw */
8966 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8968 lpt_init_clock_gating(dev_priv);
8971 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
8975 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
8977 /* WaDisableEarlyCull:ivb */
8978 I915_WRITE(_3D_CHICKEN3,
8979 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8981 /* WaDisableBackToBackFlipFix:ivb */
8982 I915_WRITE(IVB_CHICKEN3,
8983 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8984 CHICKEN3_DGMG_DONE_FIX_DISABLE);
8986 /* WaDisablePSDDualDispatchEnable:ivb */
8987 if (IS_IVB_GT1(dev_priv))
8988 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8989 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8991 /* WaDisable_RenderCache_OperationalFlush:ivb */
8992 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8994 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
8995 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
8996 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
8998 /* WaApplyL3ControlAndL3ChickenMode:ivb */
8999 I915_WRITE(GEN7_L3CNTLREG1,
9000 GEN7_WA_FOR_GEN7_L3_CONTROL);
9001 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9002 GEN7_WA_L3_CHICKEN_MODE);
9003 if (IS_IVB_GT1(dev_priv))
9004 I915_WRITE(GEN7_ROW_CHICKEN2,
9005 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9007 /* must write both registers */
9008 I915_WRITE(GEN7_ROW_CHICKEN2,
9009 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9010 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9011 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9014 /* WaForceL3Serialization:ivb */
9015 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9016 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9019 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9020 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9022 I915_WRITE(GEN6_UCGCTL2,
9023 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9025 /* This is required by WaCatErrorRejectionIssue:ivb */
9026 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9027 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9028 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9030 g4x_disable_trickle_feed(dev_priv);
9032 gen7_setup_fixed_func_scheduler(dev_priv);
9034 if (0) { /* causes HiZ corruption on ivb:gt1 */
9035 /* enable HiZ Raw Stall Optimization */
9036 I915_WRITE(CACHE_MODE_0_GEN7,
9037 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9040 /* WaDisable4x2SubspanOptimization:ivb */
9041 I915_WRITE(CACHE_MODE_1,
9042 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9045 * BSpec recommends 8x4 when MSAA is used,
9046 * however in practice 16x4 seems fastest.
9048 * Note that PS/WM thread counts depend on the WIZ hashing
9049 * disable bit, which we don't touch here, but it's good
9050 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9052 I915_WRITE(GEN7_GT_MODE,
9053 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9055 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9056 snpcr &= ~GEN6_MBC_SNPCR_MASK;
9057 snpcr |= GEN6_MBC_SNPCR_MED;
9058 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9060 if (!HAS_PCH_NOP(dev_priv))
9061 cpt_init_clock_gating(dev_priv);
9063 gen6_check_mch_setup(dev_priv);
9066 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9068 /* WaDisableEarlyCull:vlv */
9069 I915_WRITE(_3D_CHICKEN3,
9070 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9072 /* WaDisableBackToBackFlipFix:vlv */
9073 I915_WRITE(IVB_CHICKEN3,
9074 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9075 CHICKEN3_DGMG_DONE_FIX_DISABLE);
9077 /* WaPsdDispatchEnable:vlv */
9078 /* WaDisablePSDDualDispatchEnable:vlv */
9079 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9080 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9081 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9083 /* WaDisable_RenderCache_OperationalFlush:vlv */
9084 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9086 /* WaForceL3Serialization:vlv */
9087 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9088 ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9090 /* WaDisableDopClockGating:vlv */
9091 I915_WRITE(GEN7_ROW_CHICKEN2,
9092 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9094 /* This is required by WaCatErrorRejectionIssue:vlv */
9095 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9096 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9097 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9099 gen7_setup_fixed_func_scheduler(dev_priv);
9102 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9103 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9105 I915_WRITE(GEN6_UCGCTL2,
9106 GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9108 /* WaDisableL3Bank2xClockGate:vlv
9109 * Disabling L3 clock gating- MMIO 940c[25] = 1
9110 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9111 I915_WRITE(GEN7_UCGCTL4,
9112 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9115 * BSpec says this must be set, even though
9116 * WaDisable4x2SubspanOptimization isn't listed for VLV.
9118 I915_WRITE(CACHE_MODE_1,
9119 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9122 * BSpec recommends 8x4 when MSAA is used,
9123 * however in practice 16x4 seems fastest.
9125 * Note that PS/WM thread counts depend on the WIZ hashing
9126 * disable bit, which we don't touch here, but it's good
9127 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9129 I915_WRITE(GEN7_GT_MODE,
9130 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9133 * WaIncreaseL3CreditsForVLVB0:vlv
9134 * This is the hardware default actually.
9136 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9139 * WaDisableVLVClockGating_VBIIssue:vlv
9140 * Disable clock gating on th GCFG unit to prevent a delay
9141 * in the reporting of vblank events.
9143 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9146 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9148 /* WaVSRefCountFullforceMissDisable:chv */
9149 /* WaDSRefCountFullforceMissDisable:chv */
9150 I915_WRITE(GEN7_FF_THREAD_MODE,
9151 I915_READ(GEN7_FF_THREAD_MODE) &
9152 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9154 /* WaDisableSemaphoreAndSyncFlipWait:chv */
9155 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9156 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9158 /* WaDisableCSUnitClockGating:chv */
9159 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9160 GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9162 /* WaDisableSDEUnitClockGating:chv */
9163 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9164 GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9167 * WaProgramL3SqcReg1Default:chv
9168 * See gfxspecs/Related Documents/Performance Guide/
9169 * LSQC Setting Recommendations.
9171 gen8_set_l3sqc_credits(dev_priv, 38, 2);
9174 * GTT cache may not work with big pages, so if those
9175 * are ever enabled GTT cache may need to be disabled.
9177 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9180 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9182 uint32_t dspclk_gate;
9184 I915_WRITE(RENCLK_GATE_D1, 0);
9185 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9186 GS_UNIT_CLOCK_GATE_DISABLE |
9187 CL_UNIT_CLOCK_GATE_DISABLE);
9188 I915_WRITE(RAMCLK_GATE_D, 0);
9189 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9190 OVRUNIT_CLOCK_GATE_DISABLE |
9191 OVCUNIT_CLOCK_GATE_DISABLE;
9192 if (IS_GM45(dev_priv))
9193 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9194 I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9196 /* WaDisableRenderCachePipelinedFlush */
9197 I915_WRITE(CACHE_MODE_0,
9198 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9200 /* WaDisable_RenderCache_OperationalFlush:g4x */
9201 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9203 g4x_disable_trickle_feed(dev_priv);
9206 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9208 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9209 I915_WRITE(RENCLK_GATE_D2, 0);
9210 I915_WRITE(DSPCLK_GATE_D, 0);
9211 I915_WRITE(RAMCLK_GATE_D, 0);
9212 I915_WRITE16(DEUC, 0);
9213 I915_WRITE(MI_ARB_STATE,
9214 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9216 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9217 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9220 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9222 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9223 I965_RCC_CLOCK_GATE_DISABLE |
9224 I965_RCPB_CLOCK_GATE_DISABLE |
9225 I965_ISC_CLOCK_GATE_DISABLE |
9226 I965_FBC_CLOCK_GATE_DISABLE);
9227 I915_WRITE(RENCLK_GATE_D2, 0);
9228 I915_WRITE(MI_ARB_STATE,
9229 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9231 /* WaDisable_RenderCache_OperationalFlush:gen4 */
9232 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9235 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9237 u32 dstate = I915_READ(D_STATE);
9239 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9240 DSTATE_DOT_CLOCK_GATING;
9241 I915_WRITE(D_STATE, dstate);
9243 if (IS_PINEVIEW(dev_priv))
9244 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9246 /* IIR "flip pending" means done if this bit is set */
9247 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9249 /* interrupts should cause a wake up from C3 */
9250 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9252 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9253 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9255 I915_WRITE(MI_ARB_STATE,
9256 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9259 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9261 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9263 /* interrupts should cause a wake up from C3 */
9264 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9265 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9267 I915_WRITE(MEM_MODE,
9268 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9271 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9273 I915_WRITE(MEM_MODE,
9274 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9275 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9278 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9280 dev_priv->display.init_clock_gating(dev_priv);
9283 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9285 if (HAS_PCH_LPT(dev_priv))
9286 lpt_suspend_hw(dev_priv);
9289 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9291 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9295 * intel_init_clock_gating_hooks - setup the clock gating hooks
9296 * @dev_priv: device private
9298 * Setup the hooks that configure which clocks of a given platform can be
9299 * gated and also apply various GT and display specific workarounds for these
9300 * platforms. Note that some GT specific workarounds are applied separately
9301 * when GPU contexts or batchbuffers start their execution.
9303 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9305 if (IS_ICELAKE(dev_priv))
9306 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9307 else if (IS_CANNONLAKE(dev_priv))
9308 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9309 else if (IS_COFFEELAKE(dev_priv))
9310 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9311 else if (IS_SKYLAKE(dev_priv))
9312 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9313 else if (IS_KABYLAKE(dev_priv))
9314 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9315 else if (IS_BROXTON(dev_priv))
9316 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9317 else if (IS_GEMINILAKE(dev_priv))
9318 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9319 else if (IS_BROADWELL(dev_priv))
9320 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9321 else if (IS_CHERRYVIEW(dev_priv))
9322 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9323 else if (IS_HASWELL(dev_priv))
9324 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9325 else if (IS_IVYBRIDGE(dev_priv))
9326 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9327 else if (IS_VALLEYVIEW(dev_priv))
9328 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9329 else if (IS_GEN6(dev_priv))
9330 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9331 else if (IS_GEN5(dev_priv))
9332 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9333 else if (IS_G4X(dev_priv))
9334 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9335 else if (IS_I965GM(dev_priv))
9336 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9337 else if (IS_I965G(dev_priv))
9338 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9339 else if (IS_GEN3(dev_priv))
9340 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9341 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9342 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9343 else if (IS_GEN2(dev_priv))
9344 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9346 MISSING_CASE(INTEL_DEVID(dev_priv));
9347 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9351 /* Set up chip specific power management-related functions */
9352 void intel_init_pm(struct drm_i915_private *dev_priv)
9354 intel_fbc_init(dev_priv);
9357 if (IS_PINEVIEW(dev_priv))
9358 i915_pineview_get_mem_freq(dev_priv);
9359 else if (IS_GEN5(dev_priv))
9360 i915_ironlake_get_mem_freq(dev_priv);
9362 /* For FIFO watermark updates */
9363 if (INTEL_GEN(dev_priv) >= 9) {
9364 skl_setup_wm_latency(dev_priv);
9365 dev_priv->display.initial_watermarks = skl_initial_wm;
9366 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9367 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9368 } else if (HAS_PCH_SPLIT(dev_priv)) {
9369 ilk_setup_wm_latency(dev_priv);
9371 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
9372 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9373 (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
9374 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9375 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9376 dev_priv->display.compute_intermediate_wm =
9377 ilk_compute_intermediate_wm;
9378 dev_priv->display.initial_watermarks =
9379 ilk_initial_watermarks;
9380 dev_priv->display.optimize_watermarks =
9381 ilk_optimize_watermarks;
9383 DRM_DEBUG_KMS("Failed to read display plane latency. "
9386 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9387 vlv_setup_wm_latency(dev_priv);
9388 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9389 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9390 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9391 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9392 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9393 } else if (IS_G4X(dev_priv)) {
9394 g4x_setup_wm_latency(dev_priv);
9395 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9396 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9397 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9398 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9399 } else if (IS_PINEVIEW(dev_priv)) {
9400 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9403 dev_priv->mem_freq)) {
9404 DRM_INFO("failed to find known CxSR latency "
9405 "(found ddr%s fsb freq %d, mem freq %d), "
9407 (dev_priv->is_ddr3 == 1) ? "3" : "2",
9408 dev_priv->fsb_freq, dev_priv->mem_freq);
9409 /* Disable CxSR and never update its watermark again */
9410 intel_set_memory_cxsr(dev_priv, false);
9411 dev_priv->display.update_wm = NULL;
9413 dev_priv->display.update_wm = pineview_update_wm;
9414 } else if (IS_GEN4(dev_priv)) {
9415 dev_priv->display.update_wm = i965_update_wm;
9416 } else if (IS_GEN3(dev_priv)) {
9417 dev_priv->display.update_wm = i9xx_update_wm;
9418 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9419 } else if (IS_GEN2(dev_priv)) {
9420 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9421 dev_priv->display.update_wm = i845_update_wm;
9422 dev_priv->display.get_fifo_size = i845_get_fifo_size;
9424 dev_priv->display.update_wm = i9xx_update_wm;
9425 dev_priv->display.get_fifo_size = i830_get_fifo_size;
9428 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9432 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9435 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9438 case GEN6_PCODE_SUCCESS:
9440 case GEN6_PCODE_UNIMPLEMENTED_CMD:
9442 case GEN6_PCODE_ILLEGAL_CMD:
9444 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9445 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9447 case GEN6_PCODE_TIMEOUT:
9450 MISSING_CASE(flags);
9455 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9458 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9461 case GEN6_PCODE_SUCCESS:
9463 case GEN6_PCODE_ILLEGAL_CMD:
9465 case GEN7_PCODE_TIMEOUT:
9467 case GEN7_PCODE_ILLEGAL_DATA:
9469 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9472 MISSING_CASE(flags);
9477 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9481 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9483 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9484 * use te fw I915_READ variants to reduce the amount of work
9485 * required when reading/writing.
9488 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9489 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9490 mbox, __builtin_return_address(0));
9494 I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9495 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9496 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9498 if (__intel_wait_for_register_fw(dev_priv,
9499 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9501 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9502 mbox, __builtin_return_address(0));
9506 *val = I915_READ_FW(GEN6_PCODE_DATA);
9507 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9509 if (INTEL_GEN(dev_priv) > 6)
9510 status = gen7_check_mailbox_status(dev_priv);
9512 status = gen6_check_mailbox_status(dev_priv);
9515 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9516 mbox, __builtin_return_address(0), status);
9523 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9525 int fast_timeout_us, int slow_timeout_ms)
9529 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9531 /* GEN6_PCODE_* are outside of the forcewake domain, we can
9532 * use te fw I915_READ variants to reduce the amount of work
9533 * required when reading/writing.
9536 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9537 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9538 val, mbox, __builtin_return_address(0));
9542 I915_WRITE_FW(GEN6_PCODE_DATA, val);
9543 I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9544 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9546 if (__intel_wait_for_register_fw(dev_priv,
9547 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9548 fast_timeout_us, slow_timeout_ms,
9550 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9551 val, mbox, __builtin_return_address(0));
9555 I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9557 if (INTEL_GEN(dev_priv) > 6)
9558 status = gen7_check_mailbox_status(dev_priv);
9560 status = gen6_check_mailbox_status(dev_priv);
9563 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9564 val, mbox, __builtin_return_address(0), status);
9571 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9572 u32 request, u32 reply_mask, u32 reply,
9577 *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9579 return *status || ((val & reply_mask) == reply);
9583 * skl_pcode_request - send PCODE request until acknowledgment
9584 * @dev_priv: device private
9585 * @mbox: PCODE mailbox ID the request is targeted for
9586 * @request: request ID
9587 * @reply_mask: mask used to check for request acknowledgment
9588 * @reply: value used to check for request acknowledgment
9589 * @timeout_base_ms: timeout for polling with preemption enabled
9591 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9592 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9593 * The request is acknowledged once the PCODE reply dword equals @reply after
9594 * applying @reply_mask. Polling is first attempted with preemption enabled
9595 * for @timeout_base_ms and if this times out for another 50 ms with
9596 * preemption disabled.
9598 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9599 * other error as reported by PCODE.
9601 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9602 u32 reply_mask, u32 reply, int timeout_base_ms)
9607 WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9609 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9613 * Prime the PCODE by doing a request first. Normally it guarantees
9614 * that a subsequent request, at most @timeout_base_ms later, succeeds.
9615 * _wait_for() doesn't guarantee when its passed condition is evaluated
9616 * first, so send the first request explicitly.
9622 ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9627 * The above can time out if the number of requests was low (2 in the
9628 * worst case) _and_ PCODE was busy for some reason even after a
9629 * (queued) request and @timeout_base_ms delay. As a workaround retry
9630 * the poll with preemption disabled to maximize the number of
9631 * requests. Increase the timeout from @timeout_base_ms to 50ms to
9632 * account for interrupts that could reduce the number of these
9633 * requests, and for any quirks of the PCODE firmware that delays
9634 * the request completion.
9636 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9637 WARN_ON_ONCE(timeout_base_ms > 3);
9639 ret = wait_for_atomic(COND, 50);
9643 return ret ? ret : status;
9647 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9649 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9653 * Slow = Fast = GPLL ref * N
9655 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9658 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9660 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9662 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9665 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9667 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9671 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9673 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9676 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9678 struct intel_rps *rps = &dev_priv->gt_pm.rps;
9680 /* CHV needs even values */
9681 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9684 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9686 if (INTEL_GEN(dev_priv) >= 9)
9687 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9689 else if (IS_CHERRYVIEW(dev_priv))
9690 return chv_gpu_freq(dev_priv, val);
9691 else if (IS_VALLEYVIEW(dev_priv))
9692 return byt_gpu_freq(dev_priv, val);
9694 return val * GT_FREQUENCY_MULTIPLIER;
9697 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9699 if (INTEL_GEN(dev_priv) >= 9)
9700 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9701 GT_FREQUENCY_MULTIPLIER);
9702 else if (IS_CHERRYVIEW(dev_priv))
9703 return chv_freq_opcode(dev_priv, val);
9704 else if (IS_VALLEYVIEW(dev_priv))
9705 return byt_freq_opcode(dev_priv, val);
9707 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9710 void intel_pm_setup(struct drm_i915_private *dev_priv)
9712 mutex_init(&dev_priv->pcu_lock);
9713 mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9715 atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9717 dev_priv->runtime_pm.suspended = false;
9718 atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9721 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9722 const i915_reg_t reg)
9724 u32 lower, upper, tmp;
9728 * The register accessed do not need forcewake. We borrow
9729 * uncore lock to prevent concurrent access to range reg.
9731 lockdep_assert_held(&dev_priv->uncore.lock);
9734 * vlv and chv residency counters are 40 bits in width.
9735 * With a control bit, we can choose between upper or lower
9736 * 32bit window into this counter.
9738 * Although we always use the counter in high-range mode elsewhere,
9739 * userspace may attempt to read the value before rc6 is initialised,
9740 * before we have set the default VLV_COUNTER_CONTROL value. So always
9741 * set the high bit to be safe.
9743 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9744 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9745 upper = I915_READ_FW(reg);
9749 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9750 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9751 lower = I915_READ_FW(reg);
9753 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9754 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9755 upper = I915_READ_FW(reg);
9756 } while (upper != tmp && --loop);
9759 * Everywhere else we always use VLV_COUNTER_CONTROL with the
9760 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9764 return lower | (u64)upper << 8;
9767 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9768 const i915_reg_t reg)
9770 u64 time_hw, prev_hw, overflow_hw;
9771 unsigned int fw_domains;
9772 unsigned long flags;
9776 if (!HAS_RC6(dev_priv))
9780 * Store previous hw counter values for counter wrap-around handling.
9782 * There are only four interesting registers and they live next to each
9783 * other so we can use the relative address, compared to the smallest
9784 * one as the index into driver storage.
9786 i = (i915_mmio_reg_offset(reg) -
9787 i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9788 if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9791 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9793 spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9794 intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9796 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9797 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9799 div = dev_priv->czclk_freq;
9800 overflow_hw = BIT_ULL(40);
9801 time_hw = vlv_residency_raw(dev_priv, reg);
9803 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9804 if (IS_GEN9_LP(dev_priv)) {
9812 overflow_hw = BIT_ULL(32);
9813 time_hw = I915_READ_FW(reg);
9817 * Counter wrap handling.
9819 * But relying on a sufficient frequency of queries otherwise counters
9822 prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9823 dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9825 /* RC6 delta from last sample. */
9826 if (time_hw >= prev_hw)
9829 time_hw += overflow_hw - prev_hw;
9831 /* Add delta to RC6 extended raw driver copy. */
9832 time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9833 dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9835 intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9836 spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9838 return mul_u64_u32_div(time_hw, mul, div);
9841 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9845 if (INTEL_GEN(dev_priv) >= 9)
9846 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9847 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9848 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9850 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;