]> Git Repo - linux.git/blob - drivers/gpu/drm/i915/intel_pm.c
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <[email protected]>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include <linux/pm_runtime.h>
30 #include <drm/drm_plane_helper.h>
31 #include "i915_drv.h"
32 #include "intel_drv.h"
33 #include "../../../platform/x86/intel_ips.h"
34 #include <linux/module.h>
35 #include <drm/drm_atomic_helper.h>
36
37 /**
38  * DOC: RC6
39  *
40  * RC6 is a special power stage which allows the GPU to enter an very
41  * low-voltage mode when idle, using down to 0V while at this stage.  This
42  * stage is entered automatically when the GPU is idle when RC6 support is
43  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
44  *
45  * There are different RC6 modes available in Intel GPU, which differentiate
46  * among each other with the latency required to enter and leave RC6 and
47  * voltage consumed by the GPU in different states.
48  *
49  * The combination of the following flags define which states GPU is allowed
50  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
51  * RC6pp is deepest RC6. Their support by hardware varies according to the
52  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
53  * which brings the most power savings; deeper states save more power, but
54  * require higher latency to switch to and wake up.
55  */
56
57 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
58 {
59         if (HAS_LLC(dev_priv)) {
60                 /*
61                  * WaCompressedResourceDisplayNewHashMode:skl,kbl
62                  * Display WA #0390: skl,kbl
63                  *
64                  * Must match Sampler, Pixel Back End, and Media. See
65                  * WaCompressedResourceSamplerPbeMediaNewHashMode.
66                  */
67                 I915_WRITE(CHICKEN_PAR1_1,
68                            I915_READ(CHICKEN_PAR1_1) |
69                            SKL_DE_COMPRESSED_HASH_MODE);
70         }
71
72         /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
73         I915_WRITE(CHICKEN_PAR1_1,
74                    I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
75
76         /* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
77         I915_WRITE(GEN8_CHICKEN_DCPR_1,
78                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
79
80         /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
81         /* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
82         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
83                    DISP_FBC_WM_DIS |
84                    DISP_FBC_MEMORY_WAKE);
85
86         /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
87         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
88                    ILK_DPFC_DISABLE_DUMMY0);
89
90         if (IS_SKYLAKE(dev_priv)) {
91                 /* WaDisableDopClockGating */
92                 I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
93                            & ~GEN7_DOP_CLOCK_GATE_ENABLE);
94         }
95 }
96
97 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
98 {
99         gen9_init_clock_gating(dev_priv);
100
101         /* WaDisableSDEUnitClockGating:bxt */
102         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
103                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
104
105         /*
106          * FIXME:
107          * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
108          */
109         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
110                    GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
111
112         /*
113          * Wa: Backlight PWM may stop in the asserted state, causing backlight
114          * to stay fully on.
115          */
116         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
117                    PWM1_GATING_DIS | PWM2_GATING_DIS);
118 }
119
120 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
121 {
122         gen9_init_clock_gating(dev_priv);
123
124         /*
125          * WaDisablePWMClockGating:glk
126          * Backlight PWM may stop in the asserted state, causing backlight
127          * to stay fully on.
128          */
129         I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
130                    PWM1_GATING_DIS | PWM2_GATING_DIS);
131
132         /* WaDDIIOTimeout:glk */
133         if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
134                 u32 val = I915_READ(CHICKEN_MISC_2);
135                 val &= ~(GLK_CL0_PWR_DOWN |
136                          GLK_CL1_PWR_DOWN |
137                          GLK_CL2_PWR_DOWN);
138                 I915_WRITE(CHICKEN_MISC_2, val);
139         }
140
141 }
142
143 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv)
144 {
145         u32 tmp;
146
147         tmp = I915_READ(CLKCFG);
148
149         switch (tmp & CLKCFG_FSB_MASK) {
150         case CLKCFG_FSB_533:
151                 dev_priv->fsb_freq = 533; /* 133*4 */
152                 break;
153         case CLKCFG_FSB_800:
154                 dev_priv->fsb_freq = 800; /* 200*4 */
155                 break;
156         case CLKCFG_FSB_667:
157                 dev_priv->fsb_freq =  667; /* 167*4 */
158                 break;
159         case CLKCFG_FSB_400:
160                 dev_priv->fsb_freq = 400; /* 100*4 */
161                 break;
162         }
163
164         switch (tmp & CLKCFG_MEM_MASK) {
165         case CLKCFG_MEM_533:
166                 dev_priv->mem_freq = 533;
167                 break;
168         case CLKCFG_MEM_667:
169                 dev_priv->mem_freq = 667;
170                 break;
171         case CLKCFG_MEM_800:
172                 dev_priv->mem_freq = 800;
173                 break;
174         }
175
176         /* detect pineview DDR3 setting */
177         tmp = I915_READ(CSHRDDR3CTL);
178         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
179 }
180
181 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
182 {
183         u16 ddrpll, csipll;
184
185         ddrpll = I915_READ16(DDRMPLL1);
186         csipll = I915_READ16(CSIPLL0);
187
188         switch (ddrpll & 0xff) {
189         case 0xc:
190                 dev_priv->mem_freq = 800;
191                 break;
192         case 0x10:
193                 dev_priv->mem_freq = 1066;
194                 break;
195         case 0x14:
196                 dev_priv->mem_freq = 1333;
197                 break;
198         case 0x18:
199                 dev_priv->mem_freq = 1600;
200                 break;
201         default:
202                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
203                                  ddrpll & 0xff);
204                 dev_priv->mem_freq = 0;
205                 break;
206         }
207
208         dev_priv->ips.r_t = dev_priv->mem_freq;
209
210         switch (csipll & 0x3ff) {
211         case 0x00c:
212                 dev_priv->fsb_freq = 3200;
213                 break;
214         case 0x00e:
215                 dev_priv->fsb_freq = 3733;
216                 break;
217         case 0x010:
218                 dev_priv->fsb_freq = 4266;
219                 break;
220         case 0x012:
221                 dev_priv->fsb_freq = 4800;
222                 break;
223         case 0x014:
224                 dev_priv->fsb_freq = 5333;
225                 break;
226         case 0x016:
227                 dev_priv->fsb_freq = 5866;
228                 break;
229         case 0x018:
230                 dev_priv->fsb_freq = 6400;
231                 break;
232         default:
233                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
234                                  csipll & 0x3ff);
235                 dev_priv->fsb_freq = 0;
236                 break;
237         }
238
239         if (dev_priv->fsb_freq == 3200) {
240                 dev_priv->ips.c_m = 0;
241         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
242                 dev_priv->ips.c_m = 1;
243         } else {
244                 dev_priv->ips.c_m = 2;
245         }
246 }
247
248 static const struct cxsr_latency cxsr_latency_table[] = {
249         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
250         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
251         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
252         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
253         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
254
255         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
256         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
257         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
258         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
259         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
260
261         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
262         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
263         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
264         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
265         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
266
267         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
268         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
269         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
270         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
271         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
272
273         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
274         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
275         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
276         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
277         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
278
279         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
280         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
281         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
282         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
283         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
284 };
285
286 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop,
287                                                          bool is_ddr3,
288                                                          int fsb,
289                                                          int mem)
290 {
291         const struct cxsr_latency *latency;
292         int i;
293
294         if (fsb == 0 || mem == 0)
295                 return NULL;
296
297         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
298                 latency = &cxsr_latency_table[i];
299                 if (is_desktop == latency->is_desktop &&
300                     is_ddr3 == latency->is_ddr3 &&
301                     fsb == latency->fsb_freq && mem == latency->mem_freq)
302                         return latency;
303         }
304
305         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
306
307         return NULL;
308 }
309
310 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
311 {
312         u32 val;
313
314         mutex_lock(&dev_priv->pcu_lock);
315
316         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
317         if (enable)
318                 val &= ~FORCE_DDR_HIGH_FREQ;
319         else
320                 val |= FORCE_DDR_HIGH_FREQ;
321         val &= ~FORCE_DDR_LOW_FREQ;
322         val |= FORCE_DDR_FREQ_REQ_ACK;
323         vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
324
325         if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
326                       FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
327                 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
328
329         mutex_unlock(&dev_priv->pcu_lock);
330 }
331
332 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
333 {
334         u32 val;
335
336         mutex_lock(&dev_priv->pcu_lock);
337
338         val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
339         if (enable)
340                 val |= DSP_MAXFIFO_PM5_ENABLE;
341         else
342                 val &= ~DSP_MAXFIFO_PM5_ENABLE;
343         vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
344
345         mutex_unlock(&dev_priv->pcu_lock);
346 }
347
348 #define FW_WM(value, plane) \
349         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK)
350
351 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
352 {
353         bool was_enabled;
354         u32 val;
355
356         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
357                 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
358                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
359                 POSTING_READ(FW_BLC_SELF_VLV);
360         } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) {
361                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
362                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
363                 POSTING_READ(FW_BLC_SELF);
364         } else if (IS_PINEVIEW(dev_priv)) {
365                 val = I915_READ(DSPFW3);
366                 was_enabled = val & PINEVIEW_SELF_REFRESH_EN;
367                 if (enable)
368                         val |= PINEVIEW_SELF_REFRESH_EN;
369                 else
370                         val &= ~PINEVIEW_SELF_REFRESH_EN;
371                 I915_WRITE(DSPFW3, val);
372                 POSTING_READ(DSPFW3);
373         } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) {
374                 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
375                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
376                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
377                 I915_WRITE(FW_BLC_SELF, val);
378                 POSTING_READ(FW_BLC_SELF);
379         } else if (IS_I915GM(dev_priv)) {
380                 /*
381                  * FIXME can't find a bit like this for 915G, and
382                  * and yet it does have the related watermark in
383                  * FW_BLC_SELF. What's going on?
384                  */
385                 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
386                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
387                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
388                 I915_WRITE(INSTPM, val);
389                 POSTING_READ(INSTPM);
390         } else {
391                 return false;
392         }
393
394         trace_intel_memory_cxsr(dev_priv, was_enabled, enable);
395
396         DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n",
397                       enableddisabled(enable),
398                       enableddisabled(was_enabled));
399
400         return was_enabled;
401 }
402
403 /**
404  * intel_set_memory_cxsr - Configure CxSR state
405  * @dev_priv: i915 device
406  * @enable: Allow vs. disallow CxSR
407  *
408  * Allow or disallow the system to enter a special CxSR
409  * (C-state self refresh) state. What typically happens in CxSR mode
410  * is that several display FIFOs may get combined into a single larger
411  * FIFO for a particular plane (so called max FIFO mode) to allow the
412  * system to defer memory fetches longer, and the memory will enter
413  * self refresh.
414  *
415  * Note that enabling CxSR does not guarantee that the system enter
416  * this special mode, nor does it guarantee that the system stays
417  * in that mode once entered. So this just allows/disallows the system
418  * to autonomously utilize the CxSR mode. Other factors such as core
419  * C-states will affect when/if the system actually enters/exits the
420  * CxSR mode.
421  *
422  * Note that on VLV/CHV this actually only controls the max FIFO mode,
423  * and the system is free to enter/exit memory self refresh at any time
424  * even when the use of CxSR has been disallowed.
425  *
426  * While the system is actually in the CxSR/max FIFO mode, some plane
427  * control registers will not get latched on vblank. Thus in order to
428  * guarantee the system will respond to changes in the plane registers
429  * we must always disallow CxSR prior to making changes to those registers.
430  * Unfortunately the system will re-evaluate the CxSR conditions at
431  * frame start which happens after vblank start (which is when the plane
432  * registers would get latched), so we can't proceed with the plane update
433  * during the same frame where we disallowed CxSR.
434  *
435  * Certain platforms also have a deeper HPLL SR mode. Fortunately the
436  * HPLL SR mode depends on CxSR itself, so we don't have to hand hold
437  * the hardware w.r.t. HPLL SR when writing to plane registers.
438  * Disallowing just CxSR is sufficient.
439  */
440 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
441 {
442         bool ret;
443
444         mutex_lock(&dev_priv->wm.wm_mutex);
445         ret = _intel_set_memory_cxsr(dev_priv, enable);
446         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
447                 dev_priv->wm.vlv.cxsr = enable;
448         else if (IS_G4X(dev_priv))
449                 dev_priv->wm.g4x.cxsr = enable;
450         mutex_unlock(&dev_priv->wm.wm_mutex);
451
452         return ret;
453 }
454
455 /*
456  * Latency for FIFO fetches is dependent on several factors:
457  *   - memory configuration (speed, channels)
458  *   - chipset
459  *   - current MCH state
460  * It can be fairly high in some situations, so here we assume a fairly
461  * pessimal value.  It's a tradeoff between extra memory fetches (if we
462  * set this value too high, the FIFO will fetch frequently to stay full)
463  * and power consumption (set it too low to save power and we might see
464  * FIFO underruns and display "flicker").
465  *
466  * A value of 5us seems to be a good balance; safe for very low end
467  * platforms but not overly aggressive on lower latency configs.
468  */
469 static const int pessimal_latency_ns = 5000;
470
471 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \
472         ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8))
473
474 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state)
475 {
476         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
477         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
478         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
479         enum pipe pipe = crtc->pipe;
480         int sprite0_start, sprite1_start;
481
482         switch (pipe) {
483                 uint32_t dsparb, dsparb2, dsparb3;
484         case PIPE_A:
485                 dsparb = I915_READ(DSPARB);
486                 dsparb2 = I915_READ(DSPARB2);
487                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0);
488                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4);
489                 break;
490         case PIPE_B:
491                 dsparb = I915_READ(DSPARB);
492                 dsparb2 = I915_READ(DSPARB2);
493                 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8);
494                 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12);
495                 break;
496         case PIPE_C:
497                 dsparb2 = I915_READ(DSPARB2);
498                 dsparb3 = I915_READ(DSPARB3);
499                 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16);
500                 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20);
501                 break;
502         default:
503                 MISSING_CASE(pipe);
504                 return;
505         }
506
507         fifo_state->plane[PLANE_PRIMARY] = sprite0_start;
508         fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start;
509         fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start;
510         fifo_state->plane[PLANE_CURSOR] = 63;
511 }
512
513 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv,
514                               enum i9xx_plane_id i9xx_plane)
515 {
516         uint32_t dsparb = I915_READ(DSPARB);
517         int size;
518
519         size = dsparb & 0x7f;
520         if (i9xx_plane == PLANE_B)
521                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
522
523         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
524                       dsparb, plane_name(i9xx_plane), size);
525
526         return size;
527 }
528
529 static int i830_get_fifo_size(struct drm_i915_private *dev_priv,
530                               enum i9xx_plane_id i9xx_plane)
531 {
532         uint32_t dsparb = I915_READ(DSPARB);
533         int size;
534
535         size = dsparb & 0x1ff;
536         if (i9xx_plane == PLANE_B)
537                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
538         size >>= 1; /* Convert to cachelines */
539
540         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
541                       dsparb, plane_name(i9xx_plane), size);
542
543         return size;
544 }
545
546 static int i845_get_fifo_size(struct drm_i915_private *dev_priv,
547                               enum i9xx_plane_id i9xx_plane)
548 {
549         uint32_t dsparb = I915_READ(DSPARB);
550         int size;
551
552         size = dsparb & 0x7f;
553         size >>= 2; /* Convert to cachelines */
554
555         DRM_DEBUG_KMS("FIFO size - (0x%08x) %c: %d\n",
556                       dsparb, plane_name(i9xx_plane), size);
557
558         return size;
559 }
560
561 /* Pineview has different values for various configs */
562 static const struct intel_watermark_params pineview_display_wm = {
563         .fifo_size = PINEVIEW_DISPLAY_FIFO,
564         .max_wm = PINEVIEW_MAX_WM,
565         .default_wm = PINEVIEW_DFT_WM,
566         .guard_size = PINEVIEW_GUARD_WM,
567         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
568 };
569 static const struct intel_watermark_params pineview_display_hplloff_wm = {
570         .fifo_size = PINEVIEW_DISPLAY_FIFO,
571         .max_wm = PINEVIEW_MAX_WM,
572         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
573         .guard_size = PINEVIEW_GUARD_WM,
574         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
575 };
576 static const struct intel_watermark_params pineview_cursor_wm = {
577         .fifo_size = PINEVIEW_CURSOR_FIFO,
578         .max_wm = PINEVIEW_CURSOR_MAX_WM,
579         .default_wm = PINEVIEW_CURSOR_DFT_WM,
580         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
581         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
582 };
583 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
584         .fifo_size = PINEVIEW_CURSOR_FIFO,
585         .max_wm = PINEVIEW_CURSOR_MAX_WM,
586         .default_wm = PINEVIEW_CURSOR_DFT_WM,
587         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
588         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
589 };
590 static const struct intel_watermark_params i965_cursor_wm_info = {
591         .fifo_size = I965_CURSOR_FIFO,
592         .max_wm = I965_CURSOR_MAX_WM,
593         .default_wm = I965_CURSOR_DFT_WM,
594         .guard_size = 2,
595         .cacheline_size = I915_FIFO_LINE_SIZE,
596 };
597 static const struct intel_watermark_params i945_wm_info = {
598         .fifo_size = I945_FIFO_SIZE,
599         .max_wm = I915_MAX_WM,
600         .default_wm = 1,
601         .guard_size = 2,
602         .cacheline_size = I915_FIFO_LINE_SIZE,
603 };
604 static const struct intel_watermark_params i915_wm_info = {
605         .fifo_size = I915_FIFO_SIZE,
606         .max_wm = I915_MAX_WM,
607         .default_wm = 1,
608         .guard_size = 2,
609         .cacheline_size = I915_FIFO_LINE_SIZE,
610 };
611 static const struct intel_watermark_params i830_a_wm_info = {
612         .fifo_size = I855GM_FIFO_SIZE,
613         .max_wm = I915_MAX_WM,
614         .default_wm = 1,
615         .guard_size = 2,
616         .cacheline_size = I830_FIFO_LINE_SIZE,
617 };
618 static const struct intel_watermark_params i830_bc_wm_info = {
619         .fifo_size = I855GM_FIFO_SIZE,
620         .max_wm = I915_MAX_WM/2,
621         .default_wm = 1,
622         .guard_size = 2,
623         .cacheline_size = I830_FIFO_LINE_SIZE,
624 };
625 static const struct intel_watermark_params i845_wm_info = {
626         .fifo_size = I830_FIFO_SIZE,
627         .max_wm = I915_MAX_WM,
628         .default_wm = 1,
629         .guard_size = 2,
630         .cacheline_size = I830_FIFO_LINE_SIZE,
631 };
632
633 /**
634  * intel_wm_method1 - Method 1 / "small buffer" watermark formula
635  * @pixel_rate: Pipe pixel rate in kHz
636  * @cpp: Plane bytes per pixel
637  * @latency: Memory wakeup latency in 0.1us units
638  *
639  * Compute the watermark using the method 1 or "small buffer"
640  * formula. The caller may additonally add extra cachelines
641  * to account for TLB misses and clock crossings.
642  *
643  * This method is concerned with the short term drain rate
644  * of the FIFO, ie. it does not account for blanking periods
645  * which would effectively reduce the average drain rate across
646  * a longer period. The name "small" refers to the fact the
647  * FIFO is relatively small compared to the amount of data
648  * fetched.
649  *
650  * The FIFO level vs. time graph might look something like:
651  *
652  *   |\   |\
653  *   | \  | \
654  * __---__---__ (- plane active, _ blanking)
655  * -> time
656  *
657  * or perhaps like this:
658  *
659  *   |\|\  |\|\
660  * __----__----__ (- plane active, _ blanking)
661  * -> time
662  *
663  * Returns:
664  * The watermark in bytes
665  */
666 static unsigned int intel_wm_method1(unsigned int pixel_rate,
667                                      unsigned int cpp,
668                                      unsigned int latency)
669 {
670         uint64_t ret;
671
672         ret = (uint64_t) pixel_rate * cpp * latency;
673         ret = DIV_ROUND_UP_ULL(ret, 10000);
674
675         return ret;
676 }
677
678 /**
679  * intel_wm_method2 - Method 2 / "large buffer" watermark formula
680  * @pixel_rate: Pipe pixel rate in kHz
681  * @htotal: Pipe horizontal total
682  * @width: Plane width in pixels
683  * @cpp: Plane bytes per pixel
684  * @latency: Memory wakeup latency in 0.1us units
685  *
686  * Compute the watermark using the method 2 or "large buffer"
687  * formula. The caller may additonally add extra cachelines
688  * to account for TLB misses and clock crossings.
689  *
690  * This method is concerned with the long term drain rate
691  * of the FIFO, ie. it does account for blanking periods
692  * which effectively reduce the average drain rate across
693  * a longer period. The name "large" refers to the fact the
694  * FIFO is relatively large compared to the amount of data
695  * fetched.
696  *
697  * The FIFO level vs. time graph might look something like:
698  *
699  *    |\___       |\___
700  *    |    \___   |    \___
701  *    |        \  |        \
702  * __ --__--__--__--__--__--__ (- plane active, _ blanking)
703  * -> time
704  *
705  * Returns:
706  * The watermark in bytes
707  */
708 static unsigned int intel_wm_method2(unsigned int pixel_rate,
709                                      unsigned int htotal,
710                                      unsigned int width,
711                                      unsigned int cpp,
712                                      unsigned int latency)
713 {
714         unsigned int ret;
715
716         /*
717          * FIXME remove once all users are computing
718          * watermarks in the correct place.
719          */
720         if (WARN_ON_ONCE(htotal == 0))
721                 htotal = 1;
722
723         ret = (latency * pixel_rate) / (htotal * 10000);
724         ret = (ret + 1) * width * cpp;
725
726         return ret;
727 }
728
729 /**
730  * intel_calculate_wm - calculate watermark level
731  * @pixel_rate: pixel clock
732  * @wm: chip FIFO params
733  * @fifo_size: size of the FIFO buffer
734  * @cpp: bytes per pixel
735  * @latency_ns: memory latency for the platform
736  *
737  * Calculate the watermark level (the level at which the display plane will
738  * start fetching from memory again).  Each chip has a different display
739  * FIFO size and allocation, so the caller needs to figure that out and pass
740  * in the correct intel_watermark_params structure.
741  *
742  * As the pixel clock runs, the FIFO will be drained at a rate that depends
743  * on the pixel size.  When it reaches the watermark level, it'll start
744  * fetching FIFO line sized based chunks from memory until the FIFO fills
745  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
746  * will occur, and a display engine hang could result.
747  */
748 static unsigned int intel_calculate_wm(int pixel_rate,
749                                        const struct intel_watermark_params *wm,
750                                        int fifo_size, int cpp,
751                                        unsigned int latency_ns)
752 {
753         int entries, wm_size;
754
755         /*
756          * Note: we need to make sure we don't overflow for various clock &
757          * latency values.
758          * clocks go from a few thousand to several hundred thousand.
759          * latency is usually a few thousand
760          */
761         entries = intel_wm_method1(pixel_rate, cpp,
762                                    latency_ns / 100);
763         entries = DIV_ROUND_UP(entries, wm->cacheline_size) +
764                 wm->guard_size;
765         DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries);
766
767         wm_size = fifo_size - entries;
768         DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
769
770         /* Don't promote wm_size to unsigned... */
771         if (wm_size > wm->max_wm)
772                 wm_size = wm->max_wm;
773         if (wm_size <= 0)
774                 wm_size = wm->default_wm;
775
776         /*
777          * Bspec seems to indicate that the value shouldn't be lower than
778          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
779          * Lets go for 8 which is the burst size since certain platforms
780          * already use a hardcoded 8 (which is what the spec says should be
781          * done).
782          */
783         if (wm_size <= 8)
784                 wm_size = 8;
785
786         return wm_size;
787 }
788
789 static bool is_disabling(int old, int new, int threshold)
790 {
791         return old >= threshold && new < threshold;
792 }
793
794 static bool is_enabling(int old, int new, int threshold)
795 {
796         return old < threshold && new >= threshold;
797 }
798
799 static int intel_wm_num_levels(struct drm_i915_private *dev_priv)
800 {
801         return dev_priv->wm.max_level + 1;
802 }
803
804 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
805                                    const struct intel_plane_state *plane_state)
806 {
807         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
808
809         /* FIXME check the 'enable' instead */
810         if (!crtc_state->base.active)
811                 return false;
812
813         /*
814          * Treat cursor with fb as always visible since cursor updates
815          * can happen faster than the vrefresh rate, and the current
816          * watermark code doesn't handle that correctly. Cursor updates
817          * which set/clear the fb or change the cursor size are going
818          * to get throttled by intel_legacy_cursor_update() to work
819          * around this problem with the watermark code.
820          */
821         if (plane->id == PLANE_CURSOR)
822                 return plane_state->base.fb != NULL;
823         else
824                 return plane_state->base.visible;
825 }
826
827 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
828 {
829         struct intel_crtc *crtc, *enabled = NULL;
830
831         for_each_intel_crtc(&dev_priv->drm, crtc) {
832                 if (intel_crtc_active(crtc)) {
833                         if (enabled)
834                                 return NULL;
835                         enabled = crtc;
836                 }
837         }
838
839         return enabled;
840 }
841
842 static void pineview_update_wm(struct intel_crtc *unused_crtc)
843 {
844         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
845         struct intel_crtc *crtc;
846         const struct cxsr_latency *latency;
847         u32 reg;
848         unsigned int wm;
849
850         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
851                                          dev_priv->is_ddr3,
852                                          dev_priv->fsb_freq,
853                                          dev_priv->mem_freq);
854         if (!latency) {
855                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
856                 intel_set_memory_cxsr(dev_priv, false);
857                 return;
858         }
859
860         crtc = single_enabled_crtc(dev_priv);
861         if (crtc) {
862                 const struct drm_display_mode *adjusted_mode =
863                         &crtc->config->base.adjusted_mode;
864                 const struct drm_framebuffer *fb =
865                         crtc->base.primary->state->fb;
866                 int cpp = fb->format->cpp[0];
867                 int clock = adjusted_mode->crtc_clock;
868
869                 /* Display SR */
870                 wm = intel_calculate_wm(clock, &pineview_display_wm,
871                                         pineview_display_wm.fifo_size,
872                                         cpp, latency->display_sr);
873                 reg = I915_READ(DSPFW1);
874                 reg &= ~DSPFW_SR_MASK;
875                 reg |= FW_WM(wm, SR);
876                 I915_WRITE(DSPFW1, reg);
877                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
878
879                 /* cursor SR */
880                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
881                                         pineview_display_wm.fifo_size,
882                                         4, latency->cursor_sr);
883                 reg = I915_READ(DSPFW3);
884                 reg &= ~DSPFW_CURSOR_SR_MASK;
885                 reg |= FW_WM(wm, CURSOR_SR);
886                 I915_WRITE(DSPFW3, reg);
887
888                 /* Display HPLL off SR */
889                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
890                                         pineview_display_hplloff_wm.fifo_size,
891                                         cpp, latency->display_hpll_disable);
892                 reg = I915_READ(DSPFW3);
893                 reg &= ~DSPFW_HPLL_SR_MASK;
894                 reg |= FW_WM(wm, HPLL_SR);
895                 I915_WRITE(DSPFW3, reg);
896
897                 /* cursor HPLL off SR */
898                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
899                                         pineview_display_hplloff_wm.fifo_size,
900                                         4, latency->cursor_hpll_disable);
901                 reg = I915_READ(DSPFW3);
902                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
903                 reg |= FW_WM(wm, HPLL_CURSOR);
904                 I915_WRITE(DSPFW3, reg);
905                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
906
907                 intel_set_memory_cxsr(dev_priv, true);
908         } else {
909                 intel_set_memory_cxsr(dev_priv, false);
910         }
911 }
912
913 /*
914  * Documentation says:
915  * "If the line size is small, the TLB fetches can get in the way of the
916  *  data fetches, causing some lag in the pixel data return which is not
917  *  accounted for in the above formulas. The following adjustment only
918  *  needs to be applied if eight whole lines fit in the buffer at once.
919  *  The WM is adjusted upwards by the difference between the FIFO size
920  *  and the size of 8 whole lines. This adjustment is always performed
921  *  in the actual pixel depth regardless of whether FBC is enabled or not."
922  */
923 static unsigned int g4x_tlb_miss_wa(int fifo_size, int width, int cpp)
924 {
925         int tlb_miss = fifo_size * 64 - width * cpp * 8;
926
927         return max(0, tlb_miss);
928 }
929
930 static void g4x_write_wm_values(struct drm_i915_private *dev_priv,
931                                 const struct g4x_wm_values *wm)
932 {
933         enum pipe pipe;
934
935         for_each_pipe(dev_priv, pipe)
936                 trace_g4x_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
937
938         I915_WRITE(DSPFW1,
939                    FW_WM(wm->sr.plane, SR) |
940                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
941                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
942                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
943         I915_WRITE(DSPFW2,
944                    (wm->fbc_en ? DSPFW_FBC_SR_EN : 0) |
945                    FW_WM(wm->sr.fbc, FBC_SR) |
946                    FW_WM(wm->hpll.fbc, FBC_HPLL_SR) |
947                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEB) |
948                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
949                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
950         I915_WRITE(DSPFW3,
951                    (wm->hpll_en ? DSPFW_HPLL_SR_EN : 0) |
952                    FW_WM(wm->sr.cursor, CURSOR_SR) |
953                    FW_WM(wm->hpll.cursor, HPLL_CURSOR) |
954                    FW_WM(wm->hpll.plane, HPLL_SR));
955
956         POSTING_READ(DSPFW1);
957 }
958
959 #define FW_WM_VLV(value, plane) \
960         (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV)
961
962 static void vlv_write_wm_values(struct drm_i915_private *dev_priv,
963                                 const struct vlv_wm_values *wm)
964 {
965         enum pipe pipe;
966
967         for_each_pipe(dev_priv, pipe) {
968                 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm);
969
970                 I915_WRITE(VLV_DDL(pipe),
971                            (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) |
972                            (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) |
973                            (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) |
974                            (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT));
975         }
976
977         /*
978          * Zero the (unused) WM1 watermarks, and also clear all the
979          * high order bits so that there are no out of bounds values
980          * present in the registers during the reprogramming.
981          */
982         I915_WRITE(DSPHOWM, 0);
983         I915_WRITE(DSPHOWM1, 0);
984         I915_WRITE(DSPFW4, 0);
985         I915_WRITE(DSPFW5, 0);
986         I915_WRITE(DSPFW6, 0);
987
988         I915_WRITE(DSPFW1,
989                    FW_WM(wm->sr.plane, SR) |
990                    FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) |
991                    FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) |
992                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA));
993         I915_WRITE(DSPFW2,
994                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) |
995                    FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) |
996                    FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA));
997         I915_WRITE(DSPFW3,
998                    FW_WM(wm->sr.cursor, CURSOR_SR));
999
1000         if (IS_CHERRYVIEW(dev_priv)) {
1001                 I915_WRITE(DSPFW7_CHV,
1002                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1003                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1004                 I915_WRITE(DSPFW8_CHV,
1005                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) |
1006                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE));
1007                 I915_WRITE(DSPFW9_CHV,
1008                            FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) |
1009                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC));
1010                 I915_WRITE(DSPHOWM,
1011                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1012                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) |
1013                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) |
1014                            FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) |
1015                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1016                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1017                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1018                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1019                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1020                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1021         } else {
1022                 I915_WRITE(DSPFW7,
1023                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) |
1024                            FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC));
1025                 I915_WRITE(DSPHOWM,
1026                            FW_WM(wm->sr.plane >> 9, SR_HI) |
1027                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) |
1028                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) |
1029                            FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) |
1030                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) |
1031                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) |
1032                            FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI));
1033         }
1034
1035         POSTING_READ(DSPFW1);
1036 }
1037
1038 #undef FW_WM_VLV
1039
1040 static void g4x_setup_wm_latency(struct drm_i915_private *dev_priv)
1041 {
1042         /* all latencies in usec */
1043         dev_priv->wm.pri_latency[G4X_WM_LEVEL_NORMAL] = 5;
1044         dev_priv->wm.pri_latency[G4X_WM_LEVEL_SR] = 12;
1045         dev_priv->wm.pri_latency[G4X_WM_LEVEL_HPLL] = 35;
1046
1047         dev_priv->wm.max_level = G4X_WM_LEVEL_HPLL;
1048 }
1049
1050 static int g4x_plane_fifo_size(enum plane_id plane_id, int level)
1051 {
1052         /*
1053          * DSPCNTR[13] supposedly controls whether the
1054          * primary plane can use the FIFO space otherwise
1055          * reserved for the sprite plane. It's not 100% clear
1056          * what the actual FIFO size is, but it looks like we
1057          * can happily set both primary and sprite watermarks
1058          * up to 127 cachelines. So that would seem to mean
1059          * that either DSPCNTR[13] doesn't do anything, or that
1060          * the total FIFO is >= 256 cachelines in size. Either
1061          * way, we don't seem to have to worry about this
1062          * repartitioning as the maximum watermark value the
1063          * register can hold for each plane is lower than the
1064          * minimum FIFO size.
1065          */
1066         switch (plane_id) {
1067         case PLANE_CURSOR:
1068                 return 63;
1069         case PLANE_PRIMARY:
1070                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 511;
1071         case PLANE_SPRITE0:
1072                 return level == G4X_WM_LEVEL_NORMAL ? 127 : 0;
1073         default:
1074                 MISSING_CASE(plane_id);
1075                 return 0;
1076         }
1077 }
1078
1079 static int g4x_fbc_fifo_size(int level)
1080 {
1081         switch (level) {
1082         case G4X_WM_LEVEL_SR:
1083                 return 7;
1084         case G4X_WM_LEVEL_HPLL:
1085                 return 15;
1086         default:
1087                 MISSING_CASE(level);
1088                 return 0;
1089         }
1090 }
1091
1092 static uint16_t g4x_compute_wm(const struct intel_crtc_state *crtc_state,
1093                                const struct intel_plane_state *plane_state,
1094                                int level)
1095 {
1096         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1097         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1098         const struct drm_display_mode *adjusted_mode =
1099                 &crtc_state->base.adjusted_mode;
1100         unsigned int latency = dev_priv->wm.pri_latency[level] * 10;
1101         unsigned int clock, htotal, cpp, width, wm;
1102
1103         if (latency == 0)
1104                 return USHRT_MAX;
1105
1106         if (!intel_wm_plane_visible(crtc_state, plane_state))
1107                 return 0;
1108
1109         /*
1110          * Not 100% sure which way ELK should go here as the
1111          * spec only says CL/CTG should assume 32bpp and BW
1112          * doesn't need to. But as these things followed the
1113          * mobile vs. desktop lines on gen3 as well, let's
1114          * assume ELK doesn't need this.
1115          *
1116          * The spec also fails to list such a restriction for
1117          * the HPLL watermark, which seems a little strange.
1118          * Let's use 32bpp for the HPLL watermark as well.
1119          */
1120         if (IS_GM45(dev_priv) && plane->id == PLANE_PRIMARY &&
1121             level != G4X_WM_LEVEL_NORMAL)
1122                 cpp = 4;
1123         else
1124                 cpp = plane_state->base.fb->format->cpp[0];
1125
1126         clock = adjusted_mode->crtc_clock;
1127         htotal = adjusted_mode->crtc_htotal;
1128
1129         if (plane->id == PLANE_CURSOR)
1130                 width = plane_state->base.crtc_w;
1131         else
1132                 width = drm_rect_width(&plane_state->base.dst);
1133
1134         if (plane->id == PLANE_CURSOR) {
1135                 wm = intel_wm_method2(clock, htotal, width, cpp, latency);
1136         } else if (plane->id == PLANE_PRIMARY &&
1137                    level == G4X_WM_LEVEL_NORMAL) {
1138                 wm = intel_wm_method1(clock, cpp, latency);
1139         } else {
1140                 unsigned int small, large;
1141
1142                 small = intel_wm_method1(clock, cpp, latency);
1143                 large = intel_wm_method2(clock, htotal, width, cpp, latency);
1144
1145                 wm = min(small, large);
1146         }
1147
1148         wm += g4x_tlb_miss_wa(g4x_plane_fifo_size(plane->id, level),
1149                               width, cpp);
1150
1151         wm = DIV_ROUND_UP(wm, 64) + 2;
1152
1153         return min_t(unsigned int, wm, USHRT_MAX);
1154 }
1155
1156 static bool g4x_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1157                                  int level, enum plane_id plane_id, u16 value)
1158 {
1159         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1160         bool dirty = false;
1161
1162         for (; level < intel_wm_num_levels(dev_priv); level++) {
1163                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1164
1165                 dirty |= raw->plane[plane_id] != value;
1166                 raw->plane[plane_id] = value;
1167         }
1168
1169         return dirty;
1170 }
1171
1172 static bool g4x_raw_fbc_wm_set(struct intel_crtc_state *crtc_state,
1173                                int level, u16 value)
1174 {
1175         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1176         bool dirty = false;
1177
1178         /* NORMAL level doesn't have an FBC watermark */
1179         level = max(level, G4X_WM_LEVEL_SR);
1180
1181         for (; level < intel_wm_num_levels(dev_priv); level++) {
1182                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1183
1184                 dirty |= raw->fbc != value;
1185                 raw->fbc = value;
1186         }
1187
1188         return dirty;
1189 }
1190
1191 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
1192                                    const struct intel_plane_state *pstate,
1193                                    uint32_t pri_val);
1194
1195 static bool g4x_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1196                                      const struct intel_plane_state *plane_state)
1197 {
1198         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1199         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1200         enum plane_id plane_id = plane->id;
1201         bool dirty = false;
1202         int level;
1203
1204         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1205                 dirty |= g4x_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1206                 if (plane_id == PLANE_PRIMARY)
1207                         dirty |= g4x_raw_fbc_wm_set(crtc_state, 0, 0);
1208                 goto out;
1209         }
1210
1211         for (level = 0; level < num_levels; level++) {
1212                 struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1213                 int wm, max_wm;
1214
1215                 wm = g4x_compute_wm(crtc_state, plane_state, level);
1216                 max_wm = g4x_plane_fifo_size(plane_id, level);
1217
1218                 if (wm > max_wm)
1219                         break;
1220
1221                 dirty |= raw->plane[plane_id] != wm;
1222                 raw->plane[plane_id] = wm;
1223
1224                 if (plane_id != PLANE_PRIMARY ||
1225                     level == G4X_WM_LEVEL_NORMAL)
1226                         continue;
1227
1228                 wm = ilk_compute_fbc_wm(crtc_state, plane_state,
1229                                         raw->plane[plane_id]);
1230                 max_wm = g4x_fbc_fifo_size(level);
1231
1232                 /*
1233                  * FBC wm is not mandatory as we
1234                  * can always just disable its use.
1235                  */
1236                 if (wm > max_wm)
1237                         wm = USHRT_MAX;
1238
1239                 dirty |= raw->fbc != wm;
1240                 raw->fbc = wm;
1241         }
1242
1243         /* mark watermarks as invalid */
1244         dirty |= g4x_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1245
1246         if (plane_id == PLANE_PRIMARY)
1247                 dirty |= g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
1248
1249  out:
1250         if (dirty) {
1251                 DRM_DEBUG_KMS("%s watermarks: normal=%d, SR=%d, HPLL=%d\n",
1252                               plane->base.name,
1253                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_NORMAL].plane[plane_id],
1254                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].plane[plane_id],
1255                               crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].plane[plane_id]);
1256
1257                 if (plane_id == PLANE_PRIMARY)
1258                         DRM_DEBUG_KMS("FBC watermarks: SR=%d, HPLL=%d\n",
1259                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_SR].fbc,
1260                                       crtc_state->wm.g4x.raw[G4X_WM_LEVEL_HPLL].fbc);
1261         }
1262
1263         return dirty;
1264 }
1265
1266 static bool g4x_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1267                                       enum plane_id plane_id, int level)
1268 {
1269         const struct g4x_pipe_wm *raw = &crtc_state->wm.g4x.raw[level];
1270
1271         return raw->plane[plane_id] <= g4x_plane_fifo_size(plane_id, level);
1272 }
1273
1274 static bool g4x_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state,
1275                                      int level)
1276 {
1277         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1278
1279         if (level > dev_priv->wm.max_level)
1280                 return false;
1281
1282         return g4x_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1283                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1284                 g4x_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1285 }
1286
1287 /* mark all levels starting from 'level' as invalid */
1288 static void g4x_invalidate_wms(struct intel_crtc *crtc,
1289                                struct g4x_wm_state *wm_state, int level)
1290 {
1291         if (level <= G4X_WM_LEVEL_NORMAL) {
1292                 enum plane_id plane_id;
1293
1294                 for_each_plane_id_on_crtc(crtc, plane_id)
1295                         wm_state->wm.plane[plane_id] = USHRT_MAX;
1296         }
1297
1298         if (level <= G4X_WM_LEVEL_SR) {
1299                 wm_state->cxsr = false;
1300                 wm_state->sr.cursor = USHRT_MAX;
1301                 wm_state->sr.plane = USHRT_MAX;
1302                 wm_state->sr.fbc = USHRT_MAX;
1303         }
1304
1305         if (level <= G4X_WM_LEVEL_HPLL) {
1306                 wm_state->hpll_en = false;
1307                 wm_state->hpll.cursor = USHRT_MAX;
1308                 wm_state->hpll.plane = USHRT_MAX;
1309                 wm_state->hpll.fbc = USHRT_MAX;
1310         }
1311 }
1312
1313 static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1314 {
1315         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1316         struct intel_atomic_state *state =
1317                 to_intel_atomic_state(crtc_state->base.state);
1318         struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
1319         int num_active_planes = hweight32(crtc_state->active_planes &
1320                                           ~BIT(PLANE_CURSOR));
1321         const struct g4x_pipe_wm *raw;
1322         const struct intel_plane_state *old_plane_state;
1323         const struct intel_plane_state *new_plane_state;
1324         struct intel_plane *plane;
1325         enum plane_id plane_id;
1326         int i, level;
1327         unsigned int dirty = 0;
1328
1329         for_each_oldnew_intel_plane_in_state(state, plane,
1330                                              old_plane_state,
1331                                              new_plane_state, i) {
1332                 if (new_plane_state->base.crtc != &crtc->base &&
1333                     old_plane_state->base.crtc != &crtc->base)
1334                         continue;
1335
1336                 if (g4x_raw_plane_wm_compute(crtc_state, new_plane_state))
1337                         dirty |= BIT(plane->id);
1338         }
1339
1340         if (!dirty)
1341                 return 0;
1342
1343         level = G4X_WM_LEVEL_NORMAL;
1344         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1345                 goto out;
1346
1347         raw = &crtc_state->wm.g4x.raw[level];
1348         for_each_plane_id_on_crtc(crtc, plane_id)
1349                 wm_state->wm.plane[plane_id] = raw->plane[plane_id];
1350
1351         level = G4X_WM_LEVEL_SR;
1352
1353         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1354                 goto out;
1355
1356         raw = &crtc_state->wm.g4x.raw[level];
1357         wm_state->sr.plane = raw->plane[PLANE_PRIMARY];
1358         wm_state->sr.cursor = raw->plane[PLANE_CURSOR];
1359         wm_state->sr.fbc = raw->fbc;
1360
1361         wm_state->cxsr = num_active_planes == BIT(PLANE_PRIMARY);
1362
1363         level = G4X_WM_LEVEL_HPLL;
1364
1365         if (!g4x_raw_crtc_wm_is_valid(crtc_state, level))
1366                 goto out;
1367
1368         raw = &crtc_state->wm.g4x.raw[level];
1369         wm_state->hpll.plane = raw->plane[PLANE_PRIMARY];
1370         wm_state->hpll.cursor = raw->plane[PLANE_CURSOR];
1371         wm_state->hpll.fbc = raw->fbc;
1372
1373         wm_state->hpll_en = wm_state->cxsr;
1374
1375         level++;
1376
1377  out:
1378         if (level == G4X_WM_LEVEL_NORMAL)
1379                 return -EINVAL;
1380
1381         /* invalidate the higher levels */
1382         g4x_invalidate_wms(crtc, wm_state, level);
1383
1384         /*
1385          * Determine if the FBC watermark(s) can be used. IF
1386          * this isn't the case we prefer to disable the FBC
1387          ( watermark(s) rather than disable the SR/HPLL
1388          * level(s) entirely.
1389          */
1390         wm_state->fbc_en = level > G4X_WM_LEVEL_NORMAL;
1391
1392         if (level >= G4X_WM_LEVEL_SR &&
1393             wm_state->sr.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_SR))
1394                 wm_state->fbc_en = false;
1395         else if (level >= G4X_WM_LEVEL_HPLL &&
1396                  wm_state->hpll.fbc > g4x_fbc_fifo_size(G4X_WM_LEVEL_HPLL))
1397                 wm_state->fbc_en = false;
1398
1399         return 0;
1400 }
1401
1402 static int g4x_compute_intermediate_wm(struct drm_device *dev,
1403                                        struct intel_crtc *crtc,
1404                                        struct intel_crtc_state *new_crtc_state)
1405 {
1406         struct g4x_wm_state *intermediate = &new_crtc_state->wm.g4x.intermediate;
1407         const struct g4x_wm_state *optimal = &new_crtc_state->wm.g4x.optimal;
1408         struct intel_atomic_state *intel_state =
1409                 to_intel_atomic_state(new_crtc_state->base.state);
1410         const struct intel_crtc_state *old_crtc_state =
1411                 intel_atomic_get_old_crtc_state(intel_state, crtc);
1412         const struct g4x_wm_state *active = &old_crtc_state->wm.g4x.optimal;
1413         enum plane_id plane_id;
1414
1415         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
1416                 *intermediate = *optimal;
1417
1418                 intermediate->cxsr = false;
1419                 intermediate->hpll_en = false;
1420                 goto out;
1421         }
1422
1423         intermediate->cxsr = optimal->cxsr && active->cxsr &&
1424                 !new_crtc_state->disable_cxsr;
1425         intermediate->hpll_en = optimal->hpll_en && active->hpll_en &&
1426                 !new_crtc_state->disable_cxsr;
1427         intermediate->fbc_en = optimal->fbc_en && active->fbc_en;
1428
1429         for_each_plane_id_on_crtc(crtc, plane_id) {
1430                 intermediate->wm.plane[plane_id] =
1431                         max(optimal->wm.plane[plane_id],
1432                             active->wm.plane[plane_id]);
1433
1434                 WARN_ON(intermediate->wm.plane[plane_id] >
1435                         g4x_plane_fifo_size(plane_id, G4X_WM_LEVEL_NORMAL));
1436         }
1437
1438         intermediate->sr.plane = max(optimal->sr.plane,
1439                                      active->sr.plane);
1440         intermediate->sr.cursor = max(optimal->sr.cursor,
1441                                       active->sr.cursor);
1442         intermediate->sr.fbc = max(optimal->sr.fbc,
1443                                    active->sr.fbc);
1444
1445         intermediate->hpll.plane = max(optimal->hpll.plane,
1446                                        active->hpll.plane);
1447         intermediate->hpll.cursor = max(optimal->hpll.cursor,
1448                                         active->hpll.cursor);
1449         intermediate->hpll.fbc = max(optimal->hpll.fbc,
1450                                      active->hpll.fbc);
1451
1452         WARN_ON((intermediate->sr.plane >
1453                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_SR) ||
1454                  intermediate->sr.cursor >
1455                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_SR)) &&
1456                 intermediate->cxsr);
1457         WARN_ON((intermediate->sr.plane >
1458                  g4x_plane_fifo_size(PLANE_PRIMARY, G4X_WM_LEVEL_HPLL) ||
1459                  intermediate->sr.cursor >
1460                  g4x_plane_fifo_size(PLANE_CURSOR, G4X_WM_LEVEL_HPLL)) &&
1461                 intermediate->hpll_en);
1462
1463         WARN_ON(intermediate->sr.fbc > g4x_fbc_fifo_size(1) &&
1464                 intermediate->fbc_en && intermediate->cxsr);
1465         WARN_ON(intermediate->hpll.fbc > g4x_fbc_fifo_size(2) &&
1466                 intermediate->fbc_en && intermediate->hpll_en);
1467
1468 out:
1469         /*
1470          * If our intermediate WM are identical to the final WM, then we can
1471          * omit the post-vblank programming; only update if it's different.
1472          */
1473         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
1474                 new_crtc_state->wm.need_postvbl_update = true;
1475
1476         return 0;
1477 }
1478
1479 static void g4x_merge_wm(struct drm_i915_private *dev_priv,
1480                          struct g4x_wm_values *wm)
1481 {
1482         struct intel_crtc *crtc;
1483         int num_active_crtcs = 0;
1484
1485         wm->cxsr = true;
1486         wm->hpll_en = true;
1487         wm->fbc_en = true;
1488
1489         for_each_intel_crtc(&dev_priv->drm, crtc) {
1490                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1491
1492                 if (!crtc->active)
1493                         continue;
1494
1495                 if (!wm_state->cxsr)
1496                         wm->cxsr = false;
1497                 if (!wm_state->hpll_en)
1498                         wm->hpll_en = false;
1499                 if (!wm_state->fbc_en)
1500                         wm->fbc_en = false;
1501
1502                 num_active_crtcs++;
1503         }
1504
1505         if (num_active_crtcs != 1) {
1506                 wm->cxsr = false;
1507                 wm->hpll_en = false;
1508                 wm->fbc_en = false;
1509         }
1510
1511         for_each_intel_crtc(&dev_priv->drm, crtc) {
1512                 const struct g4x_wm_state *wm_state = &crtc->wm.active.g4x;
1513                 enum pipe pipe = crtc->pipe;
1514
1515                 wm->pipe[pipe] = wm_state->wm;
1516                 if (crtc->active && wm->cxsr)
1517                         wm->sr = wm_state->sr;
1518                 if (crtc->active && wm->hpll_en)
1519                         wm->hpll = wm_state->hpll;
1520         }
1521 }
1522
1523 static void g4x_program_watermarks(struct drm_i915_private *dev_priv)
1524 {
1525         struct g4x_wm_values *old_wm = &dev_priv->wm.g4x;
1526         struct g4x_wm_values new_wm = {};
1527
1528         g4x_merge_wm(dev_priv, &new_wm);
1529
1530         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
1531                 return;
1532
1533         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
1534                 _intel_set_memory_cxsr(dev_priv, false);
1535
1536         g4x_write_wm_values(dev_priv, &new_wm);
1537
1538         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
1539                 _intel_set_memory_cxsr(dev_priv, true);
1540
1541         *old_wm = new_wm;
1542 }
1543
1544 static void g4x_initial_watermarks(struct intel_atomic_state *state,
1545                                    struct intel_crtc_state *crtc_state)
1546 {
1547         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1548         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1549
1550         mutex_lock(&dev_priv->wm.wm_mutex);
1551         crtc->wm.active.g4x = crtc_state->wm.g4x.intermediate;
1552         g4x_program_watermarks(dev_priv);
1553         mutex_unlock(&dev_priv->wm.wm_mutex);
1554 }
1555
1556 static void g4x_optimize_watermarks(struct intel_atomic_state *state,
1557                                     struct intel_crtc_state *crtc_state)
1558 {
1559         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1560         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
1561
1562         if (!crtc_state->wm.need_postvbl_update)
1563                 return;
1564
1565         mutex_lock(&dev_priv->wm.wm_mutex);
1566         intel_crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
1567         g4x_program_watermarks(dev_priv);
1568         mutex_unlock(&dev_priv->wm.wm_mutex);
1569 }
1570
1571 /* latency must be in 0.1us units. */
1572 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
1573                                    unsigned int htotal,
1574                                    unsigned int width,
1575                                    unsigned int cpp,
1576                                    unsigned int latency)
1577 {
1578         unsigned int ret;
1579
1580         ret = intel_wm_method2(pixel_rate, htotal,
1581                                width, cpp, latency);
1582         ret = DIV_ROUND_UP(ret, 64);
1583
1584         return ret;
1585 }
1586
1587 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv)
1588 {
1589         /* all latencies in usec */
1590         dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3;
1591
1592         dev_priv->wm.max_level = VLV_WM_LEVEL_PM2;
1593
1594         if (IS_CHERRYVIEW(dev_priv)) {
1595                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12;
1596                 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33;
1597
1598                 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS;
1599         }
1600 }
1601
1602 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state,
1603                                      const struct intel_plane_state *plane_state,
1604                                      int level)
1605 {
1606         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1607         struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
1608         const struct drm_display_mode *adjusted_mode =
1609                 &crtc_state->base.adjusted_mode;
1610         unsigned int clock, htotal, cpp, width, wm;
1611
1612         if (dev_priv->wm.pri_latency[level] == 0)
1613                 return USHRT_MAX;
1614
1615         if (!intel_wm_plane_visible(crtc_state, plane_state))
1616                 return 0;
1617
1618         cpp = plane_state->base.fb->format->cpp[0];
1619         clock = adjusted_mode->crtc_clock;
1620         htotal = adjusted_mode->crtc_htotal;
1621         width = crtc_state->pipe_src_w;
1622
1623         if (plane->id == PLANE_CURSOR) {
1624                 /*
1625                  * FIXME the formula gives values that are
1626                  * too big for the cursor FIFO, and hence we
1627                  * would never be able to use cursors. For
1628                  * now just hardcode the watermark.
1629                  */
1630                 wm = 63;
1631         } else {
1632                 wm = vlv_wm_method2(clock, htotal, width, cpp,
1633                                     dev_priv->wm.pri_latency[level] * 10);
1634         }
1635
1636         return min_t(unsigned int, wm, USHRT_MAX);
1637 }
1638
1639 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes)
1640 {
1641         return (active_planes & (BIT(PLANE_SPRITE0) |
1642                                  BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1);
1643 }
1644
1645 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state)
1646 {
1647         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1648         const struct g4x_pipe_wm *raw =
1649                 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2];
1650         struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state;
1651         unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR);
1652         int num_active_planes = hweight32(active_planes);
1653         const int fifo_size = 511;
1654         int fifo_extra, fifo_left = fifo_size;
1655         int sprite0_fifo_extra = 0;
1656         unsigned int total_rate;
1657         enum plane_id plane_id;
1658
1659         /*
1660          * When enabling sprite0 after sprite1 has already been enabled
1661          * we tend to get an underrun unless sprite0 already has some
1662          * FIFO space allcoated. Hence we always allocate at least one
1663          * cacheline for sprite0 whenever sprite1 is enabled.
1664          *
1665          * All other plane enable sequences appear immune to this problem.
1666          */
1667         if (vlv_need_sprite0_fifo_workaround(active_planes))
1668                 sprite0_fifo_extra = 1;
1669
1670         total_rate = raw->plane[PLANE_PRIMARY] +
1671                 raw->plane[PLANE_SPRITE0] +
1672                 raw->plane[PLANE_SPRITE1] +
1673                 sprite0_fifo_extra;
1674
1675         if (total_rate > fifo_size)
1676                 return -EINVAL;
1677
1678         if (total_rate == 0)
1679                 total_rate = 1;
1680
1681         for_each_plane_id_on_crtc(crtc, plane_id) {
1682                 unsigned int rate;
1683
1684                 if ((active_planes & BIT(plane_id)) == 0) {
1685                         fifo_state->plane[plane_id] = 0;
1686                         continue;
1687                 }
1688
1689                 rate = raw->plane[plane_id];
1690                 fifo_state->plane[plane_id] = fifo_size * rate / total_rate;
1691                 fifo_left -= fifo_state->plane[plane_id];
1692         }
1693
1694         fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra;
1695         fifo_left -= sprite0_fifo_extra;
1696
1697         fifo_state->plane[PLANE_CURSOR] = 63;
1698
1699         fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1);
1700
1701         /* spread the remainder evenly */
1702         for_each_plane_id_on_crtc(crtc, plane_id) {
1703                 int plane_extra;
1704
1705                 if (fifo_left == 0)
1706                         break;
1707
1708                 if ((active_planes & BIT(plane_id)) == 0)
1709                         continue;
1710
1711                 plane_extra = min(fifo_extra, fifo_left);
1712                 fifo_state->plane[plane_id] += plane_extra;
1713                 fifo_left -= plane_extra;
1714         }
1715
1716         WARN_ON(active_planes != 0 && fifo_left != 0);
1717
1718         /* give it all to the first plane if none are active */
1719         if (active_planes == 0) {
1720                 WARN_ON(fifo_left != fifo_size);
1721                 fifo_state->plane[PLANE_PRIMARY] = fifo_left;
1722         }
1723
1724         return 0;
1725 }
1726
1727 /* mark all levels starting from 'level' as invalid */
1728 static void vlv_invalidate_wms(struct intel_crtc *crtc,
1729                                struct vlv_wm_state *wm_state, int level)
1730 {
1731         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1732
1733         for (; level < intel_wm_num_levels(dev_priv); level++) {
1734                 enum plane_id plane_id;
1735
1736                 for_each_plane_id_on_crtc(crtc, plane_id)
1737                         wm_state->wm[level].plane[plane_id] = USHRT_MAX;
1738
1739                 wm_state->sr[level].cursor = USHRT_MAX;
1740                 wm_state->sr[level].plane = USHRT_MAX;
1741         }
1742 }
1743
1744 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size)
1745 {
1746         if (wm > fifo_size)
1747                 return USHRT_MAX;
1748         else
1749                 return fifo_size - wm;
1750 }
1751
1752 /*
1753  * Starting from 'level' set all higher
1754  * levels to 'value' in the "raw" watermarks.
1755  */
1756 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state,
1757                                  int level, enum plane_id plane_id, u16 value)
1758 {
1759         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
1760         int num_levels = intel_wm_num_levels(dev_priv);
1761         bool dirty = false;
1762
1763         for (; level < num_levels; level++) {
1764                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1765
1766                 dirty |= raw->plane[plane_id] != value;
1767                 raw->plane[plane_id] = value;
1768         }
1769
1770         return dirty;
1771 }
1772
1773 static bool vlv_raw_plane_wm_compute(struct intel_crtc_state *crtc_state,
1774                                      const struct intel_plane_state *plane_state)
1775 {
1776         struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
1777         enum plane_id plane_id = plane->id;
1778         int num_levels = intel_wm_num_levels(to_i915(plane->base.dev));
1779         int level;
1780         bool dirty = false;
1781
1782         if (!intel_wm_plane_visible(crtc_state, plane_state)) {
1783                 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0);
1784                 goto out;
1785         }
1786
1787         for (level = 0; level < num_levels; level++) {
1788                 struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1789                 int wm = vlv_compute_wm_level(crtc_state, plane_state, level);
1790                 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511;
1791
1792                 if (wm > max_wm)
1793                         break;
1794
1795                 dirty |= raw->plane[plane_id] != wm;
1796                 raw->plane[plane_id] = wm;
1797         }
1798
1799         /* mark all higher levels as invalid */
1800         dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX);
1801
1802 out:
1803         if (dirty)
1804                 DRM_DEBUG_KMS("%s watermarks: PM2=%d, PM5=%d, DDR DVFS=%d\n",
1805                               plane->base.name,
1806                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id],
1807                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id],
1808                               crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]);
1809
1810         return dirty;
1811 }
1812
1813 static bool vlv_raw_plane_wm_is_valid(const struct intel_crtc_state *crtc_state,
1814                                       enum plane_id plane_id, int level)
1815 {
1816         const struct g4x_pipe_wm *raw =
1817                 &crtc_state->wm.vlv.raw[level];
1818         const struct vlv_fifo_state *fifo_state =
1819                 &crtc_state->wm.vlv.fifo_state;
1820
1821         return raw->plane[plane_id] <= fifo_state->plane[plane_id];
1822 }
1823
1824 static bool vlv_raw_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level)
1825 {
1826         return vlv_raw_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) &&
1827                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) &&
1828                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) &&
1829                 vlv_raw_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level);
1830 }
1831
1832 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state)
1833 {
1834         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1835         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1836         struct intel_atomic_state *state =
1837                 to_intel_atomic_state(crtc_state->base.state);
1838         struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
1839         const struct vlv_fifo_state *fifo_state =
1840                 &crtc_state->wm.vlv.fifo_state;
1841         int num_active_planes = hweight32(crtc_state->active_planes &
1842                                           ~BIT(PLANE_CURSOR));
1843         bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base);
1844         const struct intel_plane_state *old_plane_state;
1845         const struct intel_plane_state *new_plane_state;
1846         struct intel_plane *plane;
1847         enum plane_id plane_id;
1848         int level, ret, i;
1849         unsigned int dirty = 0;
1850
1851         for_each_oldnew_intel_plane_in_state(state, plane,
1852                                              old_plane_state,
1853                                              new_plane_state, i) {
1854                 if (new_plane_state->base.crtc != &crtc->base &&
1855                     old_plane_state->base.crtc != &crtc->base)
1856                         continue;
1857
1858                 if (vlv_raw_plane_wm_compute(crtc_state, new_plane_state))
1859                         dirty |= BIT(plane->id);
1860         }
1861
1862         /*
1863          * DSPARB registers may have been reset due to the
1864          * power well being turned off. Make sure we restore
1865          * them to a consistent state even if no primary/sprite
1866          * planes are initially active.
1867          */
1868         if (needs_modeset)
1869                 crtc_state->fifo_changed = true;
1870
1871         if (!dirty)
1872                 return 0;
1873
1874         /* cursor changes don't warrant a FIFO recompute */
1875         if (dirty & ~BIT(PLANE_CURSOR)) {
1876                 const struct intel_crtc_state *old_crtc_state =
1877                         intel_atomic_get_old_crtc_state(state, crtc);
1878                 const struct vlv_fifo_state *old_fifo_state =
1879                         &old_crtc_state->wm.vlv.fifo_state;
1880
1881                 ret = vlv_compute_fifo(crtc_state);
1882                 if (ret)
1883                         return ret;
1884
1885                 if (needs_modeset ||
1886                     memcmp(old_fifo_state, fifo_state,
1887                            sizeof(*fifo_state)) != 0)
1888                         crtc_state->fifo_changed = true;
1889         }
1890
1891         /* initially allow all levels */
1892         wm_state->num_levels = intel_wm_num_levels(dev_priv);
1893         /*
1894          * Note that enabling cxsr with no primary/sprite planes
1895          * enabled can wedge the pipe. Hence we only allow cxsr
1896          * with exactly one enabled primary/sprite plane.
1897          */
1898         wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1;
1899
1900         for (level = 0; level < wm_state->num_levels; level++) {
1901                 const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level];
1902                 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1;
1903
1904                 if (!vlv_raw_crtc_wm_is_valid(crtc_state, level))
1905                         break;
1906
1907                 for_each_plane_id_on_crtc(crtc, plane_id) {
1908                         wm_state->wm[level].plane[plane_id] =
1909                                 vlv_invert_wm_value(raw->plane[plane_id],
1910                                                     fifo_state->plane[plane_id]);
1911                 }
1912
1913                 wm_state->sr[level].plane =
1914                         vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY],
1915                                                  raw->plane[PLANE_SPRITE0],
1916                                                  raw->plane[PLANE_SPRITE1]),
1917                                             sr_fifo_size);
1918
1919                 wm_state->sr[level].cursor =
1920                         vlv_invert_wm_value(raw->plane[PLANE_CURSOR],
1921                                             63);
1922         }
1923
1924         if (level == 0)
1925                 return -EINVAL;
1926
1927         /* limit to only levels we can actually handle */
1928         wm_state->num_levels = level;
1929
1930         /* invalidate the higher levels */
1931         vlv_invalidate_wms(crtc, wm_state, level);
1932
1933         return 0;
1934 }
1935
1936 #define VLV_FIFO(plane, value) \
1937         (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV)
1938
1939 static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
1940                                    struct intel_crtc_state *crtc_state)
1941 {
1942         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
1943         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
1944         const struct vlv_fifo_state *fifo_state =
1945                 &crtc_state->wm.vlv.fifo_state;
1946         int sprite0_start, sprite1_start, fifo_size;
1947
1948         if (!crtc_state->fifo_changed)
1949                 return;
1950
1951         sprite0_start = fifo_state->plane[PLANE_PRIMARY];
1952         sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start;
1953         fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start;
1954
1955         WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63);
1956         WARN_ON(fifo_size != 511);
1957
1958         trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size);
1959
1960         /*
1961          * uncore.lock serves a double purpose here. It allows us to
1962          * use the less expensive I915_{READ,WRITE}_FW() functions, and
1963          * it protects the DSPARB registers from getting clobbered by
1964          * parallel updates from multiple pipes.
1965          *
1966          * intel_pipe_update_start() has already disabled interrupts
1967          * for us, so a plain spin_lock() is sufficient here.
1968          */
1969         spin_lock(&dev_priv->uncore.lock);
1970
1971         switch (crtc->pipe) {
1972                 uint32_t dsparb, dsparb2, dsparb3;
1973         case PIPE_A:
1974                 dsparb = I915_READ_FW(DSPARB);
1975                 dsparb2 = I915_READ_FW(DSPARB2);
1976
1977                 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
1978                             VLV_FIFO(SPRITEB, 0xff));
1979                 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) |
1980                            VLV_FIFO(SPRITEB, sprite1_start));
1981
1982                 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) |
1983                              VLV_FIFO(SPRITEB_HI, 0x1));
1984                 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
1985                            VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
1986
1987                 I915_WRITE_FW(DSPARB, dsparb);
1988                 I915_WRITE_FW(DSPARB2, dsparb2);
1989                 break;
1990         case PIPE_B:
1991                 dsparb = I915_READ_FW(DSPARB);
1992                 dsparb2 = I915_READ_FW(DSPARB2);
1993
1994                 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
1995                             VLV_FIFO(SPRITED, 0xff));
1996                 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) |
1997                            VLV_FIFO(SPRITED, sprite1_start));
1998
1999                 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) |
2000                              VLV_FIFO(SPRITED_HI, 0xff));
2001                 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
2002                            VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
2003
2004                 I915_WRITE_FW(DSPARB, dsparb);
2005                 I915_WRITE_FW(DSPARB2, dsparb2);
2006                 break;
2007         case PIPE_C:
2008                 dsparb3 = I915_READ_FW(DSPARB3);
2009                 dsparb2 = I915_READ_FW(DSPARB2);
2010
2011                 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
2012                              VLV_FIFO(SPRITEF, 0xff));
2013                 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) |
2014                             VLV_FIFO(SPRITEF, sprite1_start));
2015
2016                 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) |
2017                              VLV_FIFO(SPRITEF_HI, 0xff));
2018                 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
2019                            VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
2020
2021                 I915_WRITE_FW(DSPARB3, dsparb3);
2022                 I915_WRITE_FW(DSPARB2, dsparb2);
2023                 break;
2024         default:
2025                 break;
2026         }
2027
2028         POSTING_READ_FW(DSPARB);
2029
2030         spin_unlock(&dev_priv->uncore.lock);
2031 }
2032
2033 #undef VLV_FIFO
2034
2035 static int vlv_compute_intermediate_wm(struct drm_device *dev,
2036                                        struct intel_crtc *crtc,
2037                                        struct intel_crtc_state *new_crtc_state)
2038 {
2039         struct vlv_wm_state *intermediate = &new_crtc_state->wm.vlv.intermediate;
2040         const struct vlv_wm_state *optimal = &new_crtc_state->wm.vlv.optimal;
2041         struct intel_atomic_state *intel_state =
2042                 to_intel_atomic_state(new_crtc_state->base.state);
2043         const struct intel_crtc_state *old_crtc_state =
2044                 intel_atomic_get_old_crtc_state(intel_state, crtc);
2045         const struct vlv_wm_state *active = &old_crtc_state->wm.vlv.optimal;
2046         int level;
2047
2048         if (!new_crtc_state->base.active || drm_atomic_crtc_needs_modeset(&new_crtc_state->base)) {
2049                 *intermediate = *optimal;
2050
2051                 intermediate->cxsr = false;
2052                 goto out;
2053         }
2054
2055         intermediate->num_levels = min(optimal->num_levels, active->num_levels);
2056         intermediate->cxsr = optimal->cxsr && active->cxsr &&
2057                 !new_crtc_state->disable_cxsr;
2058
2059         for (level = 0; level < intermediate->num_levels; level++) {
2060                 enum plane_id plane_id;
2061
2062                 for_each_plane_id_on_crtc(crtc, plane_id) {
2063                         intermediate->wm[level].plane[plane_id] =
2064                                 min(optimal->wm[level].plane[plane_id],
2065                                     active->wm[level].plane[plane_id]);
2066                 }
2067
2068                 intermediate->sr[level].plane = min(optimal->sr[level].plane,
2069                                                     active->sr[level].plane);
2070                 intermediate->sr[level].cursor = min(optimal->sr[level].cursor,
2071                                                      active->sr[level].cursor);
2072         }
2073
2074         vlv_invalidate_wms(crtc, intermediate, level);
2075
2076 out:
2077         /*
2078          * If our intermediate WM are identical to the final WM, then we can
2079          * omit the post-vblank programming; only update if it's different.
2080          */
2081         if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0)
2082                 new_crtc_state->wm.need_postvbl_update = true;
2083
2084         return 0;
2085 }
2086
2087 static void vlv_merge_wm(struct drm_i915_private *dev_priv,
2088                          struct vlv_wm_values *wm)
2089 {
2090         struct intel_crtc *crtc;
2091         int num_active_crtcs = 0;
2092
2093         wm->level = dev_priv->wm.max_level;
2094         wm->cxsr = true;
2095
2096         for_each_intel_crtc(&dev_priv->drm, crtc) {
2097                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2098
2099                 if (!crtc->active)
2100                         continue;
2101
2102                 if (!wm_state->cxsr)
2103                         wm->cxsr = false;
2104
2105                 num_active_crtcs++;
2106                 wm->level = min_t(int, wm->level, wm_state->num_levels - 1);
2107         }
2108
2109         if (num_active_crtcs != 1)
2110                 wm->cxsr = false;
2111
2112         if (num_active_crtcs > 1)
2113                 wm->level = VLV_WM_LEVEL_PM2;
2114
2115         for_each_intel_crtc(&dev_priv->drm, crtc) {
2116                 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv;
2117                 enum pipe pipe = crtc->pipe;
2118
2119                 wm->pipe[pipe] = wm_state->wm[wm->level];
2120                 if (crtc->active && wm->cxsr)
2121                         wm->sr = wm_state->sr[wm->level];
2122
2123                 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2;
2124                 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2;
2125                 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2;
2126                 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2;
2127         }
2128 }
2129
2130 static void vlv_program_watermarks(struct drm_i915_private *dev_priv)
2131 {
2132         struct vlv_wm_values *old_wm = &dev_priv->wm.vlv;
2133         struct vlv_wm_values new_wm = {};
2134
2135         vlv_merge_wm(dev_priv, &new_wm);
2136
2137         if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0)
2138                 return;
2139
2140         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2141                 chv_set_memory_dvfs(dev_priv, false);
2142
2143         if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2144                 chv_set_memory_pm5(dev_priv, false);
2145
2146         if (is_disabling(old_wm->cxsr, new_wm.cxsr, true))
2147                 _intel_set_memory_cxsr(dev_priv, false);
2148
2149         vlv_write_wm_values(dev_priv, &new_wm);
2150
2151         if (is_enabling(old_wm->cxsr, new_wm.cxsr, true))
2152                 _intel_set_memory_cxsr(dev_priv, true);
2153
2154         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5))
2155                 chv_set_memory_pm5(dev_priv, true);
2156
2157         if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS))
2158                 chv_set_memory_dvfs(dev_priv, true);
2159
2160         *old_wm = new_wm;
2161 }
2162
2163 static void vlv_initial_watermarks(struct intel_atomic_state *state,
2164                                    struct intel_crtc_state *crtc_state)
2165 {
2166         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2167         struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
2168
2169         mutex_lock(&dev_priv->wm.wm_mutex);
2170         crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate;
2171         vlv_program_watermarks(dev_priv);
2172         mutex_unlock(&dev_priv->wm.wm_mutex);
2173 }
2174
2175 static void vlv_optimize_watermarks(struct intel_atomic_state *state,
2176                                     struct intel_crtc_state *crtc_state)
2177 {
2178         struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
2179         struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
2180
2181         if (!crtc_state->wm.need_postvbl_update)
2182                 return;
2183
2184         mutex_lock(&dev_priv->wm.wm_mutex);
2185         intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
2186         vlv_program_watermarks(dev_priv);
2187         mutex_unlock(&dev_priv->wm.wm_mutex);
2188 }
2189
2190 static void i965_update_wm(struct intel_crtc *unused_crtc)
2191 {
2192         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2193         struct intel_crtc *crtc;
2194         int srwm = 1;
2195         int cursor_sr = 16;
2196         bool cxsr_enabled;
2197
2198         /* Calc sr entries for one plane configs */
2199         crtc = single_enabled_crtc(dev_priv);
2200         if (crtc) {
2201                 /* self-refresh has much higher latency */
2202                 static const int sr_latency_ns = 12000;
2203                 const struct drm_display_mode *adjusted_mode =
2204                         &crtc->config->base.adjusted_mode;
2205                 const struct drm_framebuffer *fb =
2206                         crtc->base.primary->state->fb;
2207                 int clock = adjusted_mode->crtc_clock;
2208                 int htotal = adjusted_mode->crtc_htotal;
2209                 int hdisplay = crtc->config->pipe_src_w;
2210                 int cpp = fb->format->cpp[0];
2211                 int entries;
2212
2213                 entries = intel_wm_method2(clock, htotal,
2214                                            hdisplay, cpp, sr_latency_ns / 100);
2215                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
2216                 srwm = I965_FIFO_SIZE - entries;
2217                 if (srwm < 0)
2218                         srwm = 1;
2219                 srwm &= 0x1ff;
2220                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
2221                               entries, srwm);
2222
2223                 entries = intel_wm_method2(clock, htotal,
2224                                            crtc->base.cursor->state->crtc_w, 4,
2225                                            sr_latency_ns / 100);
2226                 entries = DIV_ROUND_UP(entries,
2227                                        i965_cursor_wm_info.cacheline_size) +
2228                         i965_cursor_wm_info.guard_size;
2229
2230                 cursor_sr = i965_cursor_wm_info.fifo_size - entries;
2231                 if (cursor_sr > i965_cursor_wm_info.max_wm)
2232                         cursor_sr = i965_cursor_wm_info.max_wm;
2233
2234                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
2235                               "cursor %d\n", srwm, cursor_sr);
2236
2237                 cxsr_enabled = true;
2238         } else {
2239                 cxsr_enabled = false;
2240                 /* Turn off self refresh if both pipes are enabled */
2241                 intel_set_memory_cxsr(dev_priv, false);
2242         }
2243
2244         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
2245                       srwm);
2246
2247         /* 965 has limitations... */
2248         I915_WRITE(DSPFW1, FW_WM(srwm, SR) |
2249                    FW_WM(8, CURSORB) |
2250                    FW_WM(8, PLANEB) |
2251                    FW_WM(8, PLANEA));
2252         I915_WRITE(DSPFW2, FW_WM(8, CURSORA) |
2253                    FW_WM(8, PLANEC_OLD));
2254         /* update cursor SR watermark */
2255         I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR));
2256
2257         if (cxsr_enabled)
2258                 intel_set_memory_cxsr(dev_priv, true);
2259 }
2260
2261 #undef FW_WM
2262
2263 static void i9xx_update_wm(struct intel_crtc *unused_crtc)
2264 {
2265         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2266         const struct intel_watermark_params *wm_info;
2267         uint32_t fwater_lo;
2268         uint32_t fwater_hi;
2269         int cwm, srwm = 1;
2270         int fifo_size;
2271         int planea_wm, planeb_wm;
2272         struct intel_crtc *crtc, *enabled = NULL;
2273
2274         if (IS_I945GM(dev_priv))
2275                 wm_info = &i945_wm_info;
2276         else if (!IS_GEN2(dev_priv))
2277                 wm_info = &i915_wm_info;
2278         else
2279                 wm_info = &i830_a_wm_info;
2280
2281         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
2282         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
2283         if (intel_crtc_active(crtc)) {
2284                 const struct drm_display_mode *adjusted_mode =
2285                         &crtc->config->base.adjusted_mode;
2286                 const struct drm_framebuffer *fb =
2287                         crtc->base.primary->state->fb;
2288                 int cpp;
2289
2290                 if (IS_GEN2(dev_priv))
2291                         cpp = 4;
2292                 else
2293                         cpp = fb->format->cpp[0];
2294
2295                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2296                                                wm_info, fifo_size, cpp,
2297                                                pessimal_latency_ns);
2298                 enabled = crtc;
2299         } else {
2300                 planea_wm = fifo_size - wm_info->guard_size;
2301                 if (planea_wm > (long)wm_info->max_wm)
2302                         planea_wm = wm_info->max_wm;
2303         }
2304
2305         if (IS_GEN2(dev_priv))
2306                 wm_info = &i830_bc_wm_info;
2307
2308         fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
2309         crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
2310         if (intel_crtc_active(crtc)) {
2311                 const struct drm_display_mode *adjusted_mode =
2312                         &crtc->config->base.adjusted_mode;
2313                 const struct drm_framebuffer *fb =
2314                         crtc->base.primary->state->fb;
2315                 int cpp;
2316
2317                 if (IS_GEN2(dev_priv))
2318                         cpp = 4;
2319                 else
2320                         cpp = fb->format->cpp[0];
2321
2322                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2323                                                wm_info, fifo_size, cpp,
2324                                                pessimal_latency_ns);
2325                 if (enabled == NULL)
2326                         enabled = crtc;
2327                 else
2328                         enabled = NULL;
2329         } else {
2330                 planeb_wm = fifo_size - wm_info->guard_size;
2331                 if (planeb_wm > (long)wm_info->max_wm)
2332                         planeb_wm = wm_info->max_wm;
2333         }
2334
2335         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
2336
2337         if (IS_I915GM(dev_priv) && enabled) {
2338                 struct drm_i915_gem_object *obj;
2339
2340                 obj = intel_fb_obj(enabled->base.primary->state->fb);
2341
2342                 /* self-refresh seems busted with untiled */
2343                 if (!i915_gem_object_is_tiled(obj))
2344                         enabled = NULL;
2345         }
2346
2347         /*
2348          * Overlay gets an aggressive default since video jitter is bad.
2349          */
2350         cwm = 2;
2351
2352         /* Play safe and disable self-refresh before adjusting watermarks. */
2353         intel_set_memory_cxsr(dev_priv, false);
2354
2355         /* Calc sr entries for one plane configs */
2356         if (HAS_FW_BLC(dev_priv) && enabled) {
2357                 /* self-refresh has much higher latency */
2358                 static const int sr_latency_ns = 6000;
2359                 const struct drm_display_mode *adjusted_mode =
2360                         &enabled->config->base.adjusted_mode;
2361                 const struct drm_framebuffer *fb =
2362                         enabled->base.primary->state->fb;
2363                 int clock = adjusted_mode->crtc_clock;
2364                 int htotal = adjusted_mode->crtc_htotal;
2365                 int hdisplay = enabled->config->pipe_src_w;
2366                 int cpp;
2367                 int entries;
2368
2369                 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv))
2370                         cpp = 4;
2371                 else
2372                         cpp = fb->format->cpp[0];
2373
2374                 entries = intel_wm_method2(clock, htotal, hdisplay, cpp,
2375                                            sr_latency_ns / 100);
2376                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
2377                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
2378                 srwm = wm_info->fifo_size - entries;
2379                 if (srwm < 0)
2380                         srwm = 1;
2381
2382                 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv))
2383                         I915_WRITE(FW_BLC_SELF,
2384                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
2385                 else
2386                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
2387         }
2388
2389         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
2390                       planea_wm, planeb_wm, cwm, srwm);
2391
2392         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
2393         fwater_hi = (cwm & 0x1f);
2394
2395         /* Set request length to 8 cachelines per fetch */
2396         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
2397         fwater_hi = fwater_hi | (1 << 8);
2398
2399         I915_WRITE(FW_BLC, fwater_lo);
2400         I915_WRITE(FW_BLC2, fwater_hi);
2401
2402         if (enabled)
2403                 intel_set_memory_cxsr(dev_priv, true);
2404 }
2405
2406 static void i845_update_wm(struct intel_crtc *unused_crtc)
2407 {
2408         struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
2409         struct intel_crtc *crtc;
2410         const struct drm_display_mode *adjusted_mode;
2411         uint32_t fwater_lo;
2412         int planea_wm;
2413
2414         crtc = single_enabled_crtc(dev_priv);
2415         if (crtc == NULL)
2416                 return;
2417
2418         adjusted_mode = &crtc->config->base.adjusted_mode;
2419         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
2420                                        &i845_wm_info,
2421                                        dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
2422                                        4, pessimal_latency_ns);
2423         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
2424         fwater_lo |= (3<<8) | planea_wm;
2425
2426         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
2427
2428         I915_WRITE(FW_BLC, fwater_lo);
2429 }
2430
2431 /* latency must be in 0.1us units. */
2432 static unsigned int ilk_wm_method1(unsigned int pixel_rate,
2433                                    unsigned int cpp,
2434                                    unsigned int latency)
2435 {
2436         unsigned int ret;
2437
2438         ret = intel_wm_method1(pixel_rate, cpp, latency);
2439         ret = DIV_ROUND_UP(ret, 64) + 2;
2440
2441         return ret;
2442 }
2443
2444 /* latency must be in 0.1us units. */
2445 static unsigned int ilk_wm_method2(unsigned int pixel_rate,
2446                                    unsigned int htotal,
2447                                    unsigned int width,
2448                                    unsigned int cpp,
2449                                    unsigned int latency)
2450 {
2451         unsigned int ret;
2452
2453         ret = intel_wm_method2(pixel_rate, htotal,
2454                                width, cpp, latency);
2455         ret = DIV_ROUND_UP(ret, 64) + 2;
2456
2457         return ret;
2458 }
2459
2460 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
2461                            uint8_t cpp)
2462 {
2463         /*
2464          * Neither of these should be possible since this function shouldn't be
2465          * called if the CRTC is off or the plane is invisible.  But let's be
2466          * extra paranoid to avoid a potential divide-by-zero if we screw up
2467          * elsewhere in the driver.
2468          */
2469         if (WARN_ON(!cpp))
2470                 return 0;
2471         if (WARN_ON(!horiz_pixels))
2472                 return 0;
2473
2474         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
2475 }
2476
2477 struct ilk_wm_maximums {
2478         uint16_t pri;
2479         uint16_t spr;
2480         uint16_t cur;
2481         uint16_t fbc;
2482 };
2483
2484 /*
2485  * For both WM_PIPE and WM_LP.
2486  * mem_value must be in 0.1us units.
2487  */
2488 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
2489                                    const struct intel_plane_state *pstate,
2490                                    uint32_t mem_value,
2491                                    bool is_lp)
2492 {
2493         uint32_t method1, method2;
2494         int cpp;
2495
2496         if (mem_value == 0)
2497                 return U32_MAX;
2498
2499         if (!intel_wm_plane_visible(cstate, pstate))
2500                 return 0;
2501
2502         cpp = pstate->base.fb->format->cpp[0];
2503
2504         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2505
2506         if (!is_lp)
2507                 return method1;
2508
2509         method2 = ilk_wm_method2(cstate->pixel_rate,
2510                                  cstate->base.adjusted_mode.crtc_htotal,
2511                                  drm_rect_width(&pstate->base.dst),
2512                                  cpp, mem_value);
2513
2514         return min(method1, method2);
2515 }
2516
2517 /*
2518  * For both WM_PIPE and WM_LP.
2519  * mem_value must be in 0.1us units.
2520  */
2521 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
2522                                    const struct intel_plane_state *pstate,
2523                                    uint32_t mem_value)
2524 {
2525         uint32_t method1, method2;
2526         int cpp;
2527
2528         if (mem_value == 0)
2529                 return U32_MAX;
2530
2531         if (!intel_wm_plane_visible(cstate, pstate))
2532                 return 0;
2533
2534         cpp = pstate->base.fb->format->cpp[0];
2535
2536         method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value);
2537         method2 = ilk_wm_method2(cstate->pixel_rate,
2538                                  cstate->base.adjusted_mode.crtc_htotal,
2539                                  drm_rect_width(&pstate->base.dst),
2540                                  cpp, mem_value);
2541         return min(method1, method2);
2542 }
2543
2544 /*
2545  * For both WM_PIPE and WM_LP.
2546  * mem_value must be in 0.1us units.
2547  */
2548 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
2549                                    const struct intel_plane_state *pstate,
2550                                    uint32_t mem_value)
2551 {
2552         int cpp;
2553
2554         if (mem_value == 0)
2555                 return U32_MAX;
2556
2557         if (!intel_wm_plane_visible(cstate, pstate))
2558                 return 0;
2559
2560         cpp = pstate->base.fb->format->cpp[0];
2561
2562         return ilk_wm_method2(cstate->pixel_rate,
2563                               cstate->base.adjusted_mode.crtc_htotal,
2564                               pstate->base.crtc_w, cpp, mem_value);
2565 }
2566
2567 /* Only for WM_LP. */
2568 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
2569                                    const struct intel_plane_state *pstate,
2570                                    uint32_t pri_val)
2571 {
2572         int cpp;
2573
2574         if (!intel_wm_plane_visible(cstate, pstate))
2575                 return 0;
2576
2577         cpp = pstate->base.fb->format->cpp[0];
2578
2579         return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp);
2580 }
2581
2582 static unsigned int
2583 ilk_display_fifo_size(const struct drm_i915_private *dev_priv)
2584 {
2585         if (INTEL_GEN(dev_priv) >= 8)
2586                 return 3072;
2587         else if (INTEL_GEN(dev_priv) >= 7)
2588                 return 768;
2589         else
2590                 return 512;
2591 }
2592
2593 static unsigned int
2594 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv,
2595                      int level, bool is_sprite)
2596 {
2597         if (INTEL_GEN(dev_priv) >= 8)
2598                 /* BDW primary/sprite plane watermarks */
2599                 return level == 0 ? 255 : 2047;
2600         else if (INTEL_GEN(dev_priv) >= 7)
2601                 /* IVB/HSW primary/sprite plane watermarks */
2602                 return level == 0 ? 127 : 1023;
2603         else if (!is_sprite)
2604                 /* ILK/SNB primary plane watermarks */
2605                 return level == 0 ? 127 : 511;
2606         else
2607                 /* ILK/SNB sprite plane watermarks */
2608                 return level == 0 ? 63 : 255;
2609 }
2610
2611 static unsigned int
2612 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level)
2613 {
2614         if (INTEL_GEN(dev_priv) >= 7)
2615                 return level == 0 ? 63 : 255;
2616         else
2617                 return level == 0 ? 31 : 63;
2618 }
2619
2620 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv)
2621 {
2622         if (INTEL_GEN(dev_priv) >= 8)
2623                 return 31;
2624         else
2625                 return 15;
2626 }
2627
2628 /* Calculate the maximum primary/sprite plane watermark */
2629 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
2630                                      int level,
2631                                      const struct intel_wm_config *config,
2632                                      enum intel_ddb_partitioning ddb_partitioning,
2633                                      bool is_sprite)
2634 {
2635         struct drm_i915_private *dev_priv = to_i915(dev);
2636         unsigned int fifo_size = ilk_display_fifo_size(dev_priv);
2637
2638         /* if sprites aren't enabled, sprites get nothing */
2639         if (is_sprite && !config->sprites_enabled)
2640                 return 0;
2641
2642         /* HSW allows LP1+ watermarks even with multiple pipes */
2643         if (level == 0 || config->num_pipes_active > 1) {
2644                 fifo_size /= INTEL_INFO(dev_priv)->num_pipes;
2645
2646                 /*
2647                  * For some reason the non self refresh
2648                  * FIFO size is only half of the self
2649                  * refresh FIFO size on ILK/SNB.
2650                  */
2651                 if (INTEL_GEN(dev_priv) <= 6)
2652                         fifo_size /= 2;
2653         }
2654
2655         if (config->sprites_enabled) {
2656                 /* level 0 is always calculated with 1:1 split */
2657                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
2658                         if (is_sprite)
2659                                 fifo_size *= 5;
2660                         fifo_size /= 6;
2661                 } else {
2662                         fifo_size /= 2;
2663                 }
2664         }
2665
2666         /* clamp to max that the registers can hold */
2667         return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite));
2668 }
2669
2670 /* Calculate the maximum cursor plane watermark */
2671 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
2672                                       int level,
2673                                       const struct intel_wm_config *config)
2674 {
2675         /* HSW LP1+ watermarks w/ multiple pipes */
2676         if (level > 0 && config->num_pipes_active > 1)
2677                 return 64;
2678
2679         /* otherwise just report max that registers can hold */
2680         return ilk_cursor_wm_reg_max(to_i915(dev), level);
2681 }
2682
2683 static void ilk_compute_wm_maximums(const struct drm_device *dev,
2684                                     int level,
2685                                     const struct intel_wm_config *config,
2686                                     enum intel_ddb_partitioning ddb_partitioning,
2687                                     struct ilk_wm_maximums *max)
2688 {
2689         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
2690         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
2691         max->cur = ilk_cursor_wm_max(dev, level, config);
2692         max->fbc = ilk_fbc_wm_reg_max(to_i915(dev));
2693 }
2694
2695 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv,
2696                                         int level,
2697                                         struct ilk_wm_maximums *max)
2698 {
2699         max->pri = ilk_plane_wm_reg_max(dev_priv, level, false);
2700         max->spr = ilk_plane_wm_reg_max(dev_priv, level, true);
2701         max->cur = ilk_cursor_wm_reg_max(dev_priv, level);
2702         max->fbc = ilk_fbc_wm_reg_max(dev_priv);
2703 }
2704
2705 static bool ilk_validate_wm_level(int level,
2706                                   const struct ilk_wm_maximums *max,
2707                                   struct intel_wm_level *result)
2708 {
2709         bool ret;
2710
2711         /* already determined to be invalid? */
2712         if (!result->enable)
2713                 return false;
2714
2715         result->enable = result->pri_val <= max->pri &&
2716                          result->spr_val <= max->spr &&
2717                          result->cur_val <= max->cur;
2718
2719         ret = result->enable;
2720
2721         /*
2722          * HACK until we can pre-compute everything,
2723          * and thus fail gracefully if LP0 watermarks
2724          * are exceeded...
2725          */
2726         if (level == 0 && !result->enable) {
2727                 if (result->pri_val > max->pri)
2728                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
2729                                       level, result->pri_val, max->pri);
2730                 if (result->spr_val > max->spr)
2731                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
2732                                       level, result->spr_val, max->spr);
2733                 if (result->cur_val > max->cur)
2734                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
2735                                       level, result->cur_val, max->cur);
2736
2737                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
2738                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
2739                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
2740                 result->enable = true;
2741         }
2742
2743         return ret;
2744 }
2745
2746 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
2747                                  const struct intel_crtc *intel_crtc,
2748                                  int level,
2749                                  struct intel_crtc_state *cstate,
2750                                  const struct intel_plane_state *pristate,
2751                                  const struct intel_plane_state *sprstate,
2752                                  const struct intel_plane_state *curstate,
2753                                  struct intel_wm_level *result)
2754 {
2755         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
2756         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
2757         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
2758
2759         /* WM1+ latency values stored in 0.5us units */
2760         if (level > 0) {
2761                 pri_latency *= 5;
2762                 spr_latency *= 5;
2763                 cur_latency *= 5;
2764         }
2765
2766         if (pristate) {
2767                 result->pri_val = ilk_compute_pri_wm(cstate, pristate,
2768                                                      pri_latency, level);
2769                 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
2770         }
2771
2772         if (sprstate)
2773                 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
2774
2775         if (curstate)
2776                 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
2777
2778         result->enable = true;
2779 }
2780
2781 static uint32_t
2782 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
2783 {
2784         const struct intel_atomic_state *intel_state =
2785                 to_intel_atomic_state(cstate->base.state);
2786         const struct drm_display_mode *adjusted_mode =
2787                 &cstate->base.adjusted_mode;
2788         u32 linetime, ips_linetime;
2789
2790         if (!cstate->base.active)
2791                 return 0;
2792         if (WARN_ON(adjusted_mode->crtc_clock == 0))
2793                 return 0;
2794         if (WARN_ON(intel_state->cdclk.logical.cdclk == 0))
2795                 return 0;
2796
2797         /* The WM are computed with base on how long it takes to fill a single
2798          * row at the given clock rate, multiplied by 8.
2799          * */
2800         linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2801                                      adjusted_mode->crtc_clock);
2802         ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8,
2803                                          intel_state->cdclk.logical.cdclk);
2804
2805         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
2806                PIPE_WM_LINETIME_TIME(linetime);
2807 }
2808
2809 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
2810                                   uint16_t wm[8])
2811 {
2812         if (INTEL_GEN(dev_priv) >= 9) {
2813                 uint32_t val;
2814                 int ret, i;
2815                 int level, max_level = ilk_wm_max_level(dev_priv);
2816
2817                 /* read the first set of memory latencies[0:3] */
2818                 val = 0; /* data0 to be programmed to 0 for first set */
2819                 mutex_lock(&dev_priv->pcu_lock);
2820                 ret = sandybridge_pcode_read(dev_priv,
2821                                              GEN9_PCODE_READ_MEM_LATENCY,
2822                                              &val);
2823                 mutex_unlock(&dev_priv->pcu_lock);
2824
2825                 if (ret) {
2826                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2827                         return;
2828                 }
2829
2830                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2831                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2832                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2833                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2834                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2835                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2836                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2837
2838                 /* read the second set of memory latencies[4:7] */
2839                 val = 1; /* data0 to be programmed to 1 for second set */
2840                 mutex_lock(&dev_priv->pcu_lock);
2841                 ret = sandybridge_pcode_read(dev_priv,
2842                                              GEN9_PCODE_READ_MEM_LATENCY,
2843                                              &val);
2844                 mutex_unlock(&dev_priv->pcu_lock);
2845                 if (ret) {
2846                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
2847                         return;
2848                 }
2849
2850                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
2851                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
2852                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2853                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
2854                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2855                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
2856                                 GEN9_MEM_LATENCY_LEVEL_MASK;
2857
2858                 /*
2859                  * If a level n (n > 1) has a 0us latency, all levels m (m >= n)
2860                  * need to be disabled. We make sure to sanitize the values out
2861                  * of the punit to satisfy this requirement.
2862                  */
2863                 for (level = 1; level <= max_level; level++) {
2864                         if (wm[level] == 0) {
2865                                 for (i = level + 1; i <= max_level; i++)
2866                                         wm[i] = 0;
2867                                 break;
2868                         }
2869                 }
2870
2871                 /*
2872                  * WaWmMemoryReadLatency:skl+,glk
2873                  *
2874                  * punit doesn't take into account the read latency so we need
2875                  * to add 2us to the various latency levels we retrieve from the
2876                  * punit when level 0 response data us 0us.
2877                  */
2878                 if (wm[0] == 0) {
2879                         wm[0] += 2;
2880                         for (level = 1; level <= max_level; level++) {
2881                                 if (wm[level] == 0)
2882                                         break;
2883                                 wm[level] += 2;
2884                         }
2885                 }
2886
2887                 /*
2888                  * WA Level-0 adjustment for 16GB DIMMs: SKL+
2889                  * If we could not get dimm info enable this WA to prevent from
2890                  * any underrun. If not able to get Dimm info assume 16GB dimm
2891                  * to avoid any underrun.
2892                  */
2893                 if (dev_priv->dram_info.is_16gb_dimm)
2894                         wm[0] += 1;
2895
2896         } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
2897                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
2898
2899                 wm[0] = (sskpd >> 56) & 0xFF;
2900                 if (wm[0] == 0)
2901                         wm[0] = sskpd & 0xF;
2902                 wm[1] = (sskpd >> 4) & 0xFF;
2903                 wm[2] = (sskpd >> 12) & 0xFF;
2904                 wm[3] = (sskpd >> 20) & 0x1FF;
2905                 wm[4] = (sskpd >> 32) & 0x1FF;
2906         } else if (INTEL_GEN(dev_priv) >= 6) {
2907                 uint32_t sskpd = I915_READ(MCH_SSKPD);
2908
2909                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2910                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2911                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2912                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2913         } else if (INTEL_GEN(dev_priv) >= 5) {
2914                 uint32_t mltr = I915_READ(MLTR_ILK);
2915
2916                 /* ILK primary LP0 latency is 700 ns */
2917                 wm[0] = 7;
2918                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2919                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2920         } else {
2921                 MISSING_CASE(INTEL_DEVID(dev_priv));
2922         }
2923 }
2924
2925 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv,
2926                                        uint16_t wm[5])
2927 {
2928         /* ILK sprite LP0 latency is 1300 ns */
2929         if (IS_GEN5(dev_priv))
2930                 wm[0] = 13;
2931 }
2932
2933 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv,
2934                                        uint16_t wm[5])
2935 {
2936         /* ILK cursor LP0 latency is 1300 ns */
2937         if (IS_GEN5(dev_priv))
2938                 wm[0] = 13;
2939 }
2940
2941 int ilk_wm_max_level(const struct drm_i915_private *dev_priv)
2942 {
2943         /* how many WM levels are we expecting */
2944         if (INTEL_GEN(dev_priv) >= 9)
2945                 return 7;
2946         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
2947                 return 4;
2948         else if (INTEL_GEN(dev_priv) >= 6)
2949                 return 3;
2950         else
2951                 return 2;
2952 }
2953
2954 static void intel_print_wm_latency(struct drm_i915_private *dev_priv,
2955                                    const char *name,
2956                                    const uint16_t wm[8])
2957 {
2958         int level, max_level = ilk_wm_max_level(dev_priv);
2959
2960         for (level = 0; level <= max_level; level++) {
2961                 unsigned int latency = wm[level];
2962
2963                 if (latency == 0) {
2964                         DRM_DEBUG_KMS("%s WM%d latency not provided\n",
2965                                       name, level);
2966                         continue;
2967                 }
2968
2969                 /*
2970                  * - latencies are in us on gen9.
2971                  * - before then, WM1+ latency values are in 0.5us units
2972                  */
2973                 if (INTEL_GEN(dev_priv) >= 9)
2974                         latency *= 10;
2975                 else if (level > 0)
2976                         latency *= 5;
2977
2978                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2979                               name, level, wm[level],
2980                               latency / 10, latency % 10);
2981         }
2982 }
2983
2984 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
2985                                     uint16_t wm[5], uint16_t min)
2986 {
2987         int level, max_level = ilk_wm_max_level(dev_priv);
2988
2989         if (wm[0] >= min)
2990                 return false;
2991
2992         wm[0] = max(wm[0], min);
2993         for (level = 1; level <= max_level; level++)
2994                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
2995
2996         return true;
2997 }
2998
2999 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
3000 {
3001         bool changed;
3002
3003         /*
3004          * The BIOS provided WM memory latency values are often
3005          * inadequate for high resolution displays. Adjust them.
3006          */
3007         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
3008                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
3009                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
3010
3011         if (!changed)
3012                 return;
3013
3014         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
3015         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3016         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3017         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3018 }
3019
3020 static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
3021 {
3022         /*
3023          * On some SNB machines (Thinkpad X220 Tablet at least)
3024          * LP3 usage can cause vblank interrupts to be lost.
3025          * The DEIIR bit will go high but it looks like the CPU
3026          * never gets interrupted.
3027          *
3028          * It's not clear whether other interrupt source could
3029          * be affected or if this is somehow limited to vblank
3030          * interrupts only. To play it safe we disable LP3
3031          * watermarks entirely.
3032          */
3033         if (dev_priv->wm.pri_latency[3] == 0 &&
3034             dev_priv->wm.spr_latency[3] == 0 &&
3035             dev_priv->wm.cur_latency[3] == 0)
3036                 return;
3037
3038         dev_priv->wm.pri_latency[3] = 0;
3039         dev_priv->wm.spr_latency[3] = 0;
3040         dev_priv->wm.cur_latency[3] = 0;
3041
3042         DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
3043         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3044         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3045         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3046 }
3047
3048 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
3049 {
3050         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
3051
3052         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
3053                sizeof(dev_priv->wm.pri_latency));
3054         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
3055                sizeof(dev_priv->wm.pri_latency));
3056
3057         intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency);
3058         intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency);
3059
3060         intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
3061         intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
3062         intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
3063
3064         if (IS_GEN6(dev_priv)) {
3065                 snb_wm_latency_quirk(dev_priv);
3066                 snb_wm_lp3_irq_quirk(dev_priv);
3067         }
3068 }
3069
3070 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
3071 {
3072         intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
3073         intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency);
3074 }
3075
3076 static bool ilk_validate_pipe_wm(struct drm_device *dev,
3077                                  struct intel_pipe_wm *pipe_wm)
3078 {
3079         /* LP0 watermark maximums depend on this pipe alone */
3080         const struct intel_wm_config config = {
3081                 .num_pipes_active = 1,
3082                 .sprites_enabled = pipe_wm->sprites_enabled,
3083                 .sprites_scaled = pipe_wm->sprites_scaled,
3084         };
3085         struct ilk_wm_maximums max;
3086
3087         /* LP0 watermarks always use 1/2 DDB partitioning */
3088         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
3089
3090         /* At least LP0 must be valid */
3091         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) {
3092                 DRM_DEBUG_KMS("LP0 watermark invalid\n");
3093                 return false;
3094         }
3095
3096         return true;
3097 }
3098
3099 /* Compute new watermarks for the pipe */
3100 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate)
3101 {
3102         struct drm_atomic_state *state = cstate->base.state;
3103         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
3104         struct intel_pipe_wm *pipe_wm;
3105         struct drm_device *dev = state->dev;
3106         const struct drm_i915_private *dev_priv = to_i915(dev);
3107         struct drm_plane *plane;
3108         const struct drm_plane_state *plane_state;
3109         const struct intel_plane_state *pristate = NULL;
3110         const struct intel_plane_state *sprstate = NULL;
3111         const struct intel_plane_state *curstate = NULL;
3112         int level, max_level = ilk_wm_max_level(dev_priv), usable_level;
3113         struct ilk_wm_maximums max;
3114
3115         pipe_wm = &cstate->wm.ilk.optimal;
3116
3117         drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &cstate->base) {
3118                 const struct intel_plane_state *ps = to_intel_plane_state(plane_state);
3119
3120                 if (plane->type == DRM_PLANE_TYPE_PRIMARY)
3121                         pristate = ps;
3122                 else if (plane->type == DRM_PLANE_TYPE_OVERLAY)
3123                         sprstate = ps;
3124                 else if (plane->type == DRM_PLANE_TYPE_CURSOR)
3125                         curstate = ps;
3126         }
3127
3128         pipe_wm->pipe_enabled = cstate->base.active;
3129         if (sprstate) {
3130                 pipe_wm->sprites_enabled = sprstate->base.visible;
3131                 pipe_wm->sprites_scaled = sprstate->base.visible &&
3132                         (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 ||
3133                          drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16);
3134         }
3135
3136         usable_level = max_level;
3137
3138         /* ILK/SNB: LP2+ watermarks only w/o sprites */
3139         if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled)
3140                 usable_level = 1;
3141
3142         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
3143         if (pipe_wm->sprites_scaled)
3144                 usable_level = 0;
3145
3146         memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm));
3147         ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
3148                              pristate, sprstate, curstate, &pipe_wm->wm[0]);
3149
3150         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3151                 pipe_wm->linetime = hsw_compute_linetime_wm(cstate);
3152
3153         if (!ilk_validate_pipe_wm(dev, pipe_wm))
3154                 return -EINVAL;
3155
3156         ilk_compute_wm_reg_maximums(dev_priv, 1, &max);
3157
3158         for (level = 1; level <= usable_level; level++) {
3159                 struct intel_wm_level *wm = &pipe_wm->wm[level];
3160
3161                 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
3162                                      pristate, sprstate, curstate, wm);
3163
3164                 /*
3165                  * Disable any watermark level that exceeds the
3166                  * register maximums since such watermarks are
3167                  * always invalid.
3168                  */
3169                 if (!ilk_validate_wm_level(level, &max, wm)) {
3170                         memset(wm, 0, sizeof(*wm));
3171                         break;
3172                 }
3173         }
3174
3175         return 0;
3176 }
3177
3178 /*
3179  * Build a set of 'intermediate' watermark values that satisfy both the old
3180  * state and the new state.  These can be programmed to the hardware
3181  * immediately.
3182  */
3183 static int ilk_compute_intermediate_wm(struct drm_device *dev,
3184                                        struct intel_crtc *intel_crtc,
3185                                        struct intel_crtc_state *newstate)
3186 {
3187         struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate;
3188         struct intel_atomic_state *intel_state =
3189                 to_intel_atomic_state(newstate->base.state);
3190         const struct intel_crtc_state *oldstate =
3191                 intel_atomic_get_old_crtc_state(intel_state, intel_crtc);
3192         const struct intel_pipe_wm *b = &oldstate->wm.ilk.optimal;
3193         int level, max_level = ilk_wm_max_level(to_i915(dev));
3194
3195         /*
3196          * Start with the final, target watermarks, then combine with the
3197          * currently active watermarks to get values that are safe both before
3198          * and after the vblank.
3199          */
3200         *a = newstate->wm.ilk.optimal;
3201         if (!newstate->base.active || drm_atomic_crtc_needs_modeset(&newstate->base))
3202                 return 0;
3203
3204         a->pipe_enabled |= b->pipe_enabled;
3205         a->sprites_enabled |= b->sprites_enabled;
3206         a->sprites_scaled |= b->sprites_scaled;
3207
3208         for (level = 0; level <= max_level; level++) {
3209                 struct intel_wm_level *a_wm = &a->wm[level];
3210                 const struct intel_wm_level *b_wm = &b->wm[level];
3211
3212                 a_wm->enable &= b_wm->enable;
3213                 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val);
3214                 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val);
3215                 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val);
3216                 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val);
3217         }
3218
3219         /*
3220          * We need to make sure that these merged watermark values are
3221          * actually a valid configuration themselves.  If they're not,
3222          * there's no safe way to transition from the old state to
3223          * the new state, so we need to fail the atomic transaction.
3224          */
3225         if (!ilk_validate_pipe_wm(dev, a))
3226                 return -EINVAL;
3227
3228         /*
3229          * If our intermediate WM are identical to the final WM, then we can
3230          * omit the post-vblank programming; only update if it's different.
3231          */
3232         if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0)
3233                 newstate->wm.need_postvbl_update = true;
3234
3235         return 0;
3236 }
3237
3238 /*
3239  * Merge the watermarks from all active pipes for a specific level.
3240  */
3241 static void ilk_merge_wm_level(struct drm_device *dev,
3242                                int level,
3243                                struct intel_wm_level *ret_wm)
3244 {
3245         const struct intel_crtc *intel_crtc;
3246
3247         ret_wm->enable = true;
3248
3249         for_each_intel_crtc(dev, intel_crtc) {
3250                 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk;
3251                 const struct intel_wm_level *wm = &active->wm[level];
3252
3253                 if (!active->pipe_enabled)
3254                         continue;
3255
3256                 /*
3257                  * The watermark values may have been used in the past,
3258                  * so we must maintain them in the registers for some
3259                  * time even if the level is now disabled.
3260                  */
3261                 if (!wm->enable)
3262                         ret_wm->enable = false;
3263
3264                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
3265                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
3266                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
3267                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
3268         }
3269 }
3270
3271 /*
3272  * Merge all low power watermarks for all active pipes.
3273  */
3274 static void ilk_wm_merge(struct drm_device *dev,
3275                          const struct intel_wm_config *config,
3276                          const struct ilk_wm_maximums *max,
3277                          struct intel_pipe_wm *merged)
3278 {
3279         struct drm_i915_private *dev_priv = to_i915(dev);
3280         int level, max_level = ilk_wm_max_level(dev_priv);
3281         int last_enabled_level = max_level;
3282
3283         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
3284         if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) &&
3285             config->num_pipes_active > 1)
3286                 last_enabled_level = 0;
3287
3288         /* ILK: FBC WM must be disabled always */
3289         merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6;
3290
3291         /* merge each WM1+ level */
3292         for (level = 1; level <= max_level; level++) {
3293                 struct intel_wm_level *wm = &merged->wm[level];
3294
3295                 ilk_merge_wm_level(dev, level, wm);
3296
3297                 if (level > last_enabled_level)
3298                         wm->enable = false;
3299                 else if (!ilk_validate_wm_level(level, max, wm))
3300                         /* make sure all following levels get disabled */
3301                         last_enabled_level = level - 1;
3302
3303                 /*
3304                  * The spec says it is preferred to disable
3305                  * FBC WMs instead of disabling a WM level.
3306                  */
3307                 if (wm->fbc_val > max->fbc) {
3308                         if (wm->enable)
3309                                 merged->fbc_wm_enabled = false;
3310                         wm->fbc_val = 0;
3311                 }
3312         }
3313
3314         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
3315         /*
3316          * FIXME this is racy. FBC might get enabled later.
3317          * What we should check here is whether FBC can be
3318          * enabled sometime later.
3319          */
3320         if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled &&
3321             intel_fbc_is_active(dev_priv)) {
3322                 for (level = 2; level <= max_level; level++) {
3323                         struct intel_wm_level *wm = &merged->wm[level];
3324
3325                         wm->enable = false;
3326                 }
3327         }
3328 }
3329
3330 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
3331 {
3332         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
3333         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
3334 }
3335
3336 /* The value we need to program into the WM_LPx latency field */
3337 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
3338 {
3339         struct drm_i915_private *dev_priv = to_i915(dev);
3340
3341         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
3342                 return 2 * level;
3343         else
3344                 return dev_priv->wm.pri_latency[level];
3345 }
3346
3347 static void ilk_compute_wm_results(struct drm_device *dev,
3348                                    const struct intel_pipe_wm *merged,
3349                                    enum intel_ddb_partitioning partitioning,
3350                                    struct ilk_wm_values *results)
3351 {
3352         struct drm_i915_private *dev_priv = to_i915(dev);
3353         struct intel_crtc *intel_crtc;
3354         int level, wm_lp;
3355
3356         results->enable_fbc_wm = merged->fbc_wm_enabled;
3357         results->partitioning = partitioning;
3358
3359         /* LP1+ register values */
3360         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3361                 const struct intel_wm_level *r;
3362
3363                 level = ilk_wm_lp_to_level(wm_lp, merged);
3364
3365                 r = &merged->wm[level];
3366
3367                 /*
3368                  * Maintain the watermark values even if the level is
3369                  * disabled. Doing otherwise could cause underruns.
3370                  */
3371                 results->wm_lp[wm_lp - 1] =
3372                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
3373                         (r->pri_val << WM1_LP_SR_SHIFT) |
3374                         r->cur_val;
3375
3376                 if (r->enable)
3377                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
3378
3379                 if (INTEL_GEN(dev_priv) >= 8)
3380                         results->wm_lp[wm_lp - 1] |=
3381                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
3382                 else
3383                         results->wm_lp[wm_lp - 1] |=
3384                                 r->fbc_val << WM1_LP_FBC_SHIFT;
3385
3386                 /*
3387                  * Always set WM1S_LP_EN when spr_val != 0, even if the
3388                  * level is disabled. Doing otherwise could cause underruns.
3389                  */
3390                 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) {
3391                         WARN_ON(wm_lp != 1);
3392                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
3393                 } else
3394                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
3395         }
3396
3397         /* LP0 register values */
3398         for_each_intel_crtc(dev, intel_crtc) {
3399                 enum pipe pipe = intel_crtc->pipe;
3400                 const struct intel_wm_level *r =
3401                         &intel_crtc->wm.active.ilk.wm[0];
3402
3403                 if (WARN_ON(!r->enable))
3404                         continue;
3405
3406                 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime;
3407
3408                 results->wm_pipe[pipe] =
3409                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
3410                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
3411                         r->cur_val;
3412         }
3413 }
3414
3415 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
3416  * case both are at the same level. Prefer r1 in case they're the same. */
3417 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
3418                                                   struct intel_pipe_wm *r1,
3419                                                   struct intel_pipe_wm *r2)
3420 {
3421         int level, max_level = ilk_wm_max_level(to_i915(dev));
3422         int level1 = 0, level2 = 0;
3423
3424         for (level = 1; level <= max_level; level++) {
3425                 if (r1->wm[level].enable)
3426                         level1 = level;
3427                 if (r2->wm[level].enable)
3428                         level2 = level;
3429         }
3430
3431         if (level1 == level2) {
3432                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
3433                         return r2;
3434                 else
3435                         return r1;
3436         } else if (level1 > level2) {
3437                 return r1;
3438         } else {
3439                 return r2;
3440         }
3441 }
3442
3443 /* dirty bits used to track which watermarks need changes */
3444 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
3445 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
3446 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
3447 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
3448 #define WM_DIRTY_FBC (1 << 24)
3449 #define WM_DIRTY_DDB (1 << 25)
3450
3451 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
3452                                          const struct ilk_wm_values *old,
3453                                          const struct ilk_wm_values *new)
3454 {
3455         unsigned int dirty = 0;
3456         enum pipe pipe;
3457         int wm_lp;
3458
3459         for_each_pipe(dev_priv, pipe) {
3460                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
3461                         dirty |= WM_DIRTY_LINETIME(pipe);
3462                         /* Must disable LP1+ watermarks too */
3463                         dirty |= WM_DIRTY_LP_ALL;
3464                 }
3465
3466                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
3467                         dirty |= WM_DIRTY_PIPE(pipe);
3468                         /* Must disable LP1+ watermarks too */
3469                         dirty |= WM_DIRTY_LP_ALL;
3470                 }
3471         }
3472
3473         if (old->enable_fbc_wm != new->enable_fbc_wm) {
3474                 dirty |= WM_DIRTY_FBC;
3475                 /* Must disable LP1+ watermarks too */
3476                 dirty |= WM_DIRTY_LP_ALL;
3477         }
3478
3479         if (old->partitioning != new->partitioning) {
3480                 dirty |= WM_DIRTY_DDB;
3481                 /* Must disable LP1+ watermarks too */
3482                 dirty |= WM_DIRTY_LP_ALL;
3483         }
3484
3485         /* LP1+ watermarks already deemed dirty, no need to continue */
3486         if (dirty & WM_DIRTY_LP_ALL)
3487                 return dirty;
3488
3489         /* Find the lowest numbered LP1+ watermark in need of an update... */
3490         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
3491                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
3492                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
3493                         break;
3494         }
3495
3496         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
3497         for (; wm_lp <= 3; wm_lp++)
3498                 dirty |= WM_DIRTY_LP(wm_lp);
3499
3500         return dirty;
3501 }
3502
3503 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
3504                                unsigned int dirty)
3505 {
3506         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3507         bool changed = false;
3508
3509         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
3510                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
3511                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
3512                 changed = true;
3513         }
3514         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
3515                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
3516                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
3517                 changed = true;
3518         }
3519         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
3520                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
3521                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
3522                 changed = true;
3523         }
3524
3525         /*
3526          * Don't touch WM1S_LP_EN here.
3527          * Doing so could cause underruns.
3528          */
3529
3530         return changed;
3531 }
3532
3533 /*
3534  * The spec says we shouldn't write when we don't need, because every write
3535  * causes WMs to be re-evaluated, expending some power.
3536  */
3537 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
3538                                 struct ilk_wm_values *results)
3539 {
3540         struct ilk_wm_values *previous = &dev_priv->wm.hw;
3541         unsigned int dirty;
3542         uint32_t val;
3543
3544         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
3545         if (!dirty)
3546                 return;
3547
3548         _ilk_disable_lp_wm(dev_priv, dirty);
3549
3550         if (dirty & WM_DIRTY_PIPE(PIPE_A))
3551                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
3552         if (dirty & WM_DIRTY_PIPE(PIPE_B))
3553                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
3554         if (dirty & WM_DIRTY_PIPE(PIPE_C))
3555                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
3556
3557         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
3558                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
3559         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
3560                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
3561         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
3562                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
3563
3564         if (dirty & WM_DIRTY_DDB) {
3565                 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
3566                         val = I915_READ(WM_MISC);
3567                         if (results->partitioning == INTEL_DDB_PART_1_2)
3568                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
3569                         else
3570                                 val |= WM_MISC_DATA_PARTITION_5_6;
3571                         I915_WRITE(WM_MISC, val);
3572                 } else {
3573                         val = I915_READ(DISP_ARB_CTL2);
3574                         if (results->partitioning == INTEL_DDB_PART_1_2)
3575                                 val &= ~DISP_DATA_PARTITION_5_6;
3576                         else
3577                                 val |= DISP_DATA_PARTITION_5_6;
3578                         I915_WRITE(DISP_ARB_CTL2, val);
3579                 }
3580         }
3581
3582         if (dirty & WM_DIRTY_FBC) {
3583                 val = I915_READ(DISP_ARB_CTL);
3584                 if (results->enable_fbc_wm)
3585                         val &= ~DISP_FBC_WM_DIS;
3586                 else
3587                         val |= DISP_FBC_WM_DIS;
3588                 I915_WRITE(DISP_ARB_CTL, val);
3589         }
3590
3591         if (dirty & WM_DIRTY_LP(1) &&
3592             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
3593                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
3594
3595         if (INTEL_GEN(dev_priv) >= 7) {
3596                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
3597                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
3598                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
3599                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
3600         }
3601
3602         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
3603                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
3604         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
3605                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
3606         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
3607                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
3608
3609         dev_priv->wm.hw = *results;
3610 }
3611
3612 bool ilk_disable_lp_wm(struct drm_device *dev)
3613 {
3614         struct drm_i915_private *dev_priv = to_i915(dev);
3615
3616         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
3617 }
3618
3619 static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
3620 {
3621         u8 enabled_slices;
3622
3623         /* Slice 1 will always be enabled */
3624         enabled_slices = 1;
3625
3626         /* Gen prior to GEN11 have only one DBuf slice */
3627         if (INTEL_GEN(dev_priv) < 11)
3628                 return enabled_slices;
3629
3630         if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
3631                 enabled_slices++;
3632
3633         return enabled_slices;
3634 }
3635
3636 /*
3637  * FIXME: We still don't have the proper code detect if we need to apply the WA,
3638  * so assume we'll always need it in order to avoid underruns.
3639  */
3640 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
3641 {
3642         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
3643
3644         if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv))
3645                 return true;
3646
3647         return false;
3648 }
3649
3650 static bool
3651 intel_has_sagv(struct drm_i915_private *dev_priv)
3652 {
3653         if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
3654             IS_CANNONLAKE(dev_priv))
3655                 return true;
3656
3657         if (IS_SKYLAKE(dev_priv) &&
3658             dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED)
3659                 return true;
3660
3661         return false;
3662 }
3663
3664 /*
3665  * SAGV dynamically adjusts the system agent voltage and clock frequencies
3666  * depending on power and performance requirements. The display engine access
3667  * to system memory is blocked during the adjustment time. Because of the
3668  * blocking time, having this enabled can cause full system hangs and/or pipe
3669  * underruns if we don't meet all of the following requirements:
3670  *
3671  *  - <= 1 pipe enabled
3672  *  - All planes can enable watermarks for latencies >= SAGV engine block time
3673  *  - We're not using an interlaced display configuration
3674  */
3675 int
3676 intel_enable_sagv(struct drm_i915_private *dev_priv)
3677 {
3678         int ret;
3679
3680         if (!intel_has_sagv(dev_priv))
3681                 return 0;
3682
3683         if (dev_priv->sagv_status == I915_SAGV_ENABLED)
3684                 return 0;
3685
3686         DRM_DEBUG_KMS("Enabling the SAGV\n");
3687         mutex_lock(&dev_priv->pcu_lock);
3688
3689         ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3690                                       GEN9_SAGV_ENABLE);
3691
3692         /* We don't need to wait for the SAGV when enabling */
3693         mutex_unlock(&dev_priv->pcu_lock);
3694
3695         /*
3696          * Some skl systems, pre-release machines in particular,
3697          * don't actually have an SAGV.
3698          */
3699         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3700                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3701                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3702                 return 0;
3703         } else if (ret < 0) {
3704                 DRM_ERROR("Failed to enable the SAGV\n");
3705                 return ret;
3706         }
3707
3708         dev_priv->sagv_status = I915_SAGV_ENABLED;
3709         return 0;
3710 }
3711
3712 int
3713 intel_disable_sagv(struct drm_i915_private *dev_priv)
3714 {
3715         int ret;
3716
3717         if (!intel_has_sagv(dev_priv))
3718                 return 0;
3719
3720         if (dev_priv->sagv_status == I915_SAGV_DISABLED)
3721                 return 0;
3722
3723         DRM_DEBUG_KMS("Disabling the SAGV\n");
3724         mutex_lock(&dev_priv->pcu_lock);
3725
3726         /* bspec says to keep retrying for at least 1 ms */
3727         ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
3728                                 GEN9_SAGV_DISABLE,
3729                                 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
3730                                 1);
3731         mutex_unlock(&dev_priv->pcu_lock);
3732
3733         /*
3734          * Some skl systems, pre-release machines in particular,
3735          * don't actually have an SAGV.
3736          */
3737         if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) {
3738                 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
3739                 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
3740                 return 0;
3741         } else if (ret < 0) {
3742                 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret);
3743                 return ret;
3744         }
3745
3746         dev_priv->sagv_status = I915_SAGV_DISABLED;
3747         return 0;
3748 }
3749
3750 bool intel_can_enable_sagv(struct drm_atomic_state *state)
3751 {
3752         struct drm_device *dev = state->dev;
3753         struct drm_i915_private *dev_priv = to_i915(dev);
3754         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3755         struct intel_crtc *crtc;
3756         struct intel_plane *plane;
3757         struct intel_crtc_state *cstate;
3758         enum pipe pipe;
3759         int level, latency;
3760         int sagv_block_time_us;
3761
3762         if (!intel_has_sagv(dev_priv))
3763                 return false;
3764
3765         if (IS_GEN9(dev_priv))
3766                 sagv_block_time_us = 30;
3767         else if (IS_GEN10(dev_priv))
3768                 sagv_block_time_us = 20;
3769         else
3770                 sagv_block_time_us = 10;
3771
3772         /*
3773          * SKL+ workaround: bspec recommends we disable the SAGV when we have
3774          * more then one pipe enabled
3775          *
3776          * If there are no active CRTCs, no additional checks need be performed
3777          */
3778         if (hweight32(intel_state->active_crtcs) == 0)
3779                 return true;
3780         else if (hweight32(intel_state->active_crtcs) > 1)
3781                 return false;
3782
3783         /* Since we're now guaranteed to only have one active CRTC... */
3784         pipe = ffs(intel_state->active_crtcs) - 1;
3785         crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
3786         cstate = to_intel_crtc_state(crtc->base.state);
3787
3788         if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE)
3789                 return false;
3790
3791         for_each_intel_plane_on_crtc(dev, crtc, plane) {
3792                 struct skl_plane_wm *wm =
3793                         &cstate->wm.skl.optimal.planes[plane->id];
3794
3795                 /* Skip this plane if it's not enabled */
3796                 if (!wm->wm[0].plane_en)
3797                         continue;
3798
3799                 /* Find the highest enabled wm level for this plane */
3800                 for (level = ilk_wm_max_level(dev_priv);
3801                      !wm->wm[level].plane_en; --level)
3802                      { }
3803
3804                 latency = dev_priv->wm.skl_latency[level];
3805
3806                 if (skl_needs_memory_bw_wa(intel_state) &&
3807                     plane->base.state->fb->modifier ==
3808                     I915_FORMAT_MOD_X_TILED)
3809                         latency += 15;
3810
3811                 /*
3812                  * If any of the planes on this pipe don't enable wm levels that
3813                  * incur memory latencies higher than sagv_block_time_us we
3814                  * can't enable the SAGV.
3815                  */
3816                 if (latency < sagv_block_time_us)
3817                         return false;
3818         }
3819
3820         return true;
3821 }
3822
3823 static u16 intel_get_ddb_size(struct drm_i915_private *dev_priv,
3824                               const struct intel_crtc_state *cstate,
3825                               const unsigned int total_data_rate,
3826                               const int num_active,
3827                               struct skl_ddb_allocation *ddb)
3828 {
3829         const struct drm_display_mode *adjusted_mode;
3830         u64 total_data_bw;
3831         u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
3832
3833         WARN_ON(ddb_size == 0);
3834
3835         if (INTEL_GEN(dev_priv) < 11)
3836                 return ddb_size - 4; /* 4 blocks for bypass path allocation */
3837
3838         adjusted_mode = &cstate->base.adjusted_mode;
3839         total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
3840
3841         /*
3842          * 12GB/s is maximum BW supported by single DBuf slice.
3843          */
3844         if (total_data_bw >= GBps(12) || num_active > 1) {
3845                 ddb->enabled_slices = 2;
3846         } else {
3847                 ddb->enabled_slices = 1;
3848                 ddb_size /= 2;
3849         }
3850
3851         return ddb_size;
3852 }
3853
3854 static void
3855 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
3856                                    const struct intel_crtc_state *cstate,
3857                                    const unsigned int total_data_rate,
3858                                    struct skl_ddb_allocation *ddb,
3859                                    struct skl_ddb_entry *alloc, /* out */
3860                                    int *num_active /* out */)
3861 {
3862         struct drm_atomic_state *state = cstate->base.state;
3863         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
3864         struct drm_i915_private *dev_priv = to_i915(dev);
3865         struct drm_crtc *for_crtc = cstate->base.crtc;
3866         const struct drm_crtc_state *crtc_state;
3867         const struct drm_crtc *crtc;
3868         u32 pipe_width = 0, total_width = 0, width_before_pipe = 0;
3869         enum pipe for_pipe = to_intel_crtc(for_crtc)->pipe;
3870         u16 ddb_size;
3871         u32 i;
3872
3873         if (WARN_ON(!state) || !cstate->base.active) {
3874                 alloc->start = 0;
3875                 alloc->end = 0;
3876                 *num_active = hweight32(dev_priv->active_crtcs);
3877                 return;
3878         }
3879
3880         if (intel_state->active_pipe_changes)
3881                 *num_active = hweight32(intel_state->active_crtcs);
3882         else
3883                 *num_active = hweight32(dev_priv->active_crtcs);
3884
3885         ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
3886                                       *num_active, ddb);
3887
3888         /*
3889          * If the state doesn't change the active CRTC's or there is no
3890          * modeset request, then there's no need to recalculate;
3891          * the existing pipe allocation limits should remain unchanged.
3892          * Note that we're safe from racing commits since any racing commit
3893          * that changes the active CRTC list or do modeset would need to
3894          * grab _all_ crtc locks, including the one we currently hold.
3895          */
3896         if (!intel_state->active_pipe_changes && !intel_state->modeset) {
3897                 /*
3898                  * alloc may be cleared by clear_intel_crtc_state,
3899                  * copy from old state to be sure
3900                  */
3901                 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb;
3902                 return;
3903         }
3904
3905         /*
3906          * Watermark/ddb requirement highly depends upon width of the
3907          * framebuffer, So instead of allocating DDB equally among pipes
3908          * distribute DDB based on resolution/width of the display.
3909          */
3910         for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
3911                 const struct drm_display_mode *adjusted_mode;
3912                 int hdisplay, vdisplay;
3913                 enum pipe pipe;
3914
3915                 if (!crtc_state->enable)
3916                         continue;
3917
3918                 pipe = to_intel_crtc(crtc)->pipe;
3919                 adjusted_mode = &crtc_state->adjusted_mode;
3920                 drm_mode_get_hv_timing(adjusted_mode, &hdisplay, &vdisplay);
3921                 total_width += hdisplay;
3922
3923                 if (pipe < for_pipe)
3924                         width_before_pipe += hdisplay;
3925                 else if (pipe == for_pipe)
3926                         pipe_width = hdisplay;
3927         }
3928
3929         alloc->start = ddb_size * width_before_pipe / total_width;
3930         alloc->end = ddb_size * (width_before_pipe + pipe_width) / total_width;
3931 }
3932
3933 static unsigned int skl_cursor_allocation(int num_active)
3934 {
3935         if (num_active == 1)
3936                 return 32;
3937
3938         return 8;
3939 }
3940
3941 static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
3942                                        struct skl_ddb_entry *entry, u32 reg)
3943 {
3944         u16 mask;
3945
3946         if (INTEL_GEN(dev_priv) >= 11)
3947                 mask = ICL_DDB_ENTRY_MASK;
3948         else
3949                 mask = SKL_DDB_ENTRY_MASK;
3950         entry->start = reg & mask;
3951         entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
3952
3953         if (entry->end)
3954                 entry->end += 1;
3955 }
3956
3957 static void
3958 skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
3959                            const enum pipe pipe,
3960                            const enum plane_id plane_id,
3961                            struct skl_ddb_allocation *ddb /* out */)
3962 {
3963         u32 val, val2 = 0;
3964         int fourcc, pixel_format;
3965
3966         /* Cursor doesn't support NV12/planar, so no extra calculation needed */
3967         if (plane_id == PLANE_CURSOR) {
3968                 val = I915_READ(CUR_BUF_CFG(pipe));
3969                 skl_ddb_entry_init_from_hw(dev_priv,
3970                                            &ddb->plane[pipe][plane_id], val);
3971                 return;
3972         }
3973
3974         val = I915_READ(PLANE_CTL(pipe, plane_id));
3975
3976         /* No DDB allocated for disabled planes */
3977         if (!(val & PLANE_CTL_ENABLE))
3978                 return;
3979
3980         pixel_format = val & PLANE_CTL_FORMAT_MASK;
3981         fourcc = skl_format_to_fourcc(pixel_format,
3982                                       val & PLANE_CTL_ORDER_RGBX,
3983                                       val & PLANE_CTL_ALPHA_MASK);
3984
3985         val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
3986         /*
3987          * FIXME: add proper NV12 support for ICL. Avoid reading unclaimed
3988          * registers for now.
3989          */
3990         if (INTEL_GEN(dev_priv) < 11)
3991                 val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
3992
3993         if (fourcc == DRM_FORMAT_NV12) {
3994                 skl_ddb_entry_init_from_hw(dev_priv,
3995                                            &ddb->plane[pipe][plane_id], val2);
3996                 skl_ddb_entry_init_from_hw(dev_priv,
3997                                            &ddb->uv_plane[pipe][plane_id], val);
3998         } else {
3999                 skl_ddb_entry_init_from_hw(dev_priv,
4000                                            &ddb->plane[pipe][plane_id], val);
4001         }
4002 }
4003
4004 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
4005                           struct skl_ddb_allocation *ddb /* out */)
4006 {
4007         struct intel_crtc *crtc;
4008
4009         memset(ddb, 0, sizeof(*ddb));
4010
4011         ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
4012
4013         for_each_intel_crtc(&dev_priv->drm, crtc) {
4014                 enum intel_display_power_domain power_domain;
4015                 enum plane_id plane_id;
4016                 enum pipe pipe = crtc->pipe;
4017
4018                 power_domain = POWER_DOMAIN_PIPE(pipe);
4019                 if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
4020                         continue;
4021
4022                 for_each_plane_id_on_crtc(crtc, plane_id)
4023                         skl_ddb_get_hw_plane_state(dev_priv, pipe,
4024                                                    plane_id, ddb);
4025
4026                 intel_display_power_put(dev_priv, power_domain);
4027         }
4028 }
4029
4030 /*
4031  * Determines the downscale amount of a plane for the purposes of watermark calculations.
4032  * The bspec defines downscale amount as:
4033  *
4034  * """
4035  * Horizontal down scale amount = maximum[1, Horizontal source size /
4036  *                                           Horizontal destination size]
4037  * Vertical down scale amount = maximum[1, Vertical source size /
4038  *                                         Vertical destination size]
4039  * Total down scale amount = Horizontal down scale amount *
4040  *                           Vertical down scale amount
4041  * """
4042  *
4043  * Return value is provided in 16.16 fixed point form to retain fractional part.
4044  * Caller should take care of dividing & rounding off the value.
4045  */
4046 static uint_fixed_16_16_t
4047 skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
4048                            const struct intel_plane_state *pstate)
4049 {
4050         struct intel_plane *plane = to_intel_plane(pstate->base.plane);
4051         uint32_t src_w, src_h, dst_w, dst_h;
4052         uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4053         uint_fixed_16_16_t downscale_h, downscale_w;
4054
4055         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4056                 return u32_to_fixed16(0);
4057
4058         /* n.b., src is 16.16 fixed point, dst is whole integer */
4059         if (plane->id == PLANE_CURSOR) {
4060                 /*
4061                  * Cursors only support 0/180 degree rotation,
4062                  * hence no need to account for rotation here.
4063                  */
4064                 src_w = pstate->base.src_w >> 16;
4065                 src_h = pstate->base.src_h >> 16;
4066                 dst_w = pstate->base.crtc_w;
4067                 dst_h = pstate->base.crtc_h;
4068         } else {
4069                 /*
4070                  * Src coordinates are already rotated by 270 degrees for
4071                  * the 90/270 degree plane rotation cases (to match the
4072                  * GTT mapping), hence no need to account for rotation here.
4073                  */
4074                 src_w = drm_rect_width(&pstate->base.src) >> 16;
4075                 src_h = drm_rect_height(&pstate->base.src) >> 16;
4076                 dst_w = drm_rect_width(&pstate->base.dst);
4077                 dst_h = drm_rect_height(&pstate->base.dst);
4078         }
4079
4080         fp_w_ratio = div_fixed16(src_w, dst_w);
4081         fp_h_ratio = div_fixed16(src_h, dst_h);
4082         downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4083         downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4084
4085         return mul_fixed16(downscale_w, downscale_h);
4086 }
4087
4088 static uint_fixed_16_16_t
4089 skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state)
4090 {
4091         uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1);
4092
4093         if (!crtc_state->base.enable)
4094                 return pipe_downscale;
4095
4096         if (crtc_state->pch_pfit.enabled) {
4097                 uint32_t src_w, src_h, dst_w, dst_h;
4098                 uint32_t pfit_size = crtc_state->pch_pfit.size;
4099                 uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
4100                 uint_fixed_16_16_t downscale_h, downscale_w;
4101
4102                 src_w = crtc_state->pipe_src_w;
4103                 src_h = crtc_state->pipe_src_h;
4104                 dst_w = pfit_size >> 16;
4105                 dst_h = pfit_size & 0xffff;
4106
4107                 if (!dst_w || !dst_h)
4108                         return pipe_downscale;
4109
4110                 fp_w_ratio = div_fixed16(src_w, dst_w);
4111                 fp_h_ratio = div_fixed16(src_h, dst_h);
4112                 downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
4113                 downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
4114
4115                 pipe_downscale = mul_fixed16(downscale_w, downscale_h);
4116         }
4117
4118         return pipe_downscale;
4119 }
4120
4121 int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
4122                                   struct intel_crtc_state *cstate)
4123 {
4124         struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
4125         struct drm_crtc_state *crtc_state = &cstate->base;
4126         struct drm_atomic_state *state = crtc_state->state;
4127         struct drm_plane *plane;
4128         const struct drm_plane_state *pstate;
4129         struct intel_plane_state *intel_pstate;
4130         int crtc_clock, dotclk;
4131         uint32_t pipe_max_pixel_rate;
4132         uint_fixed_16_16_t pipe_downscale;
4133         uint_fixed_16_16_t max_downscale = u32_to_fixed16(1);
4134
4135         if (!cstate->base.enable)
4136                 return 0;
4137
4138         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4139                 uint_fixed_16_16_t plane_downscale;
4140                 uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8);
4141                 int bpp;
4142
4143                 if (!intel_wm_plane_visible(cstate,
4144                                             to_intel_plane_state(pstate)))
4145                         continue;
4146
4147                 if (WARN_ON(!pstate->fb))
4148                         return -EINVAL;
4149
4150                 intel_pstate = to_intel_plane_state(pstate);
4151                 plane_downscale = skl_plane_downscale_amount(cstate,
4152                                                              intel_pstate);
4153                 bpp = pstate->fb->format->cpp[0] * 8;
4154                 if (bpp == 64)
4155                         plane_downscale = mul_fixed16(plane_downscale,
4156                                                       fp_9_div_8);
4157
4158                 max_downscale = max_fixed16(plane_downscale, max_downscale);
4159         }
4160         pipe_downscale = skl_pipe_downscale_amount(cstate);
4161
4162         pipe_downscale = mul_fixed16(pipe_downscale, max_downscale);
4163
4164         crtc_clock = crtc_state->adjusted_mode.crtc_clock;
4165         dotclk = to_intel_atomic_state(state)->cdclk.logical.cdclk;
4166
4167         if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10)
4168                 dotclk *= 2;
4169
4170         pipe_max_pixel_rate = div_round_up_u32_fixed16(dotclk, pipe_downscale);
4171
4172         if (pipe_max_pixel_rate < crtc_clock) {
4173                 DRM_DEBUG_KMS("Max supported pixel clock with scaling exceeded\n");
4174                 return -EINVAL;
4175         }
4176
4177         return 0;
4178 }
4179
4180 static unsigned int
4181 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
4182                              const struct drm_plane_state *pstate,
4183                              const int plane)
4184 {
4185         struct intel_plane *intel_plane = to_intel_plane(pstate->plane);
4186         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4187         uint32_t data_rate;
4188         uint32_t width = 0, height = 0;
4189         struct drm_framebuffer *fb;
4190         u32 format;
4191         uint_fixed_16_16_t down_scale_amount;
4192
4193         if (!intel_pstate->base.visible)
4194                 return 0;
4195
4196         fb = pstate->fb;
4197         format = fb->format->format;
4198
4199         if (intel_plane->id == PLANE_CURSOR)
4200                 return 0;
4201         if (plane == 1 && format != DRM_FORMAT_NV12)
4202                 return 0;
4203
4204         /*
4205          * Src coordinates are already rotated by 270 degrees for
4206          * the 90/270 degree plane rotation cases (to match the
4207          * GTT mapping), hence no need to account for rotation here.
4208          */
4209         width = drm_rect_width(&intel_pstate->base.src) >> 16;
4210         height = drm_rect_height(&intel_pstate->base.src) >> 16;
4211
4212         /* UV plane does 1/2 pixel sub-sampling */
4213         if (plane == 1 && format == DRM_FORMAT_NV12) {
4214                 width /= 2;
4215                 height /= 2;
4216         }
4217
4218         data_rate = width * height * fb->format->cpp[plane];
4219
4220         down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
4221
4222         return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
4223 }
4224
4225 /*
4226  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
4227  * a 8192x4096@32bpp framebuffer:
4228  *   3 * 4096 * 8192  * 4 < 2^32
4229  */
4230 static unsigned int
4231 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
4232                                  unsigned int *plane_data_rate,
4233                                  unsigned int *uv_plane_data_rate)
4234 {
4235         struct drm_crtc_state *cstate = &intel_cstate->base;
4236         struct drm_atomic_state *state = cstate->state;
4237         struct drm_plane *plane;
4238         const struct drm_plane_state *pstate;
4239         unsigned int total_data_rate = 0;
4240
4241         if (WARN_ON(!state))
4242                 return 0;
4243
4244         /* Calculate and cache data rate for each plane */
4245         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) {
4246                 enum plane_id plane_id = to_intel_plane(plane)->id;
4247                 unsigned int rate;
4248
4249                 /* packed/y */
4250                 rate = skl_plane_relative_data_rate(intel_cstate,
4251                                                     pstate, 0);
4252                 plane_data_rate[plane_id] = rate;
4253
4254                 total_data_rate += rate;
4255
4256                 /* uv-plane */
4257                 rate = skl_plane_relative_data_rate(intel_cstate,
4258                                                     pstate, 1);
4259                 uv_plane_data_rate[plane_id] = rate;
4260
4261                 total_data_rate += rate;
4262         }
4263
4264         return total_data_rate;
4265 }
4266
4267 static uint16_t
4268 skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane)
4269 {
4270         struct drm_framebuffer *fb = pstate->fb;
4271         struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
4272         uint32_t src_w, src_h;
4273         uint32_t min_scanlines = 8;
4274         uint8_t plane_bpp;
4275
4276         if (WARN_ON(!fb))
4277                 return 0;
4278
4279         /* For packed formats, and uv-plane, return 0 */
4280         if (plane == 1 && fb->format->format != DRM_FORMAT_NV12)
4281                 return 0;
4282
4283         /* For Non Y-tile return 8-blocks */
4284         if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
4285             fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
4286             fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
4287             fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
4288                 return 8;
4289
4290         /*
4291          * Src coordinates are already rotated by 270 degrees for
4292          * the 90/270 degree plane rotation cases (to match the
4293          * GTT mapping), hence no need to account for rotation here.
4294          */
4295         src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
4296         src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
4297
4298         /* Halve UV plane width and height for NV12 */
4299         if (plane == 1) {
4300                 src_w /= 2;
4301                 src_h /= 2;
4302         }
4303
4304         plane_bpp = fb->format->cpp[plane];
4305
4306         if (drm_rotation_90_or_270(pstate->rotation)) {
4307                 switch (plane_bpp) {
4308                 case 1:
4309                         min_scanlines = 32;
4310                         break;
4311                 case 2:
4312                         min_scanlines = 16;
4313                         break;
4314                 case 4:
4315                         min_scanlines = 8;
4316                         break;
4317                 case 8:
4318                         min_scanlines = 4;
4319                         break;
4320                 default:
4321                         WARN(1, "Unsupported pixel depth %u for rotation",
4322                              plane_bpp);
4323                         min_scanlines = 32;
4324                 }
4325         }
4326
4327         return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
4328 }
4329
4330 static void
4331 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
4332                  uint16_t *minimum, uint16_t *uv_minimum)
4333 {
4334         const struct drm_plane_state *pstate;
4335         struct drm_plane *plane;
4336
4337         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) {
4338                 enum plane_id plane_id = to_intel_plane(plane)->id;
4339
4340                 if (plane_id == PLANE_CURSOR)
4341                         continue;
4342
4343                 if (!pstate->visible)
4344                         continue;
4345
4346                 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
4347                 uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
4348         }
4349
4350         minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
4351 }
4352
4353 static int
4354 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
4355                       struct skl_ddb_allocation *ddb /* out */)
4356 {
4357         struct drm_atomic_state *state = cstate->base.state;
4358         struct drm_crtc *crtc = cstate->base.crtc;
4359         struct drm_device *dev = crtc->dev;
4360         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
4361         enum pipe pipe = intel_crtc->pipe;
4362         struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
4363         uint16_t alloc_size, start;
4364         uint16_t minimum[I915_MAX_PLANES] = {};
4365         uint16_t uv_minimum[I915_MAX_PLANES] = {};
4366         unsigned int total_data_rate;
4367         enum plane_id plane_id;
4368         int num_active;
4369         unsigned int plane_data_rate[I915_MAX_PLANES] = {};
4370         unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {};
4371         uint16_t total_min_blocks = 0;
4372
4373         /* Clear the partitioning for disabled planes. */
4374         memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
4375         memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe]));
4376
4377         if (WARN_ON(!state))
4378                 return 0;
4379
4380         if (!cstate->base.active) {
4381                 alloc->start = alloc->end = 0;
4382                 return 0;
4383         }
4384
4385         total_data_rate = skl_get_total_relative_data_rate(cstate,
4386                                                            plane_data_rate,
4387                                                            uv_plane_data_rate);
4388         skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
4389                                            alloc, &num_active);
4390         alloc_size = skl_ddb_entry_size(alloc);
4391         if (alloc_size == 0)
4392                 return 0;
4393
4394         skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum);
4395
4396         /*
4397          * 1. Allocate the mininum required blocks for each active plane
4398          * and allocate the cursor, it doesn't require extra allocation
4399          * proportional to the data rate.
4400          */
4401
4402         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4403                 total_min_blocks += minimum[plane_id];
4404                 total_min_blocks += uv_minimum[plane_id];
4405         }
4406
4407         if (total_min_blocks > alloc_size) {
4408                 DRM_DEBUG_KMS("Requested display configuration exceeds system DDB limitations");
4409                 DRM_DEBUG_KMS("minimum required %d/%d\n", total_min_blocks,
4410                                                         alloc_size);
4411                 return -EINVAL;
4412         }
4413
4414         alloc_size -= total_min_blocks;
4415         ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR];
4416         ddb->plane[pipe][PLANE_CURSOR].end = alloc->end;
4417
4418         /*
4419          * 2. Distribute the remaining space in proportion to the amount of
4420          * data each plane needs to fetch from memory.
4421          *
4422          * FIXME: we may not allocate every single block here.
4423          */
4424         if (total_data_rate == 0)
4425                 return 0;
4426
4427         start = alloc->start;
4428         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
4429                 unsigned int data_rate, uv_data_rate;
4430                 uint16_t plane_blocks, uv_plane_blocks;
4431
4432                 if (plane_id == PLANE_CURSOR)
4433                         continue;
4434
4435                 data_rate = plane_data_rate[plane_id];
4436
4437                 /*
4438                  * allocation for (packed formats) or (uv-plane part of planar format):
4439                  * promote the expression to 64 bits to avoid overflowing, the
4440                  * result is < available as data_rate / total_data_rate < 1
4441                  */
4442                 plane_blocks = minimum[plane_id];
4443                 plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
4444                                         total_data_rate);
4445
4446                 /* Leave disabled planes at (0,0) */
4447                 if (data_rate) {
4448                         ddb->plane[pipe][plane_id].start = start;
4449                         ddb->plane[pipe][plane_id].end = start + plane_blocks;
4450                 }
4451
4452                 start += plane_blocks;
4453
4454                 /* Allocate DDB for UV plane for planar format/NV12 */
4455                 uv_data_rate = uv_plane_data_rate[plane_id];
4456
4457                 uv_plane_blocks = uv_minimum[plane_id];
4458                 uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate,
4459                                            total_data_rate);
4460
4461                 if (uv_data_rate) {
4462                         ddb->uv_plane[pipe][plane_id].start = start;
4463                         ddb->uv_plane[pipe][plane_id].end =
4464                                 start + uv_plane_blocks;
4465                 }
4466
4467                 start += uv_plane_blocks;
4468         }
4469
4470         return 0;
4471 }
4472
4473 /*
4474  * The max latency should be 257 (max the punit can code is 255 and we add 2us
4475  * for the read latency) and cpp should always be <= 8, so that
4476  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
4477  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
4478 */
4479 static uint_fixed_16_16_t
4480 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
4481                uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
4482 {
4483         uint32_t wm_intermediate_val;
4484         uint_fixed_16_16_t ret;
4485
4486         if (latency == 0)
4487                 return FP_16_16_MAX;
4488
4489         wm_intermediate_val = latency * pixel_rate * cpp;
4490         ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
4491
4492         if (INTEL_GEN(dev_priv) >= 10)
4493                 ret = add_fixed16_u32(ret, 1);
4494
4495         return ret;
4496 }
4497
4498 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate,
4499                         uint32_t pipe_htotal,
4500                         uint32_t latency,
4501                         uint_fixed_16_16_t plane_blocks_per_line)
4502 {
4503         uint32_t wm_intermediate_val;
4504         uint_fixed_16_16_t ret;
4505
4506         if (latency == 0)
4507                 return FP_16_16_MAX;
4508
4509         wm_intermediate_val = latency * pixel_rate;
4510         wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val,
4511                                            pipe_htotal * 1000);
4512         ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line);
4513         return ret;
4514 }
4515
4516 static uint_fixed_16_16_t
4517 intel_get_linetime_us(struct intel_crtc_state *cstate)
4518 {
4519         uint32_t pixel_rate;
4520         uint32_t crtc_htotal;
4521         uint_fixed_16_16_t linetime_us;
4522
4523         if (!cstate->base.active)
4524                 return u32_to_fixed16(0);
4525
4526         pixel_rate = cstate->pixel_rate;
4527
4528         if (WARN_ON(pixel_rate == 0))
4529                 return u32_to_fixed16(0);
4530
4531         crtc_htotal = cstate->base.adjusted_mode.crtc_htotal;
4532         linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate);
4533
4534         return linetime_us;
4535 }
4536
4537 static uint32_t
4538 skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
4539                               const struct intel_plane_state *pstate)
4540 {
4541         uint64_t adjusted_pixel_rate;
4542         uint_fixed_16_16_t downscale_amount;
4543
4544         /* Shouldn't reach here on disabled planes... */
4545         if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
4546                 return 0;
4547
4548         /*
4549          * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
4550          * with additional adjustments for plane-specific scaling.
4551          */
4552         adjusted_pixel_rate = cstate->pixel_rate;
4553         downscale_amount = skl_plane_downscale_amount(cstate, pstate);
4554
4555         return mul_round_up_u32_fixed16(adjusted_pixel_rate,
4556                                             downscale_amount);
4557 }
4558
4559 static int
4560 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
4561                             struct intel_crtc_state *cstate,
4562                             const struct intel_plane_state *intel_pstate,
4563                             struct skl_wm_params *wp, int plane_id)
4564 {
4565         struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
4566         const struct drm_plane_state *pstate = &intel_pstate->base;
4567         const struct drm_framebuffer *fb = pstate->fb;
4568         uint32_t interm_pbpl;
4569         struct intel_atomic_state *state =
4570                 to_intel_atomic_state(cstate->base.state);
4571         bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4572
4573         if (!intel_wm_plane_visible(cstate, intel_pstate))
4574                 return 0;
4575
4576         /* only NV12 format has two planes */
4577         if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) {
4578                 DRM_DEBUG_KMS("Non NV12 format have single plane\n");
4579                 return -EINVAL;
4580         }
4581
4582         wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
4583                       fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
4584                       fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4585                       fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4586         wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
4587         wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
4588                          fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
4589         wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
4590
4591         if (plane->id == PLANE_CURSOR) {
4592                 wp->width = intel_pstate->base.crtc_w;
4593         } else {
4594                 /*
4595                  * Src coordinates are already rotated by 270 degrees for
4596                  * the 90/270 degree plane rotation cases (to match the
4597                  * GTT mapping), hence no need to account for rotation here.
4598                  */
4599                 wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
4600         }
4601
4602         if (plane_id == 1 && wp->is_planar)
4603                 wp->width /= 2;
4604
4605         wp->cpp = fb->format->cpp[plane_id];
4606         wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
4607                                                              intel_pstate);
4608
4609         if (INTEL_GEN(dev_priv) >= 11 &&
4610             fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
4611                 wp->dbuf_block_size = 256;
4612         else
4613                 wp->dbuf_block_size = 512;
4614
4615         if (drm_rotation_90_or_270(pstate->rotation)) {
4616
4617                 switch (wp->cpp) {
4618                 case 1:
4619                         wp->y_min_scanlines = 16;
4620                         break;
4621                 case 2:
4622                         wp->y_min_scanlines = 8;
4623                         break;
4624                 case 4:
4625                         wp->y_min_scanlines = 4;
4626                         break;
4627                 default:
4628                         MISSING_CASE(wp->cpp);
4629                         return -EINVAL;
4630                 }
4631         } else {
4632                 wp->y_min_scanlines = 4;
4633         }
4634
4635         if (apply_memory_bw_wa)
4636                 wp->y_min_scanlines *= 2;
4637
4638         wp->plane_bytes_per_line = wp->width * wp->cpp;
4639         if (wp->y_tiled) {
4640                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
4641                                            wp->y_min_scanlines,
4642                                            wp->dbuf_block_size);
4643
4644                 if (INTEL_GEN(dev_priv) >= 10)
4645                         interm_pbpl++;
4646
4647                 wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
4648                                                         wp->y_min_scanlines);
4649         } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
4650                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4651                                            wp->dbuf_block_size);
4652                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4653         } else {
4654                 interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
4655                                            wp->dbuf_block_size) + 1;
4656                 wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
4657         }
4658
4659         wp->y_tile_minimum = mul_u32_fixed16(wp->y_min_scanlines,
4660                                              wp->plane_blocks_per_line);
4661         wp->linetime_us = fixed16_to_u32_round_up(
4662                                         intel_get_linetime_us(cstate));
4663
4664         return 0;
4665 }
4666
4667 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
4668                                 struct intel_crtc_state *cstate,
4669                                 const struct intel_plane_state *intel_pstate,
4670                                 uint16_t ddb_allocation,
4671                                 int level,
4672                                 const struct skl_wm_params *wp,
4673                                 const struct skl_wm_level *result_prev,
4674                                 struct skl_wm_level *result /* out */)
4675 {
4676         const struct drm_plane_state *pstate = &intel_pstate->base;
4677         uint32_t latency = dev_priv->wm.skl_latency[level];
4678         uint_fixed_16_16_t method1, method2;
4679         uint_fixed_16_16_t selected_result;
4680         uint32_t res_blocks, res_lines;
4681         struct intel_atomic_state *state =
4682                 to_intel_atomic_state(cstate->base.state);
4683         bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
4684         uint32_t min_disp_buf_needed;
4685
4686         if (latency == 0 ||
4687             !intel_wm_plane_visible(cstate, intel_pstate)) {
4688                 result->plane_en = false;
4689                 return 0;
4690         }
4691
4692         /* Display WA #1141: kbl,cfl */
4693         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
4694             IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
4695             dev_priv->ipc_enabled)
4696                 latency += 4;
4697
4698         if (apply_memory_bw_wa && wp->x_tiled)
4699                 latency += 15;
4700
4701         method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
4702                                  wp->cpp, latency, wp->dbuf_block_size);
4703         method2 = skl_wm_method2(wp->plane_pixel_rate,
4704                                  cstate->base.adjusted_mode.crtc_htotal,
4705                                  latency,
4706                                  wp->plane_blocks_per_line);
4707
4708         if (wp->y_tiled) {
4709                 selected_result = max_fixed16(method2, wp->y_tile_minimum);
4710         } else {
4711                 if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
4712                      wp->dbuf_block_size < 1) &&
4713                      (wp->plane_bytes_per_line / wp->dbuf_block_size < 1))
4714                         selected_result = method2;
4715                 else if (ddb_allocation >=
4716                          fixed16_to_u32_round_up(wp->plane_blocks_per_line))
4717                         selected_result = min_fixed16(method1, method2);
4718                 else if (latency >= wp->linetime_us)
4719                         selected_result = min_fixed16(method1, method2);
4720                 else
4721                         selected_result = method1;
4722         }
4723
4724         res_blocks = fixed16_to_u32_round_up(selected_result) + 1;
4725         res_lines = div_round_up_fixed16(selected_result,
4726                                          wp->plane_blocks_per_line);
4727
4728         /* Display WA #1125: skl,bxt,kbl,glk */
4729         if (level == 0 && wp->rc_surface)
4730                 res_blocks += fixed16_to_u32_round_up(wp->y_tile_minimum);
4731
4732         /* Display WA #1126: skl,bxt,kbl,glk */
4733         if (level >= 1 && level <= 7) {
4734                 if (wp->y_tiled) {
4735                         res_blocks += fixed16_to_u32_round_up(
4736                                                         wp->y_tile_minimum);
4737                         res_lines += wp->y_min_scanlines;
4738                 } else {
4739                         res_blocks++;
4740                 }
4741
4742                 /*
4743                  * Make sure result blocks for higher latency levels are atleast
4744                  * as high as level below the current level.
4745                  * Assumption in DDB algorithm optimization for special cases.
4746                  * Also covers Display WA #1125 for RC.
4747                  */
4748                 if (result_prev->plane_res_b > res_blocks)
4749                         res_blocks = result_prev->plane_res_b;
4750         }
4751
4752         if (INTEL_GEN(dev_priv) >= 11) {
4753                 if (wp->y_tiled) {
4754                         uint32_t extra_lines;
4755                         uint_fixed_16_16_t fp_min_disp_buf_needed;
4756
4757                         if (res_lines % wp->y_min_scanlines == 0)
4758                                 extra_lines = wp->y_min_scanlines;
4759                         else
4760                                 extra_lines = wp->y_min_scanlines * 2 -
4761                                               res_lines % wp->y_min_scanlines;
4762
4763                         fp_min_disp_buf_needed = mul_u32_fixed16(res_lines +
4764                                                 extra_lines,
4765                                                 wp->plane_blocks_per_line);
4766                         min_disp_buf_needed = fixed16_to_u32_round_up(
4767                                                 fp_min_disp_buf_needed);
4768                 } else {
4769                         min_disp_buf_needed = DIV_ROUND_UP(res_blocks * 11, 10);
4770                 }
4771         } else {
4772                 min_disp_buf_needed = res_blocks;
4773         }
4774
4775         if ((level > 0 && res_lines > 31) ||
4776             res_blocks >= ddb_allocation ||
4777             min_disp_buf_needed >= ddb_allocation) {
4778                 result->plane_en = false;
4779
4780                 /*
4781                  * If there are no valid level 0 watermarks, then we can't
4782                  * support this display configuration.
4783                  */
4784                 if (level) {
4785                         return 0;
4786                 } else {
4787                         struct drm_plane *plane = pstate->plane;
4788
4789                         DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n");
4790                         DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n",
4791                                       plane->base.id, plane->name,
4792                                       res_blocks, ddb_allocation, res_lines);
4793                         return -EINVAL;
4794                 }
4795         }
4796
4797         /*
4798          * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A)
4799          * disable wm level 1-7 on NV12 planes
4800          */
4801         if (wp->is_planar && level >= 1 &&
4802             (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
4803              IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) {
4804                 result->plane_en = false;
4805                 return 0;
4806         }
4807
4808         /* The number of lines are ignored for the level 0 watermark. */
4809         result->plane_res_b = res_blocks;
4810         result->plane_res_l = res_lines;
4811         result->plane_en = true;
4812
4813         return 0;
4814 }
4815
4816 static int
4817 skl_compute_wm_levels(const struct drm_i915_private *dev_priv,
4818                       struct skl_ddb_allocation *ddb,
4819                       struct intel_crtc_state *cstate,
4820                       const struct intel_plane_state *intel_pstate,
4821                       const struct skl_wm_params *wm_params,
4822                       struct skl_plane_wm *wm,
4823                       int plane_id)
4824 {
4825         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
4826         struct drm_plane *plane = intel_pstate->base.plane;
4827         struct intel_plane *intel_plane = to_intel_plane(plane);
4828         uint16_t ddb_blocks;
4829         enum pipe pipe = intel_crtc->pipe;
4830         int level, max_level = ilk_wm_max_level(dev_priv);
4831         enum plane_id intel_plane_id = intel_plane->id;
4832         int ret;
4833
4834         if (WARN_ON(!intel_pstate->base.fb))
4835                 return -EINVAL;
4836
4837         ddb_blocks = plane_id ?
4838                      skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) :
4839                      skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]);
4840
4841         for (level = 0; level <= max_level; level++) {
4842                 struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] :
4843                                                           &wm->wm[level];
4844                 struct skl_wm_level *result_prev;
4845
4846                 if (level)
4847                         result_prev = plane_id ? &wm->uv_wm[level - 1] :
4848                                                   &wm->wm[level - 1];
4849                 else
4850                         result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0];
4851
4852                 ret = skl_compute_plane_wm(dev_priv,
4853                                            cstate,
4854                                            intel_pstate,
4855                                            ddb_blocks,
4856                                            level,
4857                                            wm_params,
4858                                            result_prev,
4859                                            result);
4860                 if (ret)
4861                         return ret;
4862         }
4863
4864         if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12)
4865                 wm->is_planar = true;
4866
4867         return 0;
4868 }
4869
4870 static uint32_t
4871 skl_compute_linetime_wm(struct intel_crtc_state *cstate)
4872 {
4873         struct drm_atomic_state *state = cstate->base.state;
4874         struct drm_i915_private *dev_priv = to_i915(state->dev);
4875         uint_fixed_16_16_t linetime_us;
4876         uint32_t linetime_wm;
4877
4878         linetime_us = intel_get_linetime_us(cstate);
4879
4880         if (is_fixed16_zero(linetime_us))
4881                 return 0;
4882
4883         linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us));
4884
4885         /* Display WA #1135: bxt:ALL GLK:ALL */
4886         if ((IS_BROXTON(dev_priv) || IS_GEMINILAKE(dev_priv)) &&
4887             dev_priv->ipc_enabled)
4888                 linetime_wm /= 2;
4889
4890         return linetime_wm;
4891 }
4892
4893 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
4894                                       struct skl_wm_params *wp,
4895                                       struct skl_wm_level *wm_l0,
4896                                       uint16_t ddb_allocation,
4897                                       struct skl_wm_level *trans_wm /* out */)
4898 {
4899         struct drm_device *dev = cstate->base.crtc->dev;
4900         const struct drm_i915_private *dev_priv = to_i915(dev);
4901         uint16_t trans_min, trans_y_tile_min;
4902         const uint16_t trans_amount = 10; /* This is configurable amount */
4903         uint16_t trans_offset_b, res_blocks;
4904
4905         if (!cstate->base.active)
4906                 goto exit;
4907
4908         /* Transition WM are not recommended by HW team for GEN9 */
4909         if (INTEL_GEN(dev_priv) <= 9)
4910                 goto exit;
4911
4912         /* Transition WM don't make any sense if ipc is disabled */
4913         if (!dev_priv->ipc_enabled)
4914                 goto exit;
4915
4916         trans_min = 0;
4917         if (INTEL_GEN(dev_priv) >= 10)
4918                 trans_min = 4;
4919
4920         trans_offset_b = trans_min + trans_amount;
4921
4922         if (wp->y_tiled) {
4923                 trans_y_tile_min = (uint16_t) mul_round_up_u32_fixed16(2,
4924                                                         wp->y_tile_minimum);
4925                 res_blocks = max(wm_l0->plane_res_b, trans_y_tile_min) +
4926                                 trans_offset_b;
4927         } else {
4928                 res_blocks = wm_l0->plane_res_b + trans_offset_b;
4929
4930                 /* WA BUG:1938466 add one block for non y-tile planes */
4931                 if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))
4932                         res_blocks += 1;
4933
4934         }
4935
4936         res_blocks += 1;
4937
4938         if (res_blocks < ddb_allocation) {
4939                 trans_wm->plane_res_b = res_blocks;
4940                 trans_wm->plane_en = true;
4941                 return;
4942         }
4943
4944 exit:
4945         trans_wm->plane_en = false;
4946 }
4947
4948 static int skl_build_pipe_wm(struct intel_crtc_state *cstate,
4949                              struct skl_ddb_allocation *ddb,
4950                              struct skl_pipe_wm *pipe_wm)
4951 {
4952         struct drm_device *dev = cstate->base.crtc->dev;
4953         struct drm_crtc_state *crtc_state = &cstate->base;
4954         const struct drm_i915_private *dev_priv = to_i915(dev);
4955         struct drm_plane *plane;
4956         const struct drm_plane_state *pstate;
4957         struct skl_plane_wm *wm;
4958         int ret;
4959
4960         /*
4961          * We'll only calculate watermarks for planes that are actually
4962          * enabled, so make sure all other planes are set as disabled.
4963          */
4964         memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes));
4965
4966         drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) {
4967                 const struct intel_plane_state *intel_pstate =
4968                                                 to_intel_plane_state(pstate);
4969                 enum plane_id plane_id = to_intel_plane(plane)->id;
4970                 struct skl_wm_params wm_params;
4971                 enum pipe pipe = to_intel_crtc(cstate->base.crtc)->pipe;
4972                 uint16_t ddb_blocks;
4973
4974                 wm = &pipe_wm->planes[plane_id];
4975                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
4976
4977                 ret = skl_compute_plane_wm_params(dev_priv, cstate,
4978                                                   intel_pstate, &wm_params, 0);
4979                 if (ret)
4980                         return ret;
4981
4982                 ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
4983                                             intel_pstate, &wm_params, wm, 0);
4984                 if (ret)
4985                         return ret;
4986
4987                 skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
4988                                           ddb_blocks, &wm->trans_wm);
4989
4990                 /* uv plane watermarks must also be validated for NV12/Planar */
4991                 if (wm_params.is_planar) {
4992                         memset(&wm_params, 0, sizeof(struct skl_wm_params));
4993                         wm->is_planar = true;
4994
4995                         ret = skl_compute_plane_wm_params(dev_priv, cstate,
4996                                                           intel_pstate,
4997                                                           &wm_params, 1);
4998                         if (ret)
4999                                 return ret;
5000
5001                         ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
5002                                                     intel_pstate, &wm_params,
5003                                                     wm, 1);
5004                         if (ret)
5005                                 return ret;
5006                 }
5007         }
5008
5009         pipe_wm->linetime = skl_compute_linetime_wm(cstate);
5010
5011         return 0;
5012 }
5013
5014 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
5015                                 i915_reg_t reg,
5016                                 const struct skl_ddb_entry *entry)
5017 {
5018         if (entry->end)
5019                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
5020         else
5021                 I915_WRITE(reg, 0);
5022 }
5023
5024 static void skl_write_wm_level(struct drm_i915_private *dev_priv,
5025                                i915_reg_t reg,
5026                                const struct skl_wm_level *level)
5027 {
5028         uint32_t val = 0;
5029
5030         if (level->plane_en) {
5031                 val |= PLANE_WM_EN;
5032                 val |= level->plane_res_b;
5033                 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT;
5034         }
5035
5036         I915_WRITE(reg, val);
5037 }
5038
5039 static void skl_write_plane_wm(struct intel_crtc *intel_crtc,
5040                                const struct skl_plane_wm *wm,
5041                                const struct skl_ddb_allocation *ddb,
5042                                enum plane_id plane_id)
5043 {
5044         struct drm_crtc *crtc = &intel_crtc->base;
5045         struct drm_device *dev = crtc->dev;
5046         struct drm_i915_private *dev_priv = to_i915(dev);
5047         int level, max_level = ilk_wm_max_level(dev_priv);
5048         enum pipe pipe = intel_crtc->pipe;
5049
5050         for (level = 0; level <= max_level; level++) {
5051                 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level),
5052                                    &wm->wm[level]);
5053         }
5054         skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id),
5055                            &wm->trans_wm);
5056
5057         skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5058                             &ddb->plane[pipe][plane_id]);
5059         /* FIXME: add proper NV12 support for ICL. */
5060         if (INTEL_GEN(dev_priv) >= 11)
5061                 return skl_ddb_entry_write(dev_priv,
5062                                            PLANE_BUF_CFG(pipe, plane_id),
5063                                            &ddb->plane[pipe][plane_id]);
5064         if (wm->is_planar) {
5065                 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5066                                     &ddb->uv_plane[pipe][plane_id]);
5067                 skl_ddb_entry_write(dev_priv,
5068                                     PLANE_NV12_BUF_CFG(pipe, plane_id),
5069                                     &ddb->plane[pipe][plane_id]);
5070         } else {
5071                 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
5072                                     &ddb->plane[pipe][plane_id]);
5073                 I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0);
5074         }
5075 }
5076
5077 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
5078                                 const struct skl_plane_wm *wm,
5079                                 const struct skl_ddb_allocation *ddb)
5080 {
5081         struct drm_crtc *crtc = &intel_crtc->base;
5082         struct drm_device *dev = crtc->dev;
5083         struct drm_i915_private *dev_priv = to_i915(dev);
5084         int level, max_level = ilk_wm_max_level(dev_priv);
5085         enum pipe pipe = intel_crtc->pipe;
5086
5087         for (level = 0; level <= max_level; level++) {
5088                 skl_write_wm_level(dev_priv, CUR_WM(pipe, level),
5089                                    &wm->wm[level]);
5090         }
5091         skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm);
5092
5093         skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
5094                             &ddb->plane[pipe][PLANE_CURSOR]);
5095 }
5096
5097 bool skl_wm_level_equals(const struct skl_wm_level *l1,
5098                          const struct skl_wm_level *l2)
5099 {
5100         if (l1->plane_en != l2->plane_en)
5101                 return false;
5102
5103         /* If both planes aren't enabled, the rest shouldn't matter */
5104         if (!l1->plane_en)
5105                 return true;
5106
5107         return (l1->plane_res_l == l2->plane_res_l &&
5108                 l1->plane_res_b == l2->plane_res_b);
5109 }
5110
5111 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a,
5112                                            const struct skl_ddb_entry *b)
5113 {
5114         return a->start < b->end && b->start < a->end;
5115 }
5116
5117 bool skl_ddb_allocation_overlaps(struct drm_i915_private *dev_priv,
5118                                  const struct skl_ddb_entry **entries,
5119                                  const struct skl_ddb_entry *ddb,
5120                                  int ignore)
5121 {
5122         enum pipe pipe;
5123
5124         for_each_pipe(dev_priv, pipe) {
5125                 if (pipe != ignore && entries[pipe] &&
5126                     skl_ddb_entries_overlap(ddb, entries[pipe]))
5127                         return true;
5128         }
5129
5130         return false;
5131 }
5132
5133 static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
5134                               const struct skl_pipe_wm *old_pipe_wm,
5135                               struct skl_pipe_wm *pipe_wm, /* out */
5136                               struct skl_ddb_allocation *ddb, /* out */
5137                               bool *changed /* out */)
5138 {
5139         struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate);
5140         int ret;
5141
5142         ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm);
5143         if (ret)
5144                 return ret;
5145
5146         if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm)))
5147                 *changed = false;
5148         else
5149                 *changed = true;
5150
5151         return 0;
5152 }
5153
5154 static uint32_t
5155 pipes_modified(struct drm_atomic_state *state)
5156 {
5157         struct drm_crtc *crtc;
5158         struct drm_crtc_state *cstate;
5159         uint32_t i, ret = 0;
5160
5161         for_each_new_crtc_in_state(state, crtc, cstate, i)
5162                 ret |= drm_crtc_mask(crtc);
5163
5164         return ret;
5165 }
5166
5167 static int
5168 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate)
5169 {
5170         struct drm_atomic_state *state = cstate->base.state;
5171         struct drm_device *dev = state->dev;
5172         struct drm_crtc *crtc = cstate->base.crtc;
5173         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5174         struct drm_i915_private *dev_priv = to_i915(dev);
5175         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5176         struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5177         struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
5178         struct drm_plane_state *plane_state;
5179         struct drm_plane *plane;
5180         enum pipe pipe = intel_crtc->pipe;
5181
5182         drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
5183                 enum plane_id plane_id = to_intel_plane(plane)->id;
5184
5185                 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
5186                                         &new_ddb->plane[pipe][plane_id]) &&
5187                     skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id],
5188                                         &new_ddb->uv_plane[pipe][plane_id]))
5189                         continue;
5190
5191                 plane_state = drm_atomic_get_plane_state(state, plane);
5192                 if (IS_ERR(plane_state))
5193                         return PTR_ERR(plane_state);
5194         }
5195
5196         return 0;
5197 }
5198
5199 static int
5200 skl_compute_ddb(struct drm_atomic_state *state)
5201 {
5202         const struct drm_i915_private *dev_priv = to_i915(state->dev);
5203         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5204         struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
5205         struct intel_crtc *crtc;
5206         struct intel_crtc_state *cstate;
5207         int ret, i;
5208
5209         memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
5210
5211         for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) {
5212                 ret = skl_allocate_pipe_ddb(cstate, ddb);
5213                 if (ret)
5214                         return ret;
5215
5216                 ret = skl_ddb_add_affected_planes(cstate);
5217                 if (ret)
5218                         return ret;
5219         }
5220
5221         return 0;
5222 }
5223
5224 static void
5225 skl_print_wm_changes(const struct drm_atomic_state *state)
5226 {
5227         const struct drm_device *dev = state->dev;
5228         const struct drm_i915_private *dev_priv = to_i915(dev);
5229         const struct intel_atomic_state *intel_state =
5230                 to_intel_atomic_state(state);
5231         const struct drm_crtc *crtc;
5232         const struct drm_crtc_state *cstate;
5233         const struct intel_plane *intel_plane;
5234         const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
5235         const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
5236         int i;
5237
5238         for_each_new_crtc_in_state(state, crtc, cstate, i) {
5239                 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5240                 enum pipe pipe = intel_crtc->pipe;
5241
5242                 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
5243                         enum plane_id plane_id = intel_plane->id;
5244                         const struct skl_ddb_entry *old, *new;
5245
5246                         old = &old_ddb->plane[pipe][plane_id];
5247                         new = &new_ddb->plane[pipe][plane_id];
5248
5249                         if (skl_ddb_entry_equal(old, new))
5250                                 continue;
5251
5252                         DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
5253                                          intel_plane->base.base.id,
5254                                          intel_plane->base.name,
5255                                          old->start, old->end,
5256                                          new->start, new->end);
5257                 }
5258         }
5259 }
5260
5261 static int
5262 skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed)
5263 {
5264         struct drm_device *dev = state->dev;
5265         const struct drm_i915_private *dev_priv = to_i915(dev);
5266         const struct drm_crtc *crtc;
5267         const struct drm_crtc_state *cstate;
5268         struct intel_crtc *intel_crtc;
5269         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5270         uint32_t realloc_pipes = pipes_modified(state);
5271         int ret, i;
5272
5273         /*
5274          * When we distrust bios wm we always need to recompute to set the
5275          * expected DDB allocations for each CRTC.
5276          */
5277         if (dev_priv->wm.distrust_bios_wm)
5278                 (*changed) = true;
5279
5280         /*
5281          * If this transaction isn't actually touching any CRTC's, don't
5282          * bother with watermark calculation.  Note that if we pass this
5283          * test, we're guaranteed to hold at least one CRTC state mutex,
5284          * which means we can safely use values like dev_priv->active_crtcs
5285          * since any racing commits that want to update them would need to
5286          * hold _all_ CRTC state mutexes.
5287          */
5288         for_each_new_crtc_in_state(state, crtc, cstate, i)
5289                 (*changed) = true;
5290
5291         if (!*changed)
5292                 return 0;
5293
5294         /*
5295          * If this is our first atomic update following hardware readout,
5296          * we can't trust the DDB that the BIOS programmed for us.  Let's
5297          * pretend that all pipes switched active status so that we'll
5298          * ensure a full DDB recompute.
5299          */
5300         if (dev_priv->wm.distrust_bios_wm) {
5301                 ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
5302                                        state->acquire_ctx);
5303                 if (ret)
5304                         return ret;
5305
5306                 intel_state->active_pipe_changes = ~0;
5307
5308                 /*
5309                  * We usually only initialize intel_state->active_crtcs if we
5310                  * we're doing a modeset; make sure this field is always
5311                  * initialized during the sanitization process that happens
5312                  * on the first commit too.
5313                  */
5314                 if (!intel_state->modeset)
5315                         intel_state->active_crtcs = dev_priv->active_crtcs;
5316         }
5317
5318         /*
5319          * If the modeset changes which CRTC's are active, we need to
5320          * recompute the DDB allocation for *all* active pipes, even
5321          * those that weren't otherwise being modified in any way by this
5322          * atomic commit.  Due to the shrinking of the per-pipe allocations
5323          * when new active CRTC's are added, it's possible for a pipe that
5324          * we were already using and aren't changing at all here to suddenly
5325          * become invalid if its DDB needs exceeds its new allocation.
5326          *
5327          * Note that if we wind up doing a full DDB recompute, we can't let
5328          * any other display updates race with this transaction, so we need
5329          * to grab the lock on *all* CRTC's.
5330          */
5331         if (intel_state->active_pipe_changes || intel_state->modeset) {
5332                 realloc_pipes = ~0;
5333                 intel_state->wm_results.dirty_pipes = ~0;
5334         }
5335
5336         /*
5337          * We're not recomputing for the pipes not included in the commit, so
5338          * make sure we start with the current state.
5339          */
5340         for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
5341                 struct intel_crtc_state *cstate;
5342
5343                 cstate = intel_atomic_get_crtc_state(state, intel_crtc);
5344                 if (IS_ERR(cstate))
5345                         return PTR_ERR(cstate);
5346         }
5347
5348         return 0;
5349 }
5350
5351 static int
5352 skl_compute_wm(struct drm_atomic_state *state)
5353 {
5354         struct drm_crtc *crtc;
5355         struct drm_crtc_state *cstate;
5356         struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
5357         struct skl_ddb_values *results = &intel_state->wm_results;
5358         struct skl_pipe_wm *pipe_wm;
5359         bool changed = false;
5360         int ret, i;
5361
5362         /* Clear all dirty flags */
5363         results->dirty_pipes = 0;
5364
5365         ret = skl_ddb_add_affected_pipes(state, &changed);
5366         if (ret || !changed)
5367                 return ret;
5368
5369         ret = skl_compute_ddb(state);
5370         if (ret)
5371                 return ret;
5372
5373         /*
5374          * Calculate WM's for all pipes that are part of this transaction.
5375          * Note that the DDB allocation above may have added more CRTC's that
5376          * weren't otherwise being modified (and set bits in dirty_pipes) if
5377          * pipe allocations had to change.
5378          *
5379          * FIXME:  Now that we're doing this in the atomic check phase, we
5380          * should allow skl_update_pipe_wm() to return failure in cases where
5381          * no suitable watermark values can be found.
5382          */
5383         for_each_new_crtc_in_state(state, crtc, cstate, i) {
5384                 struct intel_crtc_state *intel_cstate =
5385                         to_intel_crtc_state(cstate);
5386                 const struct skl_pipe_wm *old_pipe_wm =
5387                         &to_intel_crtc_state(crtc->state)->wm.skl.optimal;
5388
5389                 pipe_wm = &intel_cstate->wm.skl.optimal;
5390                 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm,
5391                                          &results->ddb, &changed);
5392                 if (ret)
5393                         return ret;
5394
5395                 if (changed)
5396                         results->dirty_pipes |= drm_crtc_mask(crtc);
5397
5398                 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
5399                         /* This pipe's WM's did not change */
5400                         continue;
5401
5402                 intel_cstate->update_wm_pre = true;
5403         }
5404
5405         skl_print_wm_changes(state);
5406
5407         return 0;
5408 }
5409
5410 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state,
5411                                       struct intel_crtc_state *cstate)
5412 {
5413         struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
5414         struct drm_i915_private *dev_priv = to_i915(state->base.dev);
5415         struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
5416         const struct skl_ddb_allocation *ddb = &state->wm_results.ddb;
5417         enum pipe pipe = crtc->pipe;
5418         enum plane_id plane_id;
5419
5420         if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base)))
5421                 return;
5422
5423         I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime);
5424
5425         for_each_plane_id_on_crtc(crtc, plane_id) {
5426                 if (plane_id != PLANE_CURSOR)
5427                         skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id],
5428                                            ddb, plane_id);
5429                 else
5430                         skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id],
5431                                             ddb);
5432         }
5433 }
5434
5435 static void skl_initial_wm(struct intel_atomic_state *state,
5436                            struct intel_crtc_state *cstate)
5437 {
5438         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5439         struct drm_device *dev = intel_crtc->base.dev;
5440         struct drm_i915_private *dev_priv = to_i915(dev);
5441         struct skl_ddb_values *results = &state->wm_results;
5442         struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw;
5443         enum pipe pipe = intel_crtc->pipe;
5444
5445         if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
5446                 return;
5447
5448         mutex_lock(&dev_priv->wm.wm_mutex);
5449
5450         if (cstate->base.active_changed)
5451                 skl_atomic_update_crtc_wm(state, cstate);
5452
5453         memcpy(hw_vals->ddb.uv_plane[pipe], results->ddb.uv_plane[pipe],
5454                sizeof(hw_vals->ddb.uv_plane[pipe]));
5455         memcpy(hw_vals->ddb.plane[pipe], results->ddb.plane[pipe],
5456                sizeof(hw_vals->ddb.plane[pipe]));
5457
5458         mutex_unlock(&dev_priv->wm.wm_mutex);
5459 }
5460
5461 static void ilk_compute_wm_config(struct drm_device *dev,
5462                                   struct intel_wm_config *config)
5463 {
5464         struct intel_crtc *crtc;
5465
5466         /* Compute the currently _active_ config */
5467         for_each_intel_crtc(dev, crtc) {
5468                 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
5469
5470                 if (!wm->pipe_enabled)
5471                         continue;
5472
5473                 config->sprites_enabled |= wm->sprites_enabled;
5474                 config->sprites_scaled |= wm->sprites_scaled;
5475                 config->num_pipes_active++;
5476         }
5477 }
5478
5479 static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
5480 {
5481         struct drm_device *dev = &dev_priv->drm;
5482         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
5483         struct ilk_wm_maximums max;
5484         struct intel_wm_config config = {};
5485         struct ilk_wm_values results = {};
5486         enum intel_ddb_partitioning partitioning;
5487
5488         ilk_compute_wm_config(dev, &config);
5489
5490         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
5491         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
5492
5493         /* 5/6 split only in single pipe config on IVB+ */
5494         if (INTEL_GEN(dev_priv) >= 7 &&
5495             config.num_pipes_active == 1 && config.sprites_enabled) {
5496                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
5497                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
5498
5499                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
5500         } else {
5501                 best_lp_wm = &lp_wm_1_2;
5502         }
5503
5504         partitioning = (best_lp_wm == &lp_wm_1_2) ?
5505                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
5506
5507         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
5508
5509         ilk_write_wm_values(dev_priv, &results);
5510 }
5511
5512 static void ilk_initial_watermarks(struct intel_atomic_state *state,
5513                                    struct intel_crtc_state *cstate)
5514 {
5515         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5516         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5517
5518         mutex_lock(&dev_priv->wm.wm_mutex);
5519         intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate;
5520         ilk_program_watermarks(dev_priv);
5521         mutex_unlock(&dev_priv->wm.wm_mutex);
5522 }
5523
5524 static void ilk_optimize_watermarks(struct intel_atomic_state *state,
5525                                     struct intel_crtc_state *cstate)
5526 {
5527         struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev);
5528         struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
5529
5530         mutex_lock(&dev_priv->wm.wm_mutex);
5531         if (cstate->wm.need_postvbl_update) {
5532                 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal;
5533                 ilk_program_watermarks(dev_priv);
5534         }
5535         mutex_unlock(&dev_priv->wm.wm_mutex);
5536 }
5537
5538 static inline void skl_wm_level_from_reg_val(uint32_t val,
5539                                              struct skl_wm_level *level)
5540 {
5541         level->plane_en = val & PLANE_WM_EN;
5542         level->plane_res_b = val & PLANE_WM_BLOCKS_MASK;
5543         level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) &
5544                 PLANE_WM_LINES_MASK;
5545 }
5546
5547 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc,
5548                               struct skl_pipe_wm *out)
5549 {
5550         struct drm_i915_private *dev_priv = to_i915(crtc->dev);
5551         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5552         enum pipe pipe = intel_crtc->pipe;
5553         int level, max_level;
5554         enum plane_id plane_id;
5555         uint32_t val;
5556
5557         max_level = ilk_wm_max_level(dev_priv);
5558
5559         for_each_plane_id_on_crtc(intel_crtc, plane_id) {
5560                 struct skl_plane_wm *wm = &out->planes[plane_id];
5561
5562                 for (level = 0; level <= max_level; level++) {
5563                         if (plane_id != PLANE_CURSOR)
5564                                 val = I915_READ(PLANE_WM(pipe, plane_id, level));
5565                         else
5566                                 val = I915_READ(CUR_WM(pipe, level));
5567
5568                         skl_wm_level_from_reg_val(val, &wm->wm[level]);
5569                 }
5570
5571                 if (plane_id != PLANE_CURSOR)
5572                         val = I915_READ(PLANE_WM_TRANS(pipe, plane_id));
5573                 else
5574                         val = I915_READ(CUR_WM_TRANS(pipe));
5575
5576                 skl_wm_level_from_reg_val(val, &wm->trans_wm);
5577         }
5578
5579         if (!intel_crtc->active)
5580                 return;
5581
5582         out->linetime = I915_READ(PIPE_WM_LINETIME(pipe));
5583 }
5584
5585 void skl_wm_get_hw_state(struct drm_device *dev)
5586 {
5587         struct drm_i915_private *dev_priv = to_i915(dev);
5588         struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
5589         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
5590         struct drm_crtc *crtc;
5591         struct intel_crtc *intel_crtc;
5592         struct intel_crtc_state *cstate;
5593
5594         skl_ddb_get_hw_state(dev_priv, ddb);
5595         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
5596                 intel_crtc = to_intel_crtc(crtc);
5597                 cstate = to_intel_crtc_state(crtc->state);
5598
5599                 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal);
5600
5601                 if (intel_crtc->active)
5602                         hw->dirty_pipes |= drm_crtc_mask(crtc);
5603         }
5604
5605         if (dev_priv->active_crtcs) {
5606                 /* Fully recompute DDB on first atomic commit */
5607                 dev_priv->wm.distrust_bios_wm = true;
5608         } else {
5609                 /*
5610                  * Easy/common case; just sanitize DDB now if everything off
5611                  * Keep dbuf slice info intact
5612                  */
5613                 memset(ddb->plane, 0, sizeof(ddb->plane));
5614                 memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane));
5615         }
5616 }
5617
5618 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
5619 {
5620         struct drm_device *dev = crtc->dev;
5621         struct drm_i915_private *dev_priv = to_i915(dev);
5622         struct ilk_wm_values *hw = &dev_priv->wm.hw;
5623         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
5624         struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
5625         struct intel_pipe_wm *active = &cstate->wm.ilk.optimal;
5626         enum pipe pipe = intel_crtc->pipe;
5627         static const i915_reg_t wm0_pipe_reg[] = {
5628                 [PIPE_A] = WM0_PIPEA_ILK,
5629                 [PIPE_B] = WM0_PIPEB_ILK,
5630                 [PIPE_C] = WM0_PIPEC_IVB,
5631         };
5632
5633         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
5634         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
5635                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
5636
5637         memset(active, 0, sizeof(*active));
5638
5639         active->pipe_enabled = intel_crtc->active;
5640
5641         if (active->pipe_enabled) {
5642                 u32 tmp = hw->wm_pipe[pipe];
5643
5644                 /*
5645                  * For active pipes LP0 watermark is marked as
5646                  * enabled, and LP1+ watermaks as disabled since
5647                  * we can't really reverse compute them in case
5648                  * multiple pipes are active.
5649                  */
5650                 active->wm[0].enable = true;
5651                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
5652                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
5653                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
5654                 active->linetime = hw->wm_linetime[pipe];
5655         } else {
5656                 int level, max_level = ilk_wm_max_level(dev_priv);
5657
5658                 /*
5659                  * For inactive pipes, all watermark levels
5660                  * should be marked as enabled but zeroed,
5661                  * which is what we'd compute them to.
5662                  */
5663                 for (level = 0; level <= max_level; level++)
5664                         active->wm[level].enable = true;
5665         }
5666
5667         intel_crtc->wm.active.ilk = *active;
5668 }
5669
5670 #define _FW_WM(value, plane) \
5671         (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT)
5672 #define _FW_WM_VLV(value, plane) \
5673         (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT)
5674
5675 static void g4x_read_wm_values(struct drm_i915_private *dev_priv,
5676                                struct g4x_wm_values *wm)
5677 {
5678         uint32_t tmp;
5679
5680         tmp = I915_READ(DSPFW1);
5681         wm->sr.plane = _FW_WM(tmp, SR);
5682         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5683         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEB);
5684         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM(tmp, PLANEA);
5685
5686         tmp = I915_READ(DSPFW2);
5687         wm->fbc_en = tmp & DSPFW_FBC_SR_EN;
5688         wm->sr.fbc = _FW_WM(tmp, FBC_SR);
5689         wm->hpll.fbc = _FW_WM(tmp, FBC_HPLL_SR);
5690         wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEB);
5691         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5692         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM(tmp, SPRITEA);
5693
5694         tmp = I915_READ(DSPFW3);
5695         wm->hpll_en = tmp & DSPFW_HPLL_SR_EN;
5696         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5697         wm->hpll.cursor = _FW_WM(tmp, HPLL_CURSOR);
5698         wm->hpll.plane = _FW_WM(tmp, HPLL_SR);
5699 }
5700
5701 static void vlv_read_wm_values(struct drm_i915_private *dev_priv,
5702                                struct vlv_wm_values *wm)
5703 {
5704         enum pipe pipe;
5705         uint32_t tmp;
5706
5707         for_each_pipe(dev_priv, pipe) {
5708                 tmp = I915_READ(VLV_DDL(pipe));
5709
5710                 wm->ddl[pipe].plane[PLANE_PRIMARY] =
5711                         (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5712                 wm->ddl[pipe].plane[PLANE_CURSOR] =
5713                         (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5714                 wm->ddl[pipe].plane[PLANE_SPRITE0] =
5715                         (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5716                 wm->ddl[pipe].plane[PLANE_SPRITE1] =
5717                         (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK);
5718         }
5719
5720         tmp = I915_READ(DSPFW1);
5721         wm->sr.plane = _FW_WM(tmp, SR);
5722         wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB);
5723         wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB);
5724         wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA);
5725
5726         tmp = I915_READ(DSPFW2);
5727         wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB);
5728         wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA);
5729         wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA);
5730
5731         tmp = I915_READ(DSPFW3);
5732         wm->sr.cursor = _FW_WM(tmp, CURSOR_SR);
5733
5734         if (IS_CHERRYVIEW(dev_priv)) {
5735                 tmp = I915_READ(DSPFW7_CHV);
5736                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5737                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5738
5739                 tmp = I915_READ(DSPFW8_CHV);
5740                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF);
5741                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE);
5742
5743                 tmp = I915_READ(DSPFW9_CHV);
5744                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC);
5745                 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC);
5746
5747                 tmp = I915_READ(DSPHOWM);
5748                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5749                 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8;
5750                 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8;
5751                 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8;
5752                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5753                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5754                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5755                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5756                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5757                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5758         } else {
5759                 tmp = I915_READ(DSPFW7);
5760                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED);
5761                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC);
5762
5763                 tmp = I915_READ(DSPHOWM);
5764                 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9;
5765                 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8;
5766                 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8;
5767                 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8;
5768                 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8;
5769                 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8;
5770                 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8;
5771         }
5772 }
5773
5774 #undef _FW_WM
5775 #undef _FW_WM_VLV
5776
5777 void g4x_wm_get_hw_state(struct drm_device *dev)
5778 {
5779         struct drm_i915_private *dev_priv = to_i915(dev);
5780         struct g4x_wm_values *wm = &dev_priv->wm.g4x;
5781         struct intel_crtc *crtc;
5782
5783         g4x_read_wm_values(dev_priv, wm);
5784
5785         wm->cxsr = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
5786
5787         for_each_intel_crtc(dev, crtc) {
5788                 struct intel_crtc_state *crtc_state =
5789                         to_intel_crtc_state(crtc->base.state);
5790                 struct g4x_wm_state *active = &crtc->wm.active.g4x;
5791                 struct g4x_pipe_wm *raw;
5792                 enum pipe pipe = crtc->pipe;
5793                 enum plane_id plane_id;
5794                 int level, max_level;
5795
5796                 active->cxsr = wm->cxsr;
5797                 active->hpll_en = wm->hpll_en;
5798                 active->fbc_en = wm->fbc_en;
5799
5800                 active->sr = wm->sr;
5801                 active->hpll = wm->hpll;
5802
5803                 for_each_plane_id_on_crtc(crtc, plane_id) {
5804                         active->wm.plane[plane_id] =
5805                                 wm->pipe[pipe].plane[plane_id];
5806                 }
5807
5808                 if (wm->cxsr && wm->hpll_en)
5809                         max_level = G4X_WM_LEVEL_HPLL;
5810                 else if (wm->cxsr)
5811                         max_level = G4X_WM_LEVEL_SR;
5812                 else
5813                         max_level = G4X_WM_LEVEL_NORMAL;
5814
5815                 level = G4X_WM_LEVEL_NORMAL;
5816                 raw = &crtc_state->wm.g4x.raw[level];
5817                 for_each_plane_id_on_crtc(crtc, plane_id)
5818                         raw->plane[plane_id] = active->wm.plane[plane_id];
5819
5820                 if (++level > max_level)
5821                         goto out;
5822
5823                 raw = &crtc_state->wm.g4x.raw[level];
5824                 raw->plane[PLANE_PRIMARY] = active->sr.plane;
5825                 raw->plane[PLANE_CURSOR] = active->sr.cursor;
5826                 raw->plane[PLANE_SPRITE0] = 0;
5827                 raw->fbc = active->sr.fbc;
5828
5829                 if (++level > max_level)
5830                         goto out;
5831
5832                 raw = &crtc_state->wm.g4x.raw[level];
5833                 raw->plane[PLANE_PRIMARY] = active->hpll.plane;
5834                 raw->plane[PLANE_CURSOR] = active->hpll.cursor;
5835                 raw->plane[PLANE_SPRITE0] = 0;
5836                 raw->fbc = active->hpll.fbc;
5837
5838         out:
5839                 for_each_plane_id_on_crtc(crtc, plane_id)
5840                         g4x_raw_plane_wm_set(crtc_state, level,
5841                                              plane_id, USHRT_MAX);
5842                 g4x_raw_fbc_wm_set(crtc_state, level, USHRT_MAX);
5843
5844                 crtc_state->wm.g4x.optimal = *active;
5845                 crtc_state->wm.g4x.intermediate = *active;
5846
5847                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite=%d\n",
5848                               pipe_name(pipe),
5849                               wm->pipe[pipe].plane[PLANE_PRIMARY],
5850                               wm->pipe[pipe].plane[PLANE_CURSOR],
5851                               wm->pipe[pipe].plane[PLANE_SPRITE0]);
5852         }
5853
5854         DRM_DEBUG_KMS("Initial SR watermarks: plane=%d, cursor=%d fbc=%d\n",
5855                       wm->sr.plane, wm->sr.cursor, wm->sr.fbc);
5856         DRM_DEBUG_KMS("Initial HPLL watermarks: plane=%d, SR cursor=%d fbc=%d\n",
5857                       wm->hpll.plane, wm->hpll.cursor, wm->hpll.fbc);
5858         DRM_DEBUG_KMS("Initial SR=%s HPLL=%s FBC=%s\n",
5859                       yesno(wm->cxsr), yesno(wm->hpll_en), yesno(wm->fbc_en));
5860 }
5861
5862 void g4x_wm_sanitize(struct drm_i915_private *dev_priv)
5863 {
5864         struct intel_plane *plane;
5865         struct intel_crtc *crtc;
5866
5867         mutex_lock(&dev_priv->wm.wm_mutex);
5868
5869         for_each_intel_plane(&dev_priv->drm, plane) {
5870                 struct intel_crtc *crtc =
5871                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
5872                 struct intel_crtc_state *crtc_state =
5873                         to_intel_crtc_state(crtc->base.state);
5874                 struct intel_plane_state *plane_state =
5875                         to_intel_plane_state(plane->base.state);
5876                 struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal;
5877                 enum plane_id plane_id = plane->id;
5878                 int level;
5879
5880                 if (plane_state->base.visible)
5881                         continue;
5882
5883                 for (level = 0; level < 3; level++) {
5884                         struct g4x_pipe_wm *raw =
5885                                 &crtc_state->wm.g4x.raw[level];
5886
5887                         raw->plane[plane_id] = 0;
5888                         wm_state->wm.plane[plane_id] = 0;
5889                 }
5890
5891                 if (plane_id == PLANE_PRIMARY) {
5892                         for (level = 0; level < 3; level++) {
5893                                 struct g4x_pipe_wm *raw =
5894                                         &crtc_state->wm.g4x.raw[level];
5895                                 raw->fbc = 0;
5896                         }
5897
5898                         wm_state->sr.fbc = 0;
5899                         wm_state->hpll.fbc = 0;
5900                         wm_state->fbc_en = false;
5901                 }
5902         }
5903
5904         for_each_intel_crtc(&dev_priv->drm, crtc) {
5905                 struct intel_crtc_state *crtc_state =
5906                         to_intel_crtc_state(crtc->base.state);
5907
5908                 crtc_state->wm.g4x.intermediate =
5909                         crtc_state->wm.g4x.optimal;
5910                 crtc->wm.active.g4x = crtc_state->wm.g4x.optimal;
5911         }
5912
5913         g4x_program_watermarks(dev_priv);
5914
5915         mutex_unlock(&dev_priv->wm.wm_mutex);
5916 }
5917
5918 void vlv_wm_get_hw_state(struct drm_device *dev)
5919 {
5920         struct drm_i915_private *dev_priv = to_i915(dev);
5921         struct vlv_wm_values *wm = &dev_priv->wm.vlv;
5922         struct intel_crtc *crtc;
5923         u32 val;
5924
5925         vlv_read_wm_values(dev_priv, wm);
5926
5927         wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
5928         wm->level = VLV_WM_LEVEL_PM2;
5929
5930         if (IS_CHERRYVIEW(dev_priv)) {
5931                 mutex_lock(&dev_priv->pcu_lock);
5932
5933                 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
5934                 if (val & DSP_MAXFIFO_PM5_ENABLE)
5935                         wm->level = VLV_WM_LEVEL_PM5;
5936
5937                 /*
5938                  * If DDR DVFS is disabled in the BIOS, Punit
5939                  * will never ack the request. So if that happens
5940                  * assume we don't have to enable/disable DDR DVFS
5941                  * dynamically. To test that just set the REQ_ACK
5942                  * bit to poke the Punit, but don't change the
5943                  * HIGH/LOW bits so that we don't actually change
5944                  * the current state.
5945                  */
5946                 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5947                 val |= FORCE_DDR_FREQ_REQ_ACK;
5948                 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val);
5949
5950                 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) &
5951                               FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) {
5952                         DRM_DEBUG_KMS("Punit not acking DDR DVFS request, "
5953                                       "assuming DDR DVFS is disabled\n");
5954                         dev_priv->wm.max_level = VLV_WM_LEVEL_PM5;
5955                 } else {
5956                         val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
5957                         if ((val & FORCE_DDR_HIGH_FREQ) == 0)
5958                                 wm->level = VLV_WM_LEVEL_DDR_DVFS;
5959                 }
5960
5961                 mutex_unlock(&dev_priv->pcu_lock);
5962         }
5963
5964         for_each_intel_crtc(dev, crtc) {
5965                 struct intel_crtc_state *crtc_state =
5966                         to_intel_crtc_state(crtc->base.state);
5967                 struct vlv_wm_state *active = &crtc->wm.active.vlv;
5968                 const struct vlv_fifo_state *fifo_state =
5969                         &crtc_state->wm.vlv.fifo_state;
5970                 enum pipe pipe = crtc->pipe;
5971                 enum plane_id plane_id;
5972                 int level;
5973
5974                 vlv_get_fifo_size(crtc_state);
5975
5976                 active->num_levels = wm->level + 1;
5977                 active->cxsr = wm->cxsr;
5978
5979                 for (level = 0; level < active->num_levels; level++) {
5980                         struct g4x_pipe_wm *raw =
5981                                 &crtc_state->wm.vlv.raw[level];
5982
5983                         active->sr[level].plane = wm->sr.plane;
5984                         active->sr[level].cursor = wm->sr.cursor;
5985
5986                         for_each_plane_id_on_crtc(crtc, plane_id) {
5987                                 active->wm[level].plane[plane_id] =
5988                                         wm->pipe[pipe].plane[plane_id];
5989
5990                                 raw->plane[plane_id] =
5991                                         vlv_invert_wm_value(active->wm[level].plane[plane_id],
5992                                                             fifo_state->plane[plane_id]);
5993                         }
5994                 }
5995
5996                 for_each_plane_id_on_crtc(crtc, plane_id)
5997                         vlv_raw_plane_wm_set(crtc_state, level,
5998                                              plane_id, USHRT_MAX);
5999                 vlv_invalidate_wms(crtc, active, level);
6000
6001                 crtc_state->wm.vlv.optimal = *active;
6002                 crtc_state->wm.vlv.intermediate = *active;
6003
6004                 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n",
6005                               pipe_name(pipe),
6006                               wm->pipe[pipe].plane[PLANE_PRIMARY],
6007                               wm->pipe[pipe].plane[PLANE_CURSOR],
6008                               wm->pipe[pipe].plane[PLANE_SPRITE0],
6009                               wm->pipe[pipe].plane[PLANE_SPRITE1]);
6010         }
6011
6012         DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n",
6013                       wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr);
6014 }
6015
6016 void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
6017 {
6018         struct intel_plane *plane;
6019         struct intel_crtc *crtc;
6020
6021         mutex_lock(&dev_priv->wm.wm_mutex);
6022
6023         for_each_intel_plane(&dev_priv->drm, plane) {
6024                 struct intel_crtc *crtc =
6025                         intel_get_crtc_for_pipe(dev_priv, plane->pipe);
6026                 struct intel_crtc_state *crtc_state =
6027                         to_intel_crtc_state(crtc->base.state);
6028                 struct intel_plane_state *plane_state =
6029                         to_intel_plane_state(plane->base.state);
6030                 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal;
6031                 const struct vlv_fifo_state *fifo_state =
6032                         &crtc_state->wm.vlv.fifo_state;
6033                 enum plane_id plane_id = plane->id;
6034                 int level;
6035
6036                 if (plane_state->base.visible)
6037                         continue;
6038
6039                 for (level = 0; level < wm_state->num_levels; level++) {
6040                         struct g4x_pipe_wm *raw =
6041                                 &crtc_state->wm.vlv.raw[level];
6042
6043                         raw->plane[plane_id] = 0;
6044
6045                         wm_state->wm[level].plane[plane_id] =
6046                                 vlv_invert_wm_value(raw->plane[plane_id],
6047                                                     fifo_state->plane[plane_id]);
6048                 }
6049         }
6050
6051         for_each_intel_crtc(&dev_priv->drm, crtc) {
6052                 struct intel_crtc_state *crtc_state =
6053                         to_intel_crtc_state(crtc->base.state);
6054
6055                 crtc_state->wm.vlv.intermediate =
6056                         crtc_state->wm.vlv.optimal;
6057                 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal;
6058         }
6059
6060         vlv_program_watermarks(dev_priv);
6061
6062         mutex_unlock(&dev_priv->wm.wm_mutex);
6063 }
6064
6065 /*
6066  * FIXME should probably kill this and improve
6067  * the real watermark readout/sanitation instead
6068  */
6069 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
6070 {
6071         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
6072         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
6073         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
6074
6075         /*
6076          * Don't touch WM1S_LP_EN here.
6077          * Doing so could cause underruns.
6078          */
6079 }
6080
6081 void ilk_wm_get_hw_state(struct drm_device *dev)
6082 {
6083         struct drm_i915_private *dev_priv = to_i915(dev);
6084         struct ilk_wm_values *hw = &dev_priv->wm.hw;
6085         struct drm_crtc *crtc;
6086
6087         ilk_init_lp_watermarks(dev_priv);
6088
6089         for_each_crtc(dev, crtc)
6090                 ilk_pipe_wm_get_hw_state(crtc);
6091
6092         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
6093         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
6094         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
6095
6096         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
6097         if (INTEL_GEN(dev_priv) >= 7) {
6098                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
6099                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
6100         }
6101
6102         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6103                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
6104                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6105         else if (IS_IVYBRIDGE(dev_priv))
6106                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
6107                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
6108
6109         hw->enable_fbc_wm =
6110                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
6111 }
6112
6113 /**
6114  * intel_update_watermarks - update FIFO watermark values based on current modes
6115  * @crtc: the #intel_crtc on which to compute the WM
6116  *
6117  * Calculate watermark values for the various WM regs based on current mode
6118  * and plane configuration.
6119  *
6120  * There are several cases to deal with here:
6121  *   - normal (i.e. non-self-refresh)
6122  *   - self-refresh (SR) mode
6123  *   - lines are large relative to FIFO size (buffer can hold up to 2)
6124  *   - lines are small relative to FIFO size (buffer can hold more than 2
6125  *     lines), so need to account for TLB latency
6126  *
6127  *   The normal calculation is:
6128  *     watermark = dotclock * bytes per pixel * latency
6129  *   where latency is platform & configuration dependent (we assume pessimal
6130  *   values here).
6131  *
6132  *   The SR calculation is:
6133  *     watermark = (trunc(latency/line time)+1) * surface width *
6134  *       bytes per pixel
6135  *   where
6136  *     line time = htotal / dotclock
6137  *     surface width = hdisplay for normal plane and 64 for cursor
6138  *   and latency is assumed to be high, as above.
6139  *
6140  * The final value programmed to the register should always be rounded up,
6141  * and include an extra 2 entries to account for clock crossings.
6142  *
6143  * We don't use the sprite, so we can ignore that.  And on Crestline we have
6144  * to set the non-SR watermarks to 8.
6145  */
6146 void intel_update_watermarks(struct intel_crtc *crtc)
6147 {
6148         struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
6149
6150         if (dev_priv->display.update_wm)
6151                 dev_priv->display.update_wm(crtc);
6152 }
6153
6154 void intel_enable_ipc(struct drm_i915_private *dev_priv)
6155 {
6156         u32 val;
6157
6158         /* Display WA #0477 WaDisableIPC: skl */
6159         if (IS_SKYLAKE(dev_priv))
6160                 dev_priv->ipc_enabled = false;
6161
6162         /* Display WA #1141: SKL:all KBL:all CFL */
6163         if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&
6164             !dev_priv->dram_info.symmetric_memory)
6165                 dev_priv->ipc_enabled = false;
6166
6167         val = I915_READ(DISP_ARB_CTL2);
6168
6169         if (dev_priv->ipc_enabled)
6170                 val |= DISP_IPC_ENABLE;
6171         else
6172                 val &= ~DISP_IPC_ENABLE;
6173
6174         I915_WRITE(DISP_ARB_CTL2, val);
6175 }
6176
6177 void intel_init_ipc(struct drm_i915_private *dev_priv)
6178 {
6179         dev_priv->ipc_enabled = false;
6180         if (!HAS_IPC(dev_priv))
6181                 return;
6182
6183         dev_priv->ipc_enabled = true;
6184         intel_enable_ipc(dev_priv);
6185 }
6186
6187 /*
6188  * Lock protecting IPS related data structures
6189  */
6190 DEFINE_SPINLOCK(mchdev_lock);
6191
6192 /* Global for IPS driver to get at the current i915 device. Protected by
6193  * mchdev_lock. */
6194 static struct drm_i915_private *i915_mch_dev;
6195
6196 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
6197 {
6198         u16 rgvswctl;
6199
6200         lockdep_assert_held(&mchdev_lock);
6201
6202         rgvswctl = I915_READ16(MEMSWCTL);
6203         if (rgvswctl & MEMCTL_CMD_STS) {
6204                 DRM_DEBUG("gpu busy, RCS change rejected\n");
6205                 return false; /* still busy with another command */
6206         }
6207
6208         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
6209                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
6210         I915_WRITE16(MEMSWCTL, rgvswctl);
6211         POSTING_READ16(MEMSWCTL);
6212
6213         rgvswctl |= MEMCTL_CMD_STS;
6214         I915_WRITE16(MEMSWCTL, rgvswctl);
6215
6216         return true;
6217 }
6218
6219 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
6220 {
6221         u32 rgvmodectl;
6222         u8 fmax, fmin, fstart, vstart;
6223
6224         spin_lock_irq(&mchdev_lock);
6225
6226         rgvmodectl = I915_READ(MEMMODECTL);
6227
6228         /* Enable temp reporting */
6229         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
6230         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
6231
6232         /* 100ms RC evaluation intervals */
6233         I915_WRITE(RCUPEI, 100000);
6234         I915_WRITE(RCDNEI, 100000);
6235
6236         /* Set max/min thresholds to 90ms and 80ms respectively */
6237         I915_WRITE(RCBMAXAVG, 90000);
6238         I915_WRITE(RCBMINAVG, 80000);
6239
6240         I915_WRITE(MEMIHYST, 1);
6241
6242         /* Set up min, max, and cur for interrupt handling */
6243         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
6244         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
6245         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
6246                 MEMMODE_FSTART_SHIFT;
6247
6248         vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
6249                 PXVFREQ_PX_SHIFT;
6250
6251         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
6252         dev_priv->ips.fstart = fstart;
6253
6254         dev_priv->ips.max_delay = fstart;
6255         dev_priv->ips.min_delay = fmin;
6256         dev_priv->ips.cur_delay = fstart;
6257
6258         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
6259                          fmax, fmin, fstart);
6260
6261         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
6262
6263         /*
6264          * Interrupts will be enabled in ironlake_irq_postinstall
6265          */
6266
6267         I915_WRITE(VIDSTART, vstart);
6268         POSTING_READ(VIDSTART);
6269
6270         rgvmodectl |= MEMMODE_SWMODE_EN;
6271         I915_WRITE(MEMMODECTL, rgvmodectl);
6272
6273         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
6274                 DRM_ERROR("stuck trying to change perf mode\n");
6275         mdelay(1);
6276
6277         ironlake_set_drps(dev_priv, fstart);
6278
6279         dev_priv->ips.last_count1 = I915_READ(DMIEC) +
6280                 I915_READ(DDREC) + I915_READ(CSIEC);
6281         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
6282         dev_priv->ips.last_count2 = I915_READ(GFXEC);
6283         dev_priv->ips.last_time2 = ktime_get_raw_ns();
6284
6285         spin_unlock_irq(&mchdev_lock);
6286 }
6287
6288 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
6289 {
6290         u16 rgvswctl;
6291
6292         spin_lock_irq(&mchdev_lock);
6293
6294         rgvswctl = I915_READ16(MEMSWCTL);
6295
6296         /* Ack interrupts, disable EFC interrupt */
6297         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
6298         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
6299         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
6300         I915_WRITE(DEIIR, DE_PCU_EVENT);
6301         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
6302
6303         /* Go back to the starting frequency */
6304         ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
6305         mdelay(1);
6306         rgvswctl |= MEMCTL_CMD_STS;
6307         I915_WRITE(MEMSWCTL, rgvswctl);
6308         mdelay(1);
6309
6310         spin_unlock_irq(&mchdev_lock);
6311 }
6312
6313 /* There's a funny hw issue where the hw returns all 0 when reading from
6314  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
6315  * ourselves, instead of doing a rmw cycle (which might result in us clearing
6316  * all limits and the gpu stuck at whatever frequency it is at atm).
6317  */
6318 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
6319 {
6320         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6321         u32 limits;
6322
6323         /* Only set the down limit when we've reached the lowest level to avoid
6324          * getting more interrupts, otherwise leave this clear. This prevents a
6325          * race in the hw when coming out of rc6: There's a tiny window where
6326          * the hw runs at the minimal clock before selecting the desired
6327          * frequency, if the down threshold expires in that window we will not
6328          * receive a down interrupt. */
6329         if (INTEL_GEN(dev_priv) >= 9) {
6330                 limits = (rps->max_freq_softlimit) << 23;
6331                 if (val <= rps->min_freq_softlimit)
6332                         limits |= (rps->min_freq_softlimit) << 14;
6333         } else {
6334                 limits = rps->max_freq_softlimit << 24;
6335                 if (val <= rps->min_freq_softlimit)
6336                         limits |= rps->min_freq_softlimit << 16;
6337         }
6338
6339         return limits;
6340 }
6341
6342 static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
6343 {
6344         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6345         u32 threshold_up = 0, threshold_down = 0; /* in % */
6346         u32 ei_up = 0, ei_down = 0;
6347
6348         lockdep_assert_held(&rps->power.mutex);
6349
6350         if (new_power == rps->power.mode)
6351                 return;
6352
6353         /* Note the units here are not exactly 1us, but 1280ns. */
6354         switch (new_power) {
6355         case LOW_POWER:
6356                 /* Upclock if more than 95% busy over 16ms */
6357                 ei_up = 16000;
6358                 threshold_up = 95;
6359
6360                 /* Downclock if less than 85% busy over 32ms */
6361                 ei_down = 32000;
6362                 threshold_down = 85;
6363                 break;
6364
6365         case BETWEEN:
6366                 /* Upclock if more than 90% busy over 13ms */
6367                 ei_up = 13000;
6368                 threshold_up = 90;
6369
6370                 /* Downclock if less than 75% busy over 32ms */
6371                 ei_down = 32000;
6372                 threshold_down = 75;
6373                 break;
6374
6375         case HIGH_POWER:
6376                 /* Upclock if more than 85% busy over 10ms */
6377                 ei_up = 10000;
6378                 threshold_up = 85;
6379
6380                 /* Downclock if less than 60% busy over 32ms */
6381                 ei_down = 32000;
6382                 threshold_down = 60;
6383                 break;
6384         }
6385
6386         /* When byt can survive without system hang with dynamic
6387          * sw freq adjustments, this restriction can be lifted.
6388          */
6389         if (IS_VALLEYVIEW(dev_priv))
6390                 goto skip_hw_write;
6391
6392         I915_WRITE(GEN6_RP_UP_EI,
6393                    GT_INTERVAL_FROM_US(dev_priv, ei_up));
6394         I915_WRITE(GEN6_RP_UP_THRESHOLD,
6395                    GT_INTERVAL_FROM_US(dev_priv,
6396                                        ei_up * threshold_up / 100));
6397
6398         I915_WRITE(GEN6_RP_DOWN_EI,
6399                    GT_INTERVAL_FROM_US(dev_priv, ei_down));
6400         I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
6401                    GT_INTERVAL_FROM_US(dev_priv,
6402                                        ei_down * threshold_down / 100));
6403
6404         I915_WRITE(GEN6_RP_CONTROL,
6405                    GEN6_RP_MEDIA_TURBO |
6406                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
6407                    GEN6_RP_MEDIA_IS_GFX |
6408                    GEN6_RP_ENABLE |
6409                    GEN6_RP_UP_BUSY_AVG |
6410                    GEN6_RP_DOWN_IDLE_AVG);
6411
6412 skip_hw_write:
6413         rps->power.mode = new_power;
6414         rps->power.up_threshold = threshold_up;
6415         rps->power.down_threshold = threshold_down;
6416 }
6417
6418 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6419 {
6420         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6421         int new_power;
6422
6423         new_power = rps->power.mode;
6424         switch (rps->power.mode) {
6425         case LOW_POWER:
6426                 if (val > rps->efficient_freq + 1 &&
6427                     val > rps->cur_freq)
6428                         new_power = BETWEEN;
6429                 break;
6430
6431         case BETWEEN:
6432                 if (val <= rps->efficient_freq &&
6433                     val < rps->cur_freq)
6434                         new_power = LOW_POWER;
6435                 else if (val >= rps->rp0_freq &&
6436                          val > rps->cur_freq)
6437                         new_power = HIGH_POWER;
6438                 break;
6439
6440         case HIGH_POWER:
6441                 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6442                     val < rps->cur_freq)
6443                         new_power = BETWEEN;
6444                 break;
6445         }
6446         /* Max/min bins are special */
6447         if (val <= rps->min_freq_softlimit)
6448                 new_power = LOW_POWER;
6449         if (val >= rps->max_freq_softlimit)
6450                 new_power = HIGH_POWER;
6451
6452         mutex_lock(&rps->power.mutex);
6453         if (rps->power.interactive)
6454                 new_power = HIGH_POWER;
6455         rps_set_power(dev_priv, new_power);
6456         mutex_unlock(&rps->power.mutex);
6457 }
6458
6459 void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6460 {
6461         struct intel_rps *rps = &i915->gt_pm.rps;
6462
6463         if (INTEL_GEN(i915) < 6)
6464                 return;
6465
6466         mutex_lock(&rps->power.mutex);
6467         if (interactive) {
6468                 if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6469                         rps_set_power(i915, HIGH_POWER);
6470         } else {
6471                 GEM_BUG_ON(!rps->power.interactive);
6472                 rps->power.interactive--;
6473         }
6474         mutex_unlock(&rps->power.mutex);
6475 }
6476
6477 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
6478 {
6479         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6480         u32 mask = 0;
6481
6482         /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
6483         if (val > rps->min_freq_softlimit)
6484                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
6485         if (val < rps->max_freq_softlimit)
6486                 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
6487
6488         mask &= dev_priv->pm_rps_events;
6489
6490         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
6491 }
6492
6493 /* gen6_set_rps is called to update the frequency request, but should also be
6494  * called when the range (min_delay and max_delay) is modified so that we can
6495  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
6496 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
6497 {
6498         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6499
6500         /* min/max delay may still have been modified so be sure to
6501          * write the limits value.
6502          */
6503         if (val != rps->cur_freq) {
6504                 gen6_set_rps_thresholds(dev_priv, val);
6505
6506                 if (INTEL_GEN(dev_priv) >= 9)
6507                         I915_WRITE(GEN6_RPNSWREQ,
6508                                    GEN9_FREQUENCY(val));
6509                 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
6510                         I915_WRITE(GEN6_RPNSWREQ,
6511                                    HSW_FREQUENCY(val));
6512                 else
6513                         I915_WRITE(GEN6_RPNSWREQ,
6514                                    GEN6_FREQUENCY(val) |
6515                                    GEN6_OFFSET(0) |
6516                                    GEN6_AGGRESSIVE_TURBO);
6517         }
6518
6519         /* Make sure we continue to get interrupts
6520          * until we hit the minimum or maximum frequencies.
6521          */
6522         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
6523         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6524
6525         rps->cur_freq = val;
6526         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6527
6528         return 0;
6529 }
6530
6531 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
6532 {
6533         int err;
6534
6535         if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1),
6536                       "Odd GPU freq value\n"))
6537                 val &= ~1;
6538
6539         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
6540
6541         if (val != dev_priv->gt_pm.rps.cur_freq) {
6542                 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
6543                 if (err)
6544                         return err;
6545
6546                 gen6_set_rps_thresholds(dev_priv, val);
6547         }
6548
6549         dev_priv->gt_pm.rps.cur_freq = val;
6550         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
6551
6552         return 0;
6553 }
6554
6555 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
6556  *
6557  * * If Gfx is Idle, then
6558  * 1. Forcewake Media well.
6559  * 2. Request idle freq.
6560  * 3. Release Forcewake of Media well.
6561 */
6562 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
6563 {
6564         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6565         u32 val = rps->idle_freq;
6566         int err;
6567
6568         if (rps->cur_freq <= val)
6569                 return;
6570
6571         /* The punit delays the write of the frequency and voltage until it
6572          * determines the GPU is awake. During normal usage we don't want to
6573          * waste power changing the frequency if the GPU is sleeping (rc6).
6574          * However, the GPU and driver is now idle and we do not want to delay
6575          * switching to minimum voltage (reducing power whilst idle) as we do
6576          * not expect to be woken in the near future and so must flush the
6577          * change by waking the device.
6578          *
6579          * We choose to take the media powerwell (either would do to trick the
6580          * punit into committing the voltage change) as that takes a lot less
6581          * power than the render powerwell.
6582          */
6583         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
6584         err = valleyview_set_rps(dev_priv, val);
6585         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
6586
6587         if (err)
6588                 DRM_ERROR("Failed to set RPS for idle\n");
6589 }
6590
6591 void gen6_rps_busy(struct drm_i915_private *dev_priv)
6592 {
6593         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6594
6595         mutex_lock(&dev_priv->pcu_lock);
6596         if (rps->enabled) {
6597                 u8 freq;
6598
6599                 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
6600                         gen6_rps_reset_ei(dev_priv);
6601                 I915_WRITE(GEN6_PMINTRMSK,
6602                            gen6_rps_pm_mask(dev_priv, rps->cur_freq));
6603
6604                 gen6_enable_rps_interrupts(dev_priv);
6605
6606                 /* Use the user's desired frequency as a guide, but for better
6607                  * performance, jump directly to RPe as our starting frequency.
6608                  */
6609                 freq = max(rps->cur_freq,
6610                            rps->efficient_freq);
6611
6612                 if (intel_set_rps(dev_priv,
6613                                   clamp(freq,
6614                                         rps->min_freq_softlimit,
6615                                         rps->max_freq_softlimit)))
6616                         DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
6617         }
6618         mutex_unlock(&dev_priv->pcu_lock);
6619 }
6620
6621 void gen6_rps_idle(struct drm_i915_private *dev_priv)
6622 {
6623         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6624
6625         /* Flush our bottom-half so that it does not race with us
6626          * setting the idle frequency and so that it is bounded by
6627          * our rpm wakeref. And then disable the interrupts to stop any
6628          * futher RPS reclocking whilst we are asleep.
6629          */
6630         gen6_disable_rps_interrupts(dev_priv);
6631
6632         mutex_lock(&dev_priv->pcu_lock);
6633         if (rps->enabled) {
6634                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6635                         vlv_set_rps_idle(dev_priv);
6636                 else
6637                         gen6_set_rps(dev_priv, rps->idle_freq);
6638                 rps->last_adj = 0;
6639                 I915_WRITE(GEN6_PMINTRMSK,
6640                            gen6_sanitize_rps_pm_mask(dev_priv, ~0));
6641         }
6642         mutex_unlock(&dev_priv->pcu_lock);
6643 }
6644
6645 void gen6_rps_boost(struct i915_request *rq,
6646                     struct intel_rps_client *rps_client)
6647 {
6648         struct intel_rps *rps = &rq->i915->gt_pm.rps;
6649         unsigned long flags;
6650         bool boost;
6651
6652         /* This is intentionally racy! We peek at the state here, then
6653          * validate inside the RPS worker.
6654          */
6655         if (!rps->enabled)
6656                 return;
6657
6658         if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
6659                 return;
6660
6661         /* Serializes with i915_request_retire() */
6662         boost = false;
6663         spin_lock_irqsave(&rq->lock, flags);
6664         if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) {
6665                 boost = !atomic_fetch_inc(&rps->num_waiters);
6666                 rq->waitboost = true;
6667         }
6668         spin_unlock_irqrestore(&rq->lock, flags);
6669         if (!boost)
6670                 return;
6671
6672         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
6673                 schedule_work(&rps->work);
6674
6675         atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
6676 }
6677
6678 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
6679 {
6680         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6681         int err;
6682
6683         lockdep_assert_held(&dev_priv->pcu_lock);
6684         GEM_BUG_ON(val > rps->max_freq);
6685         GEM_BUG_ON(val < rps->min_freq);
6686
6687         if (!rps->enabled) {
6688                 rps->cur_freq = val;
6689                 return 0;
6690         }
6691
6692         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
6693                 err = valleyview_set_rps(dev_priv, val);
6694         else
6695                 err = gen6_set_rps(dev_priv, val);
6696
6697         return err;
6698 }
6699
6700 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
6701 {
6702         I915_WRITE(GEN6_RC_CONTROL, 0);
6703         I915_WRITE(GEN9_PG_ENABLE, 0);
6704 }
6705
6706 static void gen9_disable_rps(struct drm_i915_private *dev_priv)
6707 {
6708         I915_WRITE(GEN6_RP_CONTROL, 0);
6709 }
6710
6711 static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
6712 {
6713         I915_WRITE(GEN6_RC_CONTROL, 0);
6714 }
6715
6716 static void gen6_disable_rps(struct drm_i915_private *dev_priv)
6717 {
6718         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
6719         I915_WRITE(GEN6_RP_CONTROL, 0);
6720 }
6721
6722 static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
6723 {
6724         I915_WRITE(GEN6_RC_CONTROL, 0);
6725 }
6726
6727 static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
6728 {
6729         I915_WRITE(GEN6_RP_CONTROL, 0);
6730 }
6731
6732 static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
6733 {
6734         /* We're doing forcewake before Disabling RC6,
6735          * This what the BIOS expects when going into suspend */
6736         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6737
6738         I915_WRITE(GEN6_RC_CONTROL, 0);
6739
6740         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6741 }
6742
6743 static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
6744 {
6745         I915_WRITE(GEN6_RP_CONTROL, 0);
6746 }
6747
6748 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
6749 {
6750         bool enable_rc6 = true;
6751         unsigned long rc6_ctx_base;
6752         u32 rc_ctl;
6753         int rc_sw_target;
6754
6755         rc_ctl = I915_READ(GEN6_RC_CONTROL);
6756         rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
6757                        RC_SW_TARGET_STATE_SHIFT;
6758         DRM_DEBUG_DRIVER("BIOS enabled RC states: "
6759                          "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
6760                          onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
6761                          onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
6762                          rc_sw_target);
6763
6764         if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
6765                 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
6766                 enable_rc6 = false;
6767         }
6768
6769         /*
6770          * The exact context size is not known for BXT, so assume a page size
6771          * for this check.
6772          */
6773         rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
6774         if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
6775               (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
6776                 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
6777                 enable_rc6 = false;
6778         }
6779
6780         if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
6781               ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
6782               ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
6783               ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
6784                 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
6785                 enable_rc6 = false;
6786         }
6787
6788         if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
6789             !I915_READ(GEN8_PUSHBUS_ENABLE) ||
6790             !I915_READ(GEN8_PUSHBUS_SHIFT)) {
6791                 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
6792                 enable_rc6 = false;
6793         }
6794
6795         if (!I915_READ(GEN6_GFXPAUSE)) {
6796                 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
6797                 enable_rc6 = false;
6798         }
6799
6800         if (!I915_READ(GEN8_MISC_CTRL0)) {
6801                 DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
6802                 enable_rc6 = false;
6803         }
6804
6805         return enable_rc6;
6806 }
6807
6808 static bool sanitize_rc6(struct drm_i915_private *i915)
6809 {
6810         struct intel_device_info *info = mkwrite_device_info(i915);
6811
6812         /* Powersaving is controlled by the host when inside a VM */
6813         if (intel_vgpu_active(i915))
6814                 info->has_rc6 = 0;
6815
6816         if (info->has_rc6 &&
6817             IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
6818                 DRM_INFO("RC6 disabled by BIOS\n");
6819                 info->has_rc6 = 0;
6820         }
6821
6822         /*
6823          * We assume that we do not have any deep rc6 levels if we don't have
6824          * have the previous rc6 level supported, i.e. we use HAS_RC6()
6825          * as the initial coarse check for rc6 in general, moving on to
6826          * progressively finer/deeper levels.
6827          */
6828         if (!info->has_rc6 && info->has_rc6p)
6829                 info->has_rc6p = 0;
6830
6831         return info->has_rc6;
6832 }
6833
6834 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
6835 {
6836         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6837
6838         /* All of these values are in units of 50MHz */
6839
6840         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
6841         if (IS_GEN9_LP(dev_priv)) {
6842                 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
6843                 rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
6844                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6845                 rps->min_freq = (rp_state_cap >>  0) & 0xff;
6846         } else {
6847                 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
6848                 rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
6849                 rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
6850                 rps->min_freq = (rp_state_cap >> 16) & 0xff;
6851         }
6852         /* hw_max = RP0 until we check for overclocking */
6853         rps->max_freq = rps->rp0_freq;
6854
6855         rps->efficient_freq = rps->rp1_freq;
6856         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
6857             IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6858                 u32 ddcc_status = 0;
6859
6860                 if (sandybridge_pcode_read(dev_priv,
6861                                            HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
6862                                            &ddcc_status) == 0)
6863                         rps->efficient_freq =
6864                                 clamp_t(u8,
6865                                         ((ddcc_status >> 8) & 0xff),
6866                                         rps->min_freq,
6867                                         rps->max_freq);
6868         }
6869
6870         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
6871                 /* Store the frequency values in 16.66 MHZ units, which is
6872                  * the natural hardware unit for SKL
6873                  */
6874                 rps->rp0_freq *= GEN9_FREQ_SCALER;
6875                 rps->rp1_freq *= GEN9_FREQ_SCALER;
6876                 rps->min_freq *= GEN9_FREQ_SCALER;
6877                 rps->max_freq *= GEN9_FREQ_SCALER;
6878                 rps->efficient_freq *= GEN9_FREQ_SCALER;
6879         }
6880 }
6881
6882 static void reset_rps(struct drm_i915_private *dev_priv,
6883                       int (*set)(struct drm_i915_private *, u8))
6884 {
6885         struct intel_rps *rps = &dev_priv->gt_pm.rps;
6886         u8 freq = rps->cur_freq;
6887
6888         /* force a reset */
6889         rps->power.mode = -1;
6890         rps->cur_freq = -1;
6891
6892         if (set(dev_priv, freq))
6893                 DRM_ERROR("Failed to reset RPS to initial values\n");
6894 }
6895
6896 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
6897 static void gen9_enable_rps(struct drm_i915_private *dev_priv)
6898 {
6899         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6900
6901         /* Program defaults and thresholds for RPS */
6902         if (IS_GEN9(dev_priv))
6903                 I915_WRITE(GEN6_RC_VIDEO_FREQ,
6904                         GEN9_FREQUENCY(dev_priv->gt_pm.rps.rp1_freq));
6905
6906         /* 1 second timeout*/
6907         I915_WRITE(GEN6_RP_DOWN_TIMEOUT,
6908                 GT_INTERVAL_FROM_US(dev_priv, 1000000));
6909
6910         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
6911
6912         /* Leaning on the below call to gen6_set_rps to program/setup the
6913          * Up/Down EI & threshold registers, as well as the RP_CONTROL,
6914          * RP_INTERRUPT_LIMITS & RPNSWREQ registers */
6915         reset_rps(dev_priv, gen6_set_rps);
6916
6917         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
6918 }
6919
6920 static void gen9_enable_rc6(struct drm_i915_private *dev_priv)
6921 {
6922         struct intel_engine_cs *engine;
6923         enum intel_engine_id id;
6924         u32 rc6_mode;
6925
6926         /* 1a: Software RC state - RC0 */
6927         I915_WRITE(GEN6_RC_STATE, 0);
6928
6929         /* 1b: Get forcewake during program sequence. Although the driver
6930          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
6931         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
6932
6933         /* 2a: Disable RC states. */
6934         I915_WRITE(GEN6_RC_CONTROL, 0);
6935
6936         /* 2b: Program RC6 thresholds.*/
6937         if (INTEL_GEN(dev_priv) >= 10) {
6938                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
6939                 I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
6940         } else if (IS_SKYLAKE(dev_priv)) {
6941                 /*
6942                  * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
6943                  * when CPG is enabled
6944                  */
6945                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
6946         } else {
6947                 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
6948         }
6949
6950         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
6951         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
6952         for_each_engine(engine, dev_priv, id)
6953                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
6954
6955         if (HAS_GUC(dev_priv))
6956                 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA);
6957
6958         I915_WRITE(GEN6_RC_SLEEP, 0);
6959
6960         /*
6961          * 2c: Program Coarse Power Gating Policies.
6962          *
6963          * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
6964          * use instead is a more conservative estimate for the maximum time
6965          * it takes us to service a CS interrupt and submit a new ELSP - that
6966          * is the time which the GPU is idle waiting for the CPU to select the
6967          * next request to execute. If the idle hysteresis is less than that
6968          * interrupt service latency, the hardware will automatically gate
6969          * the power well and we will then incur the wake up cost on top of
6970          * the service latency. A similar guide from intel_pstate is that we
6971          * do not want the enable hysteresis to less than the wakeup latency.
6972          *
6973          * igt/gem_exec_nop/sequential provides a rough estimate for the
6974          * service latency, and puts it around 10us for Broadwell (and other
6975          * big core) and around 40us for Broxton (and other low power cores).
6976          * [Note that for legacy ringbuffer submission, this is less than 1us!]
6977          * However, the wakeup latency on Broxton is closer to 100us. To be
6978          * conservative, we have to factor in a context switch on top (due
6979          * to ksoftirqd).
6980          */
6981         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
6982         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
6983
6984         /* 3a: Enable RC6 */
6985         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
6986
6987         /* WaRsUseTimeoutMode:cnl (pre-prod) */
6988         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0))
6989                 rc6_mode = GEN7_RC_CTL_TO_MODE;
6990         else
6991                 rc6_mode = GEN6_RC_CTL_EI_MODE(1);
6992
6993         I915_WRITE(GEN6_RC_CONTROL,
6994                    GEN6_RC_CTL_HW_ENABLE |
6995                    GEN6_RC_CTL_RC6_ENABLE |
6996                    rc6_mode);
6997
6998         /*
6999          * 3b: Enable Coarse Power Gating only when RC6 is enabled.
7000          * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6.
7001          */
7002         if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
7003                 I915_WRITE(GEN9_PG_ENABLE, 0);
7004         else
7005                 I915_WRITE(GEN9_PG_ENABLE,
7006                            GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
7007
7008         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7009 }
7010
7011 static void gen8_enable_rc6(struct drm_i915_private *dev_priv)
7012 {
7013         struct intel_engine_cs *engine;
7014         enum intel_engine_id id;
7015
7016         /* 1a: Software RC state - RC0 */
7017         I915_WRITE(GEN6_RC_STATE, 0);
7018
7019         /* 1b: Get forcewake during program sequence. Although the driver
7020          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7021         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7022
7023         /* 2a: Disable RC states. */
7024         I915_WRITE(GEN6_RC_CONTROL, 0);
7025
7026         /* 2b: Program RC6 thresholds.*/
7027         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7028         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7029         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7030         for_each_engine(engine, dev_priv, id)
7031                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7032         I915_WRITE(GEN6_RC_SLEEP, 0);
7033         I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
7034
7035         /* 3: Enable RC6 */
7036
7037         I915_WRITE(GEN6_RC_CONTROL,
7038                    GEN6_RC_CTL_HW_ENABLE |
7039                    GEN7_RC_CTL_TO_MODE |
7040                    GEN6_RC_CTL_RC6_ENABLE);
7041
7042         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7043 }
7044
7045 static void gen8_enable_rps(struct drm_i915_private *dev_priv)
7046 {
7047         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7048
7049         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7050
7051         /* 1 Program defaults and thresholds for RPS*/
7052         I915_WRITE(GEN6_RPNSWREQ,
7053                    HSW_FREQUENCY(rps->rp1_freq));
7054         I915_WRITE(GEN6_RC_VIDEO_FREQ,
7055                    HSW_FREQUENCY(rps->rp1_freq));
7056         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
7057         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
7058
7059         /* Docs recommend 900MHz, and 300 MHz respectively */
7060         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
7061                    rps->max_freq_softlimit << 24 |
7062                    rps->min_freq_softlimit << 16);
7063
7064         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
7065         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
7066         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
7067         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
7068
7069         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7070
7071         /* 2: Enable RPS */
7072         I915_WRITE(GEN6_RP_CONTROL,
7073                    GEN6_RP_MEDIA_TURBO |
7074                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7075                    GEN6_RP_MEDIA_IS_GFX |
7076                    GEN6_RP_ENABLE |
7077                    GEN6_RP_UP_BUSY_AVG |
7078                    GEN6_RP_DOWN_IDLE_AVG);
7079
7080         reset_rps(dev_priv, gen6_set_rps);
7081
7082         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7083 }
7084
7085 static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
7086 {
7087         struct intel_engine_cs *engine;
7088         enum intel_engine_id id;
7089         u32 rc6vids, rc6_mask;
7090         u32 gtfifodbg;
7091         int ret;
7092
7093         I915_WRITE(GEN6_RC_STATE, 0);
7094
7095         /* Clear the DBG now so we don't confuse earlier errors */
7096         gtfifodbg = I915_READ(GTFIFODBG);
7097         if (gtfifodbg) {
7098                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
7099                 I915_WRITE(GTFIFODBG, gtfifodbg);
7100         }
7101
7102         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7103
7104         /* disable the counters and set deterministic thresholds */
7105         I915_WRITE(GEN6_RC_CONTROL, 0);
7106
7107         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
7108         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
7109         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
7110         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7111         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7112
7113         for_each_engine(engine, dev_priv, id)
7114                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7115
7116         I915_WRITE(GEN6_RC_SLEEP, 0);
7117         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
7118         if (IS_IVYBRIDGE(dev_priv))
7119                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
7120         else
7121                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
7122         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
7123         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
7124
7125         /* We don't use those on Haswell */
7126         rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
7127         if (HAS_RC6p(dev_priv))
7128                 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
7129         if (HAS_RC6pp(dev_priv))
7130                 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
7131         I915_WRITE(GEN6_RC_CONTROL,
7132                    rc6_mask |
7133                    GEN6_RC_CTL_EI_MODE(1) |
7134                    GEN6_RC_CTL_HW_ENABLE);
7135
7136         rc6vids = 0;
7137         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
7138         if (IS_GEN6(dev_priv) && ret) {
7139                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
7140         } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
7141                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
7142                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
7143                 rc6vids &= 0xffff00;
7144                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
7145                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
7146                 if (ret)
7147                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
7148         }
7149
7150         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7151 }
7152
7153 static void gen6_enable_rps(struct drm_i915_private *dev_priv)
7154 {
7155         /* Here begins a magic sequence of register writes to enable
7156          * auto-downclocking.
7157          *
7158          * Perhaps there might be some value in exposing these to
7159          * userspace...
7160          */
7161         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7162
7163         /* Power down if completely idle for over 50ms */
7164         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
7165         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7166
7167         reset_rps(dev_priv, gen6_set_rps);
7168
7169         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7170 }
7171
7172 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
7173 {
7174         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7175         const int min_freq = 15;
7176         const int scaling_factor = 180;
7177         unsigned int gpu_freq;
7178         unsigned int max_ia_freq, min_ring_freq;
7179         unsigned int max_gpu_freq, min_gpu_freq;
7180         struct cpufreq_policy *policy;
7181
7182         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
7183
7184         if (rps->max_freq <= rps->min_freq)
7185                 return;
7186
7187         policy = cpufreq_cpu_get(0);
7188         if (policy) {
7189                 max_ia_freq = policy->cpuinfo.max_freq;
7190                 cpufreq_cpu_put(policy);
7191         } else {
7192                 /*
7193                  * Default to measured freq if none found, PCU will ensure we
7194                  * don't go over
7195                  */
7196                 max_ia_freq = tsc_khz;
7197         }
7198
7199         /* Convert from kHz to MHz */
7200         max_ia_freq /= 1000;
7201
7202         min_ring_freq = I915_READ(DCLK) & 0xf;
7203         /* convert DDR frequency from units of 266.6MHz to bandwidth */
7204         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
7205
7206         min_gpu_freq = rps->min_freq;
7207         max_gpu_freq = rps->max_freq;
7208         if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7209                 /* Convert GT frequency to 50 HZ units */
7210                 min_gpu_freq /= GEN9_FREQ_SCALER;
7211                 max_gpu_freq /= GEN9_FREQ_SCALER;
7212         }
7213
7214         /*
7215          * For each potential GPU frequency, load a ring frequency we'd like
7216          * to use for memory access.  We do this by specifying the IA frequency
7217          * the PCU should use as a reference to determine the ring frequency.
7218          */
7219         for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
7220                 const int diff = max_gpu_freq - gpu_freq;
7221                 unsigned int ia_freq = 0, ring_freq = 0;
7222
7223                 if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
7224                         /*
7225                          * ring_freq = 2 * GT. ring_freq is in 100MHz units
7226                          * No floor required for ring frequency on SKL.
7227                          */
7228                         ring_freq = gpu_freq;
7229                 } else if (INTEL_GEN(dev_priv) >= 8) {
7230                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
7231                         ring_freq = max(min_ring_freq, gpu_freq);
7232                 } else if (IS_HASWELL(dev_priv)) {
7233                         ring_freq = mult_frac(gpu_freq, 5, 4);
7234                         ring_freq = max(min_ring_freq, ring_freq);
7235                         /* leave ia_freq as the default, chosen by cpufreq */
7236                 } else {
7237                         /* On older processors, there is no separate ring
7238                          * clock domain, so in order to boost the bandwidth
7239                          * of the ring, we need to upclock the CPU (ia_freq).
7240                          *
7241                          * For GPU frequencies less than 750MHz,
7242                          * just use the lowest ring freq.
7243                          */
7244                         if (gpu_freq < min_freq)
7245                                 ia_freq = 800;
7246                         else
7247                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
7248                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
7249                 }
7250
7251                 sandybridge_pcode_write(dev_priv,
7252                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
7253                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
7254                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
7255                                         gpu_freq);
7256         }
7257 }
7258
7259 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
7260 {
7261         u32 val, rp0;
7262
7263         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7264
7265         switch (INTEL_INFO(dev_priv)->sseu.eu_total) {
7266         case 8:
7267                 /* (2 * 4) config */
7268                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
7269                 break;
7270         case 12:
7271                 /* (2 * 6) config */
7272                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
7273                 break;
7274         case 16:
7275                 /* (2 * 8) config */
7276         default:
7277                 /* Setting (2 * 8) Min RP0 for any other combination */
7278                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
7279                 break;
7280         }
7281
7282         rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
7283
7284         return rp0;
7285 }
7286
7287 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7288 {
7289         u32 val, rpe;
7290
7291         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
7292         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
7293
7294         return rpe;
7295 }
7296
7297 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
7298 {
7299         u32 val, rp1;
7300
7301         val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
7302         rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
7303
7304         return rp1;
7305 }
7306
7307 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
7308 {
7309         u32 val, rpn;
7310
7311         val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
7312         rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
7313                        FB_GFX_FREQ_FUSE_MASK);
7314
7315         return rpn;
7316 }
7317
7318 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
7319 {
7320         u32 val, rp1;
7321
7322         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7323
7324         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
7325
7326         return rp1;
7327 }
7328
7329 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
7330 {
7331         u32 val, rp0;
7332
7333         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
7334
7335         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
7336         /* Clamp to max */
7337         rp0 = min_t(u32, rp0, 0xea);
7338
7339         return rp0;
7340 }
7341
7342 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
7343 {
7344         u32 val, rpe;
7345
7346         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
7347         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
7348         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
7349         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
7350
7351         return rpe;
7352 }
7353
7354 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
7355 {
7356         u32 val;
7357
7358         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
7359         /*
7360          * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
7361          * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
7362          * a BYT-M B0 the above register contains 0xbf. Moreover when setting
7363          * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
7364          * to make sure it matches what Punit accepts.
7365          */
7366         return max_t(u32, val, 0xc0);
7367 }
7368
7369 /* Check that the pctx buffer wasn't move under us. */
7370 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
7371 {
7372         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7373
7374         WARN_ON(pctx_addr != dev_priv->dsm.start +
7375                              dev_priv->vlv_pctx->stolen->start);
7376 }
7377
7378
7379 /* Check that the pcbr address is not empty. */
7380 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
7381 {
7382         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
7383
7384         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
7385 }
7386
7387 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
7388 {
7389         resource_size_t pctx_paddr, paddr;
7390         resource_size_t pctx_size = 32*1024;
7391         u32 pcbr;
7392
7393         pcbr = I915_READ(VLV_PCBR);
7394         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
7395                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7396                 paddr = dev_priv->dsm.end + 1 - pctx_size;
7397                 GEM_BUG_ON(paddr > U32_MAX);
7398
7399                 pctx_paddr = (paddr & (~4095));
7400                 I915_WRITE(VLV_PCBR, pctx_paddr);
7401         }
7402
7403         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7404 }
7405
7406 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
7407 {
7408         struct drm_i915_gem_object *pctx;
7409         resource_size_t pctx_paddr;
7410         resource_size_t pctx_size = 24*1024;
7411         u32 pcbr;
7412
7413         pcbr = I915_READ(VLV_PCBR);
7414         if (pcbr) {
7415                 /* BIOS set it up already, grab the pre-alloc'd space */
7416                 resource_size_t pcbr_offset;
7417
7418                 pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
7419                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
7420                                                                       pcbr_offset,
7421                                                                       I915_GTT_OFFSET_NONE,
7422                                                                       pctx_size);
7423                 goto out;
7424         }
7425
7426         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
7427
7428         /*
7429          * From the Gunit register HAS:
7430          * The Gfx driver is expected to program this register and ensure
7431          * proper allocation within Gfx stolen memory.  For example, this
7432          * register should be programmed such than the PCBR range does not
7433          * overlap with other ranges, such as the frame buffer, protected
7434          * memory, or any other relevant ranges.
7435          */
7436         pctx = i915_gem_object_create_stolen(dev_priv, pctx_size);
7437         if (!pctx) {
7438                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
7439                 goto out;
7440         }
7441
7442         GEM_BUG_ON(range_overflows_t(u64,
7443                                      dev_priv->dsm.start,
7444                                      pctx->stolen->start,
7445                                      U32_MAX));
7446         pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
7447         I915_WRITE(VLV_PCBR, pctx_paddr);
7448
7449 out:
7450         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
7451         dev_priv->vlv_pctx = pctx;
7452 }
7453
7454 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
7455 {
7456         struct drm_i915_gem_object *pctx;
7457
7458         pctx = fetch_and_zero(&dev_priv->vlv_pctx);
7459         if (pctx)
7460                 i915_gem_object_put(pctx);
7461 }
7462
7463 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
7464 {
7465         dev_priv->gt_pm.rps.gpll_ref_freq =
7466                 vlv_get_cck_clock(dev_priv, "GPLL ref",
7467                                   CCK_GPLL_CLOCK_CONTROL,
7468                                   dev_priv->czclk_freq);
7469
7470         DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n",
7471                          dev_priv->gt_pm.rps.gpll_ref_freq);
7472 }
7473
7474 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
7475 {
7476         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7477         u32 val;
7478
7479         valleyview_setup_pctx(dev_priv);
7480
7481         vlv_init_gpll_ref_freq(dev_priv);
7482
7483         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7484         switch ((val >> 6) & 3) {
7485         case 0:
7486         case 1:
7487                 dev_priv->mem_freq = 800;
7488                 break;
7489         case 2:
7490                 dev_priv->mem_freq = 1066;
7491                 break;
7492         case 3:
7493                 dev_priv->mem_freq = 1333;
7494                 break;
7495         }
7496         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7497
7498         rps->max_freq = valleyview_rps_max_freq(dev_priv);
7499         rps->rp0_freq = rps->max_freq;
7500         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7501                          intel_gpu_freq(dev_priv, rps->max_freq),
7502                          rps->max_freq);
7503
7504         rps->efficient_freq = valleyview_rps_rpe_freq(dev_priv);
7505         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7506                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7507                          rps->efficient_freq);
7508
7509         rps->rp1_freq = valleyview_rps_guar_freq(dev_priv);
7510         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
7511                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7512                          rps->rp1_freq);
7513
7514         rps->min_freq = valleyview_rps_min_freq(dev_priv);
7515         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7516                          intel_gpu_freq(dev_priv, rps->min_freq),
7517                          rps->min_freq);
7518 }
7519
7520 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
7521 {
7522         struct intel_rps *rps = &dev_priv->gt_pm.rps;
7523         u32 val;
7524
7525         cherryview_setup_pctx(dev_priv);
7526
7527         vlv_init_gpll_ref_freq(dev_priv);
7528
7529         mutex_lock(&dev_priv->sb_lock);
7530         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
7531         mutex_unlock(&dev_priv->sb_lock);
7532
7533         switch ((val >> 2) & 0x7) {
7534         case 3:
7535                 dev_priv->mem_freq = 2000;
7536                 break;
7537         default:
7538                 dev_priv->mem_freq = 1600;
7539                 break;
7540         }
7541         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
7542
7543         rps->max_freq = cherryview_rps_max_freq(dev_priv);
7544         rps->rp0_freq = rps->max_freq;
7545         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
7546                          intel_gpu_freq(dev_priv, rps->max_freq),
7547                          rps->max_freq);
7548
7549         rps->efficient_freq = cherryview_rps_rpe_freq(dev_priv);
7550         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
7551                          intel_gpu_freq(dev_priv, rps->efficient_freq),
7552                          rps->efficient_freq);
7553
7554         rps->rp1_freq = cherryview_rps_guar_freq(dev_priv);
7555         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
7556                          intel_gpu_freq(dev_priv, rps->rp1_freq),
7557                          rps->rp1_freq);
7558
7559         rps->min_freq = cherryview_rps_min_freq(dev_priv);
7560         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
7561                          intel_gpu_freq(dev_priv, rps->min_freq),
7562                          rps->min_freq);
7563
7564         WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
7565                    rps->min_freq) & 1,
7566                   "Odd GPU freq values\n");
7567 }
7568
7569 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
7570 {
7571         valleyview_cleanup_pctx(dev_priv);
7572 }
7573
7574 static void cherryview_enable_rc6(struct drm_i915_private *dev_priv)
7575 {
7576         struct intel_engine_cs *engine;
7577         enum intel_engine_id id;
7578         u32 gtfifodbg, rc6_mode, pcbr;
7579
7580         gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV |
7581                                              GT_FIFO_FREE_ENTRIES_CHV);
7582         if (gtfifodbg) {
7583                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7584                                  gtfifodbg);
7585                 I915_WRITE(GTFIFODBG, gtfifodbg);
7586         }
7587
7588         cherryview_check_pctx(dev_priv);
7589
7590         /* 1a & 1b: Get forcewake during program sequence. Although the driver
7591          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
7592         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7593
7594         /*  Disable RC states. */
7595         I915_WRITE(GEN6_RC_CONTROL, 0);
7596
7597         /* 2a: Program RC6 thresholds.*/
7598         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
7599         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
7600         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
7601
7602         for_each_engine(engine, dev_priv, id)
7603                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7604         I915_WRITE(GEN6_RC_SLEEP, 0);
7605
7606         /* TO threshold set to 500 us ( 0x186 * 1.28 us) */
7607         I915_WRITE(GEN6_RC6_THRESHOLD, 0x186);
7608
7609         /* Allows RC6 residency counter to work */
7610         I915_WRITE(VLV_COUNTER_CONTROL,
7611                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7612                                       VLV_MEDIA_RC6_COUNT_EN |
7613                                       VLV_RENDER_RC6_COUNT_EN));
7614
7615         /* For now we assume BIOS is allocating and populating the PCBR  */
7616         pcbr = I915_READ(VLV_PCBR);
7617
7618         /* 3: Enable RC6 */
7619         rc6_mode = 0;
7620         if (pcbr >> VLV_PCBR_ADDR_SHIFT)
7621                 rc6_mode = GEN7_RC_CTL_TO_MODE;
7622         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
7623
7624         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7625 }
7626
7627 static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
7628 {
7629         u32 val;
7630
7631         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7632
7633         /* 1: Program defaults and thresholds for RPS*/
7634         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7635         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7636         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7637         I915_WRITE(GEN6_RP_UP_EI, 66000);
7638         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7639
7640         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7641
7642         /* 2: Enable RPS */
7643         I915_WRITE(GEN6_RP_CONTROL,
7644                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7645                    GEN6_RP_MEDIA_IS_GFX |
7646                    GEN6_RP_ENABLE |
7647                    GEN6_RP_UP_BUSY_AVG |
7648                    GEN6_RP_DOWN_IDLE_AVG);
7649
7650         /* Setting Fixed Bias */
7651         val = VLV_OVERRIDE_EN |
7652                   VLV_SOC_TDP_EN |
7653                   CHV_BIAS_CPU_50_SOC_50;
7654         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7655
7656         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7657
7658         /* RPS code assumes GPLL is used */
7659         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7660
7661         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7662         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7663
7664         reset_rps(dev_priv, valleyview_set_rps);
7665
7666         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7667 }
7668
7669 static void valleyview_enable_rc6(struct drm_i915_private *dev_priv)
7670 {
7671         struct intel_engine_cs *engine;
7672         enum intel_engine_id id;
7673         u32 gtfifodbg;
7674
7675         valleyview_check_pctx(dev_priv);
7676
7677         gtfifodbg = I915_READ(GTFIFODBG);
7678         if (gtfifodbg) {
7679                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
7680                                  gtfifodbg);
7681                 I915_WRITE(GTFIFODBG, gtfifodbg);
7682         }
7683
7684         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7685
7686         /*  Disable RC states. */
7687         I915_WRITE(GEN6_RC_CONTROL, 0);
7688
7689         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
7690         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
7691         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
7692
7693         for_each_engine(engine, dev_priv, id)
7694                 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10);
7695
7696         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
7697
7698         /* Allows RC6 residency counter to work */
7699         I915_WRITE(VLV_COUNTER_CONTROL,
7700                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
7701                                       VLV_MEDIA_RC0_COUNT_EN |
7702                                       VLV_RENDER_RC0_COUNT_EN |
7703                                       VLV_MEDIA_RC6_COUNT_EN |
7704                                       VLV_RENDER_RC6_COUNT_EN));
7705
7706         I915_WRITE(GEN6_RC_CONTROL,
7707                    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
7708
7709         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7710 }
7711
7712 static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
7713 {
7714         u32 val;
7715
7716         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
7717
7718         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
7719         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
7720         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
7721         I915_WRITE(GEN6_RP_UP_EI, 66000);
7722         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
7723
7724         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
7725
7726         I915_WRITE(GEN6_RP_CONTROL,
7727                    GEN6_RP_MEDIA_TURBO |
7728                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
7729                    GEN6_RP_MEDIA_IS_GFX |
7730                    GEN6_RP_ENABLE |
7731                    GEN6_RP_UP_BUSY_AVG |
7732                    GEN6_RP_DOWN_IDLE_CONT);
7733
7734         /* Setting Fixed Bias */
7735         val = VLV_OVERRIDE_EN |
7736                   VLV_SOC_TDP_EN |
7737                   VLV_BIAS_CPU_125_SOC_875;
7738         vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
7739
7740         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
7741
7742         /* RPS code assumes GPLL is used */
7743         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
7744
7745         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
7746         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
7747
7748         reset_rps(dev_priv, valleyview_set_rps);
7749
7750         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
7751 }
7752
7753 static unsigned long intel_pxfreq(u32 vidfreq)
7754 {
7755         unsigned long freq;
7756         int div = (vidfreq & 0x3f0000) >> 16;
7757         int post = (vidfreq & 0x3000) >> 12;
7758         int pre = (vidfreq & 0x7);
7759
7760         if (!pre)
7761                 return 0;
7762
7763         freq = ((div * 133333) / ((1<<post) * pre));
7764
7765         return freq;
7766 }
7767
7768 static const struct cparams {
7769         u16 i;
7770         u16 t;
7771         u16 m;
7772         u16 c;
7773 } cparams[] = {
7774         { 1, 1333, 301, 28664 },
7775         { 1, 1066, 294, 24460 },
7776         { 1, 800, 294, 25192 },
7777         { 0, 1333, 276, 27605 },
7778         { 0, 1066, 276, 27605 },
7779         { 0, 800, 231, 23784 },
7780 };
7781
7782 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
7783 {
7784         u64 total_count, diff, ret;
7785         u32 count1, count2, count3, m = 0, c = 0;
7786         unsigned long now = jiffies_to_msecs(jiffies), diff1;
7787         int i;
7788
7789         lockdep_assert_held(&mchdev_lock);
7790
7791         diff1 = now - dev_priv->ips.last_time1;
7792
7793         /* Prevent division-by-zero if we are asking too fast.
7794          * Also, we don't get interesting results if we are polling
7795          * faster than once in 10ms, so just return the saved value
7796          * in such cases.
7797          */
7798         if (diff1 <= 10)
7799                 return dev_priv->ips.chipset_power;
7800
7801         count1 = I915_READ(DMIEC);
7802         count2 = I915_READ(DDREC);
7803         count3 = I915_READ(CSIEC);
7804
7805         total_count = count1 + count2 + count3;
7806
7807         /* FIXME: handle per-counter overflow */
7808         if (total_count < dev_priv->ips.last_count1) {
7809                 diff = ~0UL - dev_priv->ips.last_count1;
7810                 diff += total_count;
7811         } else {
7812                 diff = total_count - dev_priv->ips.last_count1;
7813         }
7814
7815         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
7816                 if (cparams[i].i == dev_priv->ips.c_m &&
7817                     cparams[i].t == dev_priv->ips.r_t) {
7818                         m = cparams[i].m;
7819                         c = cparams[i].c;
7820                         break;
7821                 }
7822         }
7823
7824         diff = div_u64(diff, diff1);
7825         ret = ((m * diff) + c);
7826         ret = div_u64(ret, 10);
7827
7828         dev_priv->ips.last_count1 = total_count;
7829         dev_priv->ips.last_time1 = now;
7830
7831         dev_priv->ips.chipset_power = ret;
7832
7833         return ret;
7834 }
7835
7836 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
7837 {
7838         unsigned long val;
7839
7840         if (!IS_GEN5(dev_priv))
7841                 return 0;
7842
7843         spin_lock_irq(&mchdev_lock);
7844
7845         val = __i915_chipset_val(dev_priv);
7846
7847         spin_unlock_irq(&mchdev_lock);
7848
7849         return val;
7850 }
7851
7852 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
7853 {
7854         unsigned long m, x, b;
7855         u32 tsfs;
7856
7857         tsfs = I915_READ(TSFS);
7858
7859         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
7860         x = I915_READ8(TR1);
7861
7862         b = tsfs & TSFS_INTR_MASK;
7863
7864         return ((m * x) / 127) - b;
7865 }
7866
7867 static int _pxvid_to_vd(u8 pxvid)
7868 {
7869         if (pxvid == 0)
7870                 return 0;
7871
7872         if (pxvid >= 8 && pxvid < 31)
7873                 pxvid = 31;
7874
7875         return (pxvid + 2) * 125;
7876 }
7877
7878 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
7879 {
7880         const int vd = _pxvid_to_vd(pxvid);
7881         const int vm = vd - 1125;
7882
7883         if (INTEL_INFO(dev_priv)->is_mobile)
7884                 return vm > 0 ? vm : 0;
7885
7886         return vd;
7887 }
7888
7889 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
7890 {
7891         u64 now, diff, diffms;
7892         u32 count;
7893
7894         lockdep_assert_held(&mchdev_lock);
7895
7896         now = ktime_get_raw_ns();
7897         diffms = now - dev_priv->ips.last_time2;
7898         do_div(diffms, NSEC_PER_MSEC);
7899
7900         /* Don't divide by 0 */
7901         if (!diffms)
7902                 return;
7903
7904         count = I915_READ(GFXEC);
7905
7906         if (count < dev_priv->ips.last_count2) {
7907                 diff = ~0UL - dev_priv->ips.last_count2;
7908                 diff += count;
7909         } else {
7910                 diff = count - dev_priv->ips.last_count2;
7911         }
7912
7913         dev_priv->ips.last_count2 = count;
7914         dev_priv->ips.last_time2 = now;
7915
7916         /* More magic constants... */
7917         diff = diff * 1181;
7918         diff = div_u64(diff, diffms * 10);
7919         dev_priv->ips.gfx_power = diff;
7920 }
7921
7922 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
7923 {
7924         if (!IS_GEN5(dev_priv))
7925                 return;
7926
7927         spin_lock_irq(&mchdev_lock);
7928
7929         __i915_update_gfx_val(dev_priv);
7930
7931         spin_unlock_irq(&mchdev_lock);
7932 }
7933
7934 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
7935 {
7936         unsigned long t, corr, state1, corr2, state2;
7937         u32 pxvid, ext_v;
7938
7939         lockdep_assert_held(&mchdev_lock);
7940
7941         pxvid = I915_READ(PXVFREQ(dev_priv->gt_pm.rps.cur_freq));
7942         pxvid = (pxvid >> 24) & 0x7f;
7943         ext_v = pvid_to_extvid(dev_priv, pxvid);
7944
7945         state1 = ext_v;
7946
7947         t = i915_mch_val(dev_priv);
7948
7949         /* Revel in the empirically derived constants */
7950
7951         /* Correction factor in 1/100000 units */
7952         if (t > 80)
7953                 corr = ((t * 2349) + 135940);
7954         else if (t >= 50)
7955                 corr = ((t * 964) + 29317);
7956         else /* < 50 */
7957                 corr = ((t * 301) + 1004);
7958
7959         corr = corr * ((150142 * state1) / 10000 - 78642);
7960         corr /= 100000;
7961         corr2 = (corr * dev_priv->ips.corr);
7962
7963         state2 = (corr2 * state1) / 10000;
7964         state2 /= 100; /* convert to mW */
7965
7966         __i915_update_gfx_val(dev_priv);
7967
7968         return dev_priv->ips.gfx_power + state2;
7969 }
7970
7971 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
7972 {
7973         unsigned long val;
7974
7975         if (!IS_GEN5(dev_priv))
7976                 return 0;
7977
7978         spin_lock_irq(&mchdev_lock);
7979
7980         val = __i915_gfx_val(dev_priv);
7981
7982         spin_unlock_irq(&mchdev_lock);
7983
7984         return val;
7985 }
7986
7987 /**
7988  * i915_read_mch_val - return value for IPS use
7989  *
7990  * Calculate and return a value for the IPS driver to use when deciding whether
7991  * we have thermal and power headroom to increase CPU or GPU power budget.
7992  */
7993 unsigned long i915_read_mch_val(void)
7994 {
7995         struct drm_i915_private *dev_priv;
7996         unsigned long chipset_val, graphics_val, ret = 0;
7997
7998         spin_lock_irq(&mchdev_lock);
7999         if (!i915_mch_dev)
8000                 goto out_unlock;
8001         dev_priv = i915_mch_dev;
8002
8003         chipset_val = __i915_chipset_val(dev_priv);
8004         graphics_val = __i915_gfx_val(dev_priv);
8005
8006         ret = chipset_val + graphics_val;
8007
8008 out_unlock:
8009         spin_unlock_irq(&mchdev_lock);
8010
8011         return ret;
8012 }
8013 EXPORT_SYMBOL_GPL(i915_read_mch_val);
8014
8015 /**
8016  * i915_gpu_raise - raise GPU frequency limit
8017  *
8018  * Raise the limit; IPS indicates we have thermal headroom.
8019  */
8020 bool i915_gpu_raise(void)
8021 {
8022         struct drm_i915_private *dev_priv;
8023         bool ret = true;
8024
8025         spin_lock_irq(&mchdev_lock);
8026         if (!i915_mch_dev) {
8027                 ret = false;
8028                 goto out_unlock;
8029         }
8030         dev_priv = i915_mch_dev;
8031
8032         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
8033                 dev_priv->ips.max_delay--;
8034
8035 out_unlock:
8036         spin_unlock_irq(&mchdev_lock);
8037
8038         return ret;
8039 }
8040 EXPORT_SYMBOL_GPL(i915_gpu_raise);
8041
8042 /**
8043  * i915_gpu_lower - lower GPU frequency limit
8044  *
8045  * IPS indicates we're close to a thermal limit, so throttle back the GPU
8046  * frequency maximum.
8047  */
8048 bool i915_gpu_lower(void)
8049 {
8050         struct drm_i915_private *dev_priv;
8051         bool ret = true;
8052
8053         spin_lock_irq(&mchdev_lock);
8054         if (!i915_mch_dev) {
8055                 ret = false;
8056                 goto out_unlock;
8057         }
8058         dev_priv = i915_mch_dev;
8059
8060         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
8061                 dev_priv->ips.max_delay++;
8062
8063 out_unlock:
8064         spin_unlock_irq(&mchdev_lock);
8065
8066         return ret;
8067 }
8068 EXPORT_SYMBOL_GPL(i915_gpu_lower);
8069
8070 /**
8071  * i915_gpu_busy - indicate GPU business to IPS
8072  *
8073  * Tell the IPS driver whether or not the GPU is busy.
8074  */
8075 bool i915_gpu_busy(void)
8076 {
8077         bool ret = false;
8078
8079         spin_lock_irq(&mchdev_lock);
8080         if (i915_mch_dev)
8081                 ret = i915_mch_dev->gt.awake;
8082         spin_unlock_irq(&mchdev_lock);
8083
8084         return ret;
8085 }
8086 EXPORT_SYMBOL_GPL(i915_gpu_busy);
8087
8088 /**
8089  * i915_gpu_turbo_disable - disable graphics turbo
8090  *
8091  * Disable graphics turbo by resetting the max frequency and setting the
8092  * current frequency to the default.
8093  */
8094 bool i915_gpu_turbo_disable(void)
8095 {
8096         struct drm_i915_private *dev_priv;
8097         bool ret = true;
8098
8099         spin_lock_irq(&mchdev_lock);
8100         if (!i915_mch_dev) {
8101                 ret = false;
8102                 goto out_unlock;
8103         }
8104         dev_priv = i915_mch_dev;
8105
8106         dev_priv->ips.max_delay = dev_priv->ips.fstart;
8107
8108         if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart))
8109                 ret = false;
8110
8111 out_unlock:
8112         spin_unlock_irq(&mchdev_lock);
8113
8114         return ret;
8115 }
8116 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
8117
8118 /**
8119  * Tells the intel_ips driver that the i915 driver is now loaded, if
8120  * IPS got loaded first.
8121  *
8122  * This awkward dance is so that neither module has to depend on the
8123  * other in order for IPS to do the appropriate communication of
8124  * GPU turbo limits to i915.
8125  */
8126 static void
8127 ips_ping_for_i915_load(void)
8128 {
8129         void (*link)(void);
8130
8131         link = symbol_get(ips_link_to_i915_driver);
8132         if (link) {
8133                 link();
8134                 symbol_put(ips_link_to_i915_driver);
8135         }
8136 }
8137
8138 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
8139 {
8140         /* We only register the i915 ips part with intel-ips once everything is
8141          * set up, to avoid intel-ips sneaking in and reading bogus values. */
8142         spin_lock_irq(&mchdev_lock);
8143         i915_mch_dev = dev_priv;
8144         spin_unlock_irq(&mchdev_lock);
8145
8146         ips_ping_for_i915_load();
8147 }
8148
8149 void intel_gpu_ips_teardown(void)
8150 {
8151         spin_lock_irq(&mchdev_lock);
8152         i915_mch_dev = NULL;
8153         spin_unlock_irq(&mchdev_lock);
8154 }
8155
8156 static void intel_init_emon(struct drm_i915_private *dev_priv)
8157 {
8158         u32 lcfuse;
8159         u8 pxw[16];
8160         int i;
8161
8162         /* Disable to program */
8163         I915_WRITE(ECR, 0);
8164         POSTING_READ(ECR);
8165
8166         /* Program energy weights for various events */
8167         I915_WRITE(SDEW, 0x15040d00);
8168         I915_WRITE(CSIEW0, 0x007f0000);
8169         I915_WRITE(CSIEW1, 0x1e220004);
8170         I915_WRITE(CSIEW2, 0x04000004);
8171
8172         for (i = 0; i < 5; i++)
8173                 I915_WRITE(PEW(i), 0);
8174         for (i = 0; i < 3; i++)
8175                 I915_WRITE(DEW(i), 0);
8176
8177         /* Program P-state weights to account for frequency power adjustment */
8178         for (i = 0; i < 16; i++) {
8179                 u32 pxvidfreq = I915_READ(PXVFREQ(i));
8180                 unsigned long freq = intel_pxfreq(pxvidfreq);
8181                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
8182                         PXVFREQ_PX_SHIFT;
8183                 unsigned long val;
8184
8185                 val = vid * vid;
8186                 val *= (freq / 1000);
8187                 val *= 255;
8188                 val /= (127*127*900);
8189                 if (val > 0xff)
8190                         DRM_ERROR("bad pxval: %ld\n", val);
8191                 pxw[i] = val;
8192         }
8193         /* Render standby states get 0 weight */
8194         pxw[14] = 0;
8195         pxw[15] = 0;
8196
8197         for (i = 0; i < 4; i++) {
8198                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
8199                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
8200                 I915_WRITE(PXW(i), val);
8201         }
8202
8203         /* Adjust magic regs to magic values (more experimental results) */
8204         I915_WRITE(OGW0, 0);
8205         I915_WRITE(OGW1, 0);
8206         I915_WRITE(EG0, 0x00007f00);
8207         I915_WRITE(EG1, 0x0000000e);
8208         I915_WRITE(EG2, 0x000e0000);
8209         I915_WRITE(EG3, 0x68000300);
8210         I915_WRITE(EG4, 0x42000000);
8211         I915_WRITE(EG5, 0x00140031);
8212         I915_WRITE(EG6, 0);
8213         I915_WRITE(EG7, 0);
8214
8215         for (i = 0; i < 8; i++)
8216                 I915_WRITE(PXWL(i), 0);
8217
8218         /* Enable PMON + select events */
8219         I915_WRITE(ECR, 0x80000019);
8220
8221         lcfuse = I915_READ(LCFUSE02);
8222
8223         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
8224 }
8225
8226 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
8227 {
8228         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8229
8230         /*
8231          * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
8232          * requirement.
8233          */
8234         if (!sanitize_rc6(dev_priv)) {
8235                 DRM_INFO("RC6 disabled, disabling runtime PM support\n");
8236                 pm_runtime_get(&dev_priv->drm.pdev->dev);
8237         }
8238
8239         mutex_lock(&dev_priv->pcu_lock);
8240
8241         /* Initialize RPS limits (for userspace) */
8242         if (IS_CHERRYVIEW(dev_priv))
8243                 cherryview_init_gt_powersave(dev_priv);
8244         else if (IS_VALLEYVIEW(dev_priv))
8245                 valleyview_init_gt_powersave(dev_priv);
8246         else if (INTEL_GEN(dev_priv) >= 6)
8247                 gen6_init_rps_frequencies(dev_priv);
8248
8249         /* Derive initial user preferences/limits from the hardware limits */
8250         rps->idle_freq = rps->min_freq;
8251         rps->cur_freq = rps->idle_freq;
8252
8253         rps->max_freq_softlimit = rps->max_freq;
8254         rps->min_freq_softlimit = rps->min_freq;
8255
8256         if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
8257                 rps->min_freq_softlimit =
8258                         max_t(int,
8259                               rps->efficient_freq,
8260                               intel_freq_opcode(dev_priv, 450));
8261
8262         /* After setting max-softlimit, find the overclock max freq */
8263         if (IS_GEN6(dev_priv) ||
8264             IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
8265                 u32 params = 0;
8266
8267                 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
8268                 if (params & BIT(31)) { /* OC supported */
8269                         DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
8270                                          (rps->max_freq & 0xff) * 50,
8271                                          (params & 0xff) * 50);
8272                         rps->max_freq = params & 0xff;
8273                 }
8274         }
8275
8276         /* Finally allow us to boost to max by default */
8277         rps->boost_freq = rps->max_freq;
8278
8279         mutex_unlock(&dev_priv->pcu_lock);
8280 }
8281
8282 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
8283 {
8284         if (IS_VALLEYVIEW(dev_priv))
8285                 valleyview_cleanup_gt_powersave(dev_priv);
8286
8287         if (!HAS_RC6(dev_priv))
8288                 pm_runtime_put(&dev_priv->drm.pdev->dev);
8289 }
8290
8291 /**
8292  * intel_suspend_gt_powersave - suspend PM work and helper threads
8293  * @dev_priv: i915 device
8294  *
8295  * We don't want to disable RC6 or other features here, we just want
8296  * to make sure any work we've queued has finished and won't bother
8297  * us while we're suspended.
8298  */
8299 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
8300 {
8301         if (INTEL_GEN(dev_priv) < 6)
8302                 return;
8303
8304         /* gen6_rps_idle() will be called later to disable interrupts */
8305 }
8306
8307 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
8308 {
8309         dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */
8310         dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
8311         intel_disable_gt_powersave(dev_priv);
8312
8313         if (INTEL_GEN(dev_priv) >= 11)
8314                 gen11_reset_rps_interrupts(dev_priv);
8315         else if (INTEL_GEN(dev_priv) >= 6)
8316                 gen6_reset_rps_interrupts(dev_priv);
8317 }
8318
8319 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
8320 {
8321         lockdep_assert_held(&i915->pcu_lock);
8322
8323         if (!i915->gt_pm.llc_pstate.enabled)
8324                 return;
8325
8326         /* Currently there is no HW configuration to be done to disable. */
8327
8328         i915->gt_pm.llc_pstate.enabled = false;
8329 }
8330
8331 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
8332 {
8333         lockdep_assert_held(&dev_priv->pcu_lock);
8334
8335         if (!dev_priv->gt_pm.rc6.enabled)
8336                 return;
8337
8338         if (INTEL_GEN(dev_priv) >= 9)
8339                 gen9_disable_rc6(dev_priv);
8340         else if (IS_CHERRYVIEW(dev_priv))
8341                 cherryview_disable_rc6(dev_priv);
8342         else if (IS_VALLEYVIEW(dev_priv))
8343                 valleyview_disable_rc6(dev_priv);
8344         else if (INTEL_GEN(dev_priv) >= 6)
8345                 gen6_disable_rc6(dev_priv);
8346
8347         dev_priv->gt_pm.rc6.enabled = false;
8348 }
8349
8350 static void intel_disable_rps(struct drm_i915_private *dev_priv)
8351 {
8352         lockdep_assert_held(&dev_priv->pcu_lock);
8353
8354         if (!dev_priv->gt_pm.rps.enabled)
8355                 return;
8356
8357         if (INTEL_GEN(dev_priv) >= 9)
8358                 gen9_disable_rps(dev_priv);
8359         else if (IS_CHERRYVIEW(dev_priv))
8360                 cherryview_disable_rps(dev_priv);
8361         else if (IS_VALLEYVIEW(dev_priv))
8362                 valleyview_disable_rps(dev_priv);
8363         else if (INTEL_GEN(dev_priv) >= 6)
8364                 gen6_disable_rps(dev_priv);
8365         else if (IS_IRONLAKE_M(dev_priv))
8366                 ironlake_disable_drps(dev_priv);
8367
8368         dev_priv->gt_pm.rps.enabled = false;
8369 }
8370
8371 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
8372 {
8373         mutex_lock(&dev_priv->pcu_lock);
8374
8375         intel_disable_rc6(dev_priv);
8376         intel_disable_rps(dev_priv);
8377         if (HAS_LLC(dev_priv))
8378                 intel_disable_llc_pstate(dev_priv);
8379
8380         mutex_unlock(&dev_priv->pcu_lock);
8381 }
8382
8383 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
8384 {
8385         lockdep_assert_held(&i915->pcu_lock);
8386
8387         if (i915->gt_pm.llc_pstate.enabled)
8388                 return;
8389
8390         gen6_update_ring_freq(i915);
8391
8392         i915->gt_pm.llc_pstate.enabled = true;
8393 }
8394
8395 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
8396 {
8397         lockdep_assert_held(&dev_priv->pcu_lock);
8398
8399         if (dev_priv->gt_pm.rc6.enabled)
8400                 return;
8401
8402         if (IS_CHERRYVIEW(dev_priv))
8403                 cherryview_enable_rc6(dev_priv);
8404         else if (IS_VALLEYVIEW(dev_priv))
8405                 valleyview_enable_rc6(dev_priv);
8406         else if (INTEL_GEN(dev_priv) >= 9)
8407                 gen9_enable_rc6(dev_priv);
8408         else if (IS_BROADWELL(dev_priv))
8409                 gen8_enable_rc6(dev_priv);
8410         else if (INTEL_GEN(dev_priv) >= 6)
8411                 gen6_enable_rc6(dev_priv);
8412
8413         dev_priv->gt_pm.rc6.enabled = true;
8414 }
8415
8416 static void intel_enable_rps(struct drm_i915_private *dev_priv)
8417 {
8418         struct intel_rps *rps = &dev_priv->gt_pm.rps;
8419
8420         lockdep_assert_held(&dev_priv->pcu_lock);
8421
8422         if (rps->enabled)
8423                 return;
8424
8425         if (IS_CHERRYVIEW(dev_priv)) {
8426                 cherryview_enable_rps(dev_priv);
8427         } else if (IS_VALLEYVIEW(dev_priv)) {
8428                 valleyview_enable_rps(dev_priv);
8429         } else if (INTEL_GEN(dev_priv) >= 9) {
8430                 gen9_enable_rps(dev_priv);
8431         } else if (IS_BROADWELL(dev_priv)) {
8432                 gen8_enable_rps(dev_priv);
8433         } else if (INTEL_GEN(dev_priv) >= 6) {
8434                 gen6_enable_rps(dev_priv);
8435         } else if (IS_IRONLAKE_M(dev_priv)) {
8436                 ironlake_enable_drps(dev_priv);
8437                 intel_init_emon(dev_priv);
8438         }
8439
8440         WARN_ON(rps->max_freq < rps->min_freq);
8441         WARN_ON(rps->idle_freq > rps->max_freq);
8442
8443         WARN_ON(rps->efficient_freq < rps->min_freq);
8444         WARN_ON(rps->efficient_freq > rps->max_freq);
8445
8446         rps->enabled = true;
8447 }
8448
8449 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
8450 {
8451         /* Powersaving is controlled by the host when inside a VM */
8452         if (intel_vgpu_active(dev_priv))
8453                 return;
8454
8455         mutex_lock(&dev_priv->pcu_lock);
8456
8457         if (HAS_RC6(dev_priv))
8458                 intel_enable_rc6(dev_priv);
8459         intel_enable_rps(dev_priv);
8460         if (HAS_LLC(dev_priv))
8461                 intel_enable_llc_pstate(dev_priv);
8462
8463         mutex_unlock(&dev_priv->pcu_lock);
8464 }
8465
8466 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
8467 {
8468         /*
8469          * On Ibex Peak and Cougar Point, we need to disable clock
8470          * gating for the panel power sequencer or it will fail to
8471          * start up when no ports are active.
8472          */
8473         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
8474 }
8475
8476 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
8477 {
8478         enum pipe pipe;
8479
8480         for_each_pipe(dev_priv, pipe) {
8481                 I915_WRITE(DSPCNTR(pipe),
8482                            I915_READ(DSPCNTR(pipe)) |
8483                            DISPPLANE_TRICKLE_FEED_DISABLE);
8484
8485                 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
8486                 POSTING_READ(DSPSURF(pipe));
8487         }
8488 }
8489
8490 static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
8491 {
8492         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8493
8494         /*
8495          * Required for FBC
8496          * WaFbcDisableDpfcClockGating:ilk
8497          */
8498         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
8499                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
8500                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
8501
8502         I915_WRITE(PCH_3DCGDIS0,
8503                    MARIUNIT_CLOCK_GATE_DISABLE |
8504                    SVSMUNIT_CLOCK_GATE_DISABLE);
8505         I915_WRITE(PCH_3DCGDIS1,
8506                    VFMUNIT_CLOCK_GATE_DISABLE);
8507
8508         /*
8509          * According to the spec the following bits should be set in
8510          * order to enable memory self-refresh
8511          * The bit 22/21 of 0x42004
8512          * The bit 5 of 0x42020
8513          * The bit 15 of 0x45000
8514          */
8515         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8516                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
8517                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
8518         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
8519         I915_WRITE(DISP_ARB_CTL,
8520                    (I915_READ(DISP_ARB_CTL) |
8521                     DISP_FBC_WM_DIS));
8522
8523         /*
8524          * Based on the document from hardware guys the following bits
8525          * should be set unconditionally in order to enable FBC.
8526          * The bit 22 of 0x42000
8527          * The bit 22 of 0x42004
8528          * The bit 7,8,9 of 0x42020.
8529          */
8530         if (IS_IRONLAKE_M(dev_priv)) {
8531                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
8532                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
8533                            I915_READ(ILK_DISPLAY_CHICKEN1) |
8534                            ILK_FBCQ_DIS);
8535                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
8536                            I915_READ(ILK_DISPLAY_CHICKEN2) |
8537                            ILK_DPARB_GATE);
8538         }
8539
8540         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8541
8542         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8543                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8544                    ILK_ELPIN_409_SELECT);
8545         I915_WRITE(_3D_CHICKEN2,
8546                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
8547                    _3D_CHICKEN2_WM_READ_PIPELINED);
8548
8549         /* WaDisableRenderCachePipelinedFlush:ilk */
8550         I915_WRITE(CACHE_MODE_0,
8551                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
8552
8553         /* WaDisable_RenderCache_OperationalFlush:ilk */
8554         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8555
8556         g4x_disable_trickle_feed(dev_priv);
8557
8558         ibx_init_clock_gating(dev_priv);
8559 }
8560
8561 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
8562 {
8563         int pipe;
8564         uint32_t val;
8565
8566         /*
8567          * On Ibex Peak and Cougar Point, we need to disable clock
8568          * gating for the panel power sequencer or it will fail to
8569          * start up when no ports are active.
8570          */
8571         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
8572                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
8573                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
8574         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
8575                    DPLS_EDP_PPS_FIX_DIS);
8576         /* The below fixes the weird display corruption, a few pixels shifted
8577          * downward, on (only) LVDS of some HP laptops with IVY.
8578          */
8579         for_each_pipe(dev_priv, pipe) {
8580                 val = I915_READ(TRANS_CHICKEN2(pipe));
8581                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
8582                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8583                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
8584                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
8585                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
8586                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
8587                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
8588                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
8589         }
8590         /* WADP0ClockGatingDisable */
8591         for_each_pipe(dev_priv, pipe) {
8592                 I915_WRITE(TRANS_CHICKEN1(pipe),
8593                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8594         }
8595 }
8596
8597 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
8598 {
8599         uint32_t tmp;
8600
8601         tmp = I915_READ(MCH_SSKPD);
8602         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
8603                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
8604                               tmp);
8605 }
8606
8607 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
8608 {
8609         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
8610
8611         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
8612
8613         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8614                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8615                    ILK_ELPIN_409_SELECT);
8616
8617         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
8618         I915_WRITE(_3D_CHICKEN,
8619                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
8620
8621         /* WaDisable_RenderCache_OperationalFlush:snb */
8622         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8623
8624         /*
8625          * BSpec recoomends 8x4 when MSAA is used,
8626          * however in practice 16x4 seems fastest.
8627          *
8628          * Note that PS/WM thread counts depend on the WIZ hashing
8629          * disable bit, which we don't touch here, but it's good
8630          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8631          */
8632         I915_WRITE(GEN6_GT_MODE,
8633                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8634
8635         I915_WRITE(CACHE_MODE_0,
8636                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
8637
8638         I915_WRITE(GEN6_UCGCTL1,
8639                    I915_READ(GEN6_UCGCTL1) |
8640                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
8641                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
8642
8643         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
8644          * gating disable must be set.  Failure to set it results in
8645          * flickering pixels due to Z write ordering failures after
8646          * some amount of runtime in the Mesa "fire" demo, and Unigine
8647          * Sanctuary and Tropics, and apparently anything else with
8648          * alpha test or pixel discard.
8649          *
8650          * According to the spec, bit 11 (RCCUNIT) must also be set,
8651          * but we didn't debug actual testcases to find it out.
8652          *
8653          * WaDisableRCCUnitClockGating:snb
8654          * WaDisableRCPBUnitClockGating:snb
8655          */
8656         I915_WRITE(GEN6_UCGCTL2,
8657                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
8658                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
8659
8660         /* WaStripsFansDisableFastClipPerformanceFix:snb */
8661         I915_WRITE(_3D_CHICKEN3,
8662                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
8663
8664         /*
8665          * Bspec says:
8666          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
8667          * 3DSTATE_SF number of SF output attributes is more than 16."
8668          */
8669         I915_WRITE(_3D_CHICKEN3,
8670                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
8671
8672         /*
8673          * According to the spec the following bits should be
8674          * set in order to enable memory self-refresh and fbc:
8675          * The bit21 and bit22 of 0x42000
8676          * The bit21 and bit22 of 0x42004
8677          * The bit5 and bit7 of 0x42020
8678          * The bit14 of 0x70180
8679          * The bit14 of 0x71180
8680          *
8681          * WaFbcAsynchFlipDisableFbcQueue:snb
8682          */
8683         I915_WRITE(ILK_DISPLAY_CHICKEN1,
8684                    I915_READ(ILK_DISPLAY_CHICKEN1) |
8685                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
8686         I915_WRITE(ILK_DISPLAY_CHICKEN2,
8687                    I915_READ(ILK_DISPLAY_CHICKEN2) |
8688                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
8689         I915_WRITE(ILK_DSPCLK_GATE_D,
8690                    I915_READ(ILK_DSPCLK_GATE_D) |
8691                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
8692                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
8693
8694         g4x_disable_trickle_feed(dev_priv);
8695
8696         cpt_init_clock_gating(dev_priv);
8697
8698         gen6_check_mch_setup(dev_priv);
8699 }
8700
8701 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
8702 {
8703         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
8704
8705         /*
8706          * WaVSThreadDispatchOverride:ivb,vlv
8707          *
8708          * This actually overrides the dispatch
8709          * mode for all thread types.
8710          */
8711         reg &= ~GEN7_FF_SCHED_MASK;
8712         reg |= GEN7_FF_TS_SCHED_HW;
8713         reg |= GEN7_FF_VS_SCHED_HW;
8714         reg |= GEN7_FF_DS_SCHED_HW;
8715
8716         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
8717 }
8718
8719 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
8720 {
8721         /*
8722          * TODO: this bit should only be enabled when really needed, then
8723          * disabled when not needed anymore in order to save power.
8724          */
8725         if (HAS_PCH_LPT_LP(dev_priv))
8726                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
8727                            I915_READ(SOUTH_DSPCLK_GATE_D) |
8728                            PCH_LP_PARTITION_LEVEL_DISABLE);
8729
8730         /* WADPOClockGatingDisable:hsw */
8731         I915_WRITE(TRANS_CHICKEN1(PIPE_A),
8732                    I915_READ(TRANS_CHICKEN1(PIPE_A)) |
8733                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
8734 }
8735
8736 static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
8737 {
8738         if (HAS_PCH_LPT_LP(dev_priv)) {
8739                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
8740
8741                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
8742                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
8743         }
8744 }
8745
8746 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
8747                                    int general_prio_credits,
8748                                    int high_prio_credits)
8749 {
8750         u32 misccpctl;
8751         u32 val;
8752
8753         /* WaTempDisableDOPClkGating:bdw */
8754         misccpctl = I915_READ(GEN7_MISCCPCTL);
8755         I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
8756
8757         val = I915_READ(GEN8_L3SQCREG1);
8758         val &= ~L3_PRIO_CREDITS_MASK;
8759         val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
8760         val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
8761         I915_WRITE(GEN8_L3SQCREG1, val);
8762
8763         /*
8764          * Wait at least 100 clocks before re-enabling clock gating.
8765          * See the definition of L3SQCREG1 in BSpec.
8766          */
8767         POSTING_READ(GEN8_L3SQCREG1);
8768         udelay(1);
8769         I915_WRITE(GEN7_MISCCPCTL, misccpctl);
8770 }
8771
8772 static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
8773 {
8774         /* This is not an Wa. Enable to reduce Sampler power */
8775         I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
8776                    I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
8777 }
8778
8779 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
8780 {
8781         if (!HAS_PCH_CNP(dev_priv))
8782                 return;
8783
8784         /* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
8785         I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
8786                    CNP_PWM_CGE_GATING_DISABLE);
8787 }
8788
8789 static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
8790 {
8791         u32 val;
8792         cnp_init_clock_gating(dev_priv);
8793
8794         /* This is not an Wa. Enable for better image quality */
8795         I915_WRITE(_3D_CHICKEN3,
8796                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
8797
8798         /* WaEnableChickenDCPR:cnl */
8799         I915_WRITE(GEN8_CHICKEN_DCPR_1,
8800                    I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
8801
8802         /* WaFbcWakeMemOn:cnl */
8803         I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
8804                    DISP_FBC_MEMORY_WAKE);
8805
8806         val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE);
8807         /* ReadHitWriteOnlyDisable:cnl */
8808         val |= RCCUNIT_CLKGATE_DIS;
8809         /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
8810         if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
8811                 val |= SARBUNIT_CLKGATE_DIS;
8812         I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
8813
8814         /* Wa_2201832410:cnl */
8815         val = I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE);
8816         val |= GWUNIT_CLKGATE_DIS;
8817         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, val);
8818
8819         /* WaDisableVFclkgate:cnl */
8820         /* WaVFUnitClockGatingDisable:cnl */
8821         val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
8822         val |= VFUNIT_CLKGATE_DIS;
8823         I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
8824 }
8825
8826 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
8827 {
8828         cnp_init_clock_gating(dev_priv);
8829         gen9_init_clock_gating(dev_priv);
8830
8831         /* WaFbcNukeOnHostModify:cfl */
8832         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8833                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8834 }
8835
8836 static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
8837 {
8838         gen9_init_clock_gating(dev_priv);
8839
8840         /* WaDisableSDEUnitClockGating:kbl */
8841         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8842                 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8843                            GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8844
8845         /* WaDisableGamClockGating:kbl */
8846         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
8847                 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
8848                            GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
8849
8850         /* WaFbcNukeOnHostModify:kbl */
8851         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8852                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8853 }
8854
8855 static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
8856 {
8857         gen9_init_clock_gating(dev_priv);
8858
8859         /* WAC6entrylatency:skl */
8860         I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
8861                    FBC_LLC_FULLY_OPEN);
8862
8863         /* WaFbcNukeOnHostModify:skl */
8864         I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
8865                    ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
8866 }
8867
8868 static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
8869 {
8870         /* The GTT cache must be disabled if the system is using 2M pages. */
8871         bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
8872                                                  I915_GTT_PAGE_SIZE_2M);
8873         enum pipe pipe;
8874
8875         /* WaSwitchSolVfFArbitrationPriority:bdw */
8876         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8877
8878         /* WaPsrDPAMaskVBlankInSRD:bdw */
8879         I915_WRITE(CHICKEN_PAR1_1,
8880                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
8881
8882         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
8883         for_each_pipe(dev_priv, pipe) {
8884                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
8885                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
8886                            BDW_DPRS_MASK_VBLANK_SRD);
8887         }
8888
8889         /* WaVSRefCountFullforceMissDisable:bdw */
8890         /* WaDSRefCountFullforceMissDisable:bdw */
8891         I915_WRITE(GEN7_FF_THREAD_MODE,
8892                    I915_READ(GEN7_FF_THREAD_MODE) &
8893                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
8894
8895         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
8896                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
8897
8898         /* WaDisableSDEUnitClockGating:bdw */
8899         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
8900                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
8901
8902         /* WaProgramL3SqcReg1Default:bdw */
8903         gen8_set_l3sqc_credits(dev_priv, 30, 2);
8904
8905         /* WaGttCachingOffByDefault:bdw */
8906         I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
8907
8908         /* WaKVMNotificationOnConfigChange:bdw */
8909         I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
8910                    | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
8911
8912         lpt_init_clock_gating(dev_priv);
8913
8914         /* WaDisableDopClockGating:bdw
8915          *
8916          * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
8917          * clock gating.
8918          */
8919         I915_WRITE(GEN6_UCGCTL1,
8920                    I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
8921 }
8922
8923 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
8924 {
8925         /* L3 caching of data atomics doesn't work -- disable it. */
8926         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
8927         I915_WRITE(HSW_ROW_CHICKEN3,
8928                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
8929
8930         /* This is required by WaCatErrorRejectionIssue:hsw */
8931         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
8932                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
8933                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
8934
8935         /* WaVSRefCountFullforceMissDisable:hsw */
8936         I915_WRITE(GEN7_FF_THREAD_MODE,
8937                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
8938
8939         /* WaDisable_RenderCache_OperationalFlush:hsw */
8940         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8941
8942         /* enable HiZ Raw Stall Optimization */
8943         I915_WRITE(CACHE_MODE_0_GEN7,
8944                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
8945
8946         /* WaDisable4x2SubspanOptimization:hsw */
8947         I915_WRITE(CACHE_MODE_1,
8948                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
8949
8950         /*
8951          * BSpec recommends 8x4 when MSAA is used,
8952          * however in practice 16x4 seems fastest.
8953          *
8954          * Note that PS/WM thread counts depend on the WIZ hashing
8955          * disable bit, which we don't touch here, but it's good
8956          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
8957          */
8958         I915_WRITE(GEN7_GT_MODE,
8959                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
8960
8961         /* WaSampleCChickenBitEnable:hsw */
8962         I915_WRITE(HALF_SLICE_CHICKEN3,
8963                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
8964
8965         /* WaSwitchSolVfFArbitrationPriority:hsw */
8966         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
8967
8968         lpt_init_clock_gating(dev_priv);
8969 }
8970
8971 static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
8972 {
8973         uint32_t snpcr;
8974
8975         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
8976
8977         /* WaDisableEarlyCull:ivb */
8978         I915_WRITE(_3D_CHICKEN3,
8979                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
8980
8981         /* WaDisableBackToBackFlipFix:ivb */
8982         I915_WRITE(IVB_CHICKEN3,
8983                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
8984                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
8985
8986         /* WaDisablePSDDualDispatchEnable:ivb */
8987         if (IS_IVB_GT1(dev_priv))
8988                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
8989                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
8990
8991         /* WaDisable_RenderCache_OperationalFlush:ivb */
8992         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
8993
8994         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
8995         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
8996                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
8997
8998         /* WaApplyL3ControlAndL3ChickenMode:ivb */
8999         I915_WRITE(GEN7_L3CNTLREG1,
9000                         GEN7_WA_FOR_GEN7_L3_CONTROL);
9001         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
9002                    GEN7_WA_L3_CHICKEN_MODE);
9003         if (IS_IVB_GT1(dev_priv))
9004                 I915_WRITE(GEN7_ROW_CHICKEN2,
9005                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9006         else {
9007                 /* must write both registers */
9008                 I915_WRITE(GEN7_ROW_CHICKEN2,
9009                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9010                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
9011                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9012         }
9013
9014         /* WaForceL3Serialization:ivb */
9015         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9016                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9017
9018         /*
9019          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9020          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
9021          */
9022         I915_WRITE(GEN6_UCGCTL2,
9023                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9024
9025         /* This is required by WaCatErrorRejectionIssue:ivb */
9026         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9027                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9028                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9029
9030         g4x_disable_trickle_feed(dev_priv);
9031
9032         gen7_setup_fixed_func_scheduler(dev_priv);
9033
9034         if (0) { /* causes HiZ corruption on ivb:gt1 */
9035                 /* enable HiZ Raw Stall Optimization */
9036                 I915_WRITE(CACHE_MODE_0_GEN7,
9037                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
9038         }
9039
9040         /* WaDisable4x2SubspanOptimization:ivb */
9041         I915_WRITE(CACHE_MODE_1,
9042                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9043
9044         /*
9045          * BSpec recommends 8x4 when MSAA is used,
9046          * however in practice 16x4 seems fastest.
9047          *
9048          * Note that PS/WM thread counts depend on the WIZ hashing
9049          * disable bit, which we don't touch here, but it's good
9050          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9051          */
9052         I915_WRITE(GEN7_GT_MODE,
9053                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9054
9055         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
9056         snpcr &= ~GEN6_MBC_SNPCR_MASK;
9057         snpcr |= GEN6_MBC_SNPCR_MED;
9058         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
9059
9060         if (!HAS_PCH_NOP(dev_priv))
9061                 cpt_init_clock_gating(dev_priv);
9062
9063         gen6_check_mch_setup(dev_priv);
9064 }
9065
9066 static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
9067 {
9068         /* WaDisableEarlyCull:vlv */
9069         I915_WRITE(_3D_CHICKEN3,
9070                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
9071
9072         /* WaDisableBackToBackFlipFix:vlv */
9073         I915_WRITE(IVB_CHICKEN3,
9074                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
9075                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
9076
9077         /* WaPsdDispatchEnable:vlv */
9078         /* WaDisablePSDDualDispatchEnable:vlv */
9079         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
9080                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
9081                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
9082
9083         /* WaDisable_RenderCache_OperationalFlush:vlv */
9084         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9085
9086         /* WaForceL3Serialization:vlv */
9087         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
9088                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
9089
9090         /* WaDisableDopClockGating:vlv */
9091         I915_WRITE(GEN7_ROW_CHICKEN2,
9092                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
9093
9094         /* This is required by WaCatErrorRejectionIssue:vlv */
9095         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
9096                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
9097                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
9098
9099         gen7_setup_fixed_func_scheduler(dev_priv);
9100
9101         /*
9102          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
9103          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
9104          */
9105         I915_WRITE(GEN6_UCGCTL2,
9106                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
9107
9108         /* WaDisableL3Bank2xClockGate:vlv
9109          * Disabling L3 clock gating- MMIO 940c[25] = 1
9110          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
9111         I915_WRITE(GEN7_UCGCTL4,
9112                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
9113
9114         /*
9115          * BSpec says this must be set, even though
9116          * WaDisable4x2SubspanOptimization isn't listed for VLV.
9117          */
9118         I915_WRITE(CACHE_MODE_1,
9119                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
9120
9121         /*
9122          * BSpec recommends 8x4 when MSAA is used,
9123          * however in practice 16x4 seems fastest.
9124          *
9125          * Note that PS/WM thread counts depend on the WIZ hashing
9126          * disable bit, which we don't touch here, but it's good
9127          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
9128          */
9129         I915_WRITE(GEN7_GT_MODE,
9130                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
9131
9132         /*
9133          * WaIncreaseL3CreditsForVLVB0:vlv
9134          * This is the hardware default actually.
9135          */
9136         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
9137
9138         /*
9139          * WaDisableVLVClockGating_VBIIssue:vlv
9140          * Disable clock gating on th GCFG unit to prevent a delay
9141          * in the reporting of vblank events.
9142          */
9143         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
9144 }
9145
9146 static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
9147 {
9148         /* WaVSRefCountFullforceMissDisable:chv */
9149         /* WaDSRefCountFullforceMissDisable:chv */
9150         I915_WRITE(GEN7_FF_THREAD_MODE,
9151                    I915_READ(GEN7_FF_THREAD_MODE) &
9152                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
9153
9154         /* WaDisableSemaphoreAndSyncFlipWait:chv */
9155         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
9156                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
9157
9158         /* WaDisableCSUnitClockGating:chv */
9159         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
9160                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
9161
9162         /* WaDisableSDEUnitClockGating:chv */
9163         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
9164                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
9165
9166         /*
9167          * WaProgramL3SqcReg1Default:chv
9168          * See gfxspecs/Related Documents/Performance Guide/
9169          * LSQC Setting Recommendations.
9170          */
9171         gen8_set_l3sqc_credits(dev_priv, 38, 2);
9172
9173         /*
9174          * GTT cache may not work with big pages, so if those
9175          * are ever enabled GTT cache may need to be disabled.
9176          */
9177         I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
9178 }
9179
9180 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
9181 {
9182         uint32_t dspclk_gate;
9183
9184         I915_WRITE(RENCLK_GATE_D1, 0);
9185         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
9186                    GS_UNIT_CLOCK_GATE_DISABLE |
9187                    CL_UNIT_CLOCK_GATE_DISABLE);
9188         I915_WRITE(RAMCLK_GATE_D, 0);
9189         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
9190                 OVRUNIT_CLOCK_GATE_DISABLE |
9191                 OVCUNIT_CLOCK_GATE_DISABLE;
9192         if (IS_GM45(dev_priv))
9193                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
9194         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
9195
9196         /* WaDisableRenderCachePipelinedFlush */
9197         I915_WRITE(CACHE_MODE_0,
9198                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
9199
9200         /* WaDisable_RenderCache_OperationalFlush:g4x */
9201         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9202
9203         g4x_disable_trickle_feed(dev_priv);
9204 }
9205
9206 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
9207 {
9208         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
9209         I915_WRITE(RENCLK_GATE_D2, 0);
9210         I915_WRITE(DSPCLK_GATE_D, 0);
9211         I915_WRITE(RAMCLK_GATE_D, 0);
9212         I915_WRITE16(DEUC, 0);
9213         I915_WRITE(MI_ARB_STATE,
9214                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9215
9216         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9217         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9218 }
9219
9220 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
9221 {
9222         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
9223                    I965_RCC_CLOCK_GATE_DISABLE |
9224                    I965_RCPB_CLOCK_GATE_DISABLE |
9225                    I965_ISC_CLOCK_GATE_DISABLE |
9226                    I965_FBC_CLOCK_GATE_DISABLE);
9227         I915_WRITE(RENCLK_GATE_D2, 0);
9228         I915_WRITE(MI_ARB_STATE,
9229                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9230
9231         /* WaDisable_RenderCache_OperationalFlush:gen4 */
9232         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
9233 }
9234
9235 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv)
9236 {
9237         u32 dstate = I915_READ(D_STATE);
9238
9239         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
9240                 DSTATE_DOT_CLOCK_GATING;
9241         I915_WRITE(D_STATE, dstate);
9242
9243         if (IS_PINEVIEW(dev_priv))
9244                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
9245
9246         /* IIR "flip pending" means done if this bit is set */
9247         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
9248
9249         /* interrupts should cause a wake up from C3 */
9250         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
9251
9252         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
9253         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
9254
9255         I915_WRITE(MI_ARB_STATE,
9256                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
9257 }
9258
9259 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv)
9260 {
9261         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
9262
9263         /* interrupts should cause a wake up from C3 */
9264         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
9265                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
9266
9267         I915_WRITE(MEM_MODE,
9268                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
9269 }
9270
9271 static void i830_init_clock_gating(struct drm_i915_private *dev_priv)
9272 {
9273         I915_WRITE(MEM_MODE,
9274                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
9275                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
9276 }
9277
9278 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
9279 {
9280         dev_priv->display.init_clock_gating(dev_priv);
9281 }
9282
9283 void intel_suspend_hw(struct drm_i915_private *dev_priv)
9284 {
9285         if (HAS_PCH_LPT(dev_priv))
9286                 lpt_suspend_hw(dev_priv);
9287 }
9288
9289 static void nop_init_clock_gating(struct drm_i915_private *dev_priv)
9290 {
9291         DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n");
9292 }
9293
9294 /**
9295  * intel_init_clock_gating_hooks - setup the clock gating hooks
9296  * @dev_priv: device private
9297  *
9298  * Setup the hooks that configure which clocks of a given platform can be
9299  * gated and also apply various GT and display specific workarounds for these
9300  * platforms. Note that some GT specific workarounds are applied separately
9301  * when GPU contexts or batchbuffers start their execution.
9302  */
9303 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
9304 {
9305         if (IS_ICELAKE(dev_priv))
9306                 dev_priv->display.init_clock_gating = icl_init_clock_gating;
9307         else if (IS_CANNONLAKE(dev_priv))
9308                 dev_priv->display.init_clock_gating = cnl_init_clock_gating;
9309         else if (IS_COFFEELAKE(dev_priv))
9310                 dev_priv->display.init_clock_gating = cfl_init_clock_gating;
9311         else if (IS_SKYLAKE(dev_priv))
9312                 dev_priv->display.init_clock_gating = skl_init_clock_gating;
9313         else if (IS_KABYLAKE(dev_priv))
9314                 dev_priv->display.init_clock_gating = kbl_init_clock_gating;
9315         else if (IS_BROXTON(dev_priv))
9316                 dev_priv->display.init_clock_gating = bxt_init_clock_gating;
9317         else if (IS_GEMINILAKE(dev_priv))
9318                 dev_priv->display.init_clock_gating = glk_init_clock_gating;
9319         else if (IS_BROADWELL(dev_priv))
9320                 dev_priv->display.init_clock_gating = bdw_init_clock_gating;
9321         else if (IS_CHERRYVIEW(dev_priv))
9322                 dev_priv->display.init_clock_gating = chv_init_clock_gating;
9323         else if (IS_HASWELL(dev_priv))
9324                 dev_priv->display.init_clock_gating = hsw_init_clock_gating;
9325         else if (IS_IVYBRIDGE(dev_priv))
9326                 dev_priv->display.init_clock_gating = ivb_init_clock_gating;
9327         else if (IS_VALLEYVIEW(dev_priv))
9328                 dev_priv->display.init_clock_gating = vlv_init_clock_gating;
9329         else if (IS_GEN6(dev_priv))
9330                 dev_priv->display.init_clock_gating = gen6_init_clock_gating;
9331         else if (IS_GEN5(dev_priv))
9332                 dev_priv->display.init_clock_gating = ilk_init_clock_gating;
9333         else if (IS_G4X(dev_priv))
9334                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
9335         else if (IS_I965GM(dev_priv))
9336                 dev_priv->display.init_clock_gating = i965gm_init_clock_gating;
9337         else if (IS_I965G(dev_priv))
9338                 dev_priv->display.init_clock_gating = i965g_init_clock_gating;
9339         else if (IS_GEN3(dev_priv))
9340                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
9341         else if (IS_I85X(dev_priv) || IS_I865G(dev_priv))
9342                 dev_priv->display.init_clock_gating = i85x_init_clock_gating;
9343         else if (IS_GEN2(dev_priv))
9344                 dev_priv->display.init_clock_gating = i830_init_clock_gating;
9345         else {
9346                 MISSING_CASE(INTEL_DEVID(dev_priv));
9347                 dev_priv->display.init_clock_gating = nop_init_clock_gating;
9348         }
9349 }
9350
9351 /* Set up chip specific power management-related functions */
9352 void intel_init_pm(struct drm_i915_private *dev_priv)
9353 {
9354         intel_fbc_init(dev_priv);
9355
9356         /* For cxsr */
9357         if (IS_PINEVIEW(dev_priv))
9358                 i915_pineview_get_mem_freq(dev_priv);
9359         else if (IS_GEN5(dev_priv))
9360                 i915_ironlake_get_mem_freq(dev_priv);
9361
9362         /* For FIFO watermark updates */
9363         if (INTEL_GEN(dev_priv) >= 9) {
9364                 skl_setup_wm_latency(dev_priv);
9365                 dev_priv->display.initial_watermarks = skl_initial_wm;
9366                 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm;
9367                 dev_priv->display.compute_global_watermarks = skl_compute_wm;
9368         } else if (HAS_PCH_SPLIT(dev_priv)) {
9369                 ilk_setup_wm_latency(dev_priv);
9370
9371                 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] &&
9372                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
9373                     (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] &&
9374                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
9375                         dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
9376                         dev_priv->display.compute_intermediate_wm =
9377                                 ilk_compute_intermediate_wm;
9378                         dev_priv->display.initial_watermarks =
9379                                 ilk_initial_watermarks;
9380                         dev_priv->display.optimize_watermarks =
9381                                 ilk_optimize_watermarks;
9382                 } else {
9383                         DRM_DEBUG_KMS("Failed to read display plane latency. "
9384                                       "Disable CxSR\n");
9385                 }
9386         } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9387                 vlv_setup_wm_latency(dev_priv);
9388                 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm;
9389                 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm;
9390                 dev_priv->display.initial_watermarks = vlv_initial_watermarks;
9391                 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks;
9392                 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo;
9393         } else if (IS_G4X(dev_priv)) {
9394                 g4x_setup_wm_latency(dev_priv);
9395                 dev_priv->display.compute_pipe_wm = g4x_compute_pipe_wm;
9396                 dev_priv->display.compute_intermediate_wm = g4x_compute_intermediate_wm;
9397                 dev_priv->display.initial_watermarks = g4x_initial_watermarks;
9398                 dev_priv->display.optimize_watermarks = g4x_optimize_watermarks;
9399         } else if (IS_PINEVIEW(dev_priv)) {
9400                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv),
9401                                             dev_priv->is_ddr3,
9402                                             dev_priv->fsb_freq,
9403                                             dev_priv->mem_freq)) {
9404                         DRM_INFO("failed to find known CxSR latency "
9405                                  "(found ddr%s fsb freq %d, mem freq %d), "
9406                                  "disabling CxSR\n",
9407                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
9408                                  dev_priv->fsb_freq, dev_priv->mem_freq);
9409                         /* Disable CxSR and never update its watermark again */
9410                         intel_set_memory_cxsr(dev_priv, false);
9411                         dev_priv->display.update_wm = NULL;
9412                 } else
9413                         dev_priv->display.update_wm = pineview_update_wm;
9414         } else if (IS_GEN4(dev_priv)) {
9415                 dev_priv->display.update_wm = i965_update_wm;
9416         } else if (IS_GEN3(dev_priv)) {
9417                 dev_priv->display.update_wm = i9xx_update_wm;
9418                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
9419         } else if (IS_GEN2(dev_priv)) {
9420                 if (INTEL_INFO(dev_priv)->num_pipes == 1) {
9421                         dev_priv->display.update_wm = i845_update_wm;
9422                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
9423                 } else {
9424                         dev_priv->display.update_wm = i9xx_update_wm;
9425                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
9426                 }
9427         } else {
9428                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
9429         }
9430 }
9431
9432 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
9433 {
9434         uint32_t flags =
9435                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9436
9437         switch (flags) {
9438         case GEN6_PCODE_SUCCESS:
9439                 return 0;
9440         case GEN6_PCODE_UNIMPLEMENTED_CMD:
9441                 return -ENODEV;
9442         case GEN6_PCODE_ILLEGAL_CMD:
9443                 return -ENXIO;
9444         case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9445         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9446                 return -EOVERFLOW;
9447         case GEN6_PCODE_TIMEOUT:
9448                 return -ETIMEDOUT;
9449         default:
9450                 MISSING_CASE(flags);
9451                 return 0;
9452         }
9453 }
9454
9455 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
9456 {
9457         uint32_t flags =
9458                 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
9459
9460         switch (flags) {
9461         case GEN6_PCODE_SUCCESS:
9462                 return 0;
9463         case GEN6_PCODE_ILLEGAL_CMD:
9464                 return -ENXIO;
9465         case GEN7_PCODE_TIMEOUT:
9466                 return -ETIMEDOUT;
9467         case GEN7_PCODE_ILLEGAL_DATA:
9468                 return -EINVAL;
9469         case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
9470                 return -EOVERFLOW;
9471         default:
9472                 MISSING_CASE(flags);
9473                 return 0;
9474         }
9475 }
9476
9477 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
9478 {
9479         int status;
9480
9481         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9482
9483         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9484          * use te fw I915_READ variants to reduce the amount of work
9485          * required when reading/writing.
9486          */
9487
9488         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9489                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
9490                                  mbox, __builtin_return_address(0));
9491                 return -EAGAIN;
9492         }
9493
9494         I915_WRITE_FW(GEN6_PCODE_DATA, *val);
9495         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9496         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9497
9498         if (__intel_wait_for_register_fw(dev_priv,
9499                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9500                                          500, 0, NULL)) {
9501                 DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
9502                           mbox, __builtin_return_address(0));
9503                 return -ETIMEDOUT;
9504         }
9505
9506         *val = I915_READ_FW(GEN6_PCODE_DATA);
9507         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9508
9509         if (INTEL_GEN(dev_priv) > 6)
9510                 status = gen7_check_mailbox_status(dev_priv);
9511         else
9512                 status = gen6_check_mailbox_status(dev_priv);
9513
9514         if (status) {
9515                 DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
9516                                  mbox, __builtin_return_address(0), status);
9517                 return status;
9518         }
9519
9520         return 0;
9521 }
9522
9523 int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
9524                                     u32 mbox, u32 val,
9525                                     int fast_timeout_us, int slow_timeout_ms)
9526 {
9527         int status;
9528
9529         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9530
9531         /* GEN6_PCODE_* are outside of the forcewake domain, we can
9532          * use te fw I915_READ variants to reduce the amount of work
9533          * required when reading/writing.
9534          */
9535
9536         if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
9537                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
9538                                  val, mbox, __builtin_return_address(0));
9539                 return -EAGAIN;
9540         }
9541
9542         I915_WRITE_FW(GEN6_PCODE_DATA, val);
9543         I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
9544         I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
9545
9546         if (__intel_wait_for_register_fw(dev_priv,
9547                                          GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
9548                                          fast_timeout_us, slow_timeout_ms,
9549                                          NULL)) {
9550                 DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
9551                           val, mbox, __builtin_return_address(0));
9552                 return -ETIMEDOUT;
9553         }
9554
9555         I915_WRITE_FW(GEN6_PCODE_DATA, 0);
9556
9557         if (INTEL_GEN(dev_priv) > 6)
9558                 status = gen7_check_mailbox_status(dev_priv);
9559         else
9560                 status = gen6_check_mailbox_status(dev_priv);
9561
9562         if (status) {
9563                 DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
9564                                  val, mbox, __builtin_return_address(0), status);
9565                 return status;
9566         }
9567
9568         return 0;
9569 }
9570
9571 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
9572                                   u32 request, u32 reply_mask, u32 reply,
9573                                   u32 *status)
9574 {
9575         u32 val = request;
9576
9577         *status = sandybridge_pcode_read(dev_priv, mbox, &val);
9578
9579         return *status || ((val & reply_mask) == reply);
9580 }
9581
9582 /**
9583  * skl_pcode_request - send PCODE request until acknowledgment
9584  * @dev_priv: device private
9585  * @mbox: PCODE mailbox ID the request is targeted for
9586  * @request: request ID
9587  * @reply_mask: mask used to check for request acknowledgment
9588  * @reply: value used to check for request acknowledgment
9589  * @timeout_base_ms: timeout for polling with preemption enabled
9590  *
9591  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
9592  * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
9593  * The request is acknowledged once the PCODE reply dword equals @reply after
9594  * applying @reply_mask. Polling is first attempted with preemption enabled
9595  * for @timeout_base_ms and if this times out for another 50 ms with
9596  * preemption disabled.
9597  *
9598  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
9599  * other error as reported by PCODE.
9600  */
9601 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
9602                       u32 reply_mask, u32 reply, int timeout_base_ms)
9603 {
9604         u32 status;
9605         int ret;
9606
9607         WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
9608
9609 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
9610                                    &status)
9611
9612         /*
9613          * Prime the PCODE by doing a request first. Normally it guarantees
9614          * that a subsequent request, at most @timeout_base_ms later, succeeds.
9615          * _wait_for() doesn't guarantee when its passed condition is evaluated
9616          * first, so send the first request explicitly.
9617          */
9618         if (COND) {
9619                 ret = 0;
9620                 goto out;
9621         }
9622         ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
9623         if (!ret)
9624                 goto out;
9625
9626         /*
9627          * The above can time out if the number of requests was low (2 in the
9628          * worst case) _and_ PCODE was busy for some reason even after a
9629          * (queued) request and @timeout_base_ms delay. As a workaround retry
9630          * the poll with preemption disabled to maximize the number of
9631          * requests. Increase the timeout from @timeout_base_ms to 50ms to
9632          * account for interrupts that could reduce the number of these
9633          * requests, and for any quirks of the PCODE firmware that delays
9634          * the request completion.
9635          */
9636         DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
9637         WARN_ON_ONCE(timeout_base_ms > 3);
9638         preempt_disable();
9639         ret = wait_for_atomic(COND, 50);
9640         preempt_enable();
9641
9642 out:
9643         return ret ? ret : status;
9644 #undef COND
9645 }
9646
9647 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
9648 {
9649         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9650
9651         /*
9652          * N = val - 0xb7
9653          * Slow = Fast = GPLL ref * N
9654          */
9655         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
9656 }
9657
9658 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
9659 {
9660         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9661
9662         return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
9663 }
9664
9665 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
9666 {
9667         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9668
9669         /*
9670          * N = val / 2
9671          * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
9672          */
9673         return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
9674 }
9675
9676 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
9677 {
9678         struct intel_rps *rps = &dev_priv->gt_pm.rps;
9679
9680         /* CHV needs even values */
9681         return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
9682 }
9683
9684 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
9685 {
9686         if (INTEL_GEN(dev_priv) >= 9)
9687                 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
9688                                          GEN9_FREQ_SCALER);
9689         else if (IS_CHERRYVIEW(dev_priv))
9690                 return chv_gpu_freq(dev_priv, val);
9691         else if (IS_VALLEYVIEW(dev_priv))
9692                 return byt_gpu_freq(dev_priv, val);
9693         else
9694                 return val * GT_FREQUENCY_MULTIPLIER;
9695 }
9696
9697 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
9698 {
9699         if (INTEL_GEN(dev_priv) >= 9)
9700                 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
9701                                          GT_FREQUENCY_MULTIPLIER);
9702         else if (IS_CHERRYVIEW(dev_priv))
9703                 return chv_freq_opcode(dev_priv, val);
9704         else if (IS_VALLEYVIEW(dev_priv))
9705                 return byt_freq_opcode(dev_priv, val);
9706         else
9707                 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
9708 }
9709
9710 void intel_pm_setup(struct drm_i915_private *dev_priv)
9711 {
9712         mutex_init(&dev_priv->pcu_lock);
9713         mutex_init(&dev_priv->gt_pm.rps.power.mutex);
9714
9715         atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
9716
9717         dev_priv->runtime_pm.suspended = false;
9718         atomic_set(&dev_priv->runtime_pm.wakeref_count, 0);
9719 }
9720
9721 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
9722                              const i915_reg_t reg)
9723 {
9724         u32 lower, upper, tmp;
9725         int loop = 2;
9726
9727         /*
9728          * The register accessed do not need forcewake. We borrow
9729          * uncore lock to prevent concurrent access to range reg.
9730          */
9731         lockdep_assert_held(&dev_priv->uncore.lock);
9732
9733         /*
9734          * vlv and chv residency counters are 40 bits in width.
9735          * With a control bit, we can choose between upper or lower
9736          * 32bit window into this counter.
9737          *
9738          * Although we always use the counter in high-range mode elsewhere,
9739          * userspace may attempt to read the value before rc6 is initialised,
9740          * before we have set the default VLV_COUNTER_CONTROL value. So always
9741          * set the high bit to be safe.
9742          */
9743         I915_WRITE_FW(VLV_COUNTER_CONTROL,
9744                       _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9745         upper = I915_READ_FW(reg);
9746         do {
9747                 tmp = upper;
9748
9749                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9750                               _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
9751                 lower = I915_READ_FW(reg);
9752
9753                 I915_WRITE_FW(VLV_COUNTER_CONTROL,
9754                               _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
9755                 upper = I915_READ_FW(reg);
9756         } while (upper != tmp && --loop);
9757
9758         /*
9759          * Everywhere else we always use VLV_COUNTER_CONTROL with the
9760          * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
9761          * now.
9762          */
9763
9764         return lower | (u64)upper << 8;
9765 }
9766
9767 u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
9768                            const i915_reg_t reg)
9769 {
9770         u64 time_hw, prev_hw, overflow_hw;
9771         unsigned int fw_domains;
9772         unsigned long flags;
9773         unsigned int i;
9774         u32 mul, div;
9775
9776         if (!HAS_RC6(dev_priv))
9777                 return 0;
9778
9779         /*
9780          * Store previous hw counter values for counter wrap-around handling.
9781          *
9782          * There are only four interesting registers and they live next to each
9783          * other so we can use the relative address, compared to the smallest
9784          * one as the index into driver storage.
9785          */
9786         i = (i915_mmio_reg_offset(reg) -
9787              i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
9788         if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency)))
9789                 return 0;
9790
9791         fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
9792
9793         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
9794         intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
9795
9796         /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
9797         if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
9798                 mul = 1000000;
9799                 div = dev_priv->czclk_freq;
9800                 overflow_hw = BIT_ULL(40);
9801                 time_hw = vlv_residency_raw(dev_priv, reg);
9802         } else {
9803                 /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
9804                 if (IS_GEN9_LP(dev_priv)) {
9805                         mul = 10000;
9806                         div = 12;
9807                 } else {
9808                         mul = 1280;
9809                         div = 1;
9810                 }
9811
9812                 overflow_hw = BIT_ULL(32);
9813                 time_hw = I915_READ_FW(reg);
9814         }
9815
9816         /*
9817          * Counter wrap handling.
9818          *
9819          * But relying on a sufficient frequency of queries otherwise counters
9820          * can still wrap.
9821          */
9822         prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i];
9823         dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw;
9824
9825         /* RC6 delta from last sample. */
9826         if (time_hw >= prev_hw)
9827                 time_hw -= prev_hw;
9828         else
9829                 time_hw += overflow_hw - prev_hw;
9830
9831         /* Add delta to RC6 extended raw driver copy. */
9832         time_hw += dev_priv->gt_pm.rc6.cur_residency[i];
9833         dev_priv->gt_pm.rc6.cur_residency[i] = time_hw;
9834
9835         intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
9836         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
9837
9838         return mul_u64_u32_div(time_hw, mul, div);
9839 }
9840
9841 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
9842 {
9843         u32 cagf;
9844
9845         if (INTEL_GEN(dev_priv) >= 9)
9846                 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
9847         else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
9848                 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
9849         else
9850                 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
9851
9852         return  cagf;
9853 }
This page took 0.594005 seconds and 4 git commands to generate.