]> Git Repo - J-linux.git/blob - drivers/idle/intel_idle.c
intel_idle: Fix false positive RCU splats due to incorrect hardirqs state
[J-linux.git] / drivers / idle / intel_idle.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_idle.c - native hardware idle loop for modern Intel processors
4  *
5  * Copyright (c) 2013 - 2020, Intel Corporation.
6  * Len Brown <[email protected]>
7  * Rafael J. Wysocki <[email protected]>
8  */
9
10 /*
11  * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT
12  * in lieu of the legacy ACPI processor_idle driver.  The intent is to
13  * make Linux more efficient on these processors, as intel_idle knows
14  * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs.
15  */
16
17 /*
18  * Design Assumptions
19  *
20  * All CPUs have same idle states as boot CPU
21  *
22  * Chipset BM_STS (bus master status) bit is a NOP
23  *      for preventing entry into deep C-states
24  *
25  * CPU will flush caches as needed when entering a C-state via MWAIT
26  *      (in contrast to entering ACPI C3, in which case the WBINVD
27  *      instruction needs to be executed to flush the caches)
28  */
29
30 /*
31  * Known limitations
32  *
33  * ACPI has a .suspend hack to turn off deep c-statees during suspend
34  * to avoid complications with the lapic timer workaround.
35  * Have not seen issues with suspend, but may need same workaround here.
36  *
37  */
38
39 /* un-comment DEBUG to enable pr_debug() statements */
40 /* #define DEBUG */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/acpi.h>
45 #include <linux/kernel.h>
46 #include <linux/cpuidle.h>
47 #include <linux/tick.h>
48 #include <trace/events/power.h>
49 #include <linux/sched.h>
50 #include <linux/sched/smt.h>
51 #include <linux/notifier.h>
52 #include <linux/cpu.h>
53 #include <linux/moduleparam.h>
54 #include <asm/cpu_device_id.h>
55 #include <asm/intel-family.h>
56 #include <asm/nospec-branch.h>
57 #include <asm/mwait.h>
58 #include <asm/msr.h>
59
60 #define INTEL_IDLE_VERSION "0.5.1"
61
62 static struct cpuidle_driver intel_idle_driver = {
63         .name = "intel_idle",
64         .owner = THIS_MODULE,
65 };
66 /* intel_idle.max_cstate=0 disables driver */
67 static int max_cstate = CPUIDLE_STATE_MAX - 1;
68 static unsigned int disabled_states_mask;
69 static unsigned int preferred_states_mask;
70
71 static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
72
73 static unsigned long auto_demotion_disable_flags;
74
75 static enum {
76         C1E_PROMOTION_PRESERVE,
77         C1E_PROMOTION_ENABLE,
78         C1E_PROMOTION_DISABLE
79 } c1e_promotion = C1E_PROMOTION_PRESERVE;
80
81 struct idle_cpu {
82         struct cpuidle_state *state_table;
83
84         /*
85          * Hardware C-state auto-demotion may not always be optimal.
86          * Indicate which enable bits to clear here.
87          */
88         unsigned long auto_demotion_disable_flags;
89         bool byt_auto_demotion_disable_flag;
90         bool disable_promotion_to_c1e;
91         bool use_acpi;
92 };
93
94 static const struct idle_cpu *icpu __initdata;
95 static struct cpuidle_state *cpuidle_state_table __initdata;
96
97 static unsigned int mwait_substates __initdata;
98
99 /*
100  * Enable interrupts before entering the C-state. On some platforms and for
101  * some C-states, this may measurably decrease interrupt latency.
102  */
103 #define CPUIDLE_FLAG_IRQ_ENABLE         BIT(14)
104
105 /*
106  * Enable this state by default even if the ACPI _CST does not list it.
107  */
108 #define CPUIDLE_FLAG_ALWAYS_ENABLE      BIT(15)
109
110 /*
111  * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
112  * above.
113  */
114 #define CPUIDLE_FLAG_IBRS               BIT(16)
115
116 /*
117  * MWAIT takes an 8-bit "hint" in EAX "suggesting"
118  * the C-state (top nibble) and sub-state (bottom nibble)
119  * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
120  *
121  * We store the hint at the top of our "flags" for each state.
122  */
123 #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
124 #define MWAIT2flg(eax) ((eax & 0xFF) << 24)
125
126 static __always_inline int __intel_idle(struct cpuidle_device *dev,
127                                         struct cpuidle_driver *drv, int index)
128 {
129         struct cpuidle_state *state = &drv->states[index];
130         unsigned long eax = flg2MWAIT(state->flags);
131         unsigned long ecx = 1; /* break on interrupt flag */
132
133         mwait_idle_with_hints(eax, ecx);
134
135         return index;
136 }
137
138 /**
139  * intel_idle - Ask the processor to enter the given idle state.
140  * @dev: cpuidle device of the target CPU.
141  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
142  * @index: Target idle state index.
143  *
144  * Use the MWAIT instruction to notify the processor that the CPU represented by
145  * @dev is idle and it can try to enter the idle state corresponding to @index.
146  *
147  * If the local APIC timer is not known to be reliable in the target idle state,
148  * enable one-shot tick broadcasting for the target CPU before executing MWAIT.
149  *
150  * Must be called under local_irq_disable().
151  */
152 static __cpuidle int intel_idle(struct cpuidle_device *dev,
153                                 struct cpuidle_driver *drv, int index)
154 {
155         return __intel_idle(dev, drv, index);
156 }
157
158 static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
159                                     struct cpuidle_driver *drv, int index)
160 {
161         int ret;
162
163         raw_local_irq_enable();
164         ret = __intel_idle(dev, drv, index);
165
166         /*
167          * The lockdep hardirqs state may be changed to 'on' with timer
168          * tick interrupt followed by __do_softirq(). Use local_irq_disable()
169          * to keep the hardirqs state correct.
170          */
171         local_irq_disable();
172
173         return ret;
174 }
175
176 static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
177                                      struct cpuidle_driver *drv, int index)
178 {
179         bool smt_active = sched_smt_active();
180         u64 spec_ctrl = spec_ctrl_current();
181         int ret;
182
183         if (smt_active)
184                 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
185
186         ret = __intel_idle(dev, drv, index);
187
188         if (smt_active)
189                 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
190
191         return ret;
192 }
193
194 /**
195  * intel_idle_s2idle - Ask the processor to enter the given idle state.
196  * @dev: cpuidle device of the target CPU.
197  * @drv: cpuidle driver (assumed to point to intel_idle_driver).
198  * @index: Target idle state index.
199  *
200  * Use the MWAIT instruction to notify the processor that the CPU represented by
201  * @dev is idle and it can try to enter the idle state corresponding to @index.
202  *
203  * Invoked as a suspend-to-idle callback routine with frozen user space, frozen
204  * scheduler tick and suspended scheduler clock on the target CPU.
205  */
206 static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
207                                        struct cpuidle_driver *drv, int index)
208 {
209         unsigned long eax = flg2MWAIT(drv->states[index].flags);
210         unsigned long ecx = 1; /* break on interrupt flag */
211
212         mwait_idle_with_hints(eax, ecx);
213
214         return 0;
215 }
216
217 /*
218  * States are indexed by the cstate number,
219  * which is also the index into the MWAIT hint array.
220  * Thus C0 is a dummy.
221  */
222 static struct cpuidle_state nehalem_cstates[] __initdata = {
223         {
224                 .name = "C1",
225                 .desc = "MWAIT 0x00",
226                 .flags = MWAIT2flg(0x00),
227                 .exit_latency = 3,
228                 .target_residency = 6,
229                 .enter = &intel_idle,
230                 .enter_s2idle = intel_idle_s2idle, },
231         {
232                 .name = "C1E",
233                 .desc = "MWAIT 0x01",
234                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
235                 .exit_latency = 10,
236                 .target_residency = 20,
237                 .enter = &intel_idle,
238                 .enter_s2idle = intel_idle_s2idle, },
239         {
240                 .name = "C3",
241                 .desc = "MWAIT 0x10",
242                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
243                 .exit_latency = 20,
244                 .target_residency = 80,
245                 .enter = &intel_idle,
246                 .enter_s2idle = intel_idle_s2idle, },
247         {
248                 .name = "C6",
249                 .desc = "MWAIT 0x20",
250                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
251                 .exit_latency = 200,
252                 .target_residency = 800,
253                 .enter = &intel_idle,
254                 .enter_s2idle = intel_idle_s2idle, },
255         {
256                 .enter = NULL }
257 };
258
259 static struct cpuidle_state snb_cstates[] __initdata = {
260         {
261                 .name = "C1",
262                 .desc = "MWAIT 0x00",
263                 .flags = MWAIT2flg(0x00),
264                 .exit_latency = 2,
265                 .target_residency = 2,
266                 .enter = &intel_idle,
267                 .enter_s2idle = intel_idle_s2idle, },
268         {
269                 .name = "C1E",
270                 .desc = "MWAIT 0x01",
271                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
272                 .exit_latency = 10,
273                 .target_residency = 20,
274                 .enter = &intel_idle,
275                 .enter_s2idle = intel_idle_s2idle, },
276         {
277                 .name = "C3",
278                 .desc = "MWAIT 0x10",
279                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
280                 .exit_latency = 80,
281                 .target_residency = 211,
282                 .enter = &intel_idle,
283                 .enter_s2idle = intel_idle_s2idle, },
284         {
285                 .name = "C6",
286                 .desc = "MWAIT 0x20",
287                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
288                 .exit_latency = 104,
289                 .target_residency = 345,
290                 .enter = &intel_idle,
291                 .enter_s2idle = intel_idle_s2idle, },
292         {
293                 .name = "C7",
294                 .desc = "MWAIT 0x30",
295                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
296                 .exit_latency = 109,
297                 .target_residency = 345,
298                 .enter = &intel_idle,
299                 .enter_s2idle = intel_idle_s2idle, },
300         {
301                 .enter = NULL }
302 };
303
304 static struct cpuidle_state byt_cstates[] __initdata = {
305         {
306                 .name = "C1",
307                 .desc = "MWAIT 0x00",
308                 .flags = MWAIT2flg(0x00),
309                 .exit_latency = 1,
310                 .target_residency = 1,
311                 .enter = &intel_idle,
312                 .enter_s2idle = intel_idle_s2idle, },
313         {
314                 .name = "C6N",
315                 .desc = "MWAIT 0x58",
316                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
317                 .exit_latency = 300,
318                 .target_residency = 275,
319                 .enter = &intel_idle,
320                 .enter_s2idle = intel_idle_s2idle, },
321         {
322                 .name = "C6S",
323                 .desc = "MWAIT 0x52",
324                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
325                 .exit_latency = 500,
326                 .target_residency = 560,
327                 .enter = &intel_idle,
328                 .enter_s2idle = intel_idle_s2idle, },
329         {
330                 .name = "C7",
331                 .desc = "MWAIT 0x60",
332                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
333                 .exit_latency = 1200,
334                 .target_residency = 4000,
335                 .enter = &intel_idle,
336                 .enter_s2idle = intel_idle_s2idle, },
337         {
338                 .name = "C7S",
339                 .desc = "MWAIT 0x64",
340                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
341                 .exit_latency = 10000,
342                 .target_residency = 20000,
343                 .enter = &intel_idle,
344                 .enter_s2idle = intel_idle_s2idle, },
345         {
346                 .enter = NULL }
347 };
348
349 static struct cpuidle_state cht_cstates[] __initdata = {
350         {
351                 .name = "C1",
352                 .desc = "MWAIT 0x00",
353                 .flags = MWAIT2flg(0x00),
354                 .exit_latency = 1,
355                 .target_residency = 1,
356                 .enter = &intel_idle,
357                 .enter_s2idle = intel_idle_s2idle, },
358         {
359                 .name = "C6N",
360                 .desc = "MWAIT 0x58",
361                 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
362                 .exit_latency = 80,
363                 .target_residency = 275,
364                 .enter = &intel_idle,
365                 .enter_s2idle = intel_idle_s2idle, },
366         {
367                 .name = "C6S",
368                 .desc = "MWAIT 0x52",
369                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
370                 .exit_latency = 200,
371                 .target_residency = 560,
372                 .enter = &intel_idle,
373                 .enter_s2idle = intel_idle_s2idle, },
374         {
375                 .name = "C7",
376                 .desc = "MWAIT 0x60",
377                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
378                 .exit_latency = 1200,
379                 .target_residency = 4000,
380                 .enter = &intel_idle,
381                 .enter_s2idle = intel_idle_s2idle, },
382         {
383                 .name = "C7S",
384                 .desc = "MWAIT 0x64",
385                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
386                 .exit_latency = 10000,
387                 .target_residency = 20000,
388                 .enter = &intel_idle,
389                 .enter_s2idle = intel_idle_s2idle, },
390         {
391                 .enter = NULL }
392 };
393
394 static struct cpuidle_state ivb_cstates[] __initdata = {
395         {
396                 .name = "C1",
397                 .desc = "MWAIT 0x00",
398                 .flags = MWAIT2flg(0x00),
399                 .exit_latency = 1,
400                 .target_residency = 1,
401                 .enter = &intel_idle,
402                 .enter_s2idle = intel_idle_s2idle, },
403         {
404                 .name = "C1E",
405                 .desc = "MWAIT 0x01",
406                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
407                 .exit_latency = 10,
408                 .target_residency = 20,
409                 .enter = &intel_idle,
410                 .enter_s2idle = intel_idle_s2idle, },
411         {
412                 .name = "C3",
413                 .desc = "MWAIT 0x10",
414                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
415                 .exit_latency = 59,
416                 .target_residency = 156,
417                 .enter = &intel_idle,
418                 .enter_s2idle = intel_idle_s2idle, },
419         {
420                 .name = "C6",
421                 .desc = "MWAIT 0x20",
422                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
423                 .exit_latency = 80,
424                 .target_residency = 300,
425                 .enter = &intel_idle,
426                 .enter_s2idle = intel_idle_s2idle, },
427         {
428                 .name = "C7",
429                 .desc = "MWAIT 0x30",
430                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
431                 .exit_latency = 87,
432                 .target_residency = 300,
433                 .enter = &intel_idle,
434                 .enter_s2idle = intel_idle_s2idle, },
435         {
436                 .enter = NULL }
437 };
438
439 static struct cpuidle_state ivt_cstates[] __initdata = {
440         {
441                 .name = "C1",
442                 .desc = "MWAIT 0x00",
443                 .flags = MWAIT2flg(0x00),
444                 .exit_latency = 1,
445                 .target_residency = 1,
446                 .enter = &intel_idle,
447                 .enter_s2idle = intel_idle_s2idle, },
448         {
449                 .name = "C1E",
450                 .desc = "MWAIT 0x01",
451                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
452                 .exit_latency = 10,
453                 .target_residency = 80,
454                 .enter = &intel_idle,
455                 .enter_s2idle = intel_idle_s2idle, },
456         {
457                 .name = "C3",
458                 .desc = "MWAIT 0x10",
459                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
460                 .exit_latency = 59,
461                 .target_residency = 156,
462                 .enter = &intel_idle,
463                 .enter_s2idle = intel_idle_s2idle, },
464         {
465                 .name = "C6",
466                 .desc = "MWAIT 0x20",
467                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
468                 .exit_latency = 82,
469                 .target_residency = 300,
470                 .enter = &intel_idle,
471                 .enter_s2idle = intel_idle_s2idle, },
472         {
473                 .enter = NULL }
474 };
475
476 static struct cpuidle_state ivt_cstates_4s[] __initdata = {
477         {
478                 .name = "C1",
479                 .desc = "MWAIT 0x00",
480                 .flags = MWAIT2flg(0x00),
481                 .exit_latency = 1,
482                 .target_residency = 1,
483                 .enter = &intel_idle,
484                 .enter_s2idle = intel_idle_s2idle, },
485         {
486                 .name = "C1E",
487                 .desc = "MWAIT 0x01",
488                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
489                 .exit_latency = 10,
490                 .target_residency = 250,
491                 .enter = &intel_idle,
492                 .enter_s2idle = intel_idle_s2idle, },
493         {
494                 .name = "C3",
495                 .desc = "MWAIT 0x10",
496                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
497                 .exit_latency = 59,
498                 .target_residency = 300,
499                 .enter = &intel_idle,
500                 .enter_s2idle = intel_idle_s2idle, },
501         {
502                 .name = "C6",
503                 .desc = "MWAIT 0x20",
504                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
505                 .exit_latency = 84,
506                 .target_residency = 400,
507                 .enter = &intel_idle,
508                 .enter_s2idle = intel_idle_s2idle, },
509         {
510                 .enter = NULL }
511 };
512
513 static struct cpuidle_state ivt_cstates_8s[] __initdata = {
514         {
515                 .name = "C1",
516                 .desc = "MWAIT 0x00",
517                 .flags = MWAIT2flg(0x00),
518                 .exit_latency = 1,
519                 .target_residency = 1,
520                 .enter = &intel_idle,
521                 .enter_s2idle = intel_idle_s2idle, },
522         {
523                 .name = "C1E",
524                 .desc = "MWAIT 0x01",
525                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
526                 .exit_latency = 10,
527                 .target_residency = 500,
528                 .enter = &intel_idle,
529                 .enter_s2idle = intel_idle_s2idle, },
530         {
531                 .name = "C3",
532                 .desc = "MWAIT 0x10",
533                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
534                 .exit_latency = 59,
535                 .target_residency = 600,
536                 .enter = &intel_idle,
537                 .enter_s2idle = intel_idle_s2idle, },
538         {
539                 .name = "C6",
540                 .desc = "MWAIT 0x20",
541                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
542                 .exit_latency = 88,
543                 .target_residency = 700,
544                 .enter = &intel_idle,
545                 .enter_s2idle = intel_idle_s2idle, },
546         {
547                 .enter = NULL }
548 };
549
550 static struct cpuidle_state hsw_cstates[] __initdata = {
551         {
552                 .name = "C1",
553                 .desc = "MWAIT 0x00",
554                 .flags = MWAIT2flg(0x00),
555                 .exit_latency = 2,
556                 .target_residency = 2,
557                 .enter = &intel_idle,
558                 .enter_s2idle = intel_idle_s2idle, },
559         {
560                 .name = "C1E",
561                 .desc = "MWAIT 0x01",
562                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
563                 .exit_latency = 10,
564                 .target_residency = 20,
565                 .enter = &intel_idle,
566                 .enter_s2idle = intel_idle_s2idle, },
567         {
568                 .name = "C3",
569                 .desc = "MWAIT 0x10",
570                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
571                 .exit_latency = 33,
572                 .target_residency = 100,
573                 .enter = &intel_idle,
574                 .enter_s2idle = intel_idle_s2idle, },
575         {
576                 .name = "C6",
577                 .desc = "MWAIT 0x20",
578                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
579                 .exit_latency = 133,
580                 .target_residency = 400,
581                 .enter = &intel_idle,
582                 .enter_s2idle = intel_idle_s2idle, },
583         {
584                 .name = "C7s",
585                 .desc = "MWAIT 0x32",
586                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
587                 .exit_latency = 166,
588                 .target_residency = 500,
589                 .enter = &intel_idle,
590                 .enter_s2idle = intel_idle_s2idle, },
591         {
592                 .name = "C8",
593                 .desc = "MWAIT 0x40",
594                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
595                 .exit_latency = 300,
596                 .target_residency = 900,
597                 .enter = &intel_idle,
598                 .enter_s2idle = intel_idle_s2idle, },
599         {
600                 .name = "C9",
601                 .desc = "MWAIT 0x50",
602                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
603                 .exit_latency = 600,
604                 .target_residency = 1800,
605                 .enter = &intel_idle,
606                 .enter_s2idle = intel_idle_s2idle, },
607         {
608                 .name = "C10",
609                 .desc = "MWAIT 0x60",
610                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
611                 .exit_latency = 2600,
612                 .target_residency = 7700,
613                 .enter = &intel_idle,
614                 .enter_s2idle = intel_idle_s2idle, },
615         {
616                 .enter = NULL }
617 };
618 static struct cpuidle_state bdw_cstates[] __initdata = {
619         {
620                 .name = "C1",
621                 .desc = "MWAIT 0x00",
622                 .flags = MWAIT2flg(0x00),
623                 .exit_latency = 2,
624                 .target_residency = 2,
625                 .enter = &intel_idle,
626                 .enter_s2idle = intel_idle_s2idle, },
627         {
628                 .name = "C1E",
629                 .desc = "MWAIT 0x01",
630                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
631                 .exit_latency = 10,
632                 .target_residency = 20,
633                 .enter = &intel_idle,
634                 .enter_s2idle = intel_idle_s2idle, },
635         {
636                 .name = "C3",
637                 .desc = "MWAIT 0x10",
638                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
639                 .exit_latency = 40,
640                 .target_residency = 100,
641                 .enter = &intel_idle,
642                 .enter_s2idle = intel_idle_s2idle, },
643         {
644                 .name = "C6",
645                 .desc = "MWAIT 0x20",
646                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
647                 .exit_latency = 133,
648                 .target_residency = 400,
649                 .enter = &intel_idle,
650                 .enter_s2idle = intel_idle_s2idle, },
651         {
652                 .name = "C7s",
653                 .desc = "MWAIT 0x32",
654                 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
655                 .exit_latency = 166,
656                 .target_residency = 500,
657                 .enter = &intel_idle,
658                 .enter_s2idle = intel_idle_s2idle, },
659         {
660                 .name = "C8",
661                 .desc = "MWAIT 0x40",
662                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
663                 .exit_latency = 300,
664                 .target_residency = 900,
665                 .enter = &intel_idle,
666                 .enter_s2idle = intel_idle_s2idle, },
667         {
668                 .name = "C9",
669                 .desc = "MWAIT 0x50",
670                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
671                 .exit_latency = 600,
672                 .target_residency = 1800,
673                 .enter = &intel_idle,
674                 .enter_s2idle = intel_idle_s2idle, },
675         {
676                 .name = "C10",
677                 .desc = "MWAIT 0x60",
678                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
679                 .exit_latency = 2600,
680                 .target_residency = 7700,
681                 .enter = &intel_idle,
682                 .enter_s2idle = intel_idle_s2idle, },
683         {
684                 .enter = NULL }
685 };
686
687 static struct cpuidle_state skl_cstates[] __initdata = {
688         {
689                 .name = "C1",
690                 .desc = "MWAIT 0x00",
691                 .flags = MWAIT2flg(0x00),
692                 .exit_latency = 2,
693                 .target_residency = 2,
694                 .enter = &intel_idle,
695                 .enter_s2idle = intel_idle_s2idle, },
696         {
697                 .name = "C1E",
698                 .desc = "MWAIT 0x01",
699                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
700                 .exit_latency = 10,
701                 .target_residency = 20,
702                 .enter = &intel_idle,
703                 .enter_s2idle = intel_idle_s2idle, },
704         {
705                 .name = "C3",
706                 .desc = "MWAIT 0x10",
707                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
708                 .exit_latency = 70,
709                 .target_residency = 100,
710                 .enter = &intel_idle,
711                 .enter_s2idle = intel_idle_s2idle, },
712         {
713                 .name = "C6",
714                 .desc = "MWAIT 0x20",
715                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
716                 .exit_latency = 85,
717                 .target_residency = 200,
718                 .enter = &intel_idle,
719                 .enter_s2idle = intel_idle_s2idle, },
720         {
721                 .name = "C7s",
722                 .desc = "MWAIT 0x33",
723                 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
724                 .exit_latency = 124,
725                 .target_residency = 800,
726                 .enter = &intel_idle,
727                 .enter_s2idle = intel_idle_s2idle, },
728         {
729                 .name = "C8",
730                 .desc = "MWAIT 0x40",
731                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
732                 .exit_latency = 200,
733                 .target_residency = 800,
734                 .enter = &intel_idle,
735                 .enter_s2idle = intel_idle_s2idle, },
736         {
737                 .name = "C9",
738                 .desc = "MWAIT 0x50",
739                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
740                 .exit_latency = 480,
741                 .target_residency = 5000,
742                 .enter = &intel_idle,
743                 .enter_s2idle = intel_idle_s2idle, },
744         {
745                 .name = "C10",
746                 .desc = "MWAIT 0x60",
747                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
748                 .exit_latency = 890,
749                 .target_residency = 5000,
750                 .enter = &intel_idle,
751                 .enter_s2idle = intel_idle_s2idle, },
752         {
753                 .enter = NULL }
754 };
755
756 static struct cpuidle_state skx_cstates[] __initdata = {
757         {
758                 .name = "C1",
759                 .desc = "MWAIT 0x00",
760                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
761                 .exit_latency = 2,
762                 .target_residency = 2,
763                 .enter = &intel_idle,
764                 .enter_s2idle = intel_idle_s2idle, },
765         {
766                 .name = "C1E",
767                 .desc = "MWAIT 0x01",
768                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
769                 .exit_latency = 10,
770                 .target_residency = 20,
771                 .enter = &intel_idle,
772                 .enter_s2idle = intel_idle_s2idle, },
773         {
774                 .name = "C6",
775                 .desc = "MWAIT 0x20",
776                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
777                 .exit_latency = 133,
778                 .target_residency = 600,
779                 .enter = &intel_idle,
780                 .enter_s2idle = intel_idle_s2idle, },
781         {
782                 .enter = NULL }
783 };
784
785 static struct cpuidle_state icx_cstates[] __initdata = {
786         {
787                 .name = "C1",
788                 .desc = "MWAIT 0x00",
789                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
790                 .exit_latency = 1,
791                 .target_residency = 1,
792                 .enter = &intel_idle,
793                 .enter_s2idle = intel_idle_s2idle, },
794         {
795                 .name = "C1E",
796                 .desc = "MWAIT 0x01",
797                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
798                 .exit_latency = 4,
799                 .target_residency = 4,
800                 .enter = &intel_idle,
801                 .enter_s2idle = intel_idle_s2idle, },
802         {
803                 .name = "C6",
804                 .desc = "MWAIT 0x20",
805                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
806                 .exit_latency = 170,
807                 .target_residency = 600,
808                 .enter = &intel_idle,
809                 .enter_s2idle = intel_idle_s2idle, },
810         {
811                 .enter = NULL }
812 };
813
814 /*
815  * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
816  * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
817  * But in this case there is effectively no C1, because C1 requests are
818  * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
819  * and C1E requests end up with C1, so there is effectively no C1E.
820  *
821  * By default we enable C1E and disable C1 by marking it with
822  * 'CPUIDLE_FLAG_UNUSABLE'.
823  */
824 static struct cpuidle_state adl_cstates[] __initdata = {
825         {
826                 .name = "C1",
827                 .desc = "MWAIT 0x00",
828                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
829                 .exit_latency = 1,
830                 .target_residency = 1,
831                 .enter = &intel_idle,
832                 .enter_s2idle = intel_idle_s2idle, },
833         {
834                 .name = "C1E",
835                 .desc = "MWAIT 0x01",
836                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
837                 .exit_latency = 2,
838                 .target_residency = 4,
839                 .enter = &intel_idle,
840                 .enter_s2idle = intel_idle_s2idle, },
841         {
842                 .name = "C6",
843                 .desc = "MWAIT 0x20",
844                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
845                 .exit_latency = 220,
846                 .target_residency = 600,
847                 .enter = &intel_idle,
848                 .enter_s2idle = intel_idle_s2idle, },
849         {
850                 .name = "C8",
851                 .desc = "MWAIT 0x40",
852                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
853                 .exit_latency = 280,
854                 .target_residency = 800,
855                 .enter = &intel_idle,
856                 .enter_s2idle = intel_idle_s2idle, },
857         {
858                 .name = "C10",
859                 .desc = "MWAIT 0x60",
860                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
861                 .exit_latency = 680,
862                 .target_residency = 2000,
863                 .enter = &intel_idle,
864                 .enter_s2idle = intel_idle_s2idle, },
865         {
866                 .enter = NULL }
867 };
868
869 static struct cpuidle_state adl_l_cstates[] __initdata = {
870         {
871                 .name = "C1",
872                 .desc = "MWAIT 0x00",
873                 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
874                 .exit_latency = 1,
875                 .target_residency = 1,
876                 .enter = &intel_idle,
877                 .enter_s2idle = intel_idle_s2idle, },
878         {
879                 .name = "C1E",
880                 .desc = "MWAIT 0x01",
881                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
882                 .exit_latency = 2,
883                 .target_residency = 4,
884                 .enter = &intel_idle,
885                 .enter_s2idle = intel_idle_s2idle, },
886         {
887                 .name = "C6",
888                 .desc = "MWAIT 0x20",
889                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
890                 .exit_latency = 170,
891                 .target_residency = 500,
892                 .enter = &intel_idle,
893                 .enter_s2idle = intel_idle_s2idle, },
894         {
895                 .name = "C8",
896                 .desc = "MWAIT 0x40",
897                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
898                 .exit_latency = 200,
899                 .target_residency = 600,
900                 .enter = &intel_idle,
901                 .enter_s2idle = intel_idle_s2idle, },
902         {
903                 .name = "C10",
904                 .desc = "MWAIT 0x60",
905                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
906                 .exit_latency = 230,
907                 .target_residency = 700,
908                 .enter = &intel_idle,
909                 .enter_s2idle = intel_idle_s2idle, },
910         {
911                 .enter = NULL }
912 };
913
914 /*
915  * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
916  * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
917  * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
918  * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
919  * both C1 and C1E requests end up with C1, so there is effectively no C1E.
920  *
921  * By default we enable C1 and disable C1E by marking it with
922  * 'CPUIDLE_FLAG_UNUSABLE'.
923  */
924 static struct cpuidle_state spr_cstates[] __initdata = {
925         {
926                 .name = "C1",
927                 .desc = "MWAIT 0x00",
928                 .flags = MWAIT2flg(0x00),
929                 .exit_latency = 1,
930                 .target_residency = 1,
931                 .enter = &intel_idle,
932                 .enter_s2idle = intel_idle_s2idle, },
933         {
934                 .name = "C1E",
935                 .desc = "MWAIT 0x01",
936                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
937                                            CPUIDLE_FLAG_UNUSABLE,
938                 .exit_latency = 2,
939                 .target_residency = 4,
940                 .enter = &intel_idle,
941                 .enter_s2idle = intel_idle_s2idle, },
942         {
943                 .name = "C6",
944                 .desc = "MWAIT 0x20",
945                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
946                 .exit_latency = 290,
947                 .target_residency = 800,
948                 .enter = &intel_idle,
949                 .enter_s2idle = intel_idle_s2idle, },
950         {
951                 .enter = NULL }
952 };
953
954 static struct cpuidle_state atom_cstates[] __initdata = {
955         {
956                 .name = "C1E",
957                 .desc = "MWAIT 0x00",
958                 .flags = MWAIT2flg(0x00),
959                 .exit_latency = 10,
960                 .target_residency = 20,
961                 .enter = &intel_idle,
962                 .enter_s2idle = intel_idle_s2idle, },
963         {
964                 .name = "C2",
965                 .desc = "MWAIT 0x10",
966                 .flags = MWAIT2flg(0x10),
967                 .exit_latency = 20,
968                 .target_residency = 80,
969                 .enter = &intel_idle,
970                 .enter_s2idle = intel_idle_s2idle, },
971         {
972                 .name = "C4",
973                 .desc = "MWAIT 0x30",
974                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
975                 .exit_latency = 100,
976                 .target_residency = 400,
977                 .enter = &intel_idle,
978                 .enter_s2idle = intel_idle_s2idle, },
979         {
980                 .name = "C6",
981                 .desc = "MWAIT 0x52",
982                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
983                 .exit_latency = 140,
984                 .target_residency = 560,
985                 .enter = &intel_idle,
986                 .enter_s2idle = intel_idle_s2idle, },
987         {
988                 .enter = NULL }
989 };
990 static struct cpuidle_state tangier_cstates[] __initdata = {
991         {
992                 .name = "C1",
993                 .desc = "MWAIT 0x00",
994                 .flags = MWAIT2flg(0x00),
995                 .exit_latency = 1,
996                 .target_residency = 4,
997                 .enter = &intel_idle,
998                 .enter_s2idle = intel_idle_s2idle, },
999         {
1000                 .name = "C4",
1001                 .desc = "MWAIT 0x30",
1002                 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
1003                 .exit_latency = 100,
1004                 .target_residency = 400,
1005                 .enter = &intel_idle,
1006                 .enter_s2idle = intel_idle_s2idle, },
1007         {
1008                 .name = "C6",
1009                 .desc = "MWAIT 0x52",
1010                 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
1011                 .exit_latency = 140,
1012                 .target_residency = 560,
1013                 .enter = &intel_idle,
1014                 .enter_s2idle = intel_idle_s2idle, },
1015         {
1016                 .name = "C7",
1017                 .desc = "MWAIT 0x60",
1018                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1019                 .exit_latency = 1200,
1020                 .target_residency = 4000,
1021                 .enter = &intel_idle,
1022                 .enter_s2idle = intel_idle_s2idle, },
1023         {
1024                 .name = "C9",
1025                 .desc = "MWAIT 0x64",
1026                 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
1027                 .exit_latency = 10000,
1028                 .target_residency = 20000,
1029                 .enter = &intel_idle,
1030                 .enter_s2idle = intel_idle_s2idle, },
1031         {
1032                 .enter = NULL }
1033 };
1034 static struct cpuidle_state avn_cstates[] __initdata = {
1035         {
1036                 .name = "C1",
1037                 .desc = "MWAIT 0x00",
1038                 .flags = MWAIT2flg(0x00),
1039                 .exit_latency = 2,
1040                 .target_residency = 2,
1041                 .enter = &intel_idle,
1042                 .enter_s2idle = intel_idle_s2idle, },
1043         {
1044                 .name = "C6",
1045                 .desc = "MWAIT 0x51",
1046                 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
1047                 .exit_latency = 15,
1048                 .target_residency = 45,
1049                 .enter = &intel_idle,
1050                 .enter_s2idle = intel_idle_s2idle, },
1051         {
1052                 .enter = NULL }
1053 };
1054 static struct cpuidle_state knl_cstates[] __initdata = {
1055         {
1056                 .name = "C1",
1057                 .desc = "MWAIT 0x00",
1058                 .flags = MWAIT2flg(0x00),
1059                 .exit_latency = 1,
1060                 .target_residency = 2,
1061                 .enter = &intel_idle,
1062                 .enter_s2idle = intel_idle_s2idle },
1063         {
1064                 .name = "C6",
1065                 .desc = "MWAIT 0x10",
1066                 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
1067                 .exit_latency = 120,
1068                 .target_residency = 500,
1069                 .enter = &intel_idle,
1070                 .enter_s2idle = intel_idle_s2idle },
1071         {
1072                 .enter = NULL }
1073 };
1074
1075 static struct cpuidle_state bxt_cstates[] __initdata = {
1076         {
1077                 .name = "C1",
1078                 .desc = "MWAIT 0x00",
1079                 .flags = MWAIT2flg(0x00),
1080                 .exit_latency = 2,
1081                 .target_residency = 2,
1082                 .enter = &intel_idle,
1083                 .enter_s2idle = intel_idle_s2idle, },
1084         {
1085                 .name = "C1E",
1086                 .desc = "MWAIT 0x01",
1087                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1088                 .exit_latency = 10,
1089                 .target_residency = 20,
1090                 .enter = &intel_idle,
1091                 .enter_s2idle = intel_idle_s2idle, },
1092         {
1093                 .name = "C6",
1094                 .desc = "MWAIT 0x20",
1095                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1096                 .exit_latency = 133,
1097                 .target_residency = 133,
1098                 .enter = &intel_idle,
1099                 .enter_s2idle = intel_idle_s2idle, },
1100         {
1101                 .name = "C7s",
1102                 .desc = "MWAIT 0x31",
1103                 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
1104                 .exit_latency = 155,
1105                 .target_residency = 155,
1106                 .enter = &intel_idle,
1107                 .enter_s2idle = intel_idle_s2idle, },
1108         {
1109                 .name = "C8",
1110                 .desc = "MWAIT 0x40",
1111                 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
1112                 .exit_latency = 1000,
1113                 .target_residency = 1000,
1114                 .enter = &intel_idle,
1115                 .enter_s2idle = intel_idle_s2idle, },
1116         {
1117                 .name = "C9",
1118                 .desc = "MWAIT 0x50",
1119                 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
1120                 .exit_latency = 2000,
1121                 .target_residency = 2000,
1122                 .enter = &intel_idle,
1123                 .enter_s2idle = intel_idle_s2idle, },
1124         {
1125                 .name = "C10",
1126                 .desc = "MWAIT 0x60",
1127                 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
1128                 .exit_latency = 10000,
1129                 .target_residency = 10000,
1130                 .enter = &intel_idle,
1131                 .enter_s2idle = intel_idle_s2idle, },
1132         {
1133                 .enter = NULL }
1134 };
1135
1136 static struct cpuidle_state dnv_cstates[] __initdata = {
1137         {
1138                 .name = "C1",
1139                 .desc = "MWAIT 0x00",
1140                 .flags = MWAIT2flg(0x00),
1141                 .exit_latency = 2,
1142                 .target_residency = 2,
1143                 .enter = &intel_idle,
1144                 .enter_s2idle = intel_idle_s2idle, },
1145         {
1146                 .name = "C1E",
1147                 .desc = "MWAIT 0x01",
1148                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1149                 .exit_latency = 10,
1150                 .target_residency = 20,
1151                 .enter = &intel_idle,
1152                 .enter_s2idle = intel_idle_s2idle, },
1153         {
1154                 .name = "C6",
1155                 .desc = "MWAIT 0x20",
1156                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1157                 .exit_latency = 50,
1158                 .target_residency = 500,
1159                 .enter = &intel_idle,
1160                 .enter_s2idle = intel_idle_s2idle, },
1161         {
1162                 .enter = NULL }
1163 };
1164
1165 /*
1166  * Note, depending on HW and FW revision, SnowRidge SoC may or may not support
1167  * C6, and this is indicated in the CPUID mwait leaf.
1168  */
1169 static struct cpuidle_state snr_cstates[] __initdata = {
1170         {
1171                 .name = "C1",
1172                 .desc = "MWAIT 0x00",
1173                 .flags = MWAIT2flg(0x00),
1174                 .exit_latency = 2,
1175                 .target_residency = 2,
1176                 .enter = &intel_idle,
1177                 .enter_s2idle = intel_idle_s2idle, },
1178         {
1179                 .name = "C1E",
1180                 .desc = "MWAIT 0x01",
1181                 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
1182                 .exit_latency = 15,
1183                 .target_residency = 25,
1184                 .enter = &intel_idle,
1185                 .enter_s2idle = intel_idle_s2idle, },
1186         {
1187                 .name = "C6",
1188                 .desc = "MWAIT 0x20",
1189                 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
1190                 .exit_latency = 130,
1191                 .target_residency = 500,
1192                 .enter = &intel_idle,
1193                 .enter_s2idle = intel_idle_s2idle, },
1194         {
1195                 .enter = NULL }
1196 };
1197
1198 static const struct idle_cpu idle_cpu_nehalem __initconst = {
1199         .state_table = nehalem_cstates,
1200         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1201         .disable_promotion_to_c1e = true,
1202 };
1203
1204 static const struct idle_cpu idle_cpu_nhx __initconst = {
1205         .state_table = nehalem_cstates,
1206         .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
1207         .disable_promotion_to_c1e = true,
1208         .use_acpi = true,
1209 };
1210
1211 static const struct idle_cpu idle_cpu_atom __initconst = {
1212         .state_table = atom_cstates,
1213 };
1214
1215 static const struct idle_cpu idle_cpu_tangier __initconst = {
1216         .state_table = tangier_cstates,
1217 };
1218
1219 static const struct idle_cpu idle_cpu_lincroft __initconst = {
1220         .state_table = atom_cstates,
1221         .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE,
1222 };
1223
1224 static const struct idle_cpu idle_cpu_snb __initconst = {
1225         .state_table = snb_cstates,
1226         .disable_promotion_to_c1e = true,
1227 };
1228
1229 static const struct idle_cpu idle_cpu_snx __initconst = {
1230         .state_table = snb_cstates,
1231         .disable_promotion_to_c1e = true,
1232         .use_acpi = true,
1233 };
1234
1235 static const struct idle_cpu idle_cpu_byt __initconst = {
1236         .state_table = byt_cstates,
1237         .disable_promotion_to_c1e = true,
1238         .byt_auto_demotion_disable_flag = true,
1239 };
1240
1241 static const struct idle_cpu idle_cpu_cht __initconst = {
1242         .state_table = cht_cstates,
1243         .disable_promotion_to_c1e = true,
1244         .byt_auto_demotion_disable_flag = true,
1245 };
1246
1247 static const struct idle_cpu idle_cpu_ivb __initconst = {
1248         .state_table = ivb_cstates,
1249         .disable_promotion_to_c1e = true,
1250 };
1251
1252 static const struct idle_cpu idle_cpu_ivt __initconst = {
1253         .state_table = ivt_cstates,
1254         .disable_promotion_to_c1e = true,
1255         .use_acpi = true,
1256 };
1257
1258 static const struct idle_cpu idle_cpu_hsw __initconst = {
1259         .state_table = hsw_cstates,
1260         .disable_promotion_to_c1e = true,
1261 };
1262
1263 static const struct idle_cpu idle_cpu_hsx __initconst = {
1264         .state_table = hsw_cstates,
1265         .disable_promotion_to_c1e = true,
1266         .use_acpi = true,
1267 };
1268
1269 static const struct idle_cpu idle_cpu_bdw __initconst = {
1270         .state_table = bdw_cstates,
1271         .disable_promotion_to_c1e = true,
1272 };
1273
1274 static const struct idle_cpu idle_cpu_bdx __initconst = {
1275         .state_table = bdw_cstates,
1276         .disable_promotion_to_c1e = true,
1277         .use_acpi = true,
1278 };
1279
1280 static const struct idle_cpu idle_cpu_skl __initconst = {
1281         .state_table = skl_cstates,
1282         .disable_promotion_to_c1e = true,
1283 };
1284
1285 static const struct idle_cpu idle_cpu_skx __initconst = {
1286         .state_table = skx_cstates,
1287         .disable_promotion_to_c1e = true,
1288         .use_acpi = true,
1289 };
1290
1291 static const struct idle_cpu idle_cpu_icx __initconst = {
1292         .state_table = icx_cstates,
1293         .disable_promotion_to_c1e = true,
1294         .use_acpi = true,
1295 };
1296
1297 static const struct idle_cpu idle_cpu_adl __initconst = {
1298         .state_table = adl_cstates,
1299 };
1300
1301 static const struct idle_cpu idle_cpu_adl_l __initconst = {
1302         .state_table = adl_l_cstates,
1303 };
1304
1305 static const struct idle_cpu idle_cpu_spr __initconst = {
1306         .state_table = spr_cstates,
1307         .disable_promotion_to_c1e = true,
1308         .use_acpi = true,
1309 };
1310
1311 static const struct idle_cpu idle_cpu_avn __initconst = {
1312         .state_table = avn_cstates,
1313         .disable_promotion_to_c1e = true,
1314         .use_acpi = true,
1315 };
1316
1317 static const struct idle_cpu idle_cpu_knl __initconst = {
1318         .state_table = knl_cstates,
1319         .use_acpi = true,
1320 };
1321
1322 static const struct idle_cpu idle_cpu_bxt __initconst = {
1323         .state_table = bxt_cstates,
1324         .disable_promotion_to_c1e = true,
1325 };
1326
1327 static const struct idle_cpu idle_cpu_dnv __initconst = {
1328         .state_table = dnv_cstates,
1329         .disable_promotion_to_c1e = true,
1330         .use_acpi = true,
1331 };
1332
1333 static const struct idle_cpu idle_cpu_snr __initconst = {
1334         .state_table = snr_cstates,
1335         .disable_promotion_to_c1e = true,
1336         .use_acpi = true,
1337 };
1338
1339 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
1340         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &idle_cpu_nhx),
1341         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &idle_cpu_nehalem),
1342         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,           &idle_cpu_nehalem),
1343         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &idle_cpu_nehalem),
1344         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &idle_cpu_nhx),
1345         X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &idle_cpu_nhx),
1346         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL,        &idle_cpu_atom),
1347         X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID,    &idle_cpu_lincroft),
1348         X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &idle_cpu_nhx),
1349         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &idle_cpu_snb),
1350         X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &idle_cpu_snx),
1351         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL,       &idle_cpu_atom),
1352         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,     &idle_cpu_byt),
1353         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier),
1354         X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,        &idle_cpu_cht),
1355         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &idle_cpu_ivb),
1356         X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &idle_cpu_ivt),
1357         X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &idle_cpu_hsw),
1358         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &idle_cpu_hsx),
1359         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &idle_cpu_hsw),
1360         X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &idle_cpu_hsw),
1361         X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &idle_cpu_avn),
1362         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &idle_cpu_bdw),
1363         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &idle_cpu_bdw),
1364         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &idle_cpu_bdx),
1365         X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &idle_cpu_bdx),
1366         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &idle_cpu_skl),
1367         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &idle_cpu_skl),
1368         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &idle_cpu_skl),
1369         X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &idle_cpu_skl),
1370         X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &idle_cpu_skx),
1371         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &idle_cpu_icx),
1372         X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
1373         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
1374         X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
1375         X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
1376         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
1377         X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &idle_cpu_knl),
1378         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,       &idle_cpu_bxt),
1379         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &idle_cpu_bxt),
1380         X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,     &idle_cpu_dnv),
1381         X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &idle_cpu_snr),
1382         {}
1383 };
1384
1385 static const struct x86_cpu_id intel_mwait_ids[] __initconst = {
1386         X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL),
1387         {}
1388 };
1389
1390 static bool __init intel_idle_max_cstate_reached(int cstate)
1391 {
1392         if (cstate + 1 > max_cstate) {
1393                 pr_info("max_cstate %d reached\n", max_cstate);
1394                 return true;
1395         }
1396         return false;
1397 }
1398
1399 static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state)
1400 {
1401         unsigned long eax = flg2MWAIT(state->flags);
1402
1403         if (boot_cpu_has(X86_FEATURE_ARAT))
1404                 return false;
1405
1406         /*
1407          * Switch over to one-shot tick broadcast if the target C-state
1408          * is deeper than C1.
1409          */
1410         return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK);
1411 }
1412
1413 #ifdef CONFIG_ACPI_PROCESSOR_CSTATE
1414 #include <acpi/processor.h>
1415
1416 static bool no_acpi __read_mostly;
1417 module_param(no_acpi, bool, 0444);
1418 MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list");
1419
1420 static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */
1421 module_param_named(use_acpi, force_use_acpi, bool, 0444);
1422 MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list");
1423
1424 static struct acpi_processor_power acpi_state_table __initdata;
1425
1426 /**
1427  * intel_idle_cst_usable - Check if the _CST information can be used.
1428  *
1429  * Check if all of the C-states listed by _CST in the max_cstate range are
1430  * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT.
1431  */
1432 static bool __init intel_idle_cst_usable(void)
1433 {
1434         int cstate, limit;
1435
1436         limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1),
1437                       acpi_state_table.count);
1438
1439         for (cstate = 1; cstate < limit; cstate++) {
1440                 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate];
1441
1442                 if (cx->entry_method != ACPI_CSTATE_FFH)
1443                         return false;
1444         }
1445
1446         return true;
1447 }
1448
1449 static bool __init intel_idle_acpi_cst_extract(void)
1450 {
1451         unsigned int cpu;
1452
1453         if (no_acpi) {
1454                 pr_debug("Not allowed to use ACPI _CST\n");
1455                 return false;
1456         }
1457
1458         for_each_possible_cpu(cpu) {
1459                 struct acpi_processor *pr = per_cpu(processors, cpu);
1460
1461                 if (!pr)
1462                         continue;
1463
1464                 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table))
1465                         continue;
1466
1467                 acpi_state_table.count++;
1468
1469                 if (!intel_idle_cst_usable())
1470                         continue;
1471
1472                 if (!acpi_processor_claim_cst_control())
1473                         break;
1474
1475                 return true;
1476         }
1477
1478         acpi_state_table.count = 0;
1479         pr_debug("ACPI _CST not found or not usable\n");
1480         return false;
1481 }
1482
1483 static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
1484 {
1485         int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1486
1487         /*
1488          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1489          * the interesting states are ACPI_CSTATE_FFH.
1490          */
1491         for (cstate = 1; cstate < limit; cstate++) {
1492                 struct acpi_processor_cx *cx;
1493                 struct cpuidle_state *state;
1494
1495                 if (intel_idle_max_cstate_reached(cstate - 1))
1496                         break;
1497
1498                 cx = &acpi_state_table.states[cstate];
1499
1500                 state = &drv->states[drv->state_count++];
1501
1502                 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate);
1503                 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN);
1504                 state->exit_latency = cx->latency;
1505                 /*
1506                  * For C1-type C-states use the same number for both the exit
1507                  * latency and target residency, because that is the case for
1508                  * C1 in the majority of the static C-states tables above.
1509                  * For the other types of C-states, however, set the target
1510                  * residency to 3 times the exit latency which should lead to
1511                  * a reasonable balance between energy-efficiency and
1512                  * performance in the majority of interesting cases.
1513                  */
1514                 state->target_residency = cx->latency;
1515                 if (cx->type > ACPI_STATE_C1)
1516                         state->target_residency *= 3;
1517
1518                 state->flags = MWAIT2flg(cx->address);
1519                 if (cx->type > ACPI_STATE_C2)
1520                         state->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
1521
1522                 if (disabled_states_mask & BIT(cstate))
1523                         state->flags |= CPUIDLE_FLAG_OFF;
1524
1525                 if (intel_idle_state_needs_timer_stop(state))
1526                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
1527
1528                 state->enter = intel_idle;
1529                 state->enter_s2idle = intel_idle_s2idle;
1530         }
1531 }
1532
1533 static bool __init intel_idle_off_by_default(u32 mwait_hint)
1534 {
1535         int cstate, limit;
1536
1537         /*
1538          * If there are no _CST C-states, do not disable any C-states by
1539          * default.
1540          */
1541         if (!acpi_state_table.count)
1542                 return false;
1543
1544         limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count);
1545         /*
1546          * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of
1547          * the interesting states are ACPI_CSTATE_FFH.
1548          */
1549         for (cstate = 1; cstate < limit; cstate++) {
1550                 if (acpi_state_table.states[cstate].address == mwait_hint)
1551                         return false;
1552         }
1553         return true;
1554 }
1555 #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1556 #define force_use_acpi  (false)
1557
1558 static inline bool intel_idle_acpi_cst_extract(void) { return false; }
1559 static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
1560 static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
1561 #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
1562
1563 /**
1564  * ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
1565  *
1566  * Tune IVT multi-socket targets.
1567  * Assumption: num_sockets == (max_package_num + 1).
1568  */
1569 static void __init ivt_idle_state_table_update(void)
1570 {
1571         /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
1572         int cpu, package_num, num_sockets = 1;
1573
1574         for_each_online_cpu(cpu) {
1575                 package_num = topology_physical_package_id(cpu);
1576                 if (package_num + 1 > num_sockets) {
1577                         num_sockets = package_num + 1;
1578
1579                         if (num_sockets > 4) {
1580                                 cpuidle_state_table = ivt_cstates_8s;
1581                                 return;
1582                         }
1583                 }
1584         }
1585
1586         if (num_sockets > 2)
1587                 cpuidle_state_table = ivt_cstates_4s;
1588
1589         /* else, 1 and 2 socket systems use default ivt_cstates */
1590 }
1591
1592 /**
1593  * irtl_2_usec - IRTL to microseconds conversion.
1594  * @irtl: IRTL MSR value.
1595  *
1596  * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds.
1597  */
1598 static unsigned long long __init irtl_2_usec(unsigned long long irtl)
1599 {
1600         static const unsigned int irtl_ns_units[] __initconst = {
1601                 1, 32, 1024, 32768, 1048576, 33554432, 0, 0
1602         };
1603         unsigned long long ns;
1604
1605         if (!irtl)
1606                 return 0;
1607
1608         ns = irtl_ns_units[(irtl >> 10) & 0x7];
1609
1610         return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC);
1611 }
1612
1613 /**
1614  * bxt_idle_state_table_update - Fix up the Broxton idle states table.
1615  *
1616  * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the
1617  * definitive maximum latency and use the same value for target_residency.
1618  */
1619 static void __init bxt_idle_state_table_update(void)
1620 {
1621         unsigned long long msr;
1622         unsigned int usec;
1623
1624         rdmsrl(MSR_PKGC6_IRTL, msr);
1625         usec = irtl_2_usec(msr);
1626         if (usec) {
1627                 bxt_cstates[2].exit_latency = usec;
1628                 bxt_cstates[2].target_residency = usec;
1629         }
1630
1631         rdmsrl(MSR_PKGC7_IRTL, msr);
1632         usec = irtl_2_usec(msr);
1633         if (usec) {
1634                 bxt_cstates[3].exit_latency = usec;
1635                 bxt_cstates[3].target_residency = usec;
1636         }
1637
1638         rdmsrl(MSR_PKGC8_IRTL, msr);
1639         usec = irtl_2_usec(msr);
1640         if (usec) {
1641                 bxt_cstates[4].exit_latency = usec;
1642                 bxt_cstates[4].target_residency = usec;
1643         }
1644
1645         rdmsrl(MSR_PKGC9_IRTL, msr);
1646         usec = irtl_2_usec(msr);
1647         if (usec) {
1648                 bxt_cstates[5].exit_latency = usec;
1649                 bxt_cstates[5].target_residency = usec;
1650         }
1651
1652         rdmsrl(MSR_PKGC10_IRTL, msr);
1653         usec = irtl_2_usec(msr);
1654         if (usec) {
1655                 bxt_cstates[6].exit_latency = usec;
1656                 bxt_cstates[6].target_residency = usec;
1657         }
1658
1659 }
1660
1661 /**
1662  * sklh_idle_state_table_update - Fix up the Sky Lake idle states table.
1663  *
1664  * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled.
1665  */
1666 static void __init sklh_idle_state_table_update(void)
1667 {
1668         unsigned long long msr;
1669         unsigned int eax, ebx, ecx, edx;
1670
1671
1672         /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
1673         if (max_cstate <= 7)
1674                 return;
1675
1676         /* if PC10 not present in CPUID.MWAIT.EDX */
1677         if ((mwait_substates & (0xF << 28)) == 0)
1678                 return;
1679
1680         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1681
1682         /* PC10 is not enabled in PKG C-state limit */
1683         if ((msr & 0xF) != 8)
1684                 return;
1685
1686         ecx = 0;
1687         cpuid(7, &eax, &ebx, &ecx, &edx);
1688
1689         /* if SGX is present */
1690         if (ebx & (1 << 2)) {
1691
1692                 rdmsrl(MSR_IA32_FEAT_CTL, msr);
1693
1694                 /* if SGX is enabled */
1695                 if (msr & (1 << 18))
1696                         return;
1697         }
1698
1699         skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C8-SKL */
1700         skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;  /* C9-SKL */
1701 }
1702
1703 /**
1704  * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
1705  * idle states table.
1706  */
1707 static void __init skx_idle_state_table_update(void)
1708 {
1709         unsigned long long msr;
1710
1711         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1712
1713         /*
1714          * 000b: C0/C1 (no package C-state support)
1715          * 001b: C2
1716          * 010b: C6 (non-retention)
1717          * 011b: C6 (retention)
1718          * 111b: No Package C state limits.
1719          */
1720         if ((msr & 0x7) < 2) {
1721                 /*
1722                  * Uses the CC6 + PC0 latency and 3 times of
1723                  * latency for target_residency if the PC6
1724                  * is disabled in BIOS. This is consistent
1725                  * with how intel_idle driver uses _CST
1726                  * to set the target_residency.
1727                  */
1728                 skx_cstates[2].exit_latency = 92;
1729                 skx_cstates[2].target_residency = 276;
1730         }
1731 }
1732
1733 /**
1734  * adl_idle_state_table_update - Adjust AlderLake idle states table.
1735  */
1736 static void __init adl_idle_state_table_update(void)
1737 {
1738         /* Check if user prefers C1 over C1E. */
1739         if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
1740                 cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1741                 cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
1742
1743                 /* Disable C1E by clearing the "C1E promotion" bit. */
1744                 c1e_promotion = C1E_PROMOTION_DISABLE;
1745                 return;
1746         }
1747
1748         /* Make sure C1E is enabled by default */
1749         c1e_promotion = C1E_PROMOTION_ENABLE;
1750 }
1751
1752 /**
1753  * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
1754  */
1755 static void __init spr_idle_state_table_update(void)
1756 {
1757         unsigned long long msr;
1758
1759         /* Check if user prefers C1E over C1. */
1760         if ((preferred_states_mask & BIT(2)) &&
1761             !(preferred_states_mask & BIT(1))) {
1762                 /* Disable C1 and enable C1E. */
1763                 spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
1764                 spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
1765
1766                 /* Enable C1E using the "C1E promotion" bit. */
1767                 c1e_promotion = C1E_PROMOTION_ENABLE;
1768         }
1769
1770         /*
1771          * By default, the C6 state assumes the worst-case scenario of package
1772          * C6. However, if PC6 is disabled, we update the numbers to match
1773          * core C6.
1774          */
1775         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
1776
1777         /* Limit value 2 and above allow for PC6. */
1778         if ((msr & 0x7) < 2) {
1779                 spr_cstates[2].exit_latency = 190;
1780                 spr_cstates[2].target_residency = 600;
1781         }
1782 }
1783
1784 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
1785 {
1786         unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
1787         unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
1788                                         MWAIT_SUBSTATE_MASK;
1789
1790         /* Ignore the C-state if there are NO sub-states in CPUID for it. */
1791         if (num_substates == 0)
1792                 return false;
1793
1794         if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1795                 mark_tsc_unstable("TSC halts in idle states deeper than C2");
1796
1797         return true;
1798 }
1799
1800 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
1801 {
1802         int cstate;
1803
1804         switch (boot_cpu_data.x86_model) {
1805         case INTEL_FAM6_IVYBRIDGE_X:
1806                 ivt_idle_state_table_update();
1807                 break;
1808         case INTEL_FAM6_ATOM_GOLDMONT:
1809         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1810                 bxt_idle_state_table_update();
1811                 break;
1812         case INTEL_FAM6_SKYLAKE:
1813                 sklh_idle_state_table_update();
1814                 break;
1815         case INTEL_FAM6_SKYLAKE_X:
1816                 skx_idle_state_table_update();
1817                 break;
1818         case INTEL_FAM6_SAPPHIRERAPIDS_X:
1819                 spr_idle_state_table_update();
1820                 break;
1821         case INTEL_FAM6_ALDERLAKE:
1822         case INTEL_FAM6_ALDERLAKE_L:
1823                 adl_idle_state_table_update();
1824                 break;
1825         }
1826
1827         for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
1828                 unsigned int mwait_hint;
1829
1830                 if (intel_idle_max_cstate_reached(cstate))
1831                         break;
1832
1833                 if (!cpuidle_state_table[cstate].enter &&
1834                     !cpuidle_state_table[cstate].enter_s2idle)
1835                         break;
1836
1837                 /* If marked as unusable, skip this state. */
1838                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
1839                         pr_debug("state %s is disabled\n",
1840                                  cpuidle_state_table[cstate].name);
1841                         continue;
1842                 }
1843
1844                 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
1845                 if (!intel_idle_verify_cstate(mwait_hint))
1846                         continue;
1847
1848                 /* Structure copy. */
1849                 drv->states[drv->state_count] = cpuidle_state_table[cstate];
1850
1851                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
1852                         drv->states[drv->state_count].enter = intel_idle_irq;
1853
1854                 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1855                     cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1856                         WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1857                         drv->states[drv->state_count].enter = intel_idle_ibrs;
1858                 }
1859
1860                 if ((disabled_states_mask & BIT(drv->state_count)) ||
1861                     ((icpu->use_acpi || force_use_acpi) &&
1862                      intel_idle_off_by_default(mwait_hint) &&
1863                      !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)))
1864                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF;
1865
1866                 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count]))
1867                         drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP;
1868
1869                 drv->state_count++;
1870         }
1871
1872         if (icpu->byt_auto_demotion_disable_flag) {
1873                 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
1874                 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
1875         }
1876 }
1877
1878 /**
1879  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
1880  * @drv: cpuidle driver structure to initialize.
1881  */
1882 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
1883 {
1884         cpuidle_poll_state_init(drv);
1885
1886         if (disabled_states_mask & BIT(0))
1887                 drv->states[0].flags |= CPUIDLE_FLAG_OFF;
1888
1889         drv->state_count = 1;
1890
1891         if (icpu)
1892                 intel_idle_init_cstates_icpu(drv);
1893         else
1894                 intel_idle_init_cstates_acpi(drv);
1895 }
1896
1897 static void auto_demotion_disable(void)
1898 {
1899         unsigned long long msr_bits;
1900
1901         rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1902         msr_bits &= ~auto_demotion_disable_flags;
1903         wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
1904 }
1905
1906 static void c1e_promotion_enable(void)
1907 {
1908         unsigned long long msr_bits;
1909
1910         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1911         msr_bits |= 0x2;
1912         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1913 }
1914
1915 static void c1e_promotion_disable(void)
1916 {
1917         unsigned long long msr_bits;
1918
1919         rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
1920         msr_bits &= ~0x2;
1921         wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
1922 }
1923
1924 /**
1925  * intel_idle_cpu_init - Register the target CPU with the cpuidle core.
1926  * @cpu: CPU to initialize.
1927  *
1928  * Register a cpuidle device object for @cpu and update its MSRs in accordance
1929  * with the processor model flags.
1930  */
1931 static int intel_idle_cpu_init(unsigned int cpu)
1932 {
1933         struct cpuidle_device *dev;
1934
1935         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1936         dev->cpu = cpu;
1937
1938         if (cpuidle_register_device(dev)) {
1939                 pr_debug("cpuidle_register_device %d failed!\n", cpu);
1940                 return -EIO;
1941         }
1942
1943         if (auto_demotion_disable_flags)
1944                 auto_demotion_disable();
1945
1946         if (c1e_promotion == C1E_PROMOTION_ENABLE)
1947                 c1e_promotion_enable();
1948         else if (c1e_promotion == C1E_PROMOTION_DISABLE)
1949                 c1e_promotion_disable();
1950
1951         return 0;
1952 }
1953
1954 static int intel_idle_cpu_online(unsigned int cpu)
1955 {
1956         struct cpuidle_device *dev;
1957
1958         if (!boot_cpu_has(X86_FEATURE_ARAT))
1959                 tick_broadcast_enable();
1960
1961         /*
1962          * Some systems can hotplug a cpu at runtime after
1963          * the kernel has booted, we have to initialize the
1964          * driver in this case
1965          */
1966         dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
1967         if (!dev->registered)
1968                 return intel_idle_cpu_init(cpu);
1969
1970         return 0;
1971 }
1972
1973 /**
1974  * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices.
1975  */
1976 static void __init intel_idle_cpuidle_devices_uninit(void)
1977 {
1978         int i;
1979
1980         for_each_online_cpu(i)
1981                 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
1982 }
1983
1984 static int __init intel_idle_init(void)
1985 {
1986         const struct x86_cpu_id *id;
1987         unsigned int eax, ebx, ecx;
1988         int retval;
1989
1990         /* Do not load intel_idle at all for now if idle= is passed */
1991         if (boot_option_idle_override != IDLE_NO_OVERRIDE)
1992                 return -ENODEV;
1993
1994         if (max_cstate == 0) {
1995                 pr_debug("disabled\n");
1996                 return -EPERM;
1997         }
1998
1999         id = x86_match_cpu(intel_idle_ids);
2000         if (id) {
2001                 if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
2002                         pr_debug("Please enable MWAIT in BIOS SETUP\n");
2003                         return -ENODEV;
2004                 }
2005         } else {
2006                 id = x86_match_cpu(intel_mwait_ids);
2007                 if (!id)
2008                         return -ENODEV;
2009         }
2010
2011         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
2012                 return -ENODEV;
2013
2014         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
2015
2016         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
2017             !(ecx & CPUID5_ECX_INTERRUPT_BREAK) ||
2018             !mwait_substates)
2019                         return -ENODEV;
2020
2021         pr_debug("MWAIT substates: 0x%x\n", mwait_substates);
2022
2023         icpu = (const struct idle_cpu *)id->driver_data;
2024         if (icpu) {
2025                 cpuidle_state_table = icpu->state_table;
2026                 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
2027                 if (icpu->disable_promotion_to_c1e)
2028                         c1e_promotion = C1E_PROMOTION_DISABLE;
2029                 if (icpu->use_acpi || force_use_acpi)
2030                         intel_idle_acpi_cst_extract();
2031         } else if (!intel_idle_acpi_cst_extract()) {
2032                 return -ENODEV;
2033         }
2034
2035         pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
2036                  boot_cpu_data.x86_model);
2037
2038         intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
2039         if (!intel_idle_cpuidle_devices)
2040                 return -ENOMEM;
2041
2042         intel_idle_cpuidle_driver_init(&intel_idle_driver);
2043
2044         retval = cpuidle_register_driver(&intel_idle_driver);
2045         if (retval) {
2046                 struct cpuidle_driver *drv = cpuidle_get_driver();
2047                 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
2048                        drv ? drv->name : "none");
2049                 goto init_driver_fail;
2050         }
2051
2052         retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
2053                                    intel_idle_cpu_online, NULL);
2054         if (retval < 0)
2055                 goto hp_setup_fail;
2056
2057         pr_debug("Local APIC timer is reliable in %s\n",
2058                  boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1");
2059
2060         return 0;
2061
2062 hp_setup_fail:
2063         intel_idle_cpuidle_devices_uninit();
2064         cpuidle_unregister_driver(&intel_idle_driver);
2065 init_driver_fail:
2066         free_percpu(intel_idle_cpuidle_devices);
2067         return retval;
2068
2069 }
2070 device_initcall(intel_idle_init);
2071
2072 /*
2073  * We are not really modular, but we used to support that.  Meaning we also
2074  * support "intel_idle.max_cstate=..." at boot and also a read-only export of
2075  * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
2076  * is the easiest way (currently) to continue doing that.
2077  */
2078 module_param(max_cstate, int, 0444);
2079 /*
2080  * The positions of the bits that are set in this number are the indices of the
2081  * idle states to be disabled by default (as reflected by the names of the
2082  * corresponding idle state directories in sysfs, "state0", "state1" ...
2083  * "state<i>" ..., where <i> is the index of the given state).
2084  */
2085 module_param_named(states_off, disabled_states_mask, uint, 0444);
2086 MODULE_PARM_DESC(states_off, "Mask of disabled idle states");
2087 /*
2088  * Some platforms come with mutually exclusive C-states, so that if one is
2089  * enabled, the other C-states must not be used. Example: C1 and C1E on
2090  * Sapphire Rapids platform. This parameter allows for selecting the
2091  * preferred C-states among the groups of mutually exclusive C-states - the
2092  * selected C-states will be registered, the other C-states from the mutually
2093  * exclusive group won't be registered. If the platform has no mutually
2094  * exclusive C-states, this parameter has no effect.
2095  */
2096 module_param_named(preferred_cstates, preferred_states_mask, uint, 0444);
2097 MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states");
This page took 0.158143 seconds and 4 git commands to generate.