]> Git Repo - linux.git/blob - drivers/thermal/intel/intel_powerclamp.c
net: wan: Add framer framework support
[linux.git] / drivers / thermal / intel / intel_powerclamp.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * intel_powerclamp.c - package c-state idle injection
4  *
5  * Copyright (c) 2012-2023, Intel Corporation.
6  *
7  * Authors:
8  *     Arjan van de Ven <[email protected]>
9  *     Jacob Pan <[email protected]>
10  *
11  *      TODO:
12  *           1. better handle wakeup from external interrupts, currently a fixed
13  *              compensation is added to clamping duration when excessive amount
14  *              of wakeups are observed during idle time. the reason is that in
15  *              case of external interrupts without need for ack, clamping down
16  *              cpu in non-irq context does not reduce irq. for majority of the
17  *              cases, clamping down cpu does help reduce irq as well, we should
18  *              be able to differentiate the two cases and give a quantitative
19  *              solution for the irqs that we can control. perhaps based on
20  *              get_cpu_iowait_time_us()
21  *
22  *           2. synchronization with other hw blocks
23  */
24
25 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
26
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/delay.h>
30 #include <linux/cpu.h>
31 #include <linux/thermal.h>
32 #include <linux/debugfs.h>
33 #include <linux/seq_file.h>
34 #include <linux/idle_inject.h>
35
36 #include <asm/msr.h>
37 #include <asm/mwait.h>
38 #include <asm/cpu_device_id.h>
39
40 #define MAX_TARGET_RATIO (100U)
41 /* For each undisturbed clamping period (no extra wake ups during idle time),
42  * we increment the confidence counter for the given target ratio.
43  * CONFIDENCE_OK defines the level where runtime calibration results are
44  * valid.
45  */
46 #define CONFIDENCE_OK (3)
47 /* Default idle injection duration, driver adjust sleep time to meet target
48  * idle ratio. Similar to frequency modulation.
49  */
50 #define DEFAULT_DURATION_JIFFIES (6)
51
52 static unsigned int target_mwait;
53 static struct dentry *debug_dir;
54 static bool poll_pkg_cstate_enable;
55
56 /* Idle ratio observed using package C-state counters */
57 static unsigned int current_ratio;
58
59 /* Skip the idle injection till set to true */
60 static bool should_skip;
61
62 struct powerclamp_data {
63         unsigned int cpu;
64         unsigned int count;
65         unsigned int guard;
66         unsigned int window_size_now;
67         unsigned int target_ratio;
68         bool clamping;
69 };
70
71 static struct powerclamp_data powerclamp_data;
72
73 static struct thermal_cooling_device *cooling_dev;
74
75 static DEFINE_MUTEX(powerclamp_lock);
76
77 /* This duration is in microseconds */
78 static unsigned int duration;
79 static unsigned int pkg_cstate_ratio_cur;
80 static unsigned int window_size;
81
82 static int duration_set(const char *arg, const struct kernel_param *kp)
83 {
84         int ret = 0;
85         unsigned long new_duration;
86
87         ret = kstrtoul(arg, 10, &new_duration);
88         if (ret)
89                 goto exit;
90         if (new_duration > 25 || new_duration < 6) {
91                 pr_err("Out of recommended range %lu, between 6-25ms\n",
92                         new_duration);
93                 ret = -EINVAL;
94                 goto exit;
95         }
96
97         mutex_lock(&powerclamp_lock);
98         duration = clamp(new_duration, 6ul, 25ul) * 1000;
99         mutex_unlock(&powerclamp_lock);
100 exit:
101
102         return ret;
103 }
104
105 static int duration_get(char *buf, const struct kernel_param *kp)
106 {
107         int ret;
108
109         mutex_lock(&powerclamp_lock);
110         ret = sysfs_emit(buf, "%d\n", duration / 1000);
111         mutex_unlock(&powerclamp_lock);
112
113         return ret;
114 }
115
116 static const struct kernel_param_ops duration_ops = {
117         .set = duration_set,
118         .get = duration_get,
119 };
120
121 module_param_cb(duration, &duration_ops, NULL, 0644);
122 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
123
124 #define DEFAULT_MAX_IDLE        50
125 #define MAX_ALL_CPU_IDLE        75
126
127 static u8 max_idle = DEFAULT_MAX_IDLE;
128
129 static cpumask_var_t idle_injection_cpu_mask;
130
131 static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask)
132 {
133         if (cpumask_available(idle_injection_cpu_mask))
134                 goto copy_mask;
135
136         /* This mask is allocated only one time and freed during module exit */
137         if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL))
138                 return -ENOMEM;
139
140 copy_mask:
141         cpumask_copy(idle_injection_cpu_mask, copy_mask);
142
143         return 0;
144 }
145
146 /* Return true if the cpumask and idle percent combination is invalid */
147 static bool check_invalid(cpumask_var_t mask, u8 idle)
148 {
149         if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE)
150                 return true;
151
152         return false;
153 }
154
155 static int cpumask_set(const char *arg, const struct kernel_param *kp)
156 {
157         cpumask_var_t new_mask;
158         int ret;
159
160         mutex_lock(&powerclamp_lock);
161
162         /* Can't set mask when cooling device is in use */
163         if (powerclamp_data.clamping) {
164                 ret = -EAGAIN;
165                 goto skip_cpumask_set;
166         }
167
168         ret = alloc_cpumask_var(&new_mask, GFP_KERNEL);
169         if (!ret)
170                 goto skip_cpumask_set;
171
172         ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask),
173                            nr_cpumask_bits);
174         if (ret)
175                 goto free_cpumask_set;
176
177         if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) {
178                 ret = -EINVAL;
179                 goto free_cpumask_set;
180         }
181
182         /*
183          * When module parameters are passed from kernel command line
184          * during insmod, the module parameter callback is called
185          * before powerclamp_init(), so we can't assume that some
186          * cpumask can be allocated and copied before here. Also
187          * in this case this cpumask is used as the default mask.
188          */
189         ret = allocate_copy_idle_injection_mask(new_mask);
190
191 free_cpumask_set:
192         free_cpumask_var(new_mask);
193 skip_cpumask_set:
194         mutex_unlock(&powerclamp_lock);
195
196         return ret;
197 }
198
199 static int cpumask_get(char *buf, const struct kernel_param *kp)
200 {
201         if (!cpumask_available(idle_injection_cpu_mask))
202                 return -ENODEV;
203
204         return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask),
205                                        nr_cpumask_bits);
206 }
207
208 static const struct kernel_param_ops cpumask_ops = {
209         .set = cpumask_set,
210         .get = cpumask_get,
211 };
212
213 module_param_cb(cpumask, &cpumask_ops, NULL, 0644);
214 MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection.");
215
216 static int max_idle_set(const char *arg, const struct kernel_param *kp)
217 {
218         u8 new_max_idle;
219         int ret = 0;
220
221         mutex_lock(&powerclamp_lock);
222
223         /* Can't set mask when cooling device is in use */
224         if (powerclamp_data.clamping) {
225                 ret = -EAGAIN;
226                 goto skip_limit_set;
227         }
228
229         ret = kstrtou8(arg, 10, &new_max_idle);
230         if (ret)
231                 goto skip_limit_set;
232
233         if (new_max_idle > MAX_TARGET_RATIO) {
234                 ret = -EINVAL;
235                 goto skip_limit_set;
236         }
237
238         if (!cpumask_available(idle_injection_cpu_mask)) {
239                 ret = allocate_copy_idle_injection_mask(cpu_present_mask);
240                 if (ret)
241                         goto skip_limit_set;
242         }
243
244         if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
245                 ret = -EINVAL;
246                 goto skip_limit_set;
247         }
248
249         max_idle = new_max_idle;
250
251 skip_limit_set:
252         mutex_unlock(&powerclamp_lock);
253
254         return ret;
255 }
256
257 static const struct kernel_param_ops max_idle_ops = {
258         .set = max_idle_set,
259         .get = param_get_byte,
260 };
261
262 module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644);
263 MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100");
264
265 struct powerclamp_calibration_data {
266         unsigned long confidence;  /* used for calibration, basically a counter
267                                     * gets incremented each time a clamping
268                                     * period is completed without extra wakeups
269                                     * once that counter is reached given level,
270                                     * compensation is deemed usable.
271                                     */
272         unsigned long steady_comp; /* steady state compensation used when
273                                     * no extra wakeups occurred.
274                                     */
275         unsigned long dynamic_comp; /* compensate excessive wakeup from idle
276                                      * mostly from external interrupts.
277                                      */
278 };
279
280 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
281
282 static int window_size_set(const char *arg, const struct kernel_param *kp)
283 {
284         int ret = 0;
285         unsigned long new_window_size;
286
287         ret = kstrtoul(arg, 10, &new_window_size);
288         if (ret)
289                 goto exit_win;
290         if (new_window_size > 10 || new_window_size < 2) {
291                 pr_err("Out of recommended window size %lu, between 2-10\n",
292                         new_window_size);
293                 ret = -EINVAL;
294         }
295
296         window_size = clamp(new_window_size, 2ul, 10ul);
297         smp_mb();
298
299 exit_win:
300
301         return ret;
302 }
303
304 static const struct kernel_param_ops window_size_ops = {
305         .set = window_size_set,
306         .get = param_get_int,
307 };
308
309 module_param_cb(window_size, &window_size_ops, &window_size, 0644);
310 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
311         "\tpowerclamp controls idle ratio within this window. larger\n"
312         "\twindow size results in slower response time but more smooth\n"
313         "\tclamping results. default to 2.");
314
315 static void find_target_mwait(void)
316 {
317         unsigned int eax, ebx, ecx, edx;
318         unsigned int highest_cstate = 0;
319         unsigned int highest_subcstate = 0;
320         int i;
321
322         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
323                 return;
324
325         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
326
327         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
328             !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
329                 return;
330
331         edx >>= MWAIT_SUBSTATE_SIZE;
332         for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
333                 if (edx & MWAIT_SUBSTATE_MASK) {
334                         highest_cstate = i;
335                         highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
336                 }
337         }
338         target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
339                 (highest_subcstate - 1);
340
341 }
342
343 struct pkg_cstate_info {
344         bool skip;
345         int msr_index;
346         int cstate_id;
347 };
348
349 #define PKG_CSTATE_INIT(id) {                           \
350                 .msr_index = MSR_PKG_C##id##_RESIDENCY, \
351                 .cstate_id = id                         \
352                         }
353
354 static struct pkg_cstate_info pkg_cstates[] = {
355         PKG_CSTATE_INIT(2),
356         PKG_CSTATE_INIT(3),
357         PKG_CSTATE_INIT(6),
358         PKG_CSTATE_INIT(7),
359         PKG_CSTATE_INIT(8),
360         PKG_CSTATE_INIT(9),
361         PKG_CSTATE_INIT(10),
362         {NULL},
363 };
364
365 static bool has_pkg_state_counter(void)
366 {
367         u64 val;
368         struct pkg_cstate_info *info = pkg_cstates;
369
370         /* check if any one of the counter msrs exists */
371         while (info->msr_index) {
372                 if (!rdmsrl_safe(info->msr_index, &val))
373                         return true;
374                 info++;
375         }
376
377         return false;
378 }
379
380 static u64 pkg_state_counter(void)
381 {
382         u64 val;
383         u64 count = 0;
384         struct pkg_cstate_info *info = pkg_cstates;
385
386         while (info->msr_index) {
387                 if (!info->skip) {
388                         if (!rdmsrl_safe(info->msr_index, &val))
389                                 count += val;
390                         else
391                                 info->skip = true;
392                 }
393                 info++;
394         }
395
396         return count;
397 }
398
399 static unsigned int get_compensation(int ratio)
400 {
401         unsigned int comp = 0;
402
403         if (!poll_pkg_cstate_enable)
404                 return 0;
405
406         /* we only use compensation if all adjacent ones are good */
407         if (ratio == 1 &&
408                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
409                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
410                 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
411                 comp = (cal_data[ratio].steady_comp +
412                         cal_data[ratio + 1].steady_comp +
413                         cal_data[ratio + 2].steady_comp) / 3;
414         } else if (ratio == MAX_TARGET_RATIO - 1 &&
415                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
416                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
417                 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
418                 comp = (cal_data[ratio].steady_comp +
419                         cal_data[ratio - 1].steady_comp +
420                         cal_data[ratio - 2].steady_comp) / 3;
421         } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
422                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
423                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
424                 comp = (cal_data[ratio].steady_comp +
425                         cal_data[ratio - 1].steady_comp +
426                         cal_data[ratio + 1].steady_comp) / 3;
427         }
428
429         /* do not exceed limit */
430         if (comp + ratio >= MAX_TARGET_RATIO)
431                 comp = MAX_TARGET_RATIO - ratio - 1;
432
433         return comp;
434 }
435
436 static void adjust_compensation(int target_ratio, unsigned int win)
437 {
438         int delta;
439         struct powerclamp_calibration_data *d = &cal_data[target_ratio];
440
441         /*
442          * adjust compensations if confidence level has not been reached.
443          */
444         if (d->confidence >= CONFIDENCE_OK)
445                 return;
446
447         delta = powerclamp_data.target_ratio - current_ratio;
448         /* filter out bad data */
449         if (delta >= 0 && delta <= (1+target_ratio/10)) {
450                 if (d->steady_comp)
451                         d->steady_comp =
452                                 roundup(delta+d->steady_comp, 2)/2;
453                 else
454                         d->steady_comp = delta;
455                 d->confidence++;
456         }
457 }
458
459 static bool powerclamp_adjust_controls(unsigned int target_ratio,
460                                 unsigned int guard, unsigned int win)
461 {
462         static u64 msr_last, tsc_last;
463         u64 msr_now, tsc_now;
464         u64 val64;
465
466         /* check result for the last window */
467         msr_now = pkg_state_counter();
468         tsc_now = rdtsc();
469
470         /* calculate pkg cstate vs tsc ratio */
471         if (!msr_last || !tsc_last)
472                 current_ratio = 1;
473         else if (tsc_now-tsc_last) {
474                 val64 = 100*(msr_now-msr_last);
475                 do_div(val64, (tsc_now-tsc_last));
476                 current_ratio = val64;
477         }
478
479         /* update record */
480         msr_last = msr_now;
481         tsc_last = tsc_now;
482
483         adjust_compensation(target_ratio, win);
484
485         /* if we are above target+guard, skip */
486         return powerclamp_data.target_ratio + guard <= current_ratio;
487 }
488
489 /*
490  * This function calculates runtime from the current target ratio.
491  * This function gets called under powerclamp_lock.
492  */
493 static unsigned int get_run_time(void)
494 {
495         unsigned int compensated_ratio;
496         unsigned int runtime;
497
498         /*
499          * make sure user selected ratio does not take effect until
500          * the next round. adjust target_ratio if user has changed
501          * target such that we can converge quickly.
502          */
503         powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20;
504         powerclamp_data.window_size_now = window_size;
505
506         /*
507          * systems may have different ability to enter package level
508          * c-states, thus we need to compensate the injected idle ratio
509          * to achieve the actual target reported by the HW.
510          */
511         compensated_ratio = powerclamp_data.target_ratio +
512                 get_compensation(powerclamp_data.target_ratio);
513         if (compensated_ratio <= 0)
514                 compensated_ratio = 1;
515
516         runtime = duration * 100 / compensated_ratio - duration;
517
518         return runtime;
519 }
520
521 /*
522  * 1 HZ polling while clamping is active, useful for userspace
523  * to monitor actual idle ratio.
524  */
525 static void poll_pkg_cstate(struct work_struct *dummy);
526 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
527 static void poll_pkg_cstate(struct work_struct *dummy)
528 {
529         static u64 msr_last;
530         static u64 tsc_last;
531
532         u64 msr_now;
533         u64 tsc_now;
534         u64 val64;
535
536         msr_now = pkg_state_counter();
537         tsc_now = rdtsc();
538
539         /* calculate pkg cstate vs tsc ratio */
540         if (!msr_last || !tsc_last)
541                 pkg_cstate_ratio_cur = 1;
542         else {
543                 if (tsc_now - tsc_last) {
544                         val64 = 100 * (msr_now - msr_last);
545                         do_div(val64, (tsc_now - tsc_last));
546                         pkg_cstate_ratio_cur = val64;
547                 }
548         }
549
550         /* update record */
551         msr_last = msr_now;
552         tsc_last = tsc_now;
553
554         mutex_lock(&powerclamp_lock);
555         if (powerclamp_data.clamping)
556                 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
557         mutex_unlock(&powerclamp_lock);
558 }
559
560 static struct idle_inject_device *ii_dev;
561
562 /*
563  * This function is called from idle injection core on timer expiry
564  * for the run duration. This allows powerclamp to readjust or skip
565  * injecting idle for this cycle.
566  */
567 static bool idle_inject_update(void)
568 {
569         bool update = false;
570
571         /* We can't sleep in this callback */
572         if (!mutex_trylock(&powerclamp_lock))
573                 return true;
574
575         if (!(powerclamp_data.count % powerclamp_data.window_size_now)) {
576
577                 should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio,
578                                                          powerclamp_data.guard,
579                                                          powerclamp_data.window_size_now);
580                 update = true;
581         }
582
583         if (update) {
584                 unsigned int runtime = get_run_time();
585
586                 idle_inject_set_duration(ii_dev, runtime, duration);
587         }
588
589         powerclamp_data.count++;
590
591         mutex_unlock(&powerclamp_lock);
592
593         if (should_skip)
594                 return false;
595
596         return true;
597 }
598
599 /* This function starts idle injection by calling idle_inject_start() */
600 static void trigger_idle_injection(void)
601 {
602         unsigned int runtime = get_run_time();
603
604         idle_inject_set_duration(ii_dev, runtime, duration);
605         idle_inject_start(ii_dev);
606         powerclamp_data.clamping = true;
607 }
608
609 /*
610  * This function is called from start_power_clamp() to register
611  * CPUS with powercap idle injection register and set default
612  * idle duration and latency.
613  */
614 static int powerclamp_idle_injection_register(void)
615 {
616         poll_pkg_cstate_enable = false;
617         if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) {
618                 ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update);
619                 if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
620                         poll_pkg_cstate_enable = true;
621         } else {
622                 ii_dev = idle_inject_register(idle_injection_cpu_mask);
623         }
624
625         if (!ii_dev) {
626                 pr_err("powerclamp: idle_inject_register failed\n");
627                 return -EAGAIN;
628         }
629
630         idle_inject_set_duration(ii_dev, TICK_USEC, duration);
631         idle_inject_set_latency(ii_dev, UINT_MAX);
632
633         return 0;
634 }
635
636 /*
637  * This function is called from end_power_clamp() to stop idle injection
638  * and unregister CPUS from powercap idle injection core.
639  */
640 static void remove_idle_injection(void)
641 {
642         if (!powerclamp_data.clamping)
643                 return;
644
645         powerclamp_data.clamping = false;
646         idle_inject_stop(ii_dev);
647 }
648
649 /*
650  * This function is called when user change the cooling device
651  * state from zero to some other value.
652  */
653 static int start_power_clamp(void)
654 {
655         int ret;
656
657         ret = powerclamp_idle_injection_register();
658         if (!ret) {
659                 trigger_idle_injection();
660                 if (poll_pkg_cstate_enable)
661                         schedule_delayed_work(&poll_pkg_cstate_work, 0);
662         }
663
664         return ret;
665 }
666
667 /*
668  * This function is called when user change the cooling device
669  * state from non zero value zero.
670  */
671 static void end_power_clamp(void)
672 {
673         if (powerclamp_data.clamping) {
674                 remove_idle_injection();
675                 idle_inject_unregister(ii_dev);
676         }
677 }
678
679 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
680                                  unsigned long *state)
681 {
682         *state = MAX_TARGET_RATIO;
683
684         return 0;
685 }
686
687 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
688                                  unsigned long *state)
689 {
690         mutex_lock(&powerclamp_lock);
691         *state = powerclamp_data.target_ratio;
692         mutex_unlock(&powerclamp_lock);
693
694         return 0;
695 }
696
697 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
698                                  unsigned long new_target_ratio)
699 {
700         int ret = 0;
701
702         mutex_lock(&powerclamp_lock);
703
704         new_target_ratio = clamp(new_target_ratio, 0UL,
705                                 (unsigned long) (max_idle - 1));
706
707         if (powerclamp_data.target_ratio == new_target_ratio)
708                 goto exit_set;
709
710         if (!powerclamp_data.target_ratio && new_target_ratio > 0) {
711                 pr_info("Start idle injection to reduce power\n");
712                 powerclamp_data.target_ratio = new_target_ratio;
713                 ret = start_power_clamp();
714                 if (ret)
715                         powerclamp_data.target_ratio = 0;
716                 goto exit_set;
717         } else  if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) {
718                 pr_info("Stop forced idle injection\n");
719                 end_power_clamp();
720                 powerclamp_data.target_ratio = 0;
721         } else  /* adjust currently running */ {
722                 unsigned int runtime;
723
724                 powerclamp_data.target_ratio = new_target_ratio;
725                 runtime = get_run_time();
726                 idle_inject_set_duration(ii_dev, runtime, duration);
727         }
728
729 exit_set:
730         mutex_unlock(&powerclamp_lock);
731
732         return ret;
733 }
734
735 /* bind to generic thermal layer as cooling device*/
736 static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
737         .get_max_state = powerclamp_get_max_state,
738         .get_cur_state = powerclamp_get_cur_state,
739         .set_cur_state = powerclamp_set_cur_state,
740 };
741
742 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
743         X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
744         {}
745 };
746 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
747
748 static int __init powerclamp_probe(void)
749 {
750
751         if (!x86_match_cpu(intel_powerclamp_ids)) {
752                 pr_err("CPU does not support MWAIT\n");
753                 return -ENODEV;
754         }
755
756         /* The goal for idle time alignment is to achieve package cstate. */
757         if (!has_pkg_state_counter()) {
758                 pr_info("No package C-state available\n");
759                 return -ENODEV;
760         }
761
762         /* find the deepest mwait value */
763         find_target_mwait();
764
765         return 0;
766 }
767
768 static int powerclamp_debug_show(struct seq_file *m, void *unused)
769 {
770         int i = 0;
771
772         seq_printf(m, "pct confidence steady dynamic (compensation)\n");
773         for (i = 0; i < MAX_TARGET_RATIO; i++) {
774                 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
775                         i,
776                         cal_data[i].confidence,
777                         cal_data[i].steady_comp,
778                         cal_data[i].dynamic_comp);
779         }
780
781         return 0;
782 }
783
784 DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
785
786 static inline void powerclamp_create_debug_files(void)
787 {
788         debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
789
790         debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
791                             &powerclamp_debug_fops);
792 }
793
794 static int __init powerclamp_init(void)
795 {
796         int retval;
797
798         /* probe cpu features and ids here */
799         retval = powerclamp_probe();
800         if (retval)
801                 return retval;
802
803         mutex_lock(&powerclamp_lock);
804         if (!cpumask_available(idle_injection_cpu_mask))
805                 retval = allocate_copy_idle_injection_mask(cpu_present_mask);
806         mutex_unlock(&powerclamp_lock);
807
808         if (retval)
809                 return retval;
810
811         /* set default limit, maybe adjusted during runtime based on feedback */
812         window_size = 2;
813
814         cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
815                                                       &powerclamp_cooling_ops);
816         if (IS_ERR(cooling_dev))
817                 return -ENODEV;
818
819         if (!duration)
820                 duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES);
821
822         powerclamp_create_debug_files();
823
824         return 0;
825 }
826 module_init(powerclamp_init);
827
828 static void __exit powerclamp_exit(void)
829 {
830         mutex_lock(&powerclamp_lock);
831         end_power_clamp();
832         mutex_unlock(&powerclamp_lock);
833
834         thermal_cooling_device_unregister(cooling_dev);
835
836         cancel_delayed_work_sync(&poll_pkg_cstate_work);
837         debugfs_remove_recursive(debug_dir);
838
839         if (cpumask_available(idle_injection_cpu_mask))
840                 free_cpumask_var(idle_injection_cpu_mask);
841 }
842 module_exit(powerclamp_exit);
843
844 MODULE_IMPORT_NS(IDLE_INJECT);
845
846 MODULE_LICENSE("GPL");
847 MODULE_AUTHOR("Arjan van de Ven <[email protected]>");
848 MODULE_AUTHOR("Jacob Pan <[email protected]>");
849 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
This page took 0.087977 seconds and 4 git commands to generate.