]> Git Repo - linux.git/blame - drivers/powercap/intel_rapl_common.c
Merge tag 'pinctrl-v6.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux.git] / drivers / powercap / intel_rapl_common.c
CommitLineData
f6cc69f1 1// SPDX-License-Identifier: GPL-2.0-only
2d281d81 2/*
3382388d
ZR
3 * Common code for Intel Running Average Power Limit (RAPL) support.
4 * Copyright (c) 2019, Intel Corporation.
2d281d81
JP
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
72b8b941 8#include <linux/bitmap.h>
1aa09b93 9#include <linux/cleanup.h>
72b8b941
ZR
10#include <linux/cpu.h>
11#include <linux/delay.h>
12#include <linux/device.h>
13#include <linux/intel_rapl.h>
2d281d81 14#include <linux/kernel.h>
2d281d81 15#include <linux/list.h>
2d281d81 16#include <linux/log2.h>
72b8b941 17#include <linux/module.h>
575024a8
ZR
18#include <linux/nospec.h>
19#include <linux/perf_event.h>
72b8b941 20#include <linux/platform_device.h>
2d281d81 21#include <linux/powercap.h>
3382388d 22#include <linux/processor.h>
72b8b941
ZR
23#include <linux/slab.h>
24#include <linux/suspend.h>
25#include <linux/sysfs.h>
26#include <linux/types.h>
abcfaeb3 27
2d281d81 28#include <asm/cpu_device_id.h>
62d16733 29#include <asm/intel-family.h>
72b8b941 30#include <asm/iosf_mbi.h>
2d281d81
JP
31
32/* bitmasks for RAPL MSRs, used by primitive access functions */
33#define ENERGY_STATUS_MASK 0xffffffff
34
35#define POWER_LIMIT1_MASK 0x7FFF
36#define POWER_LIMIT1_ENABLE BIT(15)
37#define POWER_LIMIT1_CLAMP BIT(16)
38
39#define POWER_LIMIT2_MASK (0x7FFFULL<<32)
40#define POWER_LIMIT2_ENABLE BIT_ULL(47)
41#define POWER_LIMIT2_CLAMP BIT_ULL(48)
0c2ddedd
ZR
42#define POWER_HIGH_LOCK BIT_ULL(63)
43#define POWER_LOW_LOCK BIT(31)
2d281d81 44
8365a898
SP
45#define POWER_LIMIT4_MASK 0x1FFF
46
2d281d81
JP
47#define TIME_WINDOW1_MASK (0x7FULL<<17)
48#define TIME_WINDOW2_MASK (0x7FULL<<49)
49
50#define POWER_UNIT_OFFSET 0
51#define POWER_UNIT_MASK 0x0F
52
53#define ENERGY_UNIT_OFFSET 0x08
54#define ENERGY_UNIT_MASK 0x1F00
55
56#define TIME_UNIT_OFFSET 0x10
57#define TIME_UNIT_MASK 0xF0000
58
59#define POWER_INFO_MAX_MASK (0x7fffULL<<32)
60#define POWER_INFO_MIN_MASK (0x7fffULL<<16)
61#define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
62#define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
63
64#define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
65#define PP_POLICY_MASK 0x1F
66
931da6a0
ZR
67/*
68 * SPR has different layout for Psys Domain PowerLimit registers.
69 * There are 17 bits of PL1 and PL2 instead of 15 bits.
70 * The Enable bits and TimeWindow bits are also shifted as a result.
71 */
72#define PSYS_POWER_LIMIT1_MASK 0x1FFFF
73#define PSYS_POWER_LIMIT1_ENABLE BIT(17)
74
75#define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
76#define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
77
78#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
79#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
80
e12dee18
ZR
81/* bitmasks for RAPL TPMI, used by primitive access functions */
82#define TPMI_POWER_LIMIT_MASK 0x3FFFF
83#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
84#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
85#define TPMI_INFO_SPEC_MASK 0x3FFFF
86#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
87#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
88#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
89
2d281d81 90/* Non HW constants */
3382388d 91#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
2d281d81
JP
92#define RAPL_PRIMITIVE_DUMMY BIT(2)
93
2d281d81
JP
94#define TIME_WINDOW_MAX_MSEC 40000
95#define TIME_WINDOW_MIN_MSEC 250
3382388d 96#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
2d281d81 97enum unit_type {
3382388d 98 ARBITRARY_UNIT, /* no translation */
2d281d81
JP
99 POWER_UNIT,
100 ENERGY_UNIT,
101 TIME_UNIT,
102};
103
2d281d81 104/* per domain data, some are optional */
2d281d81
JP
105#define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
106
2d281d81
JP
107#define DOMAIN_STATE_INACTIVE BIT(0)
108#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
2d281d81 109
9050a9cd
ZR
110static const char *pl_names[NR_POWER_LIMITS] = {
111 [POWER_LIMIT1] = "long_term",
112 [POWER_LIMIT2] = "short_term",
113 [POWER_LIMIT4] = "peak_power",
114};
115
116enum pl_prims {
117 PL_ENABLE,
118 PL_CLAMP,
119 PL_LIMIT,
120 PL_TIME_WINDOW,
121 PL_MAX_POWER,
f442bd27 122 PL_LOCK,
9050a9cd
ZR
123};
124
125static bool is_pl_valid(struct rapl_domain *rd, int pl)
126{
127 if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
128 return false;
129 return rd->rpl[pl].name ? true : false;
130}
131
f442bd27
ZR
132static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
133{
e12dee18
ZR
134 if (rd->rp->priv->type == RAPL_IF_TPMI) {
135 if (pl == POWER_LIMIT1)
136 return PL1_LOCK;
137 if (pl == POWER_LIMIT2)
138 return PL2_LOCK;
139 if (pl == POWER_LIMIT4)
140 return PL4_LOCK;
141 }
142
143 /* MSR/MMIO Interface doesn't have Lock bit for PL4 */
144 if (pl == POWER_LIMIT4)
145 return -EINVAL;
146
f442bd27
ZR
147 /*
148 * Power Limit register that supports two power limits has a different
149 * bit position for the Lock bit.
150 */
151 if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
152 return FW_HIGH_LOCK;
153 return FW_LOCK;
154}
155
156static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
9050a9cd
ZR
157{
158 switch (pl) {
159 case POWER_LIMIT1:
160 if (prim == PL_ENABLE)
161 return PL1_ENABLE;
e12dee18 162 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
9050a9cd
ZR
163 return PL1_CLAMP;
164 if (prim == PL_LIMIT)
165 return POWER_LIMIT1;
166 if (prim == PL_TIME_WINDOW)
167 return TIME_WINDOW1;
168 if (prim == PL_MAX_POWER)
169 return THERMAL_SPEC_POWER;
f442bd27
ZR
170 if (prim == PL_LOCK)
171 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
172 return -EINVAL;
173 case POWER_LIMIT2:
174 if (prim == PL_ENABLE)
175 return PL2_ENABLE;
e12dee18 176 if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
9050a9cd
ZR
177 return PL2_CLAMP;
178 if (prim == PL_LIMIT)
179 return POWER_LIMIT2;
180 if (prim == PL_TIME_WINDOW)
181 return TIME_WINDOW2;
182 if (prim == PL_MAX_POWER)
183 return MAX_POWER;
f442bd27
ZR
184 if (prim == PL_LOCK)
185 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
186 return -EINVAL;
187 case POWER_LIMIT4:
188 if (prim == PL_LIMIT)
189 return POWER_LIMIT4;
190 if (prim == PL_ENABLE)
191 return PL4_ENABLE;
192 /* PL4 would be around two times PL2, use same prim as PL2. */
193 if (prim == PL_MAX_POWER)
194 return MAX_POWER;
e12dee18
ZR
195 if (prim == PL_LOCK)
196 return get_pl_lock_prim(rd, pl);
9050a9cd
ZR
197 return -EINVAL;
198 default:
199 return -EINVAL;
200 }
201}
2d281d81 202
2d281d81
JP
203#define power_zone_to_rapl_domain(_zone) \
204 container_of(_zone, struct rapl_domain, power_zone)
205
087e9cba 206struct rapl_defaults {
51b63409 207 u8 floor_freq_reg_addr;
693c1d78 208 int (*check_unit)(struct rapl_domain *rd);
087e9cba 209 void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
cb532e72 210 u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
3382388d 211 bool to_raw);
d474a4d3 212 unsigned int dram_domain_energy_unit;
2d798d9f 213 unsigned int psys_domain_energy_unit;
931da6a0 214 bool spr_psys_bits;
087e9cba 215};
b4288ce7 216static struct rapl_defaults *defaults_msr;
e12dee18 217static const struct rapl_defaults defaults_tpmi;
087e9cba 218
e8e28c2a
ZR
219static struct rapl_defaults *get_defaults(struct rapl_package *rp)
220{
221 return rp->priv->defaults;
222}
223
3c2c0845 224/* Sideband MBI registers */
51b63409
AT
225#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
226#define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
3c2c0845 227
2d281d81
JP
228#define PACKAGE_PLN_INT_SAVED BIT(0)
229#define MAX_PRIM_NAME (32)
230
231/* per domain data. used to describe individual knobs such that access function
232 * can be consolidated into one instead of many inline functions.
233 */
234struct rapl_primitive_info {
235 const char *name;
236 u64 mask;
237 int shift;
f7c4e0c8 238 enum rapl_domain_reg_id id;
2d281d81
JP
239 enum unit_type unit;
240 u32 flag;
241};
242
243#define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
244 .name = #p, \
245 .mask = m, \
246 .shift = s, \
247 .id = i, \
248 .unit = u, \
249 .flag = f \
250 }
251
252static void rapl_init_domains(struct rapl_package *rp);
253static int rapl_read_data_raw(struct rapl_domain *rd,
3382388d
ZR
254 enum rapl_primitives prim,
255 bool xlate, u64 *data);
2d281d81 256static int rapl_write_data_raw(struct rapl_domain *rd,
3382388d
ZR
257 enum rapl_primitives prim,
258 unsigned long long value);
9050a9cd
ZR
259static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
260 enum pl_prims pl_prim,
261 bool xlate, u64 *data);
262static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
263 enum pl_prims pl_prim,
264 unsigned long long value);
309557f5 265static u64 rapl_unit_xlate(struct rapl_domain *rd,
3382388d 266 enum unit_type type, u64 value, int to_raw);
309557f5 267static void package_power_limit_irq_save(struct rapl_package *rp);
2d281d81 268
3382388d 269static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
2d281d81 270
3382388d 271static const char *const rapl_domain_names[] = {
2d281d81
JP
272 "package",
273 "core",
274 "uncore",
275 "dram",
3521ba1c 276 "psys",
2d281d81
JP
277};
278
3382388d
ZR
279static int get_energy_counter(struct powercap_zone *power_zone,
280 u64 *energy_raw)
2d281d81
JP
281{
282 struct rapl_domain *rd;
283 u64 energy_now;
284
285 /* prevent CPU hotplug, make sure the RAPL domain does not go
286 * away while reading the counter.
287 */
5d4c779c 288 cpus_read_lock();
2d281d81
JP
289 rd = power_zone_to_rapl_domain(power_zone);
290
291 if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
292 *energy_raw = energy_now;
5d4c779c 293 cpus_read_unlock();
2d281d81
JP
294
295 return 0;
296 }
5d4c779c 297 cpus_read_unlock();
2d281d81
JP
298
299 return -EIO;
300}
301
302static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
303{
d474a4d3
JP
304 struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
305
309557f5 306 *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
2d281d81
JP
307 return 0;
308}
309
310static int release_zone(struct powercap_zone *power_zone)
311{
312 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
309557f5 313 struct rapl_package *rp = rd->rp;
2d281d81
JP
314
315 /* package zone is the last zone of a package, we can free
316 * memory here since all children has been unregistered.
317 */
318 if (rd->id == RAPL_DOMAIN_PACKAGE) {
2d281d81
JP
319 kfree(rd);
320 rp->domains = NULL;
321 }
322
323 return 0;
324
325}
326
327static int find_nr_power_limit(struct rapl_domain *rd)
328{
e1399ba2 329 int i, nr_pl = 0;
2d281d81
JP
330
331 for (i = 0; i < NR_POWER_LIMITS; i++) {
9050a9cd 332 if (is_pl_valid(rd, i))
e1399ba2 333 nr_pl++;
2d281d81
JP
334 }
335
e1399ba2 336 return nr_pl;
2d281d81
JP
337}
338
339static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
340{
341 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
e8e28c2a 342 struct rapl_defaults *defaults = get_defaults(rd->rp);
9050a9cd 343 int ret;
3c2c0845 344
5d4c779c 345 cpus_read_lock();
9050a9cd
ZR
346 ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
347 if (!ret && defaults->set_floor_freq)
e8e28c2a 348 defaults->set_floor_freq(rd, mode);
5d4c779c 349 cpus_read_unlock();
2d281d81 350
9050a9cd 351 return ret;
2d281d81
JP
352}
353
354static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
355{
356 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
357 u64 val;
9050a9cd 358 int ret;
2d281d81 359
f442bd27 360 if (rd->rpl[POWER_LIMIT1].locked) {
2d281d81
JP
361 *mode = false;
362 return 0;
363 }
5d4c779c 364 cpus_read_lock();
9050a9cd
ZR
365 ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
366 if (!ret)
367 *mode = val;
5d4c779c 368 cpus_read_unlock();
2d281d81 369
9050a9cd 370 return ret;
2d281d81
JP
371}
372
373/* per RAPL domain ops, in the order of rapl_domain_type */
600c395b 374static const struct powercap_zone_ops zone_ops[] = {
2d281d81
JP
375 /* RAPL_DOMAIN_PACKAGE */
376 {
3382388d
ZR
377 .get_energy_uj = get_energy_counter,
378 .get_max_energy_range_uj = get_max_energy_counter,
379 .release = release_zone,
380 .set_enable = set_domain_enable,
381 .get_enable = get_domain_enable,
382 },
2d281d81
JP
383 /* RAPL_DOMAIN_PP0 */
384 {
3382388d
ZR
385 .get_energy_uj = get_energy_counter,
386 .get_max_energy_range_uj = get_max_energy_counter,
387 .release = release_zone,
388 .set_enable = set_domain_enable,
389 .get_enable = get_domain_enable,
390 },
2d281d81
JP
391 /* RAPL_DOMAIN_PP1 */
392 {
3382388d
ZR
393 .get_energy_uj = get_energy_counter,
394 .get_max_energy_range_uj = get_max_energy_counter,
395 .release = release_zone,
396 .set_enable = set_domain_enable,
397 .get_enable = get_domain_enable,
398 },
2d281d81
JP
399 /* RAPL_DOMAIN_DRAM */
400 {
3382388d
ZR
401 .get_energy_uj = get_energy_counter,
402 .get_max_energy_range_uj = get_max_energy_counter,
403 .release = release_zone,
404 .set_enable = set_domain_enable,
405 .get_enable = get_domain_enable,
406 },
3521ba1c
SP
407 /* RAPL_DOMAIN_PLATFORM */
408 {
3382388d
ZR
409 .get_energy_uj = get_energy_counter,
410 .get_max_energy_range_uj = get_max_energy_counter,
411 .release = release_zone,
412 .set_enable = set_domain_enable,
413 .get_enable = get_domain_enable,
414 },
2d281d81
JP
415};
416
e1399ba2
JP
417/*
418 * Constraint index used by powercap can be different than power limit (PL)
3382388d 419 * index in that some PLs maybe missing due to non-existent MSRs. So we
e1399ba2
JP
420 * need to convert here by finding the valid PLs only (name populated).
421 */
422static int contraint_to_pl(struct rapl_domain *rd, int cid)
423{
424 int i, j;
425
9050a9cd
ZR
426 for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
427 if (is_pl_valid(rd, i) && j++ == cid) {
e1399ba2
JP
428 pr_debug("%s: index %d\n", __func__, i);
429 return i;
430 }
431 }
cb43f81b 432 pr_err("Cannot find matching power limit for constraint %d\n", cid);
e1399ba2
JP
433
434 return -EINVAL;
435}
436
437static int set_power_limit(struct powercap_zone *power_zone, int cid,
3382388d 438 u64 power_limit)
2d281d81
JP
439{
440 struct rapl_domain *rd;
441 struct rapl_package *rp;
442 int ret = 0;
e1399ba2 443 int id;
2d281d81 444
5d4c779c 445 cpus_read_lock();
2d281d81 446 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2 447 id = contraint_to_pl(rd, cid);
309557f5 448 rp = rd->rp;
2d281d81 449
9050a9cd 450 ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
2d281d81 451 if (!ret)
309557f5 452 package_power_limit_irq_save(rp);
5d4c779c 453 cpus_read_unlock();
2d281d81
JP
454 return ret;
455}
456
e1399ba2 457static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
3382388d 458 u64 *data)
2d281d81
JP
459{
460 struct rapl_domain *rd;
461 u64 val;
2d281d81 462 int ret = 0;
e1399ba2 463 int id;
2d281d81 464
5d4c779c 465 cpus_read_lock();
2d281d81 466 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2 467 id = contraint_to_pl(rd, cid);
cb43f81b 468
9050a9cd
ZR
469 ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
470 if (!ret)
2d281d81
JP
471 *data = val;
472
5d4c779c 473 cpus_read_unlock();
2d281d81
JP
474
475 return ret;
476}
477
e1399ba2 478static int set_time_window(struct powercap_zone *power_zone, int cid,
3382388d 479 u64 window)
2d281d81
JP
480{
481 struct rapl_domain *rd;
482 int ret = 0;
e1399ba2 483 int id;
2d281d81 484
5d4c779c 485 cpus_read_lock();
2d281d81 486 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
487 id = contraint_to_pl(rd, cid);
488
9050a9cd 489 ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
cb43f81b 490
5d4c779c 491 cpus_read_unlock();
2d281d81
JP
492 return ret;
493}
494
3382388d
ZR
495static int get_time_window(struct powercap_zone *power_zone, int cid,
496 u64 *data)
2d281d81
JP
497{
498 struct rapl_domain *rd;
499 u64 val;
500 int ret = 0;
e1399ba2 501 int id;
2d281d81 502
5d4c779c 503 cpus_read_lock();
2d281d81 504 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
505 id = contraint_to_pl(rd, cid);
506
9050a9cd 507 ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
2d281d81
JP
508 if (!ret)
509 *data = val;
cb43f81b 510
5d4c779c 511 cpus_read_unlock();
2d281d81
JP
512
513 return ret;
514}
515
3382388d
ZR
516static const char *get_constraint_name(struct powercap_zone *power_zone,
517 int cid)
2d281d81 518{
2d281d81 519 struct rapl_domain *rd;
e1399ba2 520 int id;
2d281d81
JP
521
522 rd = power_zone_to_rapl_domain(power_zone);
e1399ba2
JP
523 id = contraint_to_pl(rd, cid);
524 if (id >= 0)
525 return rd->rpl[id].name;
2d281d81 526
e1399ba2 527 return NULL;
2d281d81
JP
528}
529
9050a9cd 530static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
2d281d81
JP
531{
532 struct rapl_domain *rd;
533 u64 val;
2d281d81 534 int ret = 0;
9050a9cd 535 int id;
2d281d81 536
5d4c779c 537 cpus_read_lock();
2d281d81 538 rd = power_zone_to_rapl_domain(power_zone);
9050a9cd
ZR
539 id = contraint_to_pl(rd, cid);
540
541 ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
542 if (!ret)
2d281d81
JP
543 *data = val;
544
8365a898 545 /* As a generalization rule, PL4 would be around two times PL2. */
9050a9cd 546 if (id == POWER_LIMIT4)
8365a898
SP
547 *data = *data * 2;
548
5d4c779c 549 cpus_read_unlock();
2d281d81
JP
550
551 return ret;
552}
553
600c395b 554static const struct powercap_zone_constraint_ops constraint_ops = {
2d281d81
JP
555 .set_power_limit_uw = set_power_limit,
556 .get_power_limit_uw = get_current_power_limit,
557 .set_time_window_us = set_time_window,
558 .get_time_window_us = get_time_window,
559 .get_max_power_uw = get_max_power,
560 .get_name = get_constraint_name,
561};
562
bf44b901
ZR
563/* Return the id used for read_raw/write_raw callback */
564static int get_rid(struct rapl_package *rp)
565{
566 return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
567}
568
2d281d81
JP
569/* called after domain detection and package level data are set */
570static void rapl_init_domains(struct rapl_package *rp)
571{
0c2ddedd
ZR
572 enum rapl_domain_type i;
573 enum rapl_domain_reg_id j;
2d281d81
JP
574 struct rapl_domain *rd = rp->domains;
575
576 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
577 unsigned int mask = rp->domain_map & (1 << i);
9050a9cd 578 int t;
7fde2712 579
0c2ddedd
ZR
580 if (!mask)
581 continue;
582
583 rd->rp = rp;
f1e8d756
ZR
584
585 if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
586 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
bf44b901
ZR
587 rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
588 rp->id);
589 } else {
f1e8d756
ZR
590 snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
591 rapl_domain_names[i]);
bf44b901 592 }
f1e8d756 593
0c2ddedd 594 rd->id = i;
a38f300b
ZR
595
596 /* PL1 is supported by default */
597 rp->priv->limits[i] |= BIT(POWER_LIMIT1);
8365a898 598
9050a9cd
ZR
599 for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
600 if (rp->priv->limits[i] & BIT(t))
601 rd->rpl[t].name = pl_names[t];
8365a898
SP
602 }
603
0c2ddedd
ZR
604 for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
605 rd->regs[j] = rp->priv->regs[i][j];
606
0c2ddedd 607 rd++;
2d281d81
JP
608 }
609}
610
309557f5 611static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
3382388d 612 u64 value, int to_raw)
2d281d81 613{
3c2c0845 614 u64 units = 1;
cb532e72 615 struct rapl_defaults *defaults = get_defaults(rd->rp);
d474a4d3 616 u64 scale = 1;
2d281d81 617
2d281d81
JP
618 switch (type) {
619 case POWER_UNIT:
cb532e72 620 units = rd->power_unit;
2d281d81
JP
621 break;
622 case ENERGY_UNIT:
d474a4d3 623 scale = ENERGY_UNIT_SCALE;
cb532e72 624 units = rd->energy_unit;
2d281d81
JP
625 break;
626 case TIME_UNIT:
cb532e72 627 return defaults->compute_time_window(rd, value, to_raw);
2d281d81
JP
628 case ARBITRARY_UNIT:
629 default:
630 return value;
a8193af7 631 }
2d281d81
JP
632
633 if (to_raw)
d474a4d3 634 return div64_u64(value, units) * scale;
3c2c0845
JP
635
636 value *= units;
637
d474a4d3 638 return div64_u64(value, scale);
2d281d81
JP
639}
640
e12dee18 641/* RAPL primitives for MSR and MMIO I/F */
b4288ce7 642static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
2d281d81 643 /* name, mask, shift, msr index, unit divisor */
11edbe5c 644 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
3382388d 645 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 646 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
3382388d 647 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 648 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
8365a898 649 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
045610c3
ZR
650 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
651 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
11edbe5c 652 [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
3382388d 653 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
f442bd27
ZR
654 [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
655 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 656 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
3382388d 657 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 658 [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
3382388d 659 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 660 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
3382388d 661 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 662 [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
3382388d 663 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 664 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
3382388d 665 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 666 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
3382388d 667 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 668 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
3382388d 669 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 670 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
3382388d 671 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 672 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
3382388d 673 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
11edbe5c 674 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
3382388d 675 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
11edbe5c 676 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
3382388d 677 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
11edbe5c 678 [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
3382388d 679 RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
11edbe5c 680 [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
931da6a0 681 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 682 [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
931da6a0 683 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
11edbe5c 684 [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
931da6a0 685 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 686 [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
931da6a0 687 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
11edbe5c 688 [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
931da6a0 689 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
11edbe5c 690 [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
931da6a0 691 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
2d281d81 692 /* non-hardware */
11edbe5c 693 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
3382388d 694 RAPL_PRIMITIVE_DERIVED),
2d281d81
JP
695};
696
e12dee18
ZR
697/* RAPL primitives for TPMI I/F */
698static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
699 /* name, mask, shift, msr index, unit divisor */
700 [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
701 RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
702 [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
703 RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
704 [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
705 RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
706 [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
707 RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
708 [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
709 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
710 [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
711 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
712 [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
713 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
714 [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
715 RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
716 [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
717 RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
718 [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
719 RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
720 [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
721 RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
722 [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
723 RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
724 [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
725 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
726 [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
727 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
728 [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
729 RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
730 [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
731 RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
732 [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
733 RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
734 /* non-hardware */
735 [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
736 POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
737};
738
98ff639a
ZR
739static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
740{
741 struct rapl_primitive_info *rpi = rp->priv->rpi;
742
743 if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi)
744 return NULL;
745
746 return &rpi[prim];
747}
748
e8e28c2a
ZR
749static int rapl_config(struct rapl_package *rp)
750{
b4288ce7
ZR
751 switch (rp->priv->type) {
752 /* MMIO I/F shares the same register layout as MSR registers */
753 case RAPL_IF_MMIO:
754 case RAPL_IF_MSR:
755 rp->priv->defaults = (void *)defaults_msr;
756 rp->priv->rpi = (void *)rpi_msr;
757 break;
e12dee18
ZR
758 case RAPL_IF_TPMI:
759 rp->priv->defaults = (void *)&defaults_tpmi;
760 rp->priv->rpi = (void *)rpi_tpmi;
761 break;
b4288ce7
ZR
762 default:
763 return -EINVAL;
764 }
2d1f5006
ZR
765
766 /* defaults_msr can be NULL on unsupported platforms */
767 if (!rp->priv->defaults || !rp->priv->rpi)
768 return -ENODEV;
769
e8e28c2a
ZR
770 return 0;
771}
772
931da6a0
ZR
773static enum rapl_primitives
774prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
775{
e8e28c2a
ZR
776 struct rapl_defaults *defaults = get_defaults(rd->rp);
777
778 if (!defaults->spr_psys_bits)
931da6a0
ZR
779 return prim;
780
781 if (rd->id != RAPL_DOMAIN_PLATFORM)
782 return prim;
783
784 switch (prim) {
785 case POWER_LIMIT1:
786 return PSYS_POWER_LIMIT1;
787 case POWER_LIMIT2:
788 return PSYS_POWER_LIMIT2;
789 case PL1_ENABLE:
790 return PSYS_PL1_ENABLE;
791 case PL2_ENABLE:
792 return PSYS_PL2_ENABLE;
793 case TIME_WINDOW1:
794 return PSYS_TIME_WINDOW1;
795 case TIME_WINDOW2:
796 return PSYS_TIME_WINDOW2;
797 default:
798 return prim;
799 }
800}
801
2d281d81
JP
802/* Read primitive data based on its related struct rapl_primitive_info.
803 * if xlate flag is set, return translated data based on data units, i.e.
804 * time, energy, and power.
805 * RAPL MSRs are non-architectual and are laid out not consistently across
806 * domains. Here we use primitive info to allow writing consolidated access
807 * functions.
808 * For a given primitive, it is processed by MSR mask and shift. Unit conversion
809 * is pre-assigned based on RAPL unit MSRs read at init time.
810 * 63-------------------------- 31--------------------------- 0
811 * | xxxxx (mask) |
812 * | |<- shift ----------------|
813 * 63-------------------------- 31--------------------------- 0
814 */
815static int rapl_read_data_raw(struct rapl_domain *rd,
3382388d 816 enum rapl_primitives prim, bool xlate, u64 *data)
2d281d81 817{
beea8df8 818 u64 value;
931da6a0 819 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
98ff639a 820 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
beea8df8 821 struct reg_action ra;
2d281d81 822
98ff639a 823 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
2d281d81
JP
824 return -EINVAL;
825
98ff639a 826 ra.reg = rd->regs[rpi->id];
16e95a62 827 if (!ra.reg.val)
2d281d81 828 return -EINVAL;
323ee64a 829
2d281d81 830 /* non-hardware data are collected by the polling thread */
98ff639a 831 if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
2d281d81
JP
832 *data = rd->rdd.primitives[prim];
833 return 0;
834 }
835
98ff639a 836 ra.mask = rpi->mask;
beea8df8 837
bf44b901 838 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
16e95a62 839 pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
2d281d81
JP
840 return -EIO;
841 }
842
98ff639a 843 value = ra.value >> rpi->shift;
beea8df8 844
2d281d81 845 if (xlate)
98ff639a 846 *data = rapl_unit_xlate(rd, rpi->unit, value, 0);
2d281d81 847 else
beea8df8 848 *data = value;
2d281d81
JP
849
850 return 0;
851}
852
853/* Similar use of primitive info in the read counterpart */
854static int rapl_write_data_raw(struct rapl_domain *rd,
3382388d
ZR
855 enum rapl_primitives prim,
856 unsigned long long value)
2d281d81 857{
931da6a0 858 enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
98ff639a 859 struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
f14a1396 860 u64 bits;
beea8df8 861 struct reg_action ra;
f14a1396 862 int ret;
2d281d81 863
98ff639a
ZR
864 if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
865 return -EINVAL;
866
98ff639a
ZR
867 bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
868 bits <<= rpi->shift;
869 bits &= rpi->mask;
edbdabc6 870
beea8df8 871 memset(&ra, 0, sizeof(ra));
f14a1396 872
98ff639a
ZR
873 ra.reg = rd->regs[rpi->id];
874 ra.mask = rpi->mask;
beea8df8 875 ra.value = bits;
f14a1396 876
bf44b901 877 ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
f14a1396
JP
878
879 return ret;
2d281d81
JP
880}
881
9050a9cd
ZR
882static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
883 enum pl_prims pl_prim, bool xlate, u64 *data)
884{
f442bd27 885 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
9050a9cd
ZR
886
887 if (!is_pl_valid(rd, pl))
888 return -EINVAL;
889
890 return rapl_read_data_raw(rd, prim, xlate, data);
891}
892
893static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
894 enum pl_prims pl_prim,
895 unsigned long long value)
896{
f442bd27 897 enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
9050a9cd
ZR
898
899 if (!is_pl_valid(rd, pl))
900 return -EINVAL;
901
f442bd27 902 if (rd->rpl[pl].locked) {
a60ec448 903 pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
9050a9cd
ZR
904 return -EACCES;
905 }
906
907 return rapl_write_data_raw(rd, prim, value);
908}
3c2c0845
JP
909/*
910 * Raw RAPL data stored in MSRs are in certain scales. We need to
911 * convert them into standard units based on the units reported in
912 * the RAPL unit MSRs. This is specific to CPUs as the method to
913 * calculate units differ on different CPUs.
914 * We convert the units to below format based on CPUs.
915 * i.e.
d474a4d3 916 * energy unit: picoJoules : Represented in picoJoules by default
3c2c0845
JP
917 * power unit : microWatts : Represented in milliWatts by default
918 * time unit : microseconds: Represented in seconds by default
919 */
693c1d78 920static int rapl_check_unit_core(struct rapl_domain *rd)
2d281d81 921{
1193b165 922 struct reg_action ra;
2d281d81
JP
923 u32 value;
924
cb532e72 925 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1193b165 926 ra.mask = ~0;
bf44b901
ZR
927 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
928 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 929 ra.reg.val, rd->rp->name, rd->name);
2d281d81
JP
930 return -ENODEV;
931 }
932
1193b165 933 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
cb532e72 934 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
2d281d81 935
1193b165 936 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
cb532e72 937 rd->power_unit = 1000000 / (1 << value);
2d281d81 938
1193b165 939 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
cb532e72 940 rd->time_unit = 1000000 / (1 << value);
2d281d81 941
cb532e72
ZR
942 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
943 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
2d281d81
JP
944
945 return 0;
946}
947
693c1d78 948static int rapl_check_unit_atom(struct rapl_domain *rd)
3c2c0845 949{
1193b165 950 struct reg_action ra;
3c2c0845
JP
951 u32 value;
952
cb532e72 953 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1193b165 954 ra.mask = ~0;
bf44b901
ZR
955 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
956 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 957 ra.reg.val, rd->rp->name, rd->name);
3c2c0845
JP
958 return -ENODEV;
959 }
1193b165
ZR
960
961 value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
cb532e72 962 rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
3c2c0845 963
1193b165 964 value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
cb532e72 965 rd->power_unit = (1 << value) * 1000;
3c2c0845 966
1193b165 967 value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
cb532e72 968 rd->time_unit = 1000000 / (1 << value);
3c2c0845 969
cb532e72
ZR
970 pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
971 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
3c2c0845
JP
972
973 return 0;
974}
975
f14a1396
JP
976static void power_limit_irq_save_cpu(void *info)
977{
978 u32 l, h = 0;
979 struct rapl_package *rp = (struct rapl_package *)info;
980
981 /* save the state of PLN irq mask bit before disabling it */
982 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
983 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
984 rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
985 rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
986 }
987 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
988 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
989}
990
2d281d81
JP
991/* REVISIT:
992 * When package power limit is set artificially low by RAPL, LVT
993 * thermal interrupt for package power limit should be ignored
994 * since we are not really exceeding the real limit. The intention
995 * is to avoid excessive interrupts while we are trying to save power.
996 * A useful feature might be routing the package_power_limit interrupt
997 * to userspace via eventfd. once we have a usecase, this is simple
998 * to do by adding an atomic notifier.
999 */
1000
309557f5 1001static void package_power_limit_irq_save(struct rapl_package *rp)
2d281d81 1002{
bf44b901
ZR
1003 if (rp->lead_cpu < 0)
1004 return;
1005
f14a1396
JP
1006 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
1007 return;
1008
323ee64a 1009 smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
f14a1396
JP
1010}
1011
58705069
TG
1012/*
1013 * Restore per package power limit interrupt enable state. Called from cpu
1014 * hotplug code on package removal.
1015 */
1016static void package_power_limit_irq_restore(struct rapl_package *rp)
f14a1396 1017{
58705069
TG
1018 u32 l, h;
1019
bf44b901
ZR
1020 if (rp->lead_cpu < 0)
1021 return;
1022
58705069
TG
1023 if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
1024 return;
1025
1026 /* irq enable state not saved, nothing to restore */
1027 if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
1028 return;
f14a1396
JP
1029
1030 rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
1031
1032 if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
1033 l |= PACKAGE_THERM_INT_PLN_ENABLE;
1034 else
1035 l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
1036
1037 wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
2d281d81
JP
1038}
1039
3c2c0845
JP
1040static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
1041{
9050a9cd 1042 int i;
3c2c0845
JP
1043
1044 /* always enable clamp such that p-state can go below OS requested
1045 * range. power capping priority over guranteed frequency.
1046 */
9050a9cd 1047 rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
3c2c0845 1048
9050a9cd
ZR
1049 for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
1050 rapl_write_pl_data(rd, i, PL_ENABLE, mode);
1051 rapl_write_pl_data(rd, i, PL_CLAMP, mode);
3c2c0845
JP
1052 }
1053}
1054
1055static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
1056{
1057 static u32 power_ctrl_orig_val;
e8e28c2a 1058 struct rapl_defaults *defaults = get_defaults(rd->rp);
3c2c0845
JP
1059 u32 mdata;
1060
e8e28c2a 1061 if (!defaults->floor_freq_reg_addr) {
51b63409
AT
1062 pr_err("Invalid floor frequency config register\n");
1063 return;
1064 }
1065
3c2c0845 1066 if (!power_ctrl_orig_val)
4077a387 1067 iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
e8e28c2a 1068 defaults->floor_freq_reg_addr,
4077a387 1069 &power_ctrl_orig_val);
3c2c0845
JP
1070 mdata = power_ctrl_orig_val;
1071 if (enable) {
1072 mdata &= ~(0x7f << 8);
1073 mdata |= 1 << 8;
1074 }
4077a387 1075 iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
e8e28c2a 1076 defaults->floor_freq_reg_addr, mdata);
3c2c0845
JP
1077}
1078
cb532e72 1079static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
3382388d 1080 bool to_raw)
3c2c0845 1081{
3382388d 1082 u64 f, y; /* fraction and exp. used for time unit */
3c2c0845
JP
1083
1084 /*
1085 * Special processing based on 2^Y*(1+F/4), refer
1086 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
1087 */
1088 if (!to_raw) {
1089 f = (value & 0x60) >> 5;
1090 y = value & 0x1f;
cb532e72 1091 value = (1 << y) * (4 + f) * rd->time_unit / 4;
3c2c0845 1092 } else {
cb532e72 1093 if (value < rd->time_unit)
2d935400
CQ
1094 return 0;
1095
cb532e72 1096 do_div(value, rd->time_unit);
3c2c0845 1097 y = ilog2(value);
cf835b00
ZR
1098
1099 /*
1100 * The target hardware field is 7 bits wide, so return all ones
1101 * if the exponent is too large.
1102 */
1103 if (y > 0x1f)
1104 return 0x7f;
1105
1106 f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
3c2c0845
JP
1107 value = (y & 0x1f) | ((f & 0x3) << 5);
1108 }
1109 return value;
1110}
1111
cb532e72 1112static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
3382388d 1113 bool to_raw)
3c2c0845
JP
1114{
1115 /*
1116 * Atom time unit encoding is straight forward val * time_unit,
1117 * where time_unit is default to 1 sec. Never 0.
1118 */
1119 if (!to_raw)
cb532e72 1120 return (value) ? value * rd->time_unit : rd->time_unit;
3382388d 1121
cb532e72 1122 value = div64_u64(value, rd->time_unit);
3c2c0845
JP
1123
1124 return value;
1125}
1126
e12dee18
ZR
1127/* TPMI Unit register has different layout */
1128#define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
1129#define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
1130#define TPMI_ENERGY_UNIT_OFFSET 0x06
1131#define TPMI_ENERGY_UNIT_MASK 0x7C0
1132#define TPMI_TIME_UNIT_OFFSET 0x0C
1133#define TPMI_TIME_UNIT_MASK 0xF000
1134
1135static int rapl_check_unit_tpmi(struct rapl_domain *rd)
1136{
1137 struct reg_action ra;
1138 u32 value;
1139
1140 ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
1141 ra.mask = ~0;
1142 if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
1143 pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
16e95a62 1144 ra.reg.val, rd->rp->name, rd->name);
e12dee18
ZR
1145 return -ENODEV;
1146 }
1147
1148 value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
1149 rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
1150
1151 value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
1152 rd->power_unit = 1000000 / (1 << value);
1153
1154 value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
1155 rd->time_unit = 1000000 / (1 << value);
1156
1157 pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
1158 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
1159
1160 return 0;
1161}
1162
1163static const struct rapl_defaults defaults_tpmi = {
1164 .check_unit = rapl_check_unit_tpmi,
1165 /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
1166 .set_floor_freq = set_floor_freq_default,
1167 .compute_time_window = rapl_compute_time_window_core,
1168};
1169
087e9cba 1170static const struct rapl_defaults rapl_defaults_core = {
51b63409 1171 .floor_freq_reg_addr = 0,
3c2c0845
JP
1172 .check_unit = rapl_check_unit_core,
1173 .set_floor_freq = set_floor_freq_default,
1174 .compute_time_window = rapl_compute_time_window_core,
087e9cba
JP
1175};
1176
d474a4d3
JP
1177static const struct rapl_defaults rapl_defaults_hsw_server = {
1178 .check_unit = rapl_check_unit_core,
1179 .set_floor_freq = set_floor_freq_default,
1180 .compute_time_window = rapl_compute_time_window_core,
1181 .dram_domain_energy_unit = 15300,
1182};
1183
2d798d9f
ZR
1184static const struct rapl_defaults rapl_defaults_spr_server = {
1185 .check_unit = rapl_check_unit_core,
1186 .set_floor_freq = set_floor_freq_default,
1187 .compute_time_window = rapl_compute_time_window_core,
2d798d9f 1188 .psys_domain_energy_unit = 1000000000,
931da6a0 1189 .spr_psys_bits = true,
2d798d9f
ZR
1190};
1191
51b63409
AT
1192static const struct rapl_defaults rapl_defaults_byt = {
1193 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
1194 .check_unit = rapl_check_unit_atom,
1195 .set_floor_freq = set_floor_freq_atom,
1196 .compute_time_window = rapl_compute_time_window_atom,
1197};
1198
1199static const struct rapl_defaults rapl_defaults_tng = {
1200 .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
3c2c0845
JP
1201 .check_unit = rapl_check_unit_atom,
1202 .set_floor_freq = set_floor_freq_atom,
1203 .compute_time_window = rapl_compute_time_window_atom,
087e9cba
JP
1204};
1205
51b63409
AT
1206static const struct rapl_defaults rapl_defaults_ann = {
1207 .floor_freq_reg_addr = 0,
1208 .check_unit = rapl_check_unit_atom,
1209 .set_floor_freq = NULL,
1210 .compute_time_window = rapl_compute_time_window_atom,
1211};
1212
1213static const struct rapl_defaults rapl_defaults_cht = {
1214 .floor_freq_reg_addr = 0,
1215 .check_unit = rapl_check_unit_atom,
1216 .set_floor_freq = NULL,
1217 .compute_time_window = rapl_compute_time_window_atom,
1218};
1219
43756a29
VD
1220static const struct rapl_defaults rapl_defaults_amd = {
1221 .check_unit = rapl_check_unit_core,
1222};
1223
ea85dbca 1224static const struct x86_cpu_id rapl_ids[] __initconst = {
b9064fb8
TL
1225 X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core),
1226 X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core),
1227
1228 X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core),
1229 X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core),
1230
1231 X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core),
1232 X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core),
1233 X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core),
1234 X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server),
1235
1236 X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core),
1237 X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core),
1238 X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core),
1239 X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server),
1240
1241 X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core),
1242 X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core),
1243 X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server),
1244 X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core),
1245 X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core),
1246 X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core),
1247 X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core),
1248 X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core),
1249 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core),
1250 X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server),
1251 X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server),
1252 X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core),
1253 X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core),
1254 X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core),
1255 X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core),
1256 X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core),
1257 X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core),
1258 X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core),
1259 X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core),
1260 X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core),
1261 X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core),
1262 X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core),
1263 X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core),
1264 X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core),
1265 X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
1266 X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
1267 X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
1268 X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core),
1269 X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
1270 X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core),
1271
1272 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
1273 X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
1274 X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
1275 X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &rapl_defaults_ann),
1276 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
1277 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
1278 X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
1279 X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core),
1280 X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core),
1281 X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core),
1282
1283 X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server),
1284 X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server),
43756a29
VD
1285
1286 X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
8a9d881f 1287 X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
a7405612 1288 X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
2d281d81
JP
1289 {}
1290};
1291MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
1292
bed5ab63
TG
1293/* Read once for all raw primitive data for domains */
1294static void rapl_update_domain_data(struct rapl_package *rp)
2d281d81
JP
1295{
1296 int dmn, prim;
1297 u64 val;
2d281d81 1298
bed5ab63 1299 for (dmn = 0; dmn < rp->nr_domains; dmn++) {
9ea7612c 1300 pr_debug("update %s domain %s data\n", rp->name,
bed5ab63
TG
1301 rp->domains[dmn].name);
1302 /* exclude non-raw primitives */
1303 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
98ff639a
ZR
1304 struct rapl_primitive_info *rpi = get_rpi(rp, prim);
1305
bed5ab63 1306 if (!rapl_read_data_raw(&rp->domains[dmn], prim,
98ff639a 1307 rpi->unit, &val))
3382388d 1308 rp->domains[dmn].rdd.primitives[prim] = val;
2d281d81
JP
1309 }
1310 }
1311
1312}
1313
2d281d81
JP
1314static int rapl_package_register_powercap(struct rapl_package *rp)
1315{
1316 struct rapl_domain *rd;
2d281d81 1317 struct powercap_zone *power_zone = NULL;
01857cf7 1318 int nr_pl, ret;
bed5ab63
TG
1319
1320 /* Update the domain data of the new package */
1321 rapl_update_domain_data(rp);
2d281d81 1322
3382388d 1323 /* first we register package domain as the parent zone */
2d281d81
JP
1324 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1325 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1326 nr_pl = find_nr_power_limit(rd);
9ea7612c 1327 pr_debug("register package domain %s\n", rp->name);
2d281d81 1328 power_zone = powercap_register_zone(&rd->power_zone,
3382388d
ZR
1329 rp->priv->control_type, rp->name,
1330 NULL, &zone_ops[rd->id], nr_pl,
1331 &constraint_ops);
2d281d81 1332 if (IS_ERR(power_zone)) {
9ea7612c 1333 pr_debug("failed to register power zone %s\n",
3382388d 1334 rp->name);
bed5ab63 1335 return PTR_ERR(power_zone);
2d281d81
JP
1336 }
1337 /* track parent zone in per package/socket data */
1338 rp->power_zone = power_zone;
1339 /* done, only one package domain per socket */
1340 break;
1341 }
1342 }
1343 if (!power_zone) {
1344 pr_err("no package domain found, unknown topology!\n");
bed5ab63 1345 return -ENODEV;
2d281d81 1346 }
3382388d 1347 /* now register domains as children of the socket/package */
2d281d81 1348 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
f1e8d756
ZR
1349 struct powercap_zone *parent = rp->power_zone;
1350
2d281d81
JP
1351 if (rd->id == RAPL_DOMAIN_PACKAGE)
1352 continue;
f1e8d756
ZR
1353 if (rd->id == RAPL_DOMAIN_PLATFORM)
1354 parent = NULL;
2d281d81
JP
1355 /* number of power limits per domain varies */
1356 nr_pl = find_nr_power_limit(rd);
1357 power_zone = powercap_register_zone(&rd->power_zone,
3382388d 1358 rp->priv->control_type,
f1e8d756 1359 rd->name, parent,
3382388d
ZR
1360 &zone_ops[rd->id], nr_pl,
1361 &constraint_ops);
2d281d81
JP
1362
1363 if (IS_ERR(power_zone)) {
9ea7612c 1364 pr_debug("failed to register power_zone, %s:%s\n",
3382388d 1365 rp->name, rd->name);
2d281d81
JP
1366 ret = PTR_ERR(power_zone);
1367 goto err_cleanup;
1368 }
1369 }
bed5ab63 1370 return 0;
2d281d81 1371
2d281d81 1372err_cleanup:
58705069
TG
1373 /*
1374 * Clean up previously initialized domains within the package if we
2d281d81
JP
1375 * failed after the first domain setup.
1376 */
1377 while (--rd >= rp->domains) {
9ea7612c 1378 pr_debug("unregister %s domain %s\n", rp->name, rd->name);
3382388d
ZR
1379 powercap_unregister_zone(rp->priv->control_type,
1380 &rd->power_zone);
2d281d81
JP
1381 }
1382
1383 return ret;
1384}
1385
693c1d78 1386static int rapl_check_domain(int domain, struct rapl_package *rp)
2d281d81 1387{
1193b165 1388 struct reg_action ra;
2d281d81
JP
1389
1390 switch (domain) {
1391 case RAPL_DOMAIN_PACKAGE:
2d281d81 1392 case RAPL_DOMAIN_PP0:
2d281d81 1393 case RAPL_DOMAIN_PP1:
2d281d81 1394 case RAPL_DOMAIN_DRAM:
f1e8d756 1395 case RAPL_DOMAIN_PLATFORM:
1193b165 1396 ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
2d281d81
JP
1397 break;
1398 default:
1399 pr_err("invalid domain id %d\n", domain);
1400 return -EINVAL;
1401 }
9d31c676
JP
1402 /* make sure domain counters are available and contains non-zero
1403 * values, otherwise skip it.
7b874772 1404 */
1193b165 1405
7a57e9f1 1406 ra.mask = ENERGY_STATUS_MASK;
bf44b901 1407 if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
9d31c676 1408 return -ENODEV;
2d281d81 1409
9d31c676 1410 return 0;
2d281d81
JP
1411}
1412
cb532e72
ZR
1413/*
1414 * Get per domain energy/power/time unit.
1415 * RAPL Interfaces without per domain unit register will use the package
1416 * scope unit register to set per domain units.
1417 */
1418static int rapl_get_domain_unit(struct rapl_domain *rd)
1419{
1420 struct rapl_defaults *defaults = get_defaults(rd->rp);
1421 int ret;
1422
16e95a62
ZR
1423 if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
1424 if (!rd->rp->priv->reg_unit.val) {
cb532e72
ZR
1425 pr_err("No valid Unit register found\n");
1426 return -ENODEV;
1427 }
1428 rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
1429 }
1430
1431 if (!defaults->check_unit) {
1432 pr_err("missing .check_unit() callback\n");
1433 return -ENODEV;
1434 }
1435
693c1d78 1436 ret = defaults->check_unit(rd);
cb532e72
ZR
1437 if (ret)
1438 return ret;
1439
1440 if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
1441 rd->energy_unit = defaults->dram_domain_energy_unit;
1442 if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
1443 rd->energy_unit = defaults->psys_domain_energy_unit;
1444 return 0;
1445}
1446
e1399ba2
JP
1447/*
1448 * Check if power limits are available. Two cases when they are not available:
1449 * 1. Locked by BIOS, in this case we still provide read-only access so that
1450 * users can see what limit is set by the BIOS.
1451 * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
3382388d 1452 * exist at all. In this case, we do not show the constraints in powercap.
e1399ba2
JP
1453 *
1454 * Called after domains are detected and initialized.
1455 */
1456static void rapl_detect_powerlimit(struct rapl_domain *rd)
1457{
1458 u64 val64;
1459 int i;
1460
f442bd27
ZR
1461 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
1462 if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
1463 if (val64) {
1464 rd->rpl[i].locked = true;
1465 pr_info("%s:%s:%s locked by BIOS\n",
1466 rd->rp->name, rd->name, pl_names[i]);
1467 }
e1399ba2 1468 }
3382388d 1469
081690e9 1470 if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
e1399ba2
JP
1471 rd->rpl[i].name = NULL;
1472 }
1473}
1474
2d281d81
JP
1475/* Detect active and valid domains for the given CPU, caller must
1476 * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
1477 */
693c1d78 1478static int rapl_detect_domains(struct rapl_package *rp)
2d281d81 1479{
2d281d81 1480 struct rapl_domain *rd;
58705069 1481 int i;
2d281d81
JP
1482
1483 for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
1484 /* use physical package id to read counters */
693c1d78 1485 if (!rapl_check_domain(i, rp)) {
2d281d81 1486 rp->domain_map |= 1 << i;
fcdf1797
JP
1487 pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
1488 }
2d281d81 1489 }
3382388d 1490 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
2d281d81 1491 if (!rp->nr_domains) {
9ea7612c 1492 pr_debug("no valid rapl domains found in %s\n", rp->name);
58705069 1493 return -ENODEV;
2d281d81 1494 }
9ea7612c 1495 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
2d281d81 1496
2fa00769 1497 rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
3382388d 1498 GFP_KERNEL);
58705069
TG
1499 if (!rp->domains)
1500 return -ENOMEM;
1501
2d281d81
JP
1502 rapl_init_domains(rp);
1503
cb532e72
ZR
1504 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1505 rapl_get_domain_unit(rd);
e1399ba2 1506 rapl_detect_powerlimit(rd);
cb532e72 1507 }
e1399ba2 1508
2d281d81
JP
1509 return 0;
1510}
1511
575024a8
ZR
1512#ifdef CONFIG_PERF_EVENTS
1513
1514/*
1515 * Support for RAPL PMU
1516 *
1517 * Register a PMU if any of the registered RAPL Packages have the requirement
1518 * of exposing its energy counters via Perf PMU.
1519 *
1520 * PMU Name:
1521 * power
1522 *
1523 * Events:
1524 * Name Event id RAPL Domain
1525 * energy_cores 0x01 RAPL_DOMAIN_PP0
1526 * energy_pkg 0x02 RAPL_DOMAIN_PACKAGE
1527 * energy_ram 0x03 RAPL_DOMAIN_DRAM
1528 * energy_gpu 0x04 RAPL_DOMAIN_PP1
1529 * energy_psys 0x05 RAPL_DOMAIN_PLATFORM
1530 *
1531 * Unit:
1532 * Joules
1533 *
1534 * Scale:
1535 * 2.3283064365386962890625e-10
1536 * The same RAPL domain in different RAPL Packages may have different
1537 * energy units. Use 2.3283064365386962890625e-10 (2^-32) Joules as
1538 * the fixed unit for all energy counters, and covert each hardware
1539 * counter increase to N times of PMU event counter increases.
1540 *
1541 * This is fully compatible with the current MSR RAPL PMU. This means that
1542 * userspace programs like turbostat can use the same code to handle RAPL Perf
1543 * PMU, no matter what RAPL Interface driver (MSR/TPMI, etc) is running
1544 * underlying on the platform.
1545 *
1546 * Note that RAPL Packages can be probed/removed dynamically, and the events
1547 * supported by each TPMI RAPL device can be different. Thus the RAPL PMU
1548 * support is done on demand, which means
1549 * 1. PMU is registered only if it is needed by a RAPL Package. PMU events for
1550 * unsupported counters are not exposed.
1551 * 2. PMU is unregistered and registered when a new RAPL Package is probed and
1552 * supports new counters that are not supported by current PMU.
1553 * 3. PMU is unregistered when all registered RAPL Packages don't need PMU.
1554 */
1555
1556struct rapl_pmu {
1557 struct pmu pmu; /* Perf PMU structure */
1558 u64 timer_ms; /* Maximum expiration time to avoid counter overflow */
1559 unsigned long domain_map; /* Events supported by current registered PMU */
1560 bool registered; /* Whether the PMU has been registered or not */
1561};
1562
1563static struct rapl_pmu rapl_pmu;
1564
1565/* PMU helpers */
1566
1567static int get_pmu_cpu(struct rapl_package *rp)
1568{
1569 int cpu;
1570
1571 if (!rp->has_pmu)
1572 return nr_cpu_ids;
1573
1574 /* Only TPMI RAPL is supported for now */
1575 if (rp->priv->type != RAPL_IF_TPMI)
1576 return nr_cpu_ids;
1577
1578 /* TPMI RAPL uses any CPU in the package for PMU */
1579 for_each_online_cpu(cpu)
1580 if (topology_physical_package_id(cpu) == rp->id)
1581 return cpu;
1582
1583 return nr_cpu_ids;
1584}
1585
1586static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu)
1587{
1588 if (!rp->has_pmu)
1589 return false;
1590
1591 /* Only TPMI RAPL is supported for now */
1592 if (rp->priv->type != RAPL_IF_TPMI)
1593 return false;
1594
1595 /* TPMI RAPL uses any CPU in the package for PMU */
1596 return topology_physical_package_id(cpu) == rp->id;
1597}
1598
1599static struct rapl_package_pmu_data *event_to_pmu_data(struct perf_event *event)
1600{
1601 struct rapl_package *rp = event->pmu_private;
1602
1603 return &rp->pmu_data;
1604}
1605
1606/* PMU event callbacks */
1607
1608static u64 event_read_counter(struct perf_event *event)
1609{
1610 struct rapl_package *rp = event->pmu_private;
1611 u64 val;
1612 int ret;
1613
1614 /* Return 0 for unsupported events */
1615 if (event->hw.idx < 0)
1616 return 0;
1617
1618 ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val);
1619
1620 /* Return 0 for failed read */
1621 if (ret)
1622 return 0;
1623
1624 return val;
1625}
1626
1627static void __rapl_pmu_event_start(struct perf_event *event)
1628{
1629 struct rapl_package_pmu_data *data = event_to_pmu_data(event);
1630
1631 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1632 return;
1633
1634 event->hw.state = 0;
1635
1636 list_add_tail(&event->active_entry, &data->active_list);
1637
1638 local64_set(&event->hw.prev_count, event_read_counter(event));
1639 if (++data->n_active == 1)
1640 hrtimer_start(&data->hrtimer, data->timer_interval,
1641 HRTIMER_MODE_REL_PINNED);
1642}
1643
1644static void rapl_pmu_event_start(struct perf_event *event, int mode)
1645{
1646 struct rapl_package_pmu_data *data = event_to_pmu_data(event);
1647 unsigned long flags;
1648
1649 raw_spin_lock_irqsave(&data->lock, flags);
1650 __rapl_pmu_event_start(event);
1651 raw_spin_unlock_irqrestore(&data->lock, flags);
1652}
1653
1654static u64 rapl_event_update(struct perf_event *event)
1655{
1656 struct hw_perf_event *hwc = &event->hw;
1657 struct rapl_package_pmu_data *data = event_to_pmu_data(event);
1658 u64 prev_raw_count, new_raw_count;
1659 s64 delta, sdelta;
1660
1661 /*
1662 * Follow the generic code to drain hwc->prev_count.
1663 * The loop is not expected to run for multiple times.
1664 */
1665 prev_raw_count = local64_read(&hwc->prev_count);
1666 do {
1667 new_raw_count = event_read_counter(event);
1668 } while (!local64_try_cmpxchg(&hwc->prev_count,
1669 &prev_raw_count, new_raw_count));
1670
1671
1672 /*
1673 * Now we have the new raw value and have updated the prev
1674 * timestamp already. We can now calculate the elapsed delta
1675 * (event-)time and add that to the generic event.
1676 */
1677 delta = new_raw_count - prev_raw_count;
1678
1679 /*
1680 * Scale delta to smallest unit (2^-32)
1681 * users must then scale back: count * 1/(1e9*2^32) to get Joules
1682 * or use ldexp(count, -32).
1683 * Watts = Joules/Time delta
1684 */
1685 sdelta = delta * data->scale[event->hw.flags];
1686
1687 local64_add(sdelta, &event->count);
1688
1689 return new_raw_count;
1690}
1691
1692static void rapl_pmu_event_stop(struct perf_event *event, int mode)
1693{
1694 struct rapl_package_pmu_data *data = event_to_pmu_data(event);
1695 struct hw_perf_event *hwc = &event->hw;
1696 unsigned long flags;
1697
1698 raw_spin_lock_irqsave(&data->lock, flags);
1699
1700 /* Mark event as deactivated and stopped */
1701 if (!(hwc->state & PERF_HES_STOPPED)) {
1702 WARN_ON_ONCE(data->n_active <= 0);
1703 if (--data->n_active == 0)
1704 hrtimer_cancel(&data->hrtimer);
1705
1706 list_del(&event->active_entry);
1707
1708 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1709 hwc->state |= PERF_HES_STOPPED;
1710 }
1711
1712 /* Check if update of sw counter is necessary */
1713 if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1714 /*
1715 * Drain the remaining delta count out of a event
1716 * that we are disabling:
1717 */
1718 rapl_event_update(event);
1719 hwc->state |= PERF_HES_UPTODATE;
1720 }
1721
1722 raw_spin_unlock_irqrestore(&data->lock, flags);
1723}
1724
1725static int rapl_pmu_event_add(struct perf_event *event, int mode)
1726{
1727 struct rapl_package_pmu_data *data = event_to_pmu_data(event);
1728 struct hw_perf_event *hwc = &event->hw;
1729 unsigned long flags;
1730
1731 raw_spin_lock_irqsave(&data->lock, flags);
1732
1733 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1734
1735 if (mode & PERF_EF_START)
1736 __rapl_pmu_event_start(event);
1737
1738 raw_spin_unlock_irqrestore(&data->lock, flags);
1739
1740 return 0;
1741}
1742
1743static void rapl_pmu_event_del(struct perf_event *event, int flags)
1744{
1745 rapl_pmu_event_stop(event, PERF_EF_UPDATE);
1746}
1747
1748/* RAPL PMU event ids, same as shown in sysfs */
1749enum perf_rapl_events {
1750 PERF_RAPL_PP0 = 1, /* all cores */
1751 PERF_RAPL_PKG, /* entire package */
1752 PERF_RAPL_RAM, /* DRAM */
1753 PERF_RAPL_PP1, /* gpu */
1754 PERF_RAPL_PSYS, /* psys */
1755 PERF_RAPL_MAX
1756};
1757#define RAPL_EVENT_MASK GENMASK(7, 0)
1758
1759static const int event_to_domain[PERF_RAPL_MAX] = {
1760 [PERF_RAPL_PP0] = RAPL_DOMAIN_PP0,
1761 [PERF_RAPL_PKG] = RAPL_DOMAIN_PACKAGE,
1762 [PERF_RAPL_RAM] = RAPL_DOMAIN_DRAM,
1763 [PERF_RAPL_PP1] = RAPL_DOMAIN_PP1,
1764 [PERF_RAPL_PSYS] = RAPL_DOMAIN_PLATFORM,
1765};
1766
1767static int rapl_pmu_event_init(struct perf_event *event)
1768{
1769 struct rapl_package *pos, *rp = NULL;
1770 u64 cfg = event->attr.config & RAPL_EVENT_MASK;
1771 int domain, idx;
1772
1773 /* Only look at RAPL events */
1774 if (event->attr.type != event->pmu->type)
1775 return -ENOENT;
1776
1777 /* Check for supported events only */
1778 if (!cfg || cfg >= PERF_RAPL_MAX)
1779 return -EINVAL;
1780
1781 if (event->cpu < 0)
1782 return -EINVAL;
1783
1784 /* Find out which Package the event belongs to */
1785 list_for_each_entry(pos, &rapl_packages, plist) {
1786 if (is_rp_pmu_cpu(pos, event->cpu)) {
1787 rp = pos;
1788 break;
1789 }
1790 }
1791 if (!rp)
1792 return -ENODEV;
1793
1794 /* Find out which RAPL Domain the event belongs to */
1795 domain = event_to_domain[cfg];
1796
1797 event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
1798 event->pmu_private = rp; /* Which package */
1799 event->hw.flags = domain; /* Which domain */
1800
1801 event->hw.idx = -1;
1802 /* Find out the index in rp->domains[] to get domain pointer */
1803 for (idx = 0; idx < rp->nr_domains; idx++) {
1804 if (rp->domains[idx].id == domain) {
1805 event->hw.idx = idx;
1806 break;
1807 }
1808 }
1809
1810 return 0;
1811}
1812
1813static void rapl_pmu_event_read(struct perf_event *event)
1814{
1815 rapl_event_update(event);
1816}
1817
1818static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
1819{
1820 struct rapl_package_pmu_data *data =
1821 container_of(hrtimer, struct rapl_package_pmu_data, hrtimer);
1822 struct perf_event *event;
1823 unsigned long flags;
1824
1825 if (!data->n_active)
1826 return HRTIMER_NORESTART;
1827
1828 raw_spin_lock_irqsave(&data->lock, flags);
1829
1830 list_for_each_entry(event, &data->active_list, active_entry)
1831 rapl_event_update(event);
1832
1833 raw_spin_unlock_irqrestore(&data->lock, flags);
1834
1835 hrtimer_forward_now(hrtimer, data->timer_interval);
1836
1837 return HRTIMER_RESTART;
1838}
1839
1840/* PMU sysfs attributes */
1841
1842/*
1843 * There are no default events, but we need to create "events" group (with
1844 * empty attrs) before updating it with detected events.
1845 */
1846static struct attribute *attrs_empty[] = {
1847 NULL,
1848};
1849
1850static struct attribute_group pmu_events_group = {
1851 .name = "events",
1852 .attrs = attrs_empty,
1853};
1854
1855static ssize_t cpumask_show(struct device *dev,
1856 struct device_attribute *attr, char *buf)
1857{
1858 struct rapl_package *rp;
1859 cpumask_var_t cpu_mask;
1860 int cpu;
1861 int ret;
1862
1863 if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1864 return -ENOMEM;
1865
1866 cpus_read_lock();
1867
1868 cpumask_clear(cpu_mask);
1869
1870 /* Choose a cpu for each RAPL Package */
1871 list_for_each_entry(rp, &rapl_packages, plist) {
1872 cpu = get_pmu_cpu(rp);
1873 if (cpu < nr_cpu_ids)
1874 cpumask_set_cpu(cpu, cpu_mask);
1875 }
1876 cpus_read_unlock();
1877
1878 ret = cpumap_print_to_pagebuf(true, buf, cpu_mask);
1879
1880 free_cpumask_var(cpu_mask);
1881
1882 return ret;
1883}
1884
1885static DEVICE_ATTR_RO(cpumask);
1886
1887static struct attribute *pmu_cpumask_attrs[] = {
1888 &dev_attr_cpumask.attr,
1889 NULL
1890};
1891
1892static struct attribute_group pmu_cpumask_group = {
1893 .attrs = pmu_cpumask_attrs,
1894};
1895
1896PMU_FORMAT_ATTR(event, "config:0-7");
1897static struct attribute *pmu_format_attr[] = {
1898 &format_attr_event.attr,
1899 NULL
1900};
1901
1902static struct attribute_group pmu_format_group = {
1903 .name = "format",
1904 .attrs = pmu_format_attr,
1905};
1906
1907static const struct attribute_group *pmu_attr_groups[] = {
1908 &pmu_events_group,
1909 &pmu_cpumask_group,
1910 &pmu_format_group,
1911 NULL
1912};
1913
1914#define RAPL_EVENT_ATTR_STR(_name, v, str) \
1915static struct perf_pmu_events_attr event_attr_##v = { \
1916 .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
1917 .event_str = str, \
1918}
1919
1920RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
1921RAPL_EVENT_ATTR_STR(energy-pkg, rapl_pkg, "event=0x02");
1922RAPL_EVENT_ATTR_STR(energy-ram, rapl_ram, "event=0x03");
1923RAPL_EVENT_ATTR_STR(energy-gpu, rapl_gpu, "event=0x04");
1924RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
1925
1926RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_unit_cores, "Joules");
1927RAPL_EVENT_ATTR_STR(energy-pkg.unit, rapl_unit_pkg, "Joules");
1928RAPL_EVENT_ATTR_STR(energy-ram.unit, rapl_unit_ram, "Joules");
1929RAPL_EVENT_ATTR_STR(energy-gpu.unit, rapl_unit_gpu, "Joules");
1930RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_unit_psys, "Joules");
1931
1932RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_scale_cores, "2.3283064365386962890625e-10");
1933RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_scale_pkg, "2.3283064365386962890625e-10");
1934RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_scale_ram, "2.3283064365386962890625e-10");
1935RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_scale_gpu, "2.3283064365386962890625e-10");
1936RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_scale_psys, "2.3283064365386962890625e-10");
1937
1938#define RAPL_EVENT_GROUP(_name, domain) \
1939static struct attribute *pmu_attr_##_name[] = { \
1940 &event_attr_rapl_##_name.attr.attr, \
1941 &event_attr_rapl_unit_##_name.attr.attr, \
1942 &event_attr_rapl_scale_##_name.attr.attr, \
1943 NULL \
1944}; \
1945static umode_t is_visible_##_name(struct kobject *kobj, struct attribute *attr, int event) \
1946{ \
1947 return rapl_pmu.domain_map & BIT(domain) ? attr->mode : 0; \
1948} \
1949static struct attribute_group pmu_group_##_name = { \
1950 .name = "events", \
1951 .attrs = pmu_attr_##_name, \
1952 .is_visible = is_visible_##_name, \
1953}
1954
1955RAPL_EVENT_GROUP(cores, RAPL_DOMAIN_PP0);
1956RAPL_EVENT_GROUP(pkg, RAPL_DOMAIN_PACKAGE);
1957RAPL_EVENT_GROUP(ram, RAPL_DOMAIN_DRAM);
1958RAPL_EVENT_GROUP(gpu, RAPL_DOMAIN_PP1);
1959RAPL_EVENT_GROUP(psys, RAPL_DOMAIN_PLATFORM);
1960
1961static const struct attribute_group *pmu_attr_update[] = {
1962 &pmu_group_cores,
1963 &pmu_group_pkg,
1964 &pmu_group_ram,
1965 &pmu_group_gpu,
1966 &pmu_group_psys,
1967 NULL
1968};
1969
1970static int rapl_pmu_update(struct rapl_package *rp)
1971{
1972 int ret = 0;
1973
1974 /* Return if PMU already covers all events supported by current RAPL Package */
1975 if (rapl_pmu.registered && !(rp->domain_map & (~rapl_pmu.domain_map)))
1976 goto end;
1977
1978 /* Unregister previous registered PMU */
1979 if (rapl_pmu.registered)
1980 perf_pmu_unregister(&rapl_pmu.pmu);
1981
1982 rapl_pmu.registered = false;
1983 rapl_pmu.domain_map |= rp->domain_map;
1984
1985 memset(&rapl_pmu.pmu, 0, sizeof(struct pmu));
1986 rapl_pmu.pmu.attr_groups = pmu_attr_groups;
1987 rapl_pmu.pmu.attr_update = pmu_attr_update;
1988 rapl_pmu.pmu.task_ctx_nr = perf_invalid_context;
1989 rapl_pmu.pmu.event_init = rapl_pmu_event_init;
1990 rapl_pmu.pmu.add = rapl_pmu_event_add;
1991 rapl_pmu.pmu.del = rapl_pmu_event_del;
1992 rapl_pmu.pmu.start = rapl_pmu_event_start;
1993 rapl_pmu.pmu.stop = rapl_pmu_event_stop;
1994 rapl_pmu.pmu.read = rapl_pmu_event_read;
1995 rapl_pmu.pmu.module = THIS_MODULE;
1996 rapl_pmu.pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT;
1997 ret = perf_pmu_register(&rapl_pmu.pmu, "power", -1);
1998 if (ret) {
1999 pr_info("Failed to register PMU\n");
2000 return ret;
2001 }
2002
2003 rapl_pmu.registered = true;
2004end:
2005 rp->has_pmu = true;
2006 return ret;
2007}
2008
2009int rapl_package_add_pmu(struct rapl_package *rp)
2010{
2011 struct rapl_package_pmu_data *data = &rp->pmu_data;
2012 int idx;
2013
2014 if (rp->has_pmu)
2015 return -EEXIST;
2016
2017 guard(cpus_read_lock)();
2018
2019 for (idx = 0; idx < rp->nr_domains; idx++) {
2020 struct rapl_domain *rd = &rp->domains[idx];
2021 int domain = rd->id;
2022 u64 val;
2023
2024 if (!test_bit(domain, &rp->domain_map))
2025 continue;
2026
2027 /*
2028 * The RAPL PMU granularity is 2^-32 Joules
2029 * data->scale[]: times of 2^-32 Joules for each ENERGY COUNTER increase
2030 */
2031 val = rd->energy_unit * (1ULL << 32);
2032 do_div(val, ENERGY_UNIT_SCALE * 1000000);
2033 data->scale[domain] = val;
2034
2035 if (!rapl_pmu.timer_ms) {
2036 struct rapl_primitive_info *rpi = get_rpi(rp, ENERGY_COUNTER);
2037
2038 /*
2039 * Calculate the timer rate:
2040 * Use reference of 200W for scaling the timeout to avoid counter
2041 * overflows.
2042 *
2043 * max_count = rpi->mask >> rpi->shift + 1
2044 * max_energy_pj = max_count * rd->energy_unit
2045 * max_time_sec = (max_energy_pj / 1000000000) / 200w
2046 *
2047 * rapl_pmu.timer_ms = max_time_sec * 1000 / 2
2048 */
2049 val = (rpi->mask >> rpi->shift) + 1;
2050 val *= rd->energy_unit;
2051 do_div(val, 1000000 * 200 * 2);
2052 rapl_pmu.timer_ms = val;
2053
2054 pr_debug("%llu ms overflow timer\n", rapl_pmu.timer_ms);
2055 }
2056
2057 pr_debug("Domain %s: hw unit %lld * 2^-32 Joules\n", rd->name, data->scale[domain]);
2058 }
2059
2060 /* Initialize per package PMU data */
2061 raw_spin_lock_init(&data->lock);
2062 INIT_LIST_HEAD(&data->active_list);
2063 data->timer_interval = ms_to_ktime(rapl_pmu.timer_ms);
2064 hrtimer_init(&data->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2065 data->hrtimer.function = rapl_hrtimer_handle;
2066
2067 return rapl_pmu_update(rp);
2068}
2069EXPORT_SYMBOL_GPL(rapl_package_add_pmu);
2070
2071void rapl_package_remove_pmu(struct rapl_package *rp)
2072{
2073 struct rapl_package *pos;
2074
2075 if (!rp->has_pmu)
2076 return;
2077
2078 guard(cpus_read_lock)();
2079
2080 list_for_each_entry(pos, &rapl_packages, plist) {
2081 /* PMU is still needed */
2082 if (pos->has_pmu && pos != rp)
2083 return;
2084 }
2085
2086 perf_pmu_unregister(&rapl_pmu.pmu);
2087 memset(&rapl_pmu, 0, sizeof(struct rapl_pmu));
2088}
2089EXPORT_SYMBOL_GPL(rapl_package_remove_pmu);
2090#endif
2091
2d281d81 2092/* called from CPU hotplug notifier, hotplug lock held */
1aa09b93 2093void rapl_remove_package_cpuslocked(struct rapl_package *rp)
2d281d81
JP
2094{
2095 struct rapl_domain *rd, *rd_package = NULL;
2096
58705069
TG
2097 package_power_limit_irq_restore(rp);
2098
2d281d81 2099 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
9050a9cd
ZR
2100 int i;
2101
2102 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
2103 rapl_write_pl_data(rd, i, PL_ENABLE, 0);
2104 rapl_write_pl_data(rd, i, PL_CLAMP, 0);
58705069 2105 }
9050a9cd 2106
2d281d81
JP
2107 if (rd->id == RAPL_DOMAIN_PACKAGE) {
2108 rd_package = rd;
2109 continue;
2110 }
9ea7612c
ZR
2111 pr_debug("remove package, undo power limit on %s: %s\n",
2112 rp->name, rd->name);
3382388d
ZR
2113 powercap_unregister_zone(rp->priv->control_type,
2114 &rd->power_zone);
2d281d81
JP
2115 }
2116 /* do parent zone last */
3382388d
ZR
2117 powercap_unregister_zone(rp->priv->control_type,
2118 &rd_package->power_zone);
2d281d81
JP
2119 list_del(&rp->plist);
2120 kfree(rp);
2121}
1aa09b93
ZR
2122EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
2123
2124void rapl_remove_package(struct rapl_package *rp)
2125{
2126 guard(cpus_read_lock)();
2127 rapl_remove_package_cpuslocked(rp);
2128}
3382388d
ZR
2129EXPORT_SYMBOL_GPL(rapl_remove_package);
2130
2131/* caller to ensure CPU hotplug lock is held */
1aa09b93
ZR
2132struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
2133 bool id_is_cpu)
3382388d 2134{
3382388d 2135 struct rapl_package *rp;
bf44b901
ZR
2136 int uid;
2137
2138 if (id_is_cpu)
2139 uid = topology_logical_die_id(id);
2140 else
2141 uid = id;
3382388d
ZR
2142
2143 list_for_each_entry(rp, &rapl_packages, plist) {
bf44b901 2144 if (rp->id == uid
3382388d
ZR
2145 && rp->priv->control_type == priv->control_type)
2146 return rp;
2147 }
2148
2149 return NULL;
2150}
1aa09b93
ZR
2151EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
2152
2153struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
2154{
2155 guard(cpus_read_lock)();
2156 return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
2157}
3382388d 2158EXPORT_SYMBOL_GPL(rapl_find_package_domain);
2d281d81
JP
2159
2160/* called from CPU hotplug notifier, hotplug lock held */
1aa09b93 2161struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
2d281d81 2162{
2d281d81 2163 struct rapl_package *rp;
b4005e92 2164 int ret;
2d281d81 2165
2d281d81
JP
2166 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
2167 if (!rp)
b4005e92 2168 return ERR_PTR(-ENOMEM);
2d281d81 2169
bf44b901
ZR
2170 if (id_is_cpu) {
2171 rp->id = topology_logical_die_id(id);
2172 rp->lead_cpu = id;
bd745d1c 2173 if (topology_max_dies_per_package() > 1)
bf44b901
ZR
2174 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
2175 topology_physical_package_id(id), topology_die_id(id));
2176 else
2177 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
2178 topology_physical_package_id(id));
2179 } else {
2180 rp->id = id;
2181 rp->lead_cpu = -1;
2182 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
2183 }
323ee64a 2184
bf44b901 2185 rp->priv = priv;
e8e28c2a
ZR
2186 ret = rapl_config(rp);
2187 if (ret)
2188 goto err_free_package;
2189
2d281d81 2190 /* check if the package contains valid domains */
693c1d78 2191 if (rapl_detect_domains(rp)) {
2d281d81
JP
2192 ret = -ENODEV;
2193 goto err_free_package;
2194 }
a74f4367
TG
2195 ret = rapl_package_register_powercap(rp);
2196 if (!ret) {
2d281d81
JP
2197 INIT_LIST_HEAD(&rp->plist);
2198 list_add(&rp->plist, &rapl_packages);
b4005e92 2199 return rp;
2d281d81
JP
2200 }
2201
2202err_free_package:
2203 kfree(rp->domains);
2204 kfree(rp);
b4005e92 2205 return ERR_PTR(ret);
2d281d81 2206}
1aa09b93
ZR
2207EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
2208
2209struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
2210{
2211 guard(cpus_read_lock)();
2212 return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
2213}
3382388d 2214EXPORT_SYMBOL_GPL(rapl_add_package);
2d281d81 2215
52b3672c
ZH
2216static void power_limit_state_save(void)
2217{
2218 struct rapl_package *rp;
2219 struct rapl_domain *rd;
9050a9cd 2220 int ret, i;
52b3672c 2221
5d4c779c 2222 cpus_read_lock();
52b3672c
ZH
2223 list_for_each_entry(rp, &rapl_packages, plist) {
2224 if (!rp->power_zone)
2225 continue;
2226 rd = power_zone_to_rapl_domain(rp->power_zone);
9050a9cd
ZR
2227 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
2228 ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
3382388d 2229 &rd->rpl[i].last_power_limit);
9050a9cd
ZR
2230 if (ret)
2231 rd->rpl[i].last_power_limit = 0;
52b3672c
ZH
2232 }
2233 }
5d4c779c 2234 cpus_read_unlock();
52b3672c
ZH
2235}
2236
2237static void power_limit_state_restore(void)
2238{
2239 struct rapl_package *rp;
2240 struct rapl_domain *rd;
9050a9cd 2241 int i;
52b3672c 2242
5d4c779c 2243 cpus_read_lock();
52b3672c
ZH
2244 list_for_each_entry(rp, &rapl_packages, plist) {
2245 if (!rp->power_zone)
2246 continue;
2247 rd = power_zone_to_rapl_domain(rp->power_zone);
9050a9cd
ZR
2248 for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
2249 if (rd->rpl[i].last_power_limit)
2250 rapl_write_pl_data(rd, i, PL_LIMIT,
2251 rd->rpl[i].last_power_limit);
52b3672c 2252 }
5d4c779c 2253 cpus_read_unlock();
52b3672c
ZH
2254}
2255
2256static int rapl_pm_callback(struct notifier_block *nb,
3382388d 2257 unsigned long mode, void *_unused)
52b3672c
ZH
2258{
2259 switch (mode) {
2260 case PM_SUSPEND_PREPARE:
2261 power_limit_state_save();
2262 break;
2263 case PM_POST_SUSPEND:
2264 power_limit_state_restore();
2265 break;
2266 }
2267 return NOTIFY_OK;
2268}
2269
2270static struct notifier_block rapl_pm_notifier = {
2271 .notifier_call = rapl_pm_callback,
2272};
2273
abcfaeb3
ZR
2274static struct platform_device *rapl_msr_platdev;
2275
2276static int __init rapl_init(void)
2d281d81 2277{
087e9cba 2278 const struct x86_cpu_id *id;
58705069 2279 int ret;
2d281d81 2280
087e9cba 2281 id = x86_match_cpu(rapl_ids);
1488ac99 2282 if (id) {
b4288ce7 2283 defaults_msr = (struct rapl_defaults *)id->driver_data;
2d281d81 2284
1488ac99
ZR
2285 rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
2286 if (!rapl_msr_platdev)
2287 return -ENOMEM;
52b3672c 2288
1488ac99
ZR
2289 ret = platform_device_add(rapl_msr_platdev);
2290 if (ret) {
2291 platform_device_put(rapl_msr_platdev);
2292 return ret;
2293 }
abcfaeb3
ZR
2294 }
2295
1488ac99
ZR
2296 ret = register_pm_notifier(&rapl_pm_notifier);
2297 if (ret && rapl_msr_platdev) {
2298 platform_device_del(rapl_msr_platdev);
abcfaeb3 2299 platform_device_put(rapl_msr_platdev);
1488ac99 2300 }
abcfaeb3
ZR
2301
2302 return ret;
2d281d81
JP
2303}
2304
abcfaeb3 2305static void __exit rapl_exit(void)
2d281d81 2306{
abcfaeb3 2307 platform_device_unregister(rapl_msr_platdev);
52b3672c 2308 unregister_pm_notifier(&rapl_pm_notifier);
2d281d81
JP
2309}
2310
f76cb066 2311fs_initcall(rapl_init);
abcfaeb3
ZR
2312module_exit(rapl_exit);
2313
3382388d 2314MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
2d281d81
JP
2315MODULE_AUTHOR("Jacob Pan <[email protected]>");
2316MODULE_LICENSE("GPL v2");
This page took 0.958912 seconds and 4 git commands to generate.