]>
Commit | Line | Data |
---|---|---|
f6cc69f1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
2d281d81 | 2 | /* |
3382388d ZR |
3 | * Common code for Intel Running Average Power Limit (RAPL) support. |
4 | * Copyright (c) 2019, Intel Corporation. | |
2d281d81 JP |
5 | */ |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | ||
72b8b941 | 8 | #include <linux/bitmap.h> |
1aa09b93 | 9 | #include <linux/cleanup.h> |
72b8b941 ZR |
10 | #include <linux/cpu.h> |
11 | #include <linux/delay.h> | |
12 | #include <linux/device.h> | |
13 | #include <linux/intel_rapl.h> | |
2d281d81 | 14 | #include <linux/kernel.h> |
2d281d81 | 15 | #include <linux/list.h> |
2d281d81 | 16 | #include <linux/log2.h> |
72b8b941 | 17 | #include <linux/module.h> |
575024a8 ZR |
18 | #include <linux/nospec.h> |
19 | #include <linux/perf_event.h> | |
72b8b941 | 20 | #include <linux/platform_device.h> |
2d281d81 | 21 | #include <linux/powercap.h> |
3382388d | 22 | #include <linux/processor.h> |
72b8b941 ZR |
23 | #include <linux/slab.h> |
24 | #include <linux/suspend.h> | |
25 | #include <linux/sysfs.h> | |
26 | #include <linux/types.h> | |
abcfaeb3 | 27 | |
2d281d81 | 28 | #include <asm/cpu_device_id.h> |
62d16733 | 29 | #include <asm/intel-family.h> |
72b8b941 | 30 | #include <asm/iosf_mbi.h> |
2d281d81 JP |
31 | |
32 | /* bitmasks for RAPL MSRs, used by primitive access functions */ | |
33 | #define ENERGY_STATUS_MASK 0xffffffff | |
34 | ||
35 | #define POWER_LIMIT1_MASK 0x7FFF | |
36 | #define POWER_LIMIT1_ENABLE BIT(15) | |
37 | #define POWER_LIMIT1_CLAMP BIT(16) | |
38 | ||
39 | #define POWER_LIMIT2_MASK (0x7FFFULL<<32) | |
40 | #define POWER_LIMIT2_ENABLE BIT_ULL(47) | |
41 | #define POWER_LIMIT2_CLAMP BIT_ULL(48) | |
0c2ddedd ZR |
42 | #define POWER_HIGH_LOCK BIT_ULL(63) |
43 | #define POWER_LOW_LOCK BIT(31) | |
2d281d81 | 44 | |
8365a898 SP |
45 | #define POWER_LIMIT4_MASK 0x1FFF |
46 | ||
2d281d81 JP |
47 | #define TIME_WINDOW1_MASK (0x7FULL<<17) |
48 | #define TIME_WINDOW2_MASK (0x7FULL<<49) | |
49 | ||
50 | #define POWER_UNIT_OFFSET 0 | |
51 | #define POWER_UNIT_MASK 0x0F | |
52 | ||
53 | #define ENERGY_UNIT_OFFSET 0x08 | |
54 | #define ENERGY_UNIT_MASK 0x1F00 | |
55 | ||
56 | #define TIME_UNIT_OFFSET 0x10 | |
57 | #define TIME_UNIT_MASK 0xF0000 | |
58 | ||
59 | #define POWER_INFO_MAX_MASK (0x7fffULL<<32) | |
60 | #define POWER_INFO_MIN_MASK (0x7fffULL<<16) | |
61 | #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48) | |
62 | #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff | |
63 | ||
64 | #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff | |
65 | #define PP_POLICY_MASK 0x1F | |
66 | ||
931da6a0 ZR |
67 | /* |
68 | * SPR has different layout for Psys Domain PowerLimit registers. | |
69 | * There are 17 bits of PL1 and PL2 instead of 15 bits. | |
70 | * The Enable bits and TimeWindow bits are also shifted as a result. | |
71 | */ | |
72 | #define PSYS_POWER_LIMIT1_MASK 0x1FFFF | |
73 | #define PSYS_POWER_LIMIT1_ENABLE BIT(17) | |
74 | ||
75 | #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32) | |
76 | #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49) | |
77 | ||
78 | #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19) | |
79 | #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51) | |
80 | ||
e12dee18 ZR |
81 | /* bitmasks for RAPL TPMI, used by primitive access functions */ |
82 | #define TPMI_POWER_LIMIT_MASK 0x3FFFF | |
83 | #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62) | |
84 | #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18) | |
85 | #define TPMI_INFO_SPEC_MASK 0x3FFFF | |
86 | #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18) | |
87 | #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36) | |
88 | #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54) | |
89 | ||
2d281d81 | 90 | /* Non HW constants */ |
3382388d | 91 | #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ |
2d281d81 JP |
92 | #define RAPL_PRIMITIVE_DUMMY BIT(2) |
93 | ||
2d281d81 JP |
94 | #define TIME_WINDOW_MAX_MSEC 40000 |
95 | #define TIME_WINDOW_MIN_MSEC 250 | |
3382388d | 96 | #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ |
2d281d81 | 97 | enum unit_type { |
3382388d | 98 | ARBITRARY_UNIT, /* no translation */ |
2d281d81 JP |
99 | POWER_UNIT, |
100 | ENERGY_UNIT, | |
101 | TIME_UNIT, | |
102 | }; | |
103 | ||
2d281d81 | 104 | /* per domain data, some are optional */ |
2d281d81 JP |
105 | #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2) |
106 | ||
2d281d81 JP |
107 | #define DOMAIN_STATE_INACTIVE BIT(0) |
108 | #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1) | |
2d281d81 | 109 | |
9050a9cd ZR |
110 | static const char *pl_names[NR_POWER_LIMITS] = { |
111 | [POWER_LIMIT1] = "long_term", | |
112 | [POWER_LIMIT2] = "short_term", | |
113 | [POWER_LIMIT4] = "peak_power", | |
114 | }; | |
115 | ||
116 | enum pl_prims { | |
117 | PL_ENABLE, | |
118 | PL_CLAMP, | |
119 | PL_LIMIT, | |
120 | PL_TIME_WINDOW, | |
121 | PL_MAX_POWER, | |
f442bd27 | 122 | PL_LOCK, |
9050a9cd ZR |
123 | }; |
124 | ||
125 | static bool is_pl_valid(struct rapl_domain *rd, int pl) | |
126 | { | |
127 | if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4) | |
128 | return false; | |
129 | return rd->rpl[pl].name ? true : false; | |
130 | } | |
131 | ||
f442bd27 ZR |
132 | static int get_pl_lock_prim(struct rapl_domain *rd, int pl) |
133 | { | |
e12dee18 ZR |
134 | if (rd->rp->priv->type == RAPL_IF_TPMI) { |
135 | if (pl == POWER_LIMIT1) | |
136 | return PL1_LOCK; | |
137 | if (pl == POWER_LIMIT2) | |
138 | return PL2_LOCK; | |
139 | if (pl == POWER_LIMIT4) | |
140 | return PL4_LOCK; | |
141 | } | |
142 | ||
143 | /* MSR/MMIO Interface doesn't have Lock bit for PL4 */ | |
144 | if (pl == POWER_LIMIT4) | |
145 | return -EINVAL; | |
146 | ||
f442bd27 ZR |
147 | /* |
148 | * Power Limit register that supports two power limits has a different | |
149 | * bit position for the Lock bit. | |
150 | */ | |
151 | if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2)) | |
152 | return FW_HIGH_LOCK; | |
153 | return FW_LOCK; | |
154 | } | |
155 | ||
156 | static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim) | |
9050a9cd ZR |
157 | { |
158 | switch (pl) { | |
159 | case POWER_LIMIT1: | |
160 | if (prim == PL_ENABLE) | |
161 | return PL1_ENABLE; | |
e12dee18 | 162 | if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) |
9050a9cd ZR |
163 | return PL1_CLAMP; |
164 | if (prim == PL_LIMIT) | |
165 | return POWER_LIMIT1; | |
166 | if (prim == PL_TIME_WINDOW) | |
167 | return TIME_WINDOW1; | |
168 | if (prim == PL_MAX_POWER) | |
169 | return THERMAL_SPEC_POWER; | |
f442bd27 ZR |
170 | if (prim == PL_LOCK) |
171 | return get_pl_lock_prim(rd, pl); | |
9050a9cd ZR |
172 | return -EINVAL; |
173 | case POWER_LIMIT2: | |
174 | if (prim == PL_ENABLE) | |
175 | return PL2_ENABLE; | |
e12dee18 | 176 | if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI) |
9050a9cd ZR |
177 | return PL2_CLAMP; |
178 | if (prim == PL_LIMIT) | |
179 | return POWER_LIMIT2; | |
180 | if (prim == PL_TIME_WINDOW) | |
181 | return TIME_WINDOW2; | |
182 | if (prim == PL_MAX_POWER) | |
183 | return MAX_POWER; | |
f442bd27 ZR |
184 | if (prim == PL_LOCK) |
185 | return get_pl_lock_prim(rd, pl); | |
9050a9cd ZR |
186 | return -EINVAL; |
187 | case POWER_LIMIT4: | |
188 | if (prim == PL_LIMIT) | |
189 | return POWER_LIMIT4; | |
190 | if (prim == PL_ENABLE) | |
191 | return PL4_ENABLE; | |
192 | /* PL4 would be around two times PL2, use same prim as PL2. */ | |
193 | if (prim == PL_MAX_POWER) | |
194 | return MAX_POWER; | |
e12dee18 ZR |
195 | if (prim == PL_LOCK) |
196 | return get_pl_lock_prim(rd, pl); | |
9050a9cd ZR |
197 | return -EINVAL; |
198 | default: | |
199 | return -EINVAL; | |
200 | } | |
201 | } | |
2d281d81 | 202 | |
2d281d81 JP |
203 | #define power_zone_to_rapl_domain(_zone) \ |
204 | container_of(_zone, struct rapl_domain, power_zone) | |
205 | ||
087e9cba | 206 | struct rapl_defaults { |
51b63409 | 207 | u8 floor_freq_reg_addr; |
693c1d78 | 208 | int (*check_unit)(struct rapl_domain *rd); |
087e9cba | 209 | void (*set_floor_freq)(struct rapl_domain *rd, bool mode); |
cb532e72 | 210 | u64 (*compute_time_window)(struct rapl_domain *rd, u64 val, |
3382388d | 211 | bool to_raw); |
d474a4d3 | 212 | unsigned int dram_domain_energy_unit; |
2d798d9f | 213 | unsigned int psys_domain_energy_unit; |
931da6a0 | 214 | bool spr_psys_bits; |
087e9cba | 215 | }; |
b4288ce7 | 216 | static struct rapl_defaults *defaults_msr; |
e12dee18 | 217 | static const struct rapl_defaults defaults_tpmi; |
087e9cba | 218 | |
e8e28c2a ZR |
219 | static struct rapl_defaults *get_defaults(struct rapl_package *rp) |
220 | { | |
221 | return rp->priv->defaults; | |
222 | } | |
223 | ||
3c2c0845 | 224 | /* Sideband MBI registers */ |
51b63409 AT |
225 | #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2) |
226 | #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf) | |
3c2c0845 | 227 | |
2d281d81 JP |
228 | #define PACKAGE_PLN_INT_SAVED BIT(0) |
229 | #define MAX_PRIM_NAME (32) | |
230 | ||
231 | /* per domain data. used to describe individual knobs such that access function | |
232 | * can be consolidated into one instead of many inline functions. | |
233 | */ | |
234 | struct rapl_primitive_info { | |
235 | const char *name; | |
236 | u64 mask; | |
237 | int shift; | |
f7c4e0c8 | 238 | enum rapl_domain_reg_id id; |
2d281d81 JP |
239 | enum unit_type unit; |
240 | u32 flag; | |
241 | }; | |
242 | ||
243 | #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \ | |
244 | .name = #p, \ | |
245 | .mask = m, \ | |
246 | .shift = s, \ | |
247 | .id = i, \ | |
248 | .unit = u, \ | |
249 | .flag = f \ | |
250 | } | |
251 | ||
252 | static void rapl_init_domains(struct rapl_package *rp); | |
253 | static int rapl_read_data_raw(struct rapl_domain *rd, | |
3382388d ZR |
254 | enum rapl_primitives prim, |
255 | bool xlate, u64 *data); | |
2d281d81 | 256 | static int rapl_write_data_raw(struct rapl_domain *rd, |
3382388d ZR |
257 | enum rapl_primitives prim, |
258 | unsigned long long value); | |
9050a9cd ZR |
259 | static int rapl_read_pl_data(struct rapl_domain *rd, int pl, |
260 | enum pl_prims pl_prim, | |
261 | bool xlate, u64 *data); | |
262 | static int rapl_write_pl_data(struct rapl_domain *rd, int pl, | |
263 | enum pl_prims pl_prim, | |
264 | unsigned long long value); | |
309557f5 | 265 | static u64 rapl_unit_xlate(struct rapl_domain *rd, |
3382388d | 266 | enum unit_type type, u64 value, int to_raw); |
309557f5 | 267 | static void package_power_limit_irq_save(struct rapl_package *rp); |
2d281d81 | 268 | |
3382388d | 269 | static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */ |
2d281d81 | 270 | |
3382388d | 271 | static const char *const rapl_domain_names[] = { |
2d281d81 JP |
272 | "package", |
273 | "core", | |
274 | "uncore", | |
275 | "dram", | |
3521ba1c | 276 | "psys", |
2d281d81 JP |
277 | }; |
278 | ||
3382388d ZR |
279 | static int get_energy_counter(struct powercap_zone *power_zone, |
280 | u64 *energy_raw) | |
2d281d81 JP |
281 | { |
282 | struct rapl_domain *rd; | |
283 | u64 energy_now; | |
284 | ||
285 | /* prevent CPU hotplug, make sure the RAPL domain does not go | |
286 | * away while reading the counter. | |
287 | */ | |
5d4c779c | 288 | cpus_read_lock(); |
2d281d81 JP |
289 | rd = power_zone_to_rapl_domain(power_zone); |
290 | ||
291 | if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) { | |
292 | *energy_raw = energy_now; | |
5d4c779c | 293 | cpus_read_unlock(); |
2d281d81 JP |
294 | |
295 | return 0; | |
296 | } | |
5d4c779c | 297 | cpus_read_unlock(); |
2d281d81 JP |
298 | |
299 | return -EIO; | |
300 | } | |
301 | ||
302 | static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) | |
303 | { | |
d474a4d3 JP |
304 | struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); |
305 | ||
309557f5 | 306 | *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); |
2d281d81 JP |
307 | return 0; |
308 | } | |
309 | ||
310 | static int release_zone(struct powercap_zone *power_zone) | |
311 | { | |
312 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); | |
309557f5 | 313 | struct rapl_package *rp = rd->rp; |
2d281d81 JP |
314 | |
315 | /* package zone is the last zone of a package, we can free | |
316 | * memory here since all children has been unregistered. | |
317 | */ | |
318 | if (rd->id == RAPL_DOMAIN_PACKAGE) { | |
2d281d81 JP |
319 | kfree(rd); |
320 | rp->domains = NULL; | |
321 | } | |
322 | ||
323 | return 0; | |
324 | ||
325 | } | |
326 | ||
327 | static int find_nr_power_limit(struct rapl_domain *rd) | |
328 | { | |
e1399ba2 | 329 | int i, nr_pl = 0; |
2d281d81 JP |
330 | |
331 | for (i = 0; i < NR_POWER_LIMITS; i++) { | |
9050a9cd | 332 | if (is_pl_valid(rd, i)) |
e1399ba2 | 333 | nr_pl++; |
2d281d81 JP |
334 | } |
335 | ||
e1399ba2 | 336 | return nr_pl; |
2d281d81 JP |
337 | } |
338 | ||
339 | static int set_domain_enable(struct powercap_zone *power_zone, bool mode) | |
340 | { | |
341 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); | |
e8e28c2a | 342 | struct rapl_defaults *defaults = get_defaults(rd->rp); |
9050a9cd | 343 | int ret; |
3c2c0845 | 344 | |
5d4c779c | 345 | cpus_read_lock(); |
9050a9cd ZR |
346 | ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); |
347 | if (!ret && defaults->set_floor_freq) | |
e8e28c2a | 348 | defaults->set_floor_freq(rd, mode); |
5d4c779c | 349 | cpus_read_unlock(); |
2d281d81 | 350 | |
9050a9cd | 351 | return ret; |
2d281d81 JP |
352 | } |
353 | ||
354 | static int get_domain_enable(struct powercap_zone *power_zone, bool *mode) | |
355 | { | |
356 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); | |
357 | u64 val; | |
9050a9cd | 358 | int ret; |
2d281d81 | 359 | |
f442bd27 | 360 | if (rd->rpl[POWER_LIMIT1].locked) { |
2d281d81 JP |
361 | *mode = false; |
362 | return 0; | |
363 | } | |
5d4c779c | 364 | cpus_read_lock(); |
9050a9cd ZR |
365 | ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val); |
366 | if (!ret) | |
367 | *mode = val; | |
5d4c779c | 368 | cpus_read_unlock(); |
2d281d81 | 369 | |
9050a9cd | 370 | return ret; |
2d281d81 JP |
371 | } |
372 | ||
373 | /* per RAPL domain ops, in the order of rapl_domain_type */ | |
600c395b | 374 | static const struct powercap_zone_ops zone_ops[] = { |
2d281d81 JP |
375 | /* RAPL_DOMAIN_PACKAGE */ |
376 | { | |
3382388d ZR |
377 | .get_energy_uj = get_energy_counter, |
378 | .get_max_energy_range_uj = get_max_energy_counter, | |
379 | .release = release_zone, | |
380 | .set_enable = set_domain_enable, | |
381 | .get_enable = get_domain_enable, | |
382 | }, | |
2d281d81 JP |
383 | /* RAPL_DOMAIN_PP0 */ |
384 | { | |
3382388d ZR |
385 | .get_energy_uj = get_energy_counter, |
386 | .get_max_energy_range_uj = get_max_energy_counter, | |
387 | .release = release_zone, | |
388 | .set_enable = set_domain_enable, | |
389 | .get_enable = get_domain_enable, | |
390 | }, | |
2d281d81 JP |
391 | /* RAPL_DOMAIN_PP1 */ |
392 | { | |
3382388d ZR |
393 | .get_energy_uj = get_energy_counter, |
394 | .get_max_energy_range_uj = get_max_energy_counter, | |
395 | .release = release_zone, | |
396 | .set_enable = set_domain_enable, | |
397 | .get_enable = get_domain_enable, | |
398 | }, | |
2d281d81 JP |
399 | /* RAPL_DOMAIN_DRAM */ |
400 | { | |
3382388d ZR |
401 | .get_energy_uj = get_energy_counter, |
402 | .get_max_energy_range_uj = get_max_energy_counter, | |
403 | .release = release_zone, | |
404 | .set_enable = set_domain_enable, | |
405 | .get_enable = get_domain_enable, | |
406 | }, | |
3521ba1c SP |
407 | /* RAPL_DOMAIN_PLATFORM */ |
408 | { | |
3382388d ZR |
409 | .get_energy_uj = get_energy_counter, |
410 | .get_max_energy_range_uj = get_max_energy_counter, | |
411 | .release = release_zone, | |
412 | .set_enable = set_domain_enable, | |
413 | .get_enable = get_domain_enable, | |
414 | }, | |
2d281d81 JP |
415 | }; |
416 | ||
e1399ba2 JP |
417 | /* |
418 | * Constraint index used by powercap can be different than power limit (PL) | |
3382388d | 419 | * index in that some PLs maybe missing due to non-existent MSRs. So we |
e1399ba2 JP |
420 | * need to convert here by finding the valid PLs only (name populated). |
421 | */ | |
422 | static int contraint_to_pl(struct rapl_domain *rd, int cid) | |
423 | { | |
424 | int i, j; | |
425 | ||
9050a9cd ZR |
426 | for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) { |
427 | if (is_pl_valid(rd, i) && j++ == cid) { | |
e1399ba2 JP |
428 | pr_debug("%s: index %d\n", __func__, i); |
429 | return i; | |
430 | } | |
431 | } | |
cb43f81b | 432 | pr_err("Cannot find matching power limit for constraint %d\n", cid); |
e1399ba2 JP |
433 | |
434 | return -EINVAL; | |
435 | } | |
436 | ||
437 | static int set_power_limit(struct powercap_zone *power_zone, int cid, | |
3382388d | 438 | u64 power_limit) |
2d281d81 JP |
439 | { |
440 | struct rapl_domain *rd; | |
441 | struct rapl_package *rp; | |
442 | int ret = 0; | |
e1399ba2 | 443 | int id; |
2d281d81 | 444 | |
5d4c779c | 445 | cpus_read_lock(); |
2d281d81 | 446 | rd = power_zone_to_rapl_domain(power_zone); |
e1399ba2 | 447 | id = contraint_to_pl(rd, cid); |
309557f5 | 448 | rp = rd->rp; |
2d281d81 | 449 | |
9050a9cd | 450 | ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit); |
2d281d81 | 451 | if (!ret) |
309557f5 | 452 | package_power_limit_irq_save(rp); |
5d4c779c | 453 | cpus_read_unlock(); |
2d281d81 JP |
454 | return ret; |
455 | } | |
456 | ||
e1399ba2 | 457 | static int get_current_power_limit(struct powercap_zone *power_zone, int cid, |
3382388d | 458 | u64 *data) |
2d281d81 JP |
459 | { |
460 | struct rapl_domain *rd; | |
461 | u64 val; | |
2d281d81 | 462 | int ret = 0; |
e1399ba2 | 463 | int id; |
2d281d81 | 464 | |
5d4c779c | 465 | cpus_read_lock(); |
2d281d81 | 466 | rd = power_zone_to_rapl_domain(power_zone); |
e1399ba2 | 467 | id = contraint_to_pl(rd, cid); |
cb43f81b | 468 | |
9050a9cd ZR |
469 | ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val); |
470 | if (!ret) | |
2d281d81 JP |
471 | *data = val; |
472 | ||
5d4c779c | 473 | cpus_read_unlock(); |
2d281d81 JP |
474 | |
475 | return ret; | |
476 | } | |
477 | ||
e1399ba2 | 478 | static int set_time_window(struct powercap_zone *power_zone, int cid, |
3382388d | 479 | u64 window) |
2d281d81 JP |
480 | { |
481 | struct rapl_domain *rd; | |
482 | int ret = 0; | |
e1399ba2 | 483 | int id; |
2d281d81 | 484 | |
5d4c779c | 485 | cpus_read_lock(); |
2d281d81 | 486 | rd = power_zone_to_rapl_domain(power_zone); |
e1399ba2 JP |
487 | id = contraint_to_pl(rd, cid); |
488 | ||
9050a9cd | 489 | ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window); |
cb43f81b | 490 | |
5d4c779c | 491 | cpus_read_unlock(); |
2d281d81 JP |
492 | return ret; |
493 | } | |
494 | ||
3382388d ZR |
495 | static int get_time_window(struct powercap_zone *power_zone, int cid, |
496 | u64 *data) | |
2d281d81 JP |
497 | { |
498 | struct rapl_domain *rd; | |
499 | u64 val; | |
500 | int ret = 0; | |
e1399ba2 | 501 | int id; |
2d281d81 | 502 | |
5d4c779c | 503 | cpus_read_lock(); |
2d281d81 | 504 | rd = power_zone_to_rapl_domain(power_zone); |
e1399ba2 JP |
505 | id = contraint_to_pl(rd, cid); |
506 | ||
9050a9cd | 507 | ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val); |
2d281d81 JP |
508 | if (!ret) |
509 | *data = val; | |
cb43f81b | 510 | |
5d4c779c | 511 | cpus_read_unlock(); |
2d281d81 JP |
512 | |
513 | return ret; | |
514 | } | |
515 | ||
3382388d ZR |
516 | static const char *get_constraint_name(struct powercap_zone *power_zone, |
517 | int cid) | |
2d281d81 | 518 | { |
2d281d81 | 519 | struct rapl_domain *rd; |
e1399ba2 | 520 | int id; |
2d281d81 JP |
521 | |
522 | rd = power_zone_to_rapl_domain(power_zone); | |
e1399ba2 JP |
523 | id = contraint_to_pl(rd, cid); |
524 | if (id >= 0) | |
525 | return rd->rpl[id].name; | |
2d281d81 | 526 | |
e1399ba2 | 527 | return NULL; |
2d281d81 JP |
528 | } |
529 | ||
9050a9cd | 530 | static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data) |
2d281d81 JP |
531 | { |
532 | struct rapl_domain *rd; | |
533 | u64 val; | |
2d281d81 | 534 | int ret = 0; |
9050a9cd | 535 | int id; |
2d281d81 | 536 | |
5d4c779c | 537 | cpus_read_lock(); |
2d281d81 | 538 | rd = power_zone_to_rapl_domain(power_zone); |
9050a9cd ZR |
539 | id = contraint_to_pl(rd, cid); |
540 | ||
541 | ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val); | |
542 | if (!ret) | |
2d281d81 JP |
543 | *data = val; |
544 | ||
8365a898 | 545 | /* As a generalization rule, PL4 would be around two times PL2. */ |
9050a9cd | 546 | if (id == POWER_LIMIT4) |
8365a898 SP |
547 | *data = *data * 2; |
548 | ||
5d4c779c | 549 | cpus_read_unlock(); |
2d281d81 JP |
550 | |
551 | return ret; | |
552 | } | |
553 | ||
600c395b | 554 | static const struct powercap_zone_constraint_ops constraint_ops = { |
2d281d81 JP |
555 | .set_power_limit_uw = set_power_limit, |
556 | .get_power_limit_uw = get_current_power_limit, | |
557 | .set_time_window_us = set_time_window, | |
558 | .get_time_window_us = get_time_window, | |
559 | .get_max_power_uw = get_max_power, | |
560 | .get_name = get_constraint_name, | |
561 | }; | |
562 | ||
bf44b901 ZR |
563 | /* Return the id used for read_raw/write_raw callback */ |
564 | static int get_rid(struct rapl_package *rp) | |
565 | { | |
566 | return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id; | |
567 | } | |
568 | ||
2d281d81 JP |
569 | /* called after domain detection and package level data are set */ |
570 | static void rapl_init_domains(struct rapl_package *rp) | |
571 | { | |
0c2ddedd ZR |
572 | enum rapl_domain_type i; |
573 | enum rapl_domain_reg_id j; | |
2d281d81 JP |
574 | struct rapl_domain *rd = rp->domains; |
575 | ||
576 | for (i = 0; i < RAPL_DOMAIN_MAX; i++) { | |
577 | unsigned int mask = rp->domain_map & (1 << i); | |
9050a9cd | 578 | int t; |
7fde2712 | 579 | |
0c2ddedd ZR |
580 | if (!mask) |
581 | continue; | |
582 | ||
583 | rd->rp = rp; | |
f1e8d756 ZR |
584 | |
585 | if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) { | |
586 | snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d", | |
bf44b901 ZR |
587 | rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) : |
588 | rp->id); | |
589 | } else { | |
f1e8d756 ZR |
590 | snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s", |
591 | rapl_domain_names[i]); | |
bf44b901 | 592 | } |
f1e8d756 | 593 | |
0c2ddedd | 594 | rd->id = i; |
a38f300b ZR |
595 | |
596 | /* PL1 is supported by default */ | |
597 | rp->priv->limits[i] |= BIT(POWER_LIMIT1); | |
8365a898 | 598 | |
9050a9cd ZR |
599 | for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) { |
600 | if (rp->priv->limits[i] & BIT(t)) | |
601 | rd->rpl[t].name = pl_names[t]; | |
8365a898 SP |
602 | } |
603 | ||
0c2ddedd ZR |
604 | for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++) |
605 | rd->regs[j] = rp->priv->regs[i][j]; | |
606 | ||
0c2ddedd | 607 | rd++; |
2d281d81 JP |
608 | } |
609 | } | |
610 | ||
309557f5 | 611 | static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type, |
3382388d | 612 | u64 value, int to_raw) |
2d281d81 | 613 | { |
3c2c0845 | 614 | u64 units = 1; |
cb532e72 | 615 | struct rapl_defaults *defaults = get_defaults(rd->rp); |
d474a4d3 | 616 | u64 scale = 1; |
2d281d81 | 617 | |
2d281d81 JP |
618 | switch (type) { |
619 | case POWER_UNIT: | |
cb532e72 | 620 | units = rd->power_unit; |
2d281d81 JP |
621 | break; |
622 | case ENERGY_UNIT: | |
d474a4d3 | 623 | scale = ENERGY_UNIT_SCALE; |
cb532e72 | 624 | units = rd->energy_unit; |
2d281d81 JP |
625 | break; |
626 | case TIME_UNIT: | |
cb532e72 | 627 | return defaults->compute_time_window(rd, value, to_raw); |
2d281d81 JP |
628 | case ARBITRARY_UNIT: |
629 | default: | |
630 | return value; | |
a8193af7 | 631 | } |
2d281d81 JP |
632 | |
633 | if (to_raw) | |
d474a4d3 | 634 | return div64_u64(value, units) * scale; |
3c2c0845 JP |
635 | |
636 | value *= units; | |
637 | ||
d474a4d3 | 638 | return div64_u64(value, scale); |
2d281d81 JP |
639 | } |
640 | ||
e12dee18 | 641 | /* RAPL primitives for MSR and MMIO I/F */ |
b4288ce7 | 642 | static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = { |
2d281d81 | 643 | /* name, mask, shift, msr index, unit divisor */ |
11edbe5c | 644 | [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0, |
3382388d | 645 | RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), |
11edbe5c | 646 | [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32, |
3382388d | 647 | RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), |
11edbe5c | 648 | [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0, |
8365a898 | 649 | RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), |
045610c3 ZR |
650 | [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, |
651 | RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), | |
11edbe5c | 652 | [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31, |
3382388d | 653 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
f442bd27 ZR |
654 | [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63, |
655 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), | |
11edbe5c | 656 | [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15, |
3382388d | 657 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 658 | [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16, |
3382388d | 659 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 660 | [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47, |
3382388d | 661 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 662 | [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48, |
3382388d | 663 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 664 | [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17, |
3382388d | 665 | RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), |
11edbe5c | 666 | [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49, |
3382388d | 667 | RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), |
11edbe5c | 668 | [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK, |
3382388d | 669 | 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), |
11edbe5c | 670 | [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32, |
3382388d | 671 | RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), |
11edbe5c | 672 | [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16, |
3382388d | 673 | RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), |
11edbe5c | 674 | [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48, |
3382388d | 675 | RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), |
11edbe5c | 676 | [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, |
3382388d | 677 | RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), |
11edbe5c | 678 | [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0, |
3382388d | 679 | RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0), |
11edbe5c | 680 | [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0, |
931da6a0 | 681 | RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), |
11edbe5c | 682 | [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32, |
931da6a0 | 683 | RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), |
11edbe5c | 684 | [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17, |
931da6a0 | 685 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 686 | [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49, |
931da6a0 | 687 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), |
11edbe5c | 688 | [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19, |
931da6a0 | 689 | RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), |
11edbe5c | 690 | [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51, |
931da6a0 | 691 | RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), |
2d281d81 | 692 | /* non-hardware */ |
11edbe5c | 693 | [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT, |
3382388d | 694 | RAPL_PRIMITIVE_DERIVED), |
2d281d81 JP |
695 | }; |
696 | ||
e12dee18 ZR |
697 | /* RAPL primitives for TPMI I/F */ |
698 | static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = { | |
699 | /* name, mask, shift, msr index, unit divisor */ | |
700 | [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0, | |
701 | RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0), | |
702 | [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0, | |
703 | RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0), | |
704 | [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0, | |
705 | RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0), | |
706 | [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0, | |
707 | RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0), | |
708 | [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63, | |
709 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), | |
710 | [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63, | |
711 | RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), | |
712 | [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63, | |
713 | RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), | |
714 | [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, | |
715 | RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0), | |
716 | [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, | |
717 | RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0), | |
718 | [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62, | |
719 | RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0), | |
720 | [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18, | |
721 | RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0), | |
722 | [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18, | |
723 | RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0), | |
724 | [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0, | |
725 | RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), | |
726 | [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36, | |
727 | RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), | |
728 | [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18, | |
729 | RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0), | |
730 | [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54, | |
731 | RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0), | |
732 | [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0, | |
733 | RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0), | |
734 | /* non-hardware */ | |
735 | [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, | |
736 | POWER_UNIT, RAPL_PRIMITIVE_DERIVED), | |
737 | }; | |
738 | ||
98ff639a ZR |
739 | static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim) |
740 | { | |
741 | struct rapl_primitive_info *rpi = rp->priv->rpi; | |
742 | ||
743 | if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi) | |
744 | return NULL; | |
745 | ||
746 | return &rpi[prim]; | |
747 | } | |
748 | ||
e8e28c2a ZR |
749 | static int rapl_config(struct rapl_package *rp) |
750 | { | |
b4288ce7 ZR |
751 | switch (rp->priv->type) { |
752 | /* MMIO I/F shares the same register layout as MSR registers */ | |
753 | case RAPL_IF_MMIO: | |
754 | case RAPL_IF_MSR: | |
755 | rp->priv->defaults = (void *)defaults_msr; | |
756 | rp->priv->rpi = (void *)rpi_msr; | |
757 | break; | |
e12dee18 ZR |
758 | case RAPL_IF_TPMI: |
759 | rp->priv->defaults = (void *)&defaults_tpmi; | |
760 | rp->priv->rpi = (void *)rpi_tpmi; | |
761 | break; | |
b4288ce7 ZR |
762 | default: |
763 | return -EINVAL; | |
764 | } | |
2d1f5006 ZR |
765 | |
766 | /* defaults_msr can be NULL on unsupported platforms */ | |
767 | if (!rp->priv->defaults || !rp->priv->rpi) | |
768 | return -ENODEV; | |
769 | ||
e8e28c2a ZR |
770 | return 0; |
771 | } | |
772 | ||
931da6a0 ZR |
773 | static enum rapl_primitives |
774 | prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim) | |
775 | { | |
e8e28c2a ZR |
776 | struct rapl_defaults *defaults = get_defaults(rd->rp); |
777 | ||
778 | if (!defaults->spr_psys_bits) | |
931da6a0 ZR |
779 | return prim; |
780 | ||
781 | if (rd->id != RAPL_DOMAIN_PLATFORM) | |
782 | return prim; | |
783 | ||
784 | switch (prim) { | |
785 | case POWER_LIMIT1: | |
786 | return PSYS_POWER_LIMIT1; | |
787 | case POWER_LIMIT2: | |
788 | return PSYS_POWER_LIMIT2; | |
789 | case PL1_ENABLE: | |
790 | return PSYS_PL1_ENABLE; | |
791 | case PL2_ENABLE: | |
792 | return PSYS_PL2_ENABLE; | |
793 | case TIME_WINDOW1: | |
794 | return PSYS_TIME_WINDOW1; | |
795 | case TIME_WINDOW2: | |
796 | return PSYS_TIME_WINDOW2; | |
797 | default: | |
798 | return prim; | |
799 | } | |
800 | } | |
801 | ||
2d281d81 JP |
802 | /* Read primitive data based on its related struct rapl_primitive_info. |
803 | * if xlate flag is set, return translated data based on data units, i.e. | |
804 | * time, energy, and power. | |
805 | * RAPL MSRs are non-architectual and are laid out not consistently across | |
806 | * domains. Here we use primitive info to allow writing consolidated access | |
807 | * functions. | |
808 | * For a given primitive, it is processed by MSR mask and shift. Unit conversion | |
809 | * is pre-assigned based on RAPL unit MSRs read at init time. | |
810 | * 63-------------------------- 31--------------------------- 0 | |
811 | * | xxxxx (mask) | | |
812 | * | |<- shift ----------------| | |
813 | * 63-------------------------- 31--------------------------- 0 | |
814 | */ | |
815 | static int rapl_read_data_raw(struct rapl_domain *rd, | |
3382388d | 816 | enum rapl_primitives prim, bool xlate, u64 *data) |
2d281d81 | 817 | { |
beea8df8 | 818 | u64 value; |
931da6a0 | 819 | enum rapl_primitives prim_fixed = prim_fixups(rd, prim); |
98ff639a | 820 | struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); |
beea8df8 | 821 | struct reg_action ra; |
2d281d81 | 822 | |
98ff639a | 823 | if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) |
2d281d81 JP |
824 | return -EINVAL; |
825 | ||
98ff639a | 826 | ra.reg = rd->regs[rpi->id]; |
16e95a62 | 827 | if (!ra.reg.val) |
2d281d81 | 828 | return -EINVAL; |
323ee64a | 829 | |
2d281d81 | 830 | /* non-hardware data are collected by the polling thread */ |
98ff639a | 831 | if (rpi->flag & RAPL_PRIMITIVE_DERIVED) { |
2d281d81 JP |
832 | *data = rd->rdd.primitives[prim]; |
833 | return 0; | |
834 | } | |
835 | ||
98ff639a | 836 | ra.mask = rpi->mask; |
beea8df8 | 837 | |
bf44b901 | 838 | if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { |
16e95a62 | 839 | pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name); |
2d281d81 JP |
840 | return -EIO; |
841 | } | |
842 | ||
98ff639a | 843 | value = ra.value >> rpi->shift; |
beea8df8 | 844 | |
2d281d81 | 845 | if (xlate) |
98ff639a | 846 | *data = rapl_unit_xlate(rd, rpi->unit, value, 0); |
2d281d81 | 847 | else |
beea8df8 | 848 | *data = value; |
2d281d81 JP |
849 | |
850 | return 0; | |
851 | } | |
852 | ||
853 | /* Similar use of primitive info in the read counterpart */ | |
854 | static int rapl_write_data_raw(struct rapl_domain *rd, | |
3382388d ZR |
855 | enum rapl_primitives prim, |
856 | unsigned long long value) | |
2d281d81 | 857 | { |
931da6a0 | 858 | enum rapl_primitives prim_fixed = prim_fixups(rd, prim); |
98ff639a | 859 | struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed); |
f14a1396 | 860 | u64 bits; |
beea8df8 | 861 | struct reg_action ra; |
f14a1396 | 862 | int ret; |
2d281d81 | 863 | |
98ff639a ZR |
864 | if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY) |
865 | return -EINVAL; | |
866 | ||
98ff639a ZR |
867 | bits = rapl_unit_xlate(rd, rpi->unit, value, 1); |
868 | bits <<= rpi->shift; | |
869 | bits &= rpi->mask; | |
edbdabc6 | 870 | |
beea8df8 | 871 | memset(&ra, 0, sizeof(ra)); |
f14a1396 | 872 | |
98ff639a ZR |
873 | ra.reg = rd->regs[rpi->id]; |
874 | ra.mask = rpi->mask; | |
beea8df8 | 875 | ra.value = bits; |
f14a1396 | 876 | |
bf44b901 | 877 | ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra); |
f14a1396 JP |
878 | |
879 | return ret; | |
2d281d81 JP |
880 | } |
881 | ||
9050a9cd ZR |
882 | static int rapl_read_pl_data(struct rapl_domain *rd, int pl, |
883 | enum pl_prims pl_prim, bool xlate, u64 *data) | |
884 | { | |
f442bd27 | 885 | enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); |
9050a9cd ZR |
886 | |
887 | if (!is_pl_valid(rd, pl)) | |
888 | return -EINVAL; | |
889 | ||
890 | return rapl_read_data_raw(rd, prim, xlate, data); | |
891 | } | |
892 | ||
893 | static int rapl_write_pl_data(struct rapl_domain *rd, int pl, | |
894 | enum pl_prims pl_prim, | |
895 | unsigned long long value) | |
896 | { | |
f442bd27 | 897 | enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim); |
9050a9cd ZR |
898 | |
899 | if (!is_pl_valid(rd, pl)) | |
900 | return -EINVAL; | |
901 | ||
f442bd27 | 902 | if (rd->rpl[pl].locked) { |
a60ec448 | 903 | pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]); |
9050a9cd ZR |
904 | return -EACCES; |
905 | } | |
906 | ||
907 | return rapl_write_data_raw(rd, prim, value); | |
908 | } | |
3c2c0845 JP |
909 | /* |
910 | * Raw RAPL data stored in MSRs are in certain scales. We need to | |
911 | * convert them into standard units based on the units reported in | |
912 | * the RAPL unit MSRs. This is specific to CPUs as the method to | |
913 | * calculate units differ on different CPUs. | |
914 | * We convert the units to below format based on CPUs. | |
915 | * i.e. | |
d474a4d3 | 916 | * energy unit: picoJoules : Represented in picoJoules by default |
3c2c0845 JP |
917 | * power unit : microWatts : Represented in milliWatts by default |
918 | * time unit : microseconds: Represented in seconds by default | |
919 | */ | |
693c1d78 | 920 | static int rapl_check_unit_core(struct rapl_domain *rd) |
2d281d81 | 921 | { |
1193b165 | 922 | struct reg_action ra; |
2d281d81 JP |
923 | u32 value; |
924 | ||
cb532e72 | 925 | ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; |
1193b165 | 926 | ra.mask = ~0; |
bf44b901 ZR |
927 | if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { |
928 | pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", | |
16e95a62 | 929 | ra.reg.val, rd->rp->name, rd->name); |
2d281d81 JP |
930 | return -ENODEV; |
931 | } | |
932 | ||
1193b165 | 933 | value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; |
cb532e72 | 934 | rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); |
2d281d81 | 935 | |
1193b165 | 936 | value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; |
cb532e72 | 937 | rd->power_unit = 1000000 / (1 << value); |
2d281d81 | 938 | |
1193b165 | 939 | value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; |
cb532e72 | 940 | rd->time_unit = 1000000 / (1 << value); |
2d281d81 | 941 | |
cb532e72 ZR |
942 | pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", |
943 | rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); | |
2d281d81 JP |
944 | |
945 | return 0; | |
946 | } | |
947 | ||
693c1d78 | 948 | static int rapl_check_unit_atom(struct rapl_domain *rd) |
3c2c0845 | 949 | { |
1193b165 | 950 | struct reg_action ra; |
3c2c0845 JP |
951 | u32 value; |
952 | ||
cb532e72 | 953 | ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; |
1193b165 | 954 | ra.mask = ~0; |
bf44b901 ZR |
955 | if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { |
956 | pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", | |
16e95a62 | 957 | ra.reg.val, rd->rp->name, rd->name); |
3c2c0845 JP |
958 | return -ENODEV; |
959 | } | |
1193b165 ZR |
960 | |
961 | value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; | |
cb532e72 | 962 | rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value; |
3c2c0845 | 963 | |
1193b165 | 964 | value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; |
cb532e72 | 965 | rd->power_unit = (1 << value) * 1000; |
3c2c0845 | 966 | |
1193b165 | 967 | value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; |
cb532e72 | 968 | rd->time_unit = 1000000 / (1 << value); |
3c2c0845 | 969 | |
cb532e72 ZR |
970 | pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n", |
971 | rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); | |
3c2c0845 JP |
972 | |
973 | return 0; | |
974 | } | |
975 | ||
f14a1396 JP |
976 | static void power_limit_irq_save_cpu(void *info) |
977 | { | |
978 | u32 l, h = 0; | |
979 | struct rapl_package *rp = (struct rapl_package *)info; | |
980 | ||
981 | /* save the state of PLN irq mask bit before disabling it */ | |
982 | rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); | |
983 | if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) { | |
984 | rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE; | |
985 | rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED; | |
986 | } | |
987 | l &= ~PACKAGE_THERM_INT_PLN_ENABLE; | |
988 | wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | |
989 | } | |
990 | ||
2d281d81 JP |
991 | /* REVISIT: |
992 | * When package power limit is set artificially low by RAPL, LVT | |
993 | * thermal interrupt for package power limit should be ignored | |
994 | * since we are not really exceeding the real limit. The intention | |
995 | * is to avoid excessive interrupts while we are trying to save power. | |
996 | * A useful feature might be routing the package_power_limit interrupt | |
997 | * to userspace via eventfd. once we have a usecase, this is simple | |
998 | * to do by adding an atomic notifier. | |
999 | */ | |
1000 | ||
309557f5 | 1001 | static void package_power_limit_irq_save(struct rapl_package *rp) |
2d281d81 | 1002 | { |
bf44b901 ZR |
1003 | if (rp->lead_cpu < 0) |
1004 | return; | |
1005 | ||
f14a1396 JP |
1006 | if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) |
1007 | return; | |
1008 | ||
323ee64a | 1009 | smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1); |
f14a1396 JP |
1010 | } |
1011 | ||
58705069 TG |
1012 | /* |
1013 | * Restore per package power limit interrupt enable state. Called from cpu | |
1014 | * hotplug code on package removal. | |
1015 | */ | |
1016 | static void package_power_limit_irq_restore(struct rapl_package *rp) | |
f14a1396 | 1017 | { |
58705069 TG |
1018 | u32 l, h; |
1019 | ||
bf44b901 ZR |
1020 | if (rp->lead_cpu < 0) |
1021 | return; | |
1022 | ||
58705069 TG |
1023 | if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN)) |
1024 | return; | |
1025 | ||
1026 | /* irq enable state not saved, nothing to restore */ | |
1027 | if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) | |
1028 | return; | |
f14a1396 JP |
1029 | |
1030 | rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h); | |
1031 | ||
1032 | if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE) | |
1033 | l |= PACKAGE_THERM_INT_PLN_ENABLE; | |
1034 | else | |
1035 | l &= ~PACKAGE_THERM_INT_PLN_ENABLE; | |
1036 | ||
1037 | wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | |
2d281d81 JP |
1038 | } |
1039 | ||
3c2c0845 JP |
1040 | static void set_floor_freq_default(struct rapl_domain *rd, bool mode) |
1041 | { | |
9050a9cd | 1042 | int i; |
3c2c0845 JP |
1043 | |
1044 | /* always enable clamp such that p-state can go below OS requested | |
1045 | * range. power capping priority over guranteed frequency. | |
1046 | */ | |
9050a9cd | 1047 | rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode); |
3c2c0845 | 1048 | |
9050a9cd ZR |
1049 | for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) { |
1050 | rapl_write_pl_data(rd, i, PL_ENABLE, mode); | |
1051 | rapl_write_pl_data(rd, i, PL_CLAMP, mode); | |
3c2c0845 JP |
1052 | } |
1053 | } | |
1054 | ||
1055 | static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) | |
1056 | { | |
1057 | static u32 power_ctrl_orig_val; | |
e8e28c2a | 1058 | struct rapl_defaults *defaults = get_defaults(rd->rp); |
3c2c0845 JP |
1059 | u32 mdata; |
1060 | ||
e8e28c2a | 1061 | if (!defaults->floor_freq_reg_addr) { |
51b63409 AT |
1062 | pr_err("Invalid floor frequency config register\n"); |
1063 | return; | |
1064 | } | |
1065 | ||
3c2c0845 | 1066 | if (!power_ctrl_orig_val) |
4077a387 | 1067 | iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ, |
e8e28c2a | 1068 | defaults->floor_freq_reg_addr, |
4077a387 | 1069 | &power_ctrl_orig_val); |
3c2c0845 JP |
1070 | mdata = power_ctrl_orig_val; |
1071 | if (enable) { | |
1072 | mdata &= ~(0x7f << 8); | |
1073 | mdata |= 1 << 8; | |
1074 | } | |
4077a387 | 1075 | iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE, |
e8e28c2a | 1076 | defaults->floor_freq_reg_addr, mdata); |
3c2c0845 JP |
1077 | } |
1078 | ||
cb532e72 | 1079 | static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value, |
3382388d | 1080 | bool to_raw) |
3c2c0845 | 1081 | { |
3382388d | 1082 | u64 f, y; /* fraction and exp. used for time unit */ |
3c2c0845 JP |
1083 | |
1084 | /* | |
1085 | * Special processing based on 2^Y*(1+F/4), refer | |
1086 | * to Intel Software Developer's manual Vol.3B: CH 14.9.3. | |
1087 | */ | |
1088 | if (!to_raw) { | |
1089 | f = (value & 0x60) >> 5; | |
1090 | y = value & 0x1f; | |
cb532e72 | 1091 | value = (1 << y) * (4 + f) * rd->time_unit / 4; |
3c2c0845 | 1092 | } else { |
cb532e72 | 1093 | if (value < rd->time_unit) |
2d935400 CQ |
1094 | return 0; |
1095 | ||
cb532e72 | 1096 | do_div(value, rd->time_unit); |
3c2c0845 | 1097 | y = ilog2(value); |
cf835b00 ZR |
1098 | |
1099 | /* | |
1100 | * The target hardware field is 7 bits wide, so return all ones | |
1101 | * if the exponent is too large. | |
1102 | */ | |
1103 | if (y > 0x1f) | |
1104 | return 0x7f; | |
1105 | ||
1106 | f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y); | |
3c2c0845 JP |
1107 | value = (y & 0x1f) | ((f & 0x3) << 5); |
1108 | } | |
1109 | return value; | |
1110 | } | |
1111 | ||
cb532e72 | 1112 | static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value, |
3382388d | 1113 | bool to_raw) |
3c2c0845 JP |
1114 | { |
1115 | /* | |
1116 | * Atom time unit encoding is straight forward val * time_unit, | |
1117 | * where time_unit is default to 1 sec. Never 0. | |
1118 | */ | |
1119 | if (!to_raw) | |
cb532e72 | 1120 | return (value) ? value * rd->time_unit : rd->time_unit; |
3382388d | 1121 | |
cb532e72 | 1122 | value = div64_u64(value, rd->time_unit); |
3c2c0845 JP |
1123 | |
1124 | return value; | |
1125 | } | |
1126 | ||
e12dee18 ZR |
1127 | /* TPMI Unit register has different layout */ |
1128 | #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET | |
1129 | #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK | |
1130 | #define TPMI_ENERGY_UNIT_OFFSET 0x06 | |
1131 | #define TPMI_ENERGY_UNIT_MASK 0x7C0 | |
1132 | #define TPMI_TIME_UNIT_OFFSET 0x0C | |
1133 | #define TPMI_TIME_UNIT_MASK 0xF000 | |
1134 | ||
1135 | static int rapl_check_unit_tpmi(struct rapl_domain *rd) | |
1136 | { | |
1137 | struct reg_action ra; | |
1138 | u32 value; | |
1139 | ||
1140 | ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT]; | |
1141 | ra.mask = ~0; | |
1142 | if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) { | |
1143 | pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n", | |
16e95a62 | 1144 | ra.reg.val, rd->rp->name, rd->name); |
e12dee18 ZR |
1145 | return -ENODEV; |
1146 | } | |
1147 | ||
1148 | value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET; | |
1149 | rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); | |
1150 | ||
1151 | value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET; | |
1152 | rd->power_unit = 1000000 / (1 << value); | |
1153 | ||
1154 | value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET; | |
1155 | rd->time_unit = 1000000 / (1 << value); | |
1156 | ||
1157 | pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n", | |
1158 | rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit); | |
1159 | ||
1160 | return 0; | |
1161 | } | |
1162 | ||
1163 | static const struct rapl_defaults defaults_tpmi = { | |
1164 | .check_unit = rapl_check_unit_tpmi, | |
1165 | /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */ | |
1166 | .set_floor_freq = set_floor_freq_default, | |
1167 | .compute_time_window = rapl_compute_time_window_core, | |
1168 | }; | |
1169 | ||
087e9cba | 1170 | static const struct rapl_defaults rapl_defaults_core = { |
51b63409 | 1171 | .floor_freq_reg_addr = 0, |
3c2c0845 JP |
1172 | .check_unit = rapl_check_unit_core, |
1173 | .set_floor_freq = set_floor_freq_default, | |
1174 | .compute_time_window = rapl_compute_time_window_core, | |
087e9cba JP |
1175 | }; |
1176 | ||
d474a4d3 JP |
1177 | static const struct rapl_defaults rapl_defaults_hsw_server = { |
1178 | .check_unit = rapl_check_unit_core, | |
1179 | .set_floor_freq = set_floor_freq_default, | |
1180 | .compute_time_window = rapl_compute_time_window_core, | |
1181 | .dram_domain_energy_unit = 15300, | |
1182 | }; | |
1183 | ||
2d798d9f ZR |
1184 | static const struct rapl_defaults rapl_defaults_spr_server = { |
1185 | .check_unit = rapl_check_unit_core, | |
1186 | .set_floor_freq = set_floor_freq_default, | |
1187 | .compute_time_window = rapl_compute_time_window_core, | |
2d798d9f | 1188 | .psys_domain_energy_unit = 1000000000, |
931da6a0 | 1189 | .spr_psys_bits = true, |
2d798d9f ZR |
1190 | }; |
1191 | ||
51b63409 AT |
1192 | static const struct rapl_defaults rapl_defaults_byt = { |
1193 | .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT, | |
1194 | .check_unit = rapl_check_unit_atom, | |
1195 | .set_floor_freq = set_floor_freq_atom, | |
1196 | .compute_time_window = rapl_compute_time_window_atom, | |
1197 | }; | |
1198 | ||
1199 | static const struct rapl_defaults rapl_defaults_tng = { | |
1200 | .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG, | |
3c2c0845 JP |
1201 | .check_unit = rapl_check_unit_atom, |
1202 | .set_floor_freq = set_floor_freq_atom, | |
1203 | .compute_time_window = rapl_compute_time_window_atom, | |
087e9cba JP |
1204 | }; |
1205 | ||
51b63409 AT |
1206 | static const struct rapl_defaults rapl_defaults_ann = { |
1207 | .floor_freq_reg_addr = 0, | |
1208 | .check_unit = rapl_check_unit_atom, | |
1209 | .set_floor_freq = NULL, | |
1210 | .compute_time_window = rapl_compute_time_window_atom, | |
1211 | }; | |
1212 | ||
1213 | static const struct rapl_defaults rapl_defaults_cht = { | |
1214 | .floor_freq_reg_addr = 0, | |
1215 | .check_unit = rapl_check_unit_atom, | |
1216 | .set_floor_freq = NULL, | |
1217 | .compute_time_window = rapl_compute_time_window_atom, | |
1218 | }; | |
1219 | ||
43756a29 VD |
1220 | static const struct rapl_defaults rapl_defaults_amd = { |
1221 | .check_unit = rapl_check_unit_core, | |
1222 | }; | |
1223 | ||
ea85dbca | 1224 | static const struct x86_cpu_id rapl_ids[] __initconst = { |
b9064fb8 TL |
1225 | X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core), |
1226 | X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core), | |
1227 | ||
1228 | X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core), | |
1229 | X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core), | |
1230 | ||
1231 | X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core), | |
1232 | X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core), | |
1233 | X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core), | |
1234 | X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server), | |
1235 | ||
1236 | X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core), | |
1237 | X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core), | |
1238 | X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core), | |
1239 | X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server), | |
1240 | ||
1241 | X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core), | |
1242 | X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core), | |
1243 | X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server), | |
1244 | X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core), | |
1245 | X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core), | |
1246 | X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core), | |
1247 | X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core), | |
1248 | X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core), | |
1249 | X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core), | |
1250 | X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server), | |
1251 | X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server), | |
1252 | X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core), | |
1253 | X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core), | |
1254 | X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core), | |
1255 | X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core), | |
1256 | X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core), | |
1257 | X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core), | |
1258 | X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core), | |
1259 | X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core), | |
1260 | X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core), | |
1261 | X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core), | |
1262 | X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core), | |
1263 | X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core), | |
1264 | X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core), | |
1265 | X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), | |
1266 | X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server), | |
1267 | X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core), | |
1268 | X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core), | |
1269 | X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core), | |
1270 | X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core), | |
1271 | ||
1272 | X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt), | |
1273 | X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht), | |
1274 | X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng), | |
1275 | X86_MATCH_VFM(INTEL_ATOM_AIRMONT_MID, &rapl_defaults_ann), | |
1276 | X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core), | |
1277 | X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core), | |
1278 | X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core), | |
1279 | X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core), | |
1280 | X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core), | |
1281 | X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core), | |
1282 | ||
1283 | X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server), | |
1284 | X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server), | |
43756a29 VD |
1285 | |
1286 | X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), | |
8a9d881f | 1287 | X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), |
a7405612 | 1288 | X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd), |
2d281d81 JP |
1289 | {} |
1290 | }; | |
1291 | MODULE_DEVICE_TABLE(x86cpu, rapl_ids); | |
1292 | ||
bed5ab63 TG |
1293 | /* Read once for all raw primitive data for domains */ |
1294 | static void rapl_update_domain_data(struct rapl_package *rp) | |
2d281d81 JP |
1295 | { |
1296 | int dmn, prim; | |
1297 | u64 val; | |
2d281d81 | 1298 | |
bed5ab63 | 1299 | for (dmn = 0; dmn < rp->nr_domains; dmn++) { |
9ea7612c | 1300 | pr_debug("update %s domain %s data\n", rp->name, |
bed5ab63 TG |
1301 | rp->domains[dmn].name); |
1302 | /* exclude non-raw primitives */ | |
1303 | for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { | |
98ff639a ZR |
1304 | struct rapl_primitive_info *rpi = get_rpi(rp, prim); |
1305 | ||
bed5ab63 | 1306 | if (!rapl_read_data_raw(&rp->domains[dmn], prim, |
98ff639a | 1307 | rpi->unit, &val)) |
3382388d | 1308 | rp->domains[dmn].rdd.primitives[prim] = val; |
2d281d81 JP |
1309 | } |
1310 | } | |
1311 | ||
1312 | } | |
1313 | ||
2d281d81 JP |
1314 | static int rapl_package_register_powercap(struct rapl_package *rp) |
1315 | { | |
1316 | struct rapl_domain *rd; | |
2d281d81 | 1317 | struct powercap_zone *power_zone = NULL; |
01857cf7 | 1318 | int nr_pl, ret; |
bed5ab63 TG |
1319 | |
1320 | /* Update the domain data of the new package */ | |
1321 | rapl_update_domain_data(rp); | |
2d281d81 | 1322 | |
3382388d | 1323 | /* first we register package domain as the parent zone */ |
2d281d81 JP |
1324 | for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { |
1325 | if (rd->id == RAPL_DOMAIN_PACKAGE) { | |
1326 | nr_pl = find_nr_power_limit(rd); | |
9ea7612c | 1327 | pr_debug("register package domain %s\n", rp->name); |
2d281d81 | 1328 | power_zone = powercap_register_zone(&rd->power_zone, |
3382388d ZR |
1329 | rp->priv->control_type, rp->name, |
1330 | NULL, &zone_ops[rd->id], nr_pl, | |
1331 | &constraint_ops); | |
2d281d81 | 1332 | if (IS_ERR(power_zone)) { |
9ea7612c | 1333 | pr_debug("failed to register power zone %s\n", |
3382388d | 1334 | rp->name); |
bed5ab63 | 1335 | return PTR_ERR(power_zone); |
2d281d81 JP |
1336 | } |
1337 | /* track parent zone in per package/socket data */ | |
1338 | rp->power_zone = power_zone; | |
1339 | /* done, only one package domain per socket */ | |
1340 | break; | |
1341 | } | |
1342 | } | |
1343 | if (!power_zone) { | |
1344 | pr_err("no package domain found, unknown topology!\n"); | |
bed5ab63 | 1345 | return -ENODEV; |
2d281d81 | 1346 | } |
3382388d | 1347 | /* now register domains as children of the socket/package */ |
2d281d81 | 1348 | for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { |
f1e8d756 ZR |
1349 | struct powercap_zone *parent = rp->power_zone; |
1350 | ||
2d281d81 JP |
1351 | if (rd->id == RAPL_DOMAIN_PACKAGE) |
1352 | continue; | |
f1e8d756 ZR |
1353 | if (rd->id == RAPL_DOMAIN_PLATFORM) |
1354 | parent = NULL; | |
2d281d81 JP |
1355 | /* number of power limits per domain varies */ |
1356 | nr_pl = find_nr_power_limit(rd); | |
1357 | power_zone = powercap_register_zone(&rd->power_zone, | |
3382388d | 1358 | rp->priv->control_type, |
f1e8d756 | 1359 | rd->name, parent, |
3382388d ZR |
1360 | &zone_ops[rd->id], nr_pl, |
1361 | &constraint_ops); | |
2d281d81 JP |
1362 | |
1363 | if (IS_ERR(power_zone)) { | |
9ea7612c | 1364 | pr_debug("failed to register power_zone, %s:%s\n", |
3382388d | 1365 | rp->name, rd->name); |
2d281d81 JP |
1366 | ret = PTR_ERR(power_zone); |
1367 | goto err_cleanup; | |
1368 | } | |
1369 | } | |
bed5ab63 | 1370 | return 0; |
2d281d81 | 1371 | |
2d281d81 | 1372 | err_cleanup: |
58705069 TG |
1373 | /* |
1374 | * Clean up previously initialized domains within the package if we | |
2d281d81 JP |
1375 | * failed after the first domain setup. |
1376 | */ | |
1377 | while (--rd >= rp->domains) { | |
9ea7612c | 1378 | pr_debug("unregister %s domain %s\n", rp->name, rd->name); |
3382388d ZR |
1379 | powercap_unregister_zone(rp->priv->control_type, |
1380 | &rd->power_zone); | |
2d281d81 JP |
1381 | } |
1382 | ||
1383 | return ret; | |
1384 | } | |
1385 | ||
693c1d78 | 1386 | static int rapl_check_domain(int domain, struct rapl_package *rp) |
2d281d81 | 1387 | { |
1193b165 | 1388 | struct reg_action ra; |
2d281d81 JP |
1389 | |
1390 | switch (domain) { | |
1391 | case RAPL_DOMAIN_PACKAGE: | |
2d281d81 | 1392 | case RAPL_DOMAIN_PP0: |
2d281d81 | 1393 | case RAPL_DOMAIN_PP1: |
2d281d81 | 1394 | case RAPL_DOMAIN_DRAM: |
f1e8d756 | 1395 | case RAPL_DOMAIN_PLATFORM: |
1193b165 | 1396 | ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS]; |
2d281d81 JP |
1397 | break; |
1398 | default: | |
1399 | pr_err("invalid domain id %d\n", domain); | |
1400 | return -EINVAL; | |
1401 | } | |
9d31c676 JP |
1402 | /* make sure domain counters are available and contains non-zero |
1403 | * values, otherwise skip it. | |
7b874772 | 1404 | */ |
1193b165 | 1405 | |
7a57e9f1 | 1406 | ra.mask = ENERGY_STATUS_MASK; |
bf44b901 | 1407 | if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value) |
9d31c676 | 1408 | return -ENODEV; |
2d281d81 | 1409 | |
9d31c676 | 1410 | return 0; |
2d281d81 JP |
1411 | } |
1412 | ||
cb532e72 ZR |
1413 | /* |
1414 | * Get per domain energy/power/time unit. | |
1415 | * RAPL Interfaces without per domain unit register will use the package | |
1416 | * scope unit register to set per domain units. | |
1417 | */ | |
1418 | static int rapl_get_domain_unit(struct rapl_domain *rd) | |
1419 | { | |
1420 | struct rapl_defaults *defaults = get_defaults(rd->rp); | |
1421 | int ret; | |
1422 | ||
16e95a62 ZR |
1423 | if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) { |
1424 | if (!rd->rp->priv->reg_unit.val) { | |
cb532e72 ZR |
1425 | pr_err("No valid Unit register found\n"); |
1426 | return -ENODEV; | |
1427 | } | |
1428 | rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit; | |
1429 | } | |
1430 | ||
1431 | if (!defaults->check_unit) { | |
1432 | pr_err("missing .check_unit() callback\n"); | |
1433 | return -ENODEV; | |
1434 | } | |
1435 | ||
693c1d78 | 1436 | ret = defaults->check_unit(rd); |
cb532e72 ZR |
1437 | if (ret) |
1438 | return ret; | |
1439 | ||
1440 | if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit) | |
1441 | rd->energy_unit = defaults->dram_domain_energy_unit; | |
1442 | if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit) | |
1443 | rd->energy_unit = defaults->psys_domain_energy_unit; | |
1444 | return 0; | |
1445 | } | |
1446 | ||
e1399ba2 JP |
1447 | /* |
1448 | * Check if power limits are available. Two cases when they are not available: | |
1449 | * 1. Locked by BIOS, in this case we still provide read-only access so that | |
1450 | * users can see what limit is set by the BIOS. | |
1451 | * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not | |
3382388d | 1452 | * exist at all. In this case, we do not show the constraints in powercap. |
e1399ba2 JP |
1453 | * |
1454 | * Called after domains are detected and initialized. | |
1455 | */ | |
1456 | static void rapl_detect_powerlimit(struct rapl_domain *rd) | |
1457 | { | |
1458 | u64 val64; | |
1459 | int i; | |
1460 | ||
f442bd27 ZR |
1461 | for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { |
1462 | if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) { | |
1463 | if (val64) { | |
1464 | rd->rpl[i].locked = true; | |
1465 | pr_info("%s:%s:%s locked by BIOS\n", | |
1466 | rd->rp->name, rd->name, pl_names[i]); | |
1467 | } | |
e1399ba2 | 1468 | } |
3382388d | 1469 | |
081690e9 | 1470 | if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64)) |
e1399ba2 JP |
1471 | rd->rpl[i].name = NULL; |
1472 | } | |
1473 | } | |
1474 | ||
2d281d81 JP |
1475 | /* Detect active and valid domains for the given CPU, caller must |
1476 | * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. | |
1477 | */ | |
693c1d78 | 1478 | static int rapl_detect_domains(struct rapl_package *rp) |
2d281d81 | 1479 | { |
2d281d81 | 1480 | struct rapl_domain *rd; |
58705069 | 1481 | int i; |
2d281d81 JP |
1482 | |
1483 | for (i = 0; i < RAPL_DOMAIN_MAX; i++) { | |
1484 | /* use physical package id to read counters */ | |
693c1d78 | 1485 | if (!rapl_check_domain(i, rp)) { |
2d281d81 | 1486 | rp->domain_map |= 1 << i; |
fcdf1797 JP |
1487 | pr_info("Found RAPL domain %s\n", rapl_domain_names[i]); |
1488 | } | |
2d281d81 | 1489 | } |
3382388d | 1490 | rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); |
2d281d81 | 1491 | if (!rp->nr_domains) { |
9ea7612c | 1492 | pr_debug("no valid rapl domains found in %s\n", rp->name); |
58705069 | 1493 | return -ENODEV; |
2d281d81 | 1494 | } |
9ea7612c | 1495 | pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); |
2d281d81 | 1496 | |
2fa00769 | 1497 | rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain), |
3382388d | 1498 | GFP_KERNEL); |
58705069 TG |
1499 | if (!rp->domains) |
1500 | return -ENOMEM; | |
1501 | ||
2d281d81 JP |
1502 | rapl_init_domains(rp); |
1503 | ||
cb532e72 ZR |
1504 | for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { |
1505 | rapl_get_domain_unit(rd); | |
e1399ba2 | 1506 | rapl_detect_powerlimit(rd); |
cb532e72 | 1507 | } |
e1399ba2 | 1508 | |
2d281d81 JP |
1509 | return 0; |
1510 | } | |
1511 | ||
575024a8 ZR |
1512 | #ifdef CONFIG_PERF_EVENTS |
1513 | ||
1514 | /* | |
1515 | * Support for RAPL PMU | |
1516 | * | |
1517 | * Register a PMU if any of the registered RAPL Packages have the requirement | |
1518 | * of exposing its energy counters via Perf PMU. | |
1519 | * | |
1520 | * PMU Name: | |
1521 | * power | |
1522 | * | |
1523 | * Events: | |
1524 | * Name Event id RAPL Domain | |
1525 | * energy_cores 0x01 RAPL_DOMAIN_PP0 | |
1526 | * energy_pkg 0x02 RAPL_DOMAIN_PACKAGE | |
1527 | * energy_ram 0x03 RAPL_DOMAIN_DRAM | |
1528 | * energy_gpu 0x04 RAPL_DOMAIN_PP1 | |
1529 | * energy_psys 0x05 RAPL_DOMAIN_PLATFORM | |
1530 | * | |
1531 | * Unit: | |
1532 | * Joules | |
1533 | * | |
1534 | * Scale: | |
1535 | * 2.3283064365386962890625e-10 | |
1536 | * The same RAPL domain in different RAPL Packages may have different | |
1537 | * energy units. Use 2.3283064365386962890625e-10 (2^-32) Joules as | |
1538 | * the fixed unit for all energy counters, and covert each hardware | |
1539 | * counter increase to N times of PMU event counter increases. | |
1540 | * | |
1541 | * This is fully compatible with the current MSR RAPL PMU. This means that | |
1542 | * userspace programs like turbostat can use the same code to handle RAPL Perf | |
1543 | * PMU, no matter what RAPL Interface driver (MSR/TPMI, etc) is running | |
1544 | * underlying on the platform. | |
1545 | * | |
1546 | * Note that RAPL Packages can be probed/removed dynamically, and the events | |
1547 | * supported by each TPMI RAPL device can be different. Thus the RAPL PMU | |
1548 | * support is done on demand, which means | |
1549 | * 1. PMU is registered only if it is needed by a RAPL Package. PMU events for | |
1550 | * unsupported counters are not exposed. | |
1551 | * 2. PMU is unregistered and registered when a new RAPL Package is probed and | |
1552 | * supports new counters that are not supported by current PMU. | |
1553 | * 3. PMU is unregistered when all registered RAPL Packages don't need PMU. | |
1554 | */ | |
1555 | ||
1556 | struct rapl_pmu { | |
1557 | struct pmu pmu; /* Perf PMU structure */ | |
1558 | u64 timer_ms; /* Maximum expiration time to avoid counter overflow */ | |
1559 | unsigned long domain_map; /* Events supported by current registered PMU */ | |
1560 | bool registered; /* Whether the PMU has been registered or not */ | |
1561 | }; | |
1562 | ||
1563 | static struct rapl_pmu rapl_pmu; | |
1564 | ||
1565 | /* PMU helpers */ | |
1566 | ||
1567 | static int get_pmu_cpu(struct rapl_package *rp) | |
1568 | { | |
1569 | int cpu; | |
1570 | ||
1571 | if (!rp->has_pmu) | |
1572 | return nr_cpu_ids; | |
1573 | ||
1574 | /* Only TPMI RAPL is supported for now */ | |
1575 | if (rp->priv->type != RAPL_IF_TPMI) | |
1576 | return nr_cpu_ids; | |
1577 | ||
1578 | /* TPMI RAPL uses any CPU in the package for PMU */ | |
1579 | for_each_online_cpu(cpu) | |
1580 | if (topology_physical_package_id(cpu) == rp->id) | |
1581 | return cpu; | |
1582 | ||
1583 | return nr_cpu_ids; | |
1584 | } | |
1585 | ||
1586 | static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu) | |
1587 | { | |
1588 | if (!rp->has_pmu) | |
1589 | return false; | |
1590 | ||
1591 | /* Only TPMI RAPL is supported for now */ | |
1592 | if (rp->priv->type != RAPL_IF_TPMI) | |
1593 | return false; | |
1594 | ||
1595 | /* TPMI RAPL uses any CPU in the package for PMU */ | |
1596 | return topology_physical_package_id(cpu) == rp->id; | |
1597 | } | |
1598 | ||
1599 | static struct rapl_package_pmu_data *event_to_pmu_data(struct perf_event *event) | |
1600 | { | |
1601 | struct rapl_package *rp = event->pmu_private; | |
1602 | ||
1603 | return &rp->pmu_data; | |
1604 | } | |
1605 | ||
1606 | /* PMU event callbacks */ | |
1607 | ||
1608 | static u64 event_read_counter(struct perf_event *event) | |
1609 | { | |
1610 | struct rapl_package *rp = event->pmu_private; | |
1611 | u64 val; | |
1612 | int ret; | |
1613 | ||
1614 | /* Return 0 for unsupported events */ | |
1615 | if (event->hw.idx < 0) | |
1616 | return 0; | |
1617 | ||
1618 | ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val); | |
1619 | ||
1620 | /* Return 0 for failed read */ | |
1621 | if (ret) | |
1622 | return 0; | |
1623 | ||
1624 | return val; | |
1625 | } | |
1626 | ||
1627 | static void __rapl_pmu_event_start(struct perf_event *event) | |
1628 | { | |
1629 | struct rapl_package_pmu_data *data = event_to_pmu_data(event); | |
1630 | ||
1631 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | |
1632 | return; | |
1633 | ||
1634 | event->hw.state = 0; | |
1635 | ||
1636 | list_add_tail(&event->active_entry, &data->active_list); | |
1637 | ||
1638 | local64_set(&event->hw.prev_count, event_read_counter(event)); | |
1639 | if (++data->n_active == 1) | |
1640 | hrtimer_start(&data->hrtimer, data->timer_interval, | |
1641 | HRTIMER_MODE_REL_PINNED); | |
1642 | } | |
1643 | ||
1644 | static void rapl_pmu_event_start(struct perf_event *event, int mode) | |
1645 | { | |
1646 | struct rapl_package_pmu_data *data = event_to_pmu_data(event); | |
1647 | unsigned long flags; | |
1648 | ||
1649 | raw_spin_lock_irqsave(&data->lock, flags); | |
1650 | __rapl_pmu_event_start(event); | |
1651 | raw_spin_unlock_irqrestore(&data->lock, flags); | |
1652 | } | |
1653 | ||
1654 | static u64 rapl_event_update(struct perf_event *event) | |
1655 | { | |
1656 | struct hw_perf_event *hwc = &event->hw; | |
1657 | struct rapl_package_pmu_data *data = event_to_pmu_data(event); | |
1658 | u64 prev_raw_count, new_raw_count; | |
1659 | s64 delta, sdelta; | |
1660 | ||
1661 | /* | |
1662 | * Follow the generic code to drain hwc->prev_count. | |
1663 | * The loop is not expected to run for multiple times. | |
1664 | */ | |
1665 | prev_raw_count = local64_read(&hwc->prev_count); | |
1666 | do { | |
1667 | new_raw_count = event_read_counter(event); | |
1668 | } while (!local64_try_cmpxchg(&hwc->prev_count, | |
1669 | &prev_raw_count, new_raw_count)); | |
1670 | ||
1671 | ||
1672 | /* | |
1673 | * Now we have the new raw value and have updated the prev | |
1674 | * timestamp already. We can now calculate the elapsed delta | |
1675 | * (event-)time and add that to the generic event. | |
1676 | */ | |
1677 | delta = new_raw_count - prev_raw_count; | |
1678 | ||
1679 | /* | |
1680 | * Scale delta to smallest unit (2^-32) | |
1681 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | |
1682 | * or use ldexp(count, -32). | |
1683 | * Watts = Joules/Time delta | |
1684 | */ | |
1685 | sdelta = delta * data->scale[event->hw.flags]; | |
1686 | ||
1687 | local64_add(sdelta, &event->count); | |
1688 | ||
1689 | return new_raw_count; | |
1690 | } | |
1691 | ||
1692 | static void rapl_pmu_event_stop(struct perf_event *event, int mode) | |
1693 | { | |
1694 | struct rapl_package_pmu_data *data = event_to_pmu_data(event); | |
1695 | struct hw_perf_event *hwc = &event->hw; | |
1696 | unsigned long flags; | |
1697 | ||
1698 | raw_spin_lock_irqsave(&data->lock, flags); | |
1699 | ||
1700 | /* Mark event as deactivated and stopped */ | |
1701 | if (!(hwc->state & PERF_HES_STOPPED)) { | |
1702 | WARN_ON_ONCE(data->n_active <= 0); | |
1703 | if (--data->n_active == 0) | |
1704 | hrtimer_cancel(&data->hrtimer); | |
1705 | ||
1706 | list_del(&event->active_entry); | |
1707 | ||
1708 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | |
1709 | hwc->state |= PERF_HES_STOPPED; | |
1710 | } | |
1711 | ||
1712 | /* Check if update of sw counter is necessary */ | |
1713 | if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | |
1714 | /* | |
1715 | * Drain the remaining delta count out of a event | |
1716 | * that we are disabling: | |
1717 | */ | |
1718 | rapl_event_update(event); | |
1719 | hwc->state |= PERF_HES_UPTODATE; | |
1720 | } | |
1721 | ||
1722 | raw_spin_unlock_irqrestore(&data->lock, flags); | |
1723 | } | |
1724 | ||
1725 | static int rapl_pmu_event_add(struct perf_event *event, int mode) | |
1726 | { | |
1727 | struct rapl_package_pmu_data *data = event_to_pmu_data(event); | |
1728 | struct hw_perf_event *hwc = &event->hw; | |
1729 | unsigned long flags; | |
1730 | ||
1731 | raw_spin_lock_irqsave(&data->lock, flags); | |
1732 | ||
1733 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
1734 | ||
1735 | if (mode & PERF_EF_START) | |
1736 | __rapl_pmu_event_start(event); | |
1737 | ||
1738 | raw_spin_unlock_irqrestore(&data->lock, flags); | |
1739 | ||
1740 | return 0; | |
1741 | } | |
1742 | ||
1743 | static void rapl_pmu_event_del(struct perf_event *event, int flags) | |
1744 | { | |
1745 | rapl_pmu_event_stop(event, PERF_EF_UPDATE); | |
1746 | } | |
1747 | ||
1748 | /* RAPL PMU event ids, same as shown in sysfs */ | |
1749 | enum perf_rapl_events { | |
1750 | PERF_RAPL_PP0 = 1, /* all cores */ | |
1751 | PERF_RAPL_PKG, /* entire package */ | |
1752 | PERF_RAPL_RAM, /* DRAM */ | |
1753 | PERF_RAPL_PP1, /* gpu */ | |
1754 | PERF_RAPL_PSYS, /* psys */ | |
1755 | PERF_RAPL_MAX | |
1756 | }; | |
1757 | #define RAPL_EVENT_MASK GENMASK(7, 0) | |
1758 | ||
1759 | static const int event_to_domain[PERF_RAPL_MAX] = { | |
1760 | [PERF_RAPL_PP0] = RAPL_DOMAIN_PP0, | |
1761 | [PERF_RAPL_PKG] = RAPL_DOMAIN_PACKAGE, | |
1762 | [PERF_RAPL_RAM] = RAPL_DOMAIN_DRAM, | |
1763 | [PERF_RAPL_PP1] = RAPL_DOMAIN_PP1, | |
1764 | [PERF_RAPL_PSYS] = RAPL_DOMAIN_PLATFORM, | |
1765 | }; | |
1766 | ||
1767 | static int rapl_pmu_event_init(struct perf_event *event) | |
1768 | { | |
1769 | struct rapl_package *pos, *rp = NULL; | |
1770 | u64 cfg = event->attr.config & RAPL_EVENT_MASK; | |
1771 | int domain, idx; | |
1772 | ||
1773 | /* Only look at RAPL events */ | |
1774 | if (event->attr.type != event->pmu->type) | |
1775 | return -ENOENT; | |
1776 | ||
1777 | /* Check for supported events only */ | |
1778 | if (!cfg || cfg >= PERF_RAPL_MAX) | |
1779 | return -EINVAL; | |
1780 | ||
1781 | if (event->cpu < 0) | |
1782 | return -EINVAL; | |
1783 | ||
1784 | /* Find out which Package the event belongs to */ | |
1785 | list_for_each_entry(pos, &rapl_packages, plist) { | |
1786 | if (is_rp_pmu_cpu(pos, event->cpu)) { | |
1787 | rp = pos; | |
1788 | break; | |
1789 | } | |
1790 | } | |
1791 | if (!rp) | |
1792 | return -ENODEV; | |
1793 | ||
1794 | /* Find out which RAPL Domain the event belongs to */ | |
1795 | domain = event_to_domain[cfg]; | |
1796 | ||
1797 | event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; | |
1798 | event->pmu_private = rp; /* Which package */ | |
1799 | event->hw.flags = domain; /* Which domain */ | |
1800 | ||
1801 | event->hw.idx = -1; | |
1802 | /* Find out the index in rp->domains[] to get domain pointer */ | |
1803 | for (idx = 0; idx < rp->nr_domains; idx++) { | |
1804 | if (rp->domains[idx].id == domain) { | |
1805 | event->hw.idx = idx; | |
1806 | break; | |
1807 | } | |
1808 | } | |
1809 | ||
1810 | return 0; | |
1811 | } | |
1812 | ||
1813 | static void rapl_pmu_event_read(struct perf_event *event) | |
1814 | { | |
1815 | rapl_event_update(event); | |
1816 | } | |
1817 | ||
1818 | static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) | |
1819 | { | |
1820 | struct rapl_package_pmu_data *data = | |
1821 | container_of(hrtimer, struct rapl_package_pmu_data, hrtimer); | |
1822 | struct perf_event *event; | |
1823 | unsigned long flags; | |
1824 | ||
1825 | if (!data->n_active) | |
1826 | return HRTIMER_NORESTART; | |
1827 | ||
1828 | raw_spin_lock_irqsave(&data->lock, flags); | |
1829 | ||
1830 | list_for_each_entry(event, &data->active_list, active_entry) | |
1831 | rapl_event_update(event); | |
1832 | ||
1833 | raw_spin_unlock_irqrestore(&data->lock, flags); | |
1834 | ||
1835 | hrtimer_forward_now(hrtimer, data->timer_interval); | |
1836 | ||
1837 | return HRTIMER_RESTART; | |
1838 | } | |
1839 | ||
1840 | /* PMU sysfs attributes */ | |
1841 | ||
1842 | /* | |
1843 | * There are no default events, but we need to create "events" group (with | |
1844 | * empty attrs) before updating it with detected events. | |
1845 | */ | |
1846 | static struct attribute *attrs_empty[] = { | |
1847 | NULL, | |
1848 | }; | |
1849 | ||
1850 | static struct attribute_group pmu_events_group = { | |
1851 | .name = "events", | |
1852 | .attrs = attrs_empty, | |
1853 | }; | |
1854 | ||
1855 | static ssize_t cpumask_show(struct device *dev, | |
1856 | struct device_attribute *attr, char *buf) | |
1857 | { | |
1858 | struct rapl_package *rp; | |
1859 | cpumask_var_t cpu_mask; | |
1860 | int cpu; | |
1861 | int ret; | |
1862 | ||
1863 | if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) | |
1864 | return -ENOMEM; | |
1865 | ||
1866 | cpus_read_lock(); | |
1867 | ||
1868 | cpumask_clear(cpu_mask); | |
1869 | ||
1870 | /* Choose a cpu for each RAPL Package */ | |
1871 | list_for_each_entry(rp, &rapl_packages, plist) { | |
1872 | cpu = get_pmu_cpu(rp); | |
1873 | if (cpu < nr_cpu_ids) | |
1874 | cpumask_set_cpu(cpu, cpu_mask); | |
1875 | } | |
1876 | cpus_read_unlock(); | |
1877 | ||
1878 | ret = cpumap_print_to_pagebuf(true, buf, cpu_mask); | |
1879 | ||
1880 | free_cpumask_var(cpu_mask); | |
1881 | ||
1882 | return ret; | |
1883 | } | |
1884 | ||
1885 | static DEVICE_ATTR_RO(cpumask); | |
1886 | ||
1887 | static struct attribute *pmu_cpumask_attrs[] = { | |
1888 | &dev_attr_cpumask.attr, | |
1889 | NULL | |
1890 | }; | |
1891 | ||
1892 | static struct attribute_group pmu_cpumask_group = { | |
1893 | .attrs = pmu_cpumask_attrs, | |
1894 | }; | |
1895 | ||
1896 | PMU_FORMAT_ATTR(event, "config:0-7"); | |
1897 | static struct attribute *pmu_format_attr[] = { | |
1898 | &format_attr_event.attr, | |
1899 | NULL | |
1900 | }; | |
1901 | ||
1902 | static struct attribute_group pmu_format_group = { | |
1903 | .name = "format", | |
1904 | .attrs = pmu_format_attr, | |
1905 | }; | |
1906 | ||
1907 | static const struct attribute_group *pmu_attr_groups[] = { | |
1908 | &pmu_events_group, | |
1909 | &pmu_cpumask_group, | |
1910 | &pmu_format_group, | |
1911 | NULL | |
1912 | }; | |
1913 | ||
1914 | #define RAPL_EVENT_ATTR_STR(_name, v, str) \ | |
1915 | static struct perf_pmu_events_attr event_attr_##v = { \ | |
1916 | .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ | |
1917 | .event_str = str, \ | |
1918 | } | |
1919 | ||
1920 | RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); | |
1921 | RAPL_EVENT_ATTR_STR(energy-pkg, rapl_pkg, "event=0x02"); | |
1922 | RAPL_EVENT_ATTR_STR(energy-ram, rapl_ram, "event=0x03"); | |
1923 | RAPL_EVENT_ATTR_STR(energy-gpu, rapl_gpu, "event=0x04"); | |
1924 | RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); | |
1925 | ||
1926 | RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_unit_cores, "Joules"); | |
1927 | RAPL_EVENT_ATTR_STR(energy-pkg.unit, rapl_unit_pkg, "Joules"); | |
1928 | RAPL_EVENT_ATTR_STR(energy-ram.unit, rapl_unit_ram, "Joules"); | |
1929 | RAPL_EVENT_ATTR_STR(energy-gpu.unit, rapl_unit_gpu, "Joules"); | |
1930 | RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_unit_psys, "Joules"); | |
1931 | ||
1932 | RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_scale_cores, "2.3283064365386962890625e-10"); | |
1933 | RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_scale_pkg, "2.3283064365386962890625e-10"); | |
1934 | RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_scale_ram, "2.3283064365386962890625e-10"); | |
1935 | RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_scale_gpu, "2.3283064365386962890625e-10"); | |
1936 | RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_scale_psys, "2.3283064365386962890625e-10"); | |
1937 | ||
1938 | #define RAPL_EVENT_GROUP(_name, domain) \ | |
1939 | static struct attribute *pmu_attr_##_name[] = { \ | |
1940 | &event_attr_rapl_##_name.attr.attr, \ | |
1941 | &event_attr_rapl_unit_##_name.attr.attr, \ | |
1942 | &event_attr_rapl_scale_##_name.attr.attr, \ | |
1943 | NULL \ | |
1944 | }; \ | |
1945 | static umode_t is_visible_##_name(struct kobject *kobj, struct attribute *attr, int event) \ | |
1946 | { \ | |
1947 | return rapl_pmu.domain_map & BIT(domain) ? attr->mode : 0; \ | |
1948 | } \ | |
1949 | static struct attribute_group pmu_group_##_name = { \ | |
1950 | .name = "events", \ | |
1951 | .attrs = pmu_attr_##_name, \ | |
1952 | .is_visible = is_visible_##_name, \ | |
1953 | } | |
1954 | ||
1955 | RAPL_EVENT_GROUP(cores, RAPL_DOMAIN_PP0); | |
1956 | RAPL_EVENT_GROUP(pkg, RAPL_DOMAIN_PACKAGE); | |
1957 | RAPL_EVENT_GROUP(ram, RAPL_DOMAIN_DRAM); | |
1958 | RAPL_EVENT_GROUP(gpu, RAPL_DOMAIN_PP1); | |
1959 | RAPL_EVENT_GROUP(psys, RAPL_DOMAIN_PLATFORM); | |
1960 | ||
1961 | static const struct attribute_group *pmu_attr_update[] = { | |
1962 | &pmu_group_cores, | |
1963 | &pmu_group_pkg, | |
1964 | &pmu_group_ram, | |
1965 | &pmu_group_gpu, | |
1966 | &pmu_group_psys, | |
1967 | NULL | |
1968 | }; | |
1969 | ||
1970 | static int rapl_pmu_update(struct rapl_package *rp) | |
1971 | { | |
1972 | int ret = 0; | |
1973 | ||
1974 | /* Return if PMU already covers all events supported by current RAPL Package */ | |
1975 | if (rapl_pmu.registered && !(rp->domain_map & (~rapl_pmu.domain_map))) | |
1976 | goto end; | |
1977 | ||
1978 | /* Unregister previous registered PMU */ | |
1979 | if (rapl_pmu.registered) | |
1980 | perf_pmu_unregister(&rapl_pmu.pmu); | |
1981 | ||
1982 | rapl_pmu.registered = false; | |
1983 | rapl_pmu.domain_map |= rp->domain_map; | |
1984 | ||
1985 | memset(&rapl_pmu.pmu, 0, sizeof(struct pmu)); | |
1986 | rapl_pmu.pmu.attr_groups = pmu_attr_groups; | |
1987 | rapl_pmu.pmu.attr_update = pmu_attr_update; | |
1988 | rapl_pmu.pmu.task_ctx_nr = perf_invalid_context; | |
1989 | rapl_pmu.pmu.event_init = rapl_pmu_event_init; | |
1990 | rapl_pmu.pmu.add = rapl_pmu_event_add; | |
1991 | rapl_pmu.pmu.del = rapl_pmu_event_del; | |
1992 | rapl_pmu.pmu.start = rapl_pmu_event_start; | |
1993 | rapl_pmu.pmu.stop = rapl_pmu_event_stop; | |
1994 | rapl_pmu.pmu.read = rapl_pmu_event_read; | |
1995 | rapl_pmu.pmu.module = THIS_MODULE; | |
1996 | rapl_pmu.pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT; | |
1997 | ret = perf_pmu_register(&rapl_pmu.pmu, "power", -1); | |
1998 | if (ret) { | |
1999 | pr_info("Failed to register PMU\n"); | |
2000 | return ret; | |
2001 | } | |
2002 | ||
2003 | rapl_pmu.registered = true; | |
2004 | end: | |
2005 | rp->has_pmu = true; | |
2006 | return ret; | |
2007 | } | |
2008 | ||
2009 | int rapl_package_add_pmu(struct rapl_package *rp) | |
2010 | { | |
2011 | struct rapl_package_pmu_data *data = &rp->pmu_data; | |
2012 | int idx; | |
2013 | ||
2014 | if (rp->has_pmu) | |
2015 | return -EEXIST; | |
2016 | ||
2017 | guard(cpus_read_lock)(); | |
2018 | ||
2019 | for (idx = 0; idx < rp->nr_domains; idx++) { | |
2020 | struct rapl_domain *rd = &rp->domains[idx]; | |
2021 | int domain = rd->id; | |
2022 | u64 val; | |
2023 | ||
2024 | if (!test_bit(domain, &rp->domain_map)) | |
2025 | continue; | |
2026 | ||
2027 | /* | |
2028 | * The RAPL PMU granularity is 2^-32 Joules | |
2029 | * data->scale[]: times of 2^-32 Joules for each ENERGY COUNTER increase | |
2030 | */ | |
2031 | val = rd->energy_unit * (1ULL << 32); | |
2032 | do_div(val, ENERGY_UNIT_SCALE * 1000000); | |
2033 | data->scale[domain] = val; | |
2034 | ||
2035 | if (!rapl_pmu.timer_ms) { | |
2036 | struct rapl_primitive_info *rpi = get_rpi(rp, ENERGY_COUNTER); | |
2037 | ||
2038 | /* | |
2039 | * Calculate the timer rate: | |
2040 | * Use reference of 200W for scaling the timeout to avoid counter | |
2041 | * overflows. | |
2042 | * | |
2043 | * max_count = rpi->mask >> rpi->shift + 1 | |
2044 | * max_energy_pj = max_count * rd->energy_unit | |
2045 | * max_time_sec = (max_energy_pj / 1000000000) / 200w | |
2046 | * | |
2047 | * rapl_pmu.timer_ms = max_time_sec * 1000 / 2 | |
2048 | */ | |
2049 | val = (rpi->mask >> rpi->shift) + 1; | |
2050 | val *= rd->energy_unit; | |
2051 | do_div(val, 1000000 * 200 * 2); | |
2052 | rapl_pmu.timer_ms = val; | |
2053 | ||
2054 | pr_debug("%llu ms overflow timer\n", rapl_pmu.timer_ms); | |
2055 | } | |
2056 | ||
2057 | pr_debug("Domain %s: hw unit %lld * 2^-32 Joules\n", rd->name, data->scale[domain]); | |
2058 | } | |
2059 | ||
2060 | /* Initialize per package PMU data */ | |
2061 | raw_spin_lock_init(&data->lock); | |
2062 | INIT_LIST_HEAD(&data->active_list); | |
2063 | data->timer_interval = ms_to_ktime(rapl_pmu.timer_ms); | |
2064 | hrtimer_init(&data->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2065 | data->hrtimer.function = rapl_hrtimer_handle; | |
2066 | ||
2067 | return rapl_pmu_update(rp); | |
2068 | } | |
2069 | EXPORT_SYMBOL_GPL(rapl_package_add_pmu); | |
2070 | ||
2071 | void rapl_package_remove_pmu(struct rapl_package *rp) | |
2072 | { | |
2073 | struct rapl_package *pos; | |
2074 | ||
2075 | if (!rp->has_pmu) | |
2076 | return; | |
2077 | ||
2078 | guard(cpus_read_lock)(); | |
2079 | ||
2080 | list_for_each_entry(pos, &rapl_packages, plist) { | |
2081 | /* PMU is still needed */ | |
2082 | if (pos->has_pmu && pos != rp) | |
2083 | return; | |
2084 | } | |
2085 | ||
2086 | perf_pmu_unregister(&rapl_pmu.pmu); | |
2087 | memset(&rapl_pmu, 0, sizeof(struct rapl_pmu)); | |
2088 | } | |
2089 | EXPORT_SYMBOL_GPL(rapl_package_remove_pmu); | |
2090 | #endif | |
2091 | ||
2d281d81 | 2092 | /* called from CPU hotplug notifier, hotplug lock held */ |
1aa09b93 | 2093 | void rapl_remove_package_cpuslocked(struct rapl_package *rp) |
2d281d81 JP |
2094 | { |
2095 | struct rapl_domain *rd, *rd_package = NULL; | |
2096 | ||
58705069 TG |
2097 | package_power_limit_irq_restore(rp); |
2098 | ||
2d281d81 | 2099 | for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { |
9050a9cd ZR |
2100 | int i; |
2101 | ||
2102 | for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { | |
2103 | rapl_write_pl_data(rd, i, PL_ENABLE, 0); | |
2104 | rapl_write_pl_data(rd, i, PL_CLAMP, 0); | |
58705069 | 2105 | } |
9050a9cd | 2106 | |
2d281d81 JP |
2107 | if (rd->id == RAPL_DOMAIN_PACKAGE) { |
2108 | rd_package = rd; | |
2109 | continue; | |
2110 | } | |
9ea7612c ZR |
2111 | pr_debug("remove package, undo power limit on %s: %s\n", |
2112 | rp->name, rd->name); | |
3382388d ZR |
2113 | powercap_unregister_zone(rp->priv->control_type, |
2114 | &rd->power_zone); | |
2d281d81 JP |
2115 | } |
2116 | /* do parent zone last */ | |
3382388d ZR |
2117 | powercap_unregister_zone(rp->priv->control_type, |
2118 | &rd_package->power_zone); | |
2d281d81 JP |
2119 | list_del(&rp->plist); |
2120 | kfree(rp); | |
2121 | } | |
1aa09b93 ZR |
2122 | EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked); |
2123 | ||
2124 | void rapl_remove_package(struct rapl_package *rp) | |
2125 | { | |
2126 | guard(cpus_read_lock)(); | |
2127 | rapl_remove_package_cpuslocked(rp); | |
2128 | } | |
3382388d ZR |
2129 | EXPORT_SYMBOL_GPL(rapl_remove_package); |
2130 | ||
2131 | /* caller to ensure CPU hotplug lock is held */ | |
1aa09b93 ZR |
2132 | struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, |
2133 | bool id_is_cpu) | |
3382388d | 2134 | { |
3382388d | 2135 | struct rapl_package *rp; |
bf44b901 ZR |
2136 | int uid; |
2137 | ||
2138 | if (id_is_cpu) | |
2139 | uid = topology_logical_die_id(id); | |
2140 | else | |
2141 | uid = id; | |
3382388d ZR |
2142 | |
2143 | list_for_each_entry(rp, &rapl_packages, plist) { | |
bf44b901 | 2144 | if (rp->id == uid |
3382388d ZR |
2145 | && rp->priv->control_type == priv->control_type) |
2146 | return rp; | |
2147 | } | |
2148 | ||
2149 | return NULL; | |
2150 | } | |
1aa09b93 ZR |
2151 | EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked); |
2152 | ||
2153 | struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) | |
2154 | { | |
2155 | guard(cpus_read_lock)(); | |
2156 | return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu); | |
2157 | } | |
3382388d | 2158 | EXPORT_SYMBOL_GPL(rapl_find_package_domain); |
2d281d81 JP |
2159 | |
2160 | /* called from CPU hotplug notifier, hotplug lock held */ | |
1aa09b93 | 2161 | struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu) |
2d281d81 | 2162 | { |
2d281d81 | 2163 | struct rapl_package *rp; |
b4005e92 | 2164 | int ret; |
2d281d81 | 2165 | |
2d281d81 JP |
2166 | rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); |
2167 | if (!rp) | |
b4005e92 | 2168 | return ERR_PTR(-ENOMEM); |
2d281d81 | 2169 | |
bf44b901 ZR |
2170 | if (id_is_cpu) { |
2171 | rp->id = topology_logical_die_id(id); | |
2172 | rp->lead_cpu = id; | |
bd745d1c | 2173 | if (topology_max_dies_per_package() > 1) |
bf44b901 ZR |
2174 | snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", |
2175 | topology_physical_package_id(id), topology_die_id(id)); | |
2176 | else | |
2177 | snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", | |
2178 | topology_physical_package_id(id)); | |
2179 | } else { | |
2180 | rp->id = id; | |
2181 | rp->lead_cpu = -1; | |
2182 | snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id); | |
2183 | } | |
323ee64a | 2184 | |
bf44b901 | 2185 | rp->priv = priv; |
e8e28c2a ZR |
2186 | ret = rapl_config(rp); |
2187 | if (ret) | |
2188 | goto err_free_package; | |
2189 | ||
2d281d81 | 2190 | /* check if the package contains valid domains */ |
693c1d78 | 2191 | if (rapl_detect_domains(rp)) { |
2d281d81 JP |
2192 | ret = -ENODEV; |
2193 | goto err_free_package; | |
2194 | } | |
a74f4367 TG |
2195 | ret = rapl_package_register_powercap(rp); |
2196 | if (!ret) { | |
2d281d81 JP |
2197 | INIT_LIST_HEAD(&rp->plist); |
2198 | list_add(&rp->plist, &rapl_packages); | |
b4005e92 | 2199 | return rp; |
2d281d81 JP |
2200 | } |
2201 | ||
2202 | err_free_package: | |
2203 | kfree(rp->domains); | |
2204 | kfree(rp); | |
b4005e92 | 2205 | return ERR_PTR(ret); |
2d281d81 | 2206 | } |
1aa09b93 ZR |
2207 | EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked); |
2208 | ||
2209 | struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) | |
2210 | { | |
2211 | guard(cpus_read_lock)(); | |
2212 | return rapl_add_package_cpuslocked(id, priv, id_is_cpu); | |
2213 | } | |
3382388d | 2214 | EXPORT_SYMBOL_GPL(rapl_add_package); |
2d281d81 | 2215 | |
52b3672c ZH |
2216 | static void power_limit_state_save(void) |
2217 | { | |
2218 | struct rapl_package *rp; | |
2219 | struct rapl_domain *rd; | |
9050a9cd | 2220 | int ret, i; |
52b3672c | 2221 | |
5d4c779c | 2222 | cpus_read_lock(); |
52b3672c ZH |
2223 | list_for_each_entry(rp, &rapl_packages, plist) { |
2224 | if (!rp->power_zone) | |
2225 | continue; | |
2226 | rd = power_zone_to_rapl_domain(rp->power_zone); | |
9050a9cd ZR |
2227 | for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) { |
2228 | ret = rapl_read_pl_data(rd, i, PL_LIMIT, true, | |
3382388d | 2229 | &rd->rpl[i].last_power_limit); |
9050a9cd ZR |
2230 | if (ret) |
2231 | rd->rpl[i].last_power_limit = 0; | |
52b3672c ZH |
2232 | } |
2233 | } | |
5d4c779c | 2234 | cpus_read_unlock(); |
52b3672c ZH |
2235 | } |
2236 | ||
2237 | static void power_limit_state_restore(void) | |
2238 | { | |
2239 | struct rapl_package *rp; | |
2240 | struct rapl_domain *rd; | |
9050a9cd | 2241 | int i; |
52b3672c | 2242 | |
5d4c779c | 2243 | cpus_read_lock(); |
52b3672c ZH |
2244 | list_for_each_entry(rp, &rapl_packages, plist) { |
2245 | if (!rp->power_zone) | |
2246 | continue; | |
2247 | rd = power_zone_to_rapl_domain(rp->power_zone); | |
9050a9cd ZR |
2248 | for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) |
2249 | if (rd->rpl[i].last_power_limit) | |
2250 | rapl_write_pl_data(rd, i, PL_LIMIT, | |
2251 | rd->rpl[i].last_power_limit); | |
52b3672c | 2252 | } |
5d4c779c | 2253 | cpus_read_unlock(); |
52b3672c ZH |
2254 | } |
2255 | ||
2256 | static int rapl_pm_callback(struct notifier_block *nb, | |
3382388d | 2257 | unsigned long mode, void *_unused) |
52b3672c ZH |
2258 | { |
2259 | switch (mode) { | |
2260 | case PM_SUSPEND_PREPARE: | |
2261 | power_limit_state_save(); | |
2262 | break; | |
2263 | case PM_POST_SUSPEND: | |
2264 | power_limit_state_restore(); | |
2265 | break; | |
2266 | } | |
2267 | return NOTIFY_OK; | |
2268 | } | |
2269 | ||
2270 | static struct notifier_block rapl_pm_notifier = { | |
2271 | .notifier_call = rapl_pm_callback, | |
2272 | }; | |
2273 | ||
abcfaeb3 ZR |
2274 | static struct platform_device *rapl_msr_platdev; |
2275 | ||
2276 | static int __init rapl_init(void) | |
2d281d81 | 2277 | { |
087e9cba | 2278 | const struct x86_cpu_id *id; |
58705069 | 2279 | int ret; |
2d281d81 | 2280 | |
087e9cba | 2281 | id = x86_match_cpu(rapl_ids); |
1488ac99 | 2282 | if (id) { |
b4288ce7 | 2283 | defaults_msr = (struct rapl_defaults *)id->driver_data; |
2d281d81 | 2284 | |
1488ac99 ZR |
2285 | rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0); |
2286 | if (!rapl_msr_platdev) | |
2287 | return -ENOMEM; | |
52b3672c | 2288 | |
1488ac99 ZR |
2289 | ret = platform_device_add(rapl_msr_platdev); |
2290 | if (ret) { | |
2291 | platform_device_put(rapl_msr_platdev); | |
2292 | return ret; | |
2293 | } | |
abcfaeb3 ZR |
2294 | } |
2295 | ||
1488ac99 ZR |
2296 | ret = register_pm_notifier(&rapl_pm_notifier); |
2297 | if (ret && rapl_msr_platdev) { | |
2298 | platform_device_del(rapl_msr_platdev); | |
abcfaeb3 | 2299 | platform_device_put(rapl_msr_platdev); |
1488ac99 | 2300 | } |
abcfaeb3 ZR |
2301 | |
2302 | return ret; | |
2d281d81 JP |
2303 | } |
2304 | ||
abcfaeb3 | 2305 | static void __exit rapl_exit(void) |
2d281d81 | 2306 | { |
abcfaeb3 | 2307 | platform_device_unregister(rapl_msr_platdev); |
52b3672c | 2308 | unregister_pm_notifier(&rapl_pm_notifier); |
2d281d81 JP |
2309 | } |
2310 | ||
f76cb066 | 2311 | fs_initcall(rapl_init); |
abcfaeb3 ZR |
2312 | module_exit(rapl_exit); |
2313 | ||
3382388d | 2314 | MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code"); |
2d281d81 JP |
2315 | MODULE_AUTHOR("Jacob Pan <[email protected]>"); |
2316 | MODULE_LICENSE("GPL v2"); |