1 // SPDX-License-Identifier: GPL-2.0-only
3 * Hardware Feedback Interface Driver
5 * Copyright (c) 2021, Intel Corporation.
11 * The Hardware Feedback Interface provides a performance and energy efficiency
12 * capability information for each CPU in the system. Depending on the processor
13 * model, hardware may periodically update these capabilities as a result of
14 * changes in the operating conditions (e.g., power limits or thermal
15 * constraints). On other processor models, there is a single HFI update
18 * This file provides functionality to process HFI updates and relay these
19 * updates to userspace.
22 #define pr_fmt(fmt) "intel-hfi: " fmt
24 #include <linux/bitops.h>
25 #include <linux/cpufeature.h>
26 #include <linux/cpumask.h>
27 #include <linux/gfp.h>
29 #include <linux/kernel.h>
30 #include <linux/math.h>
31 #include <linux/mutex.h>
32 #include <linux/percpu-defs.h>
33 #include <linux/printk.h>
34 #include <linux/processor.h>
35 #include <linux/slab.h>
36 #include <linux/spinlock.h>
37 #include <linux/string.h>
38 #include <linux/topology.h>
39 #include <linux/workqueue.h>
43 #include "intel_hfi.h"
44 #include "thermal_interrupt.h"
46 #include "../thermal_netlink.h"
48 /* Hardware Feedback Interface MSR configuration bits */
49 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
50 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
52 /* CPUID detection and enumeration definitions for HFI */
54 #define CPUID_HFI_LEAF 6
56 union hfi_capabilities {
59 u8 energy_efficiency:1;
67 union hfi_capabilities capabilities;
76 * struct hfi_cpu_data - HFI capabilities per CPU
77 * @perf_cap: Performance capability
78 * @ee_cap: Energy efficiency capability
80 * Capabilities of a logical processor in the HFI table. These capabilities are
89 * struct hfi_hdr - Header of the HFI table
90 * @perf_updated: Hardware updated performance capabilities
91 * @ee_updated: Hardware updated energy efficiency capabilities
93 * Properties of the data in an HFI table.
101 * struct hfi_instance - Representation of an HFI instance (i.e., a table)
102 * @local_table: Base of the local copy of the HFI table
103 * @timestamp: Timestamp of the last update of the local table.
104 * Located at the base of the local table.
105 * @hdr: Base address of the header of the local table
106 * @data: Base address of the data of the local table
107 * @cpus: CPUs represented in this HFI table instance
108 * @hw_table: Pointer to the HFI table of this instance
109 * @update_work: Delayed work to process HFI updates
110 * @table_lock: Lock to protect acceses to the table of this instance
111 * @event_lock: Lock to process HFI interrupts
113 * A set of parameters to parse and navigate a specific HFI table.
115 struct hfi_instance {
124 struct delayed_work update_work;
125 raw_spinlock_t table_lock;
126 raw_spinlock_t event_lock;
130 * struct hfi_features - Supported HFI features
131 * @nr_table_pages: Size of the HFI table in 4KB pages
132 * @cpu_stride: Stride size to locate the capability data of a logical
133 * processor within the table (i.e., row stride)
134 * @hdr_size: Size of the table header
136 * Parameters and supported features that are common to all HFI instances
138 struct hfi_features {
139 size_t nr_table_pages;
140 unsigned int cpu_stride;
141 unsigned int hdr_size;
145 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
146 * @index: Row of this CPU in its HFI table
147 * @hfi_instance: Attributes of the HFI table to which this CPU belongs
149 * Parameters to link a logical processor to an HFI table and a row within it.
151 struct hfi_cpu_info {
153 struct hfi_instance *hfi_instance;
156 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
158 static int max_hfi_instances;
159 static struct hfi_instance *hfi_instances;
161 static struct hfi_features hfi_features;
162 static DEFINE_MUTEX(hfi_instance_lock);
164 static struct workqueue_struct *hfi_updates_wq;
165 #define HFI_UPDATE_INTERVAL HZ
166 #define HFI_MAX_THERM_NOTIFY_COUNT 16
168 static void get_hfi_caps(struct hfi_instance *hfi_instance,
169 struct thermal_genl_cpu_caps *cpu_caps)
173 raw_spin_lock_irq(&hfi_instance->table_lock);
174 for_each_cpu(cpu, hfi_instance->cpus) {
175 struct hfi_cpu_data *caps;
178 index = per_cpu(hfi_cpu_info, cpu).index;
179 caps = hfi_instance->data + index * hfi_features.cpu_stride;
180 cpu_caps[i].cpu = cpu;
183 * Scale performance and energy efficiency to
184 * the [0, 1023] interval that thermal netlink uses.
186 cpu_caps[i].performance = caps->perf_cap << 2;
187 cpu_caps[i].efficiency = caps->ee_cap << 2;
191 raw_spin_unlock_irq(&hfi_instance->table_lock);
195 * Call update_capabilities() when there are changes in the HFI table.
197 static void update_capabilities(struct hfi_instance *hfi_instance)
199 struct thermal_genl_cpu_caps *cpu_caps;
200 int i = 0, cpu_count;
202 /* CPUs may come online/offline while processing an HFI update. */
203 mutex_lock(&hfi_instance_lock);
205 cpu_count = cpumask_weight(hfi_instance->cpus);
207 /* No CPUs to report in this hfi_instance. */
211 cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
215 get_hfi_caps(hfi_instance, cpu_caps);
217 if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
220 /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
222 (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
223 i += HFI_MAX_THERM_NOTIFY_COUNT)
224 thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
227 cpu_count = cpu_count - i;
230 /* Process the remaining capabilities if any. */
232 thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
236 mutex_unlock(&hfi_instance_lock);
239 static void hfi_update_work_fn(struct work_struct *work)
241 struct hfi_instance *hfi_instance;
243 hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
246 update_capabilities(hfi_instance);
249 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
251 struct hfi_instance *hfi_instance;
252 int cpu = smp_processor_id();
253 struct hfi_cpu_info *info;
254 u64 new_timestamp, msr, hfi;
256 if (!pkg_therm_status_msr_val)
259 info = &per_cpu(hfi_cpu_info, cpu);
264 * A CPU is linked to its HFI instance before the thermal vector in the
265 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
266 * when receiving an HFI event.
268 hfi_instance = info->hfi_instance;
269 if (unlikely(!hfi_instance)) {
270 pr_debug("Received event on CPU %d but instance was null", cpu);
275 * On most systems, all CPUs in the package receive a package-level
276 * thermal interrupt when there is an HFI update. It is sufficient to
277 * let a single CPU to acknowledge the update and queue work to
278 * process it. The remaining CPUs can resume their work.
280 if (!raw_spin_trylock(&hfi_instance->event_lock))
283 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
284 hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
286 raw_spin_unlock(&hfi_instance->event_lock);
291 * Ack duplicate update. Since there is an active HFI
292 * status from HW, it must be a new event, not a case
293 * where a lagging CPU entered the locked region.
295 new_timestamp = *(u64 *)hfi_instance->hw_table;
296 if (*hfi_instance->timestamp == new_timestamp) {
297 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
298 raw_spin_unlock(&hfi_instance->event_lock);
302 raw_spin_lock(&hfi_instance->table_lock);
305 * Copy the updated table into our local copy. This includes the new
308 memcpy(hfi_instance->local_table, hfi_instance->hw_table,
309 hfi_features.nr_table_pages << PAGE_SHIFT);
312 * Let hardware know that we are done reading the HFI table and it is
313 * free to update it again.
315 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
317 raw_spin_unlock(&hfi_instance->table_lock);
318 raw_spin_unlock(&hfi_instance->event_lock);
320 queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
321 HFI_UPDATE_INTERVAL);
324 static void init_hfi_cpu_index(struct hfi_cpu_info *info)
326 union cpuid6_edx edx;
328 /* Do not re-read @cpu's index if it has already been initialized. */
329 if (info->index > -1)
332 edx.full = cpuid_edx(CPUID_HFI_LEAF);
333 info->index = edx.split.index;
337 * The format of the HFI table depends on the number of capabilities that the
338 * hardware supports. Keep a data structure to navigate the table.
340 static void init_hfi_instance(struct hfi_instance *hfi_instance)
342 /* The HFI header is below the time-stamp. */
343 hfi_instance->hdr = hfi_instance->local_table +
344 sizeof(*hfi_instance->timestamp);
346 /* The HFI data starts below the header. */
347 hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
351 * intel_hfi_online() - Enable HFI on @cpu
352 * @cpu: CPU in which the HFI will be enabled
354 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
355 * level. The first CPU in the die/package to come online does the full HFI
356 * initialization. Subsequent CPUs will just link themselves to the HFI
357 * instance of their die/package.
359 * This function is called before enabling the thermal vector in the local APIC
360 * in order to ensure that @cpu has an associated HFI instance when it receives
363 void intel_hfi_online(unsigned int cpu)
365 struct hfi_instance *hfi_instance;
366 struct hfi_cpu_info *info;
367 phys_addr_t hw_table_pa;
371 /* Nothing to do if hfi_instances are missing. */
376 * Link @cpu to the HFI instance of its package/die. It does not
377 * matter whether the instance has been initialized.
379 info = &per_cpu(hfi_cpu_info, cpu);
380 die_id = topology_logical_die_id(cpu);
381 hfi_instance = info->hfi_instance;
383 if (die_id >= max_hfi_instances)
386 hfi_instance = &hfi_instances[die_id];
387 info->hfi_instance = hfi_instance;
390 init_hfi_cpu_index(info);
393 * Now check if the HFI instance of the package/die of @cpu has been
394 * initialized (by checking its header). In such case, all we have to
395 * do is to add @cpu to this instance's cpumask.
397 mutex_lock(&hfi_instance_lock);
398 if (hfi_instance->hdr) {
399 cpumask_set_cpu(cpu, hfi_instance->cpus);
404 * Hardware is programmed with the physical address of the first page
405 * frame of the table. Hence, the allocated memory must be page-aligned.
407 hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
408 GFP_KERNEL | __GFP_ZERO);
409 if (!hfi_instance->hw_table)
412 hw_table_pa = virt_to_phys(hfi_instance->hw_table);
415 * Allocate memory to keep a local copy of the table that
416 * hardware generates.
418 hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
420 if (!hfi_instance->local_table)
424 * Program the address of the feedback table of this die/package. On
425 * some processors, hardware remembers the old address of the HFI table
426 * even after having been reprogrammed and re-enabled. Thus, do not free
427 * the pages allocated for the table or reprogram the hardware with a
428 * new base address. Namely, program the hardware only once.
430 msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
431 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
433 init_hfi_instance(hfi_instance);
435 INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
436 raw_spin_lock_init(&hfi_instance->table_lock);
437 raw_spin_lock_init(&hfi_instance->event_lock);
439 cpumask_set_cpu(cpu, hfi_instance->cpus);
442 * Enable the hardware feedback interface and never disable it. See
443 * comment on programming the address of the table.
445 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
446 msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
447 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
450 mutex_unlock(&hfi_instance_lock);
454 free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
459 * intel_hfi_offline() - Disable HFI on @cpu
460 * @cpu: CPU in which the HFI will be disabled
462 * Remove @cpu from those covered by its HFI instance.
464 * On some processors, hardware remembers previous programming settings even
465 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
466 * die/package of @cpu are offline. See note in intel_hfi_online().
468 void intel_hfi_offline(unsigned int cpu)
470 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
471 struct hfi_instance *hfi_instance;
474 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
475 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
478 hfi_instance = info->hfi_instance;
482 if (!hfi_instance->hdr)
485 mutex_lock(&hfi_instance_lock);
486 cpumask_clear_cpu(cpu, hfi_instance->cpus);
487 mutex_unlock(&hfi_instance_lock);
490 static __init int hfi_parse_features(void)
492 unsigned int nr_capabilities;
493 union cpuid6_edx edx;
495 if (!boot_cpu_has(X86_FEATURE_HFI))
499 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
500 * supported capabilities and the size of the HFI table.
502 edx.full = cpuid_edx(CPUID_HFI_LEAF);
504 if (!edx.split.capabilities.split.performance) {
505 pr_debug("Performance reporting not supported! Not using HFI\n");
510 * The number of supported capabilities determines the number of
511 * columns in the HFI table. Exclude the reserved bits.
513 edx.split.capabilities.split.__reserved = 0;
514 nr_capabilities = hweight8(edx.split.capabilities.bits);
516 /* The number of 4KB pages required by the table */
517 hfi_features.nr_table_pages = edx.split.table_pages + 1;
520 * The header contains change indications for each supported feature.
521 * The size of the table header is rounded up to be a multiple of 8
524 hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
527 * Data of each logical processor is also rounded up to be a multiple
530 hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
535 void __init intel_hfi_init(void)
537 struct hfi_instance *hfi_instance;
540 if (hfi_parse_features())
543 /* There is one HFI instance per die/package. */
544 max_hfi_instances = topology_max_packages() *
545 topology_max_die_per_package();
548 * This allocation may fail. CPU hotplug callbacks must check
549 * for a null pointer.
551 hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
556 for (i = 0; i < max_hfi_instances; i++) {
557 hfi_instance = &hfi_instances[i];
558 if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
562 hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
569 for (j = 0; j < i; ++j) {
570 hfi_instance = &hfi_instances[j];
571 free_cpumask_var(hfi_instance->cpus);
574 kfree(hfi_instances);
575 hfi_instances = NULL;