1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_RESCTRL_INTERNAL_H
3 #define _ASM_X86_RESCTRL_INTERNAL_H
5 #include <linux/resctrl.h>
6 #include <linux/sched.h>
7 #include <linux/kernfs.h>
8 #include <linux/fs_context.h>
9 #include <linux/jump_label.h>
10 #include <linux/tick.h>
12 #include <asm/resctrl.h>
14 #define L3_QOS_CDP_ENABLE 0x01ULL
16 #define L2_QOS_CDP_ENABLE 0x01ULL
18 #define CQM_LIMBOCHECK_INTERVAL 1000
20 #define MBM_CNTR_WIDTH_BASE 24
21 #define MBM_OVERFLOW_INTERVAL 1000
22 #define MAX_MBA_BW 100u
23 #define MBA_IS_LINEAR 0x4
24 #define MBM_CNTR_WIDTH_OFFSET_AMD 20
26 #define RMID_VAL_ERROR BIT_ULL(63)
27 #define RMID_VAL_UNAVAIL BIT_ULL(62)
29 * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
30 * data to be returned. The counter width is discovered from the hardware
31 * as an offset from MBM_CNTR_WIDTH_BASE.
33 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
35 /* Reads to Local DRAM Memory */
36 #define READS_TO_LOCAL_MEM BIT(0)
38 /* Reads to Remote DRAM Memory */
39 #define READS_TO_REMOTE_MEM BIT(1)
41 /* Non-Temporal Writes to Local Memory */
42 #define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2)
44 /* Non-Temporal Writes to Remote Memory */
45 #define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3)
47 /* Reads to Local Memory the system identifies as "Slow Memory" */
48 #define READS_TO_LOCAL_S_MEM BIT(4)
50 /* Reads to Remote Memory the system identifies as "Slow Memory" */
51 #define READS_TO_REMOTE_S_MEM BIT(5)
53 /* Dirty Victims to All Types of Memory */
54 #define DIRTY_VICTIMS_TO_ALL_MEM BIT(6)
56 /* Max event bits supported */
57 #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
60 * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring those that
61 * aren't marked nohz_full
62 * @mask: The mask to pick a CPU from.
63 * @exclude_cpu:The CPU to avoid picking.
65 * Returns a CPU from @mask, but not @exclude_cpu. If there are housekeeping
66 * CPUs that don't use nohz_full, these are preferred. Pass
67 * RESCTRL_PICK_ANY_CPU to avoid excluding any CPUs.
69 * When a CPU is excluded, returns >= nr_cpu_ids if no CPUs are available.
71 static inline unsigned int
72 cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
74 unsigned int cpu, hk_cpu;
76 if (exclude_cpu == RESCTRL_PICK_ANY_CPU)
77 cpu = cpumask_any(mask);
79 cpu = cpumask_any_but(mask, exclude_cpu);
81 /* Only continue if tick_nohz_full_mask has been initialized. */
82 if (!tick_nohz_full_enabled())
85 /* If the CPU picked isn't marked nohz_full nothing more needs doing. */
86 if (cpu < nr_cpu_ids && !tick_nohz_full_cpu(cpu))
89 /* Try to find a CPU that isn't nohz_full to use in preference */
90 hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
91 if (hk_cpu == exclude_cpu)
92 hk_cpu = cpumask_nth_andnot(1, mask, tick_nohz_full_mask);
94 if (hk_cpu < nr_cpu_ids)
100 struct rdt_fs_context {
101 struct kernfs_fs_context kfc;
104 bool enable_mba_mbps;
108 static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
110 struct kernfs_fs_context *kfc = fc->fs_private;
112 return container_of(kfc, struct rdt_fs_context, kfc);
116 * struct mon_evt - Entry in the event list of a resource
118 * @name: name of the event
119 * @configurable: true if the event is configurable
120 * @list: entry in &rdt_resource->evt_list
123 enum resctrl_event_id evtid;
126 struct list_head list;
130 * union mon_data_bits - Monitoring details for each event file.
131 * @priv: Used to store monitoring event data in @u
132 * as kernfs private data.
133 * @u.rid: Resource id associated with the event file.
134 * @u.evtid: Event id associated with the event file.
135 * @u.sum: Set when event must be summed across multiple
137 * @u.domid: When @u.sum is zero this is the domain to which
138 * the event file belongs. When @sum is one this
139 * is the id of the L3 cache that all domains to be
141 * @u: Name of the bit fields struct.
143 union mon_data_bits {
146 unsigned int rid : 10;
147 enum resctrl_event_id evtid : 7;
148 unsigned int sum : 1;
149 unsigned int domid : 14;
154 * struct rmid_read - Data passed across smp_call*() to read event count.
155 * @rgrp: Resource group for which the counter is being read. If it is a parent
156 * resource group then its event count is summed with the count from all
157 * its child resource groups.
158 * @r: Resource describing the properties of the event being read.
159 * @d: Domain that the counter should be read from. If NULL then sum all
160 * domains in @r sharing L3 @ci.id
161 * @evtid: Which monitor event to read.
162 * @first: Initialize MBM counter when true.
163 * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
164 * @err: Error encountered when reading counter.
165 * @val: Returned value of event counter. If @rgrp is a parent resource group,
166 * @val includes the sum of event counts from its child resource groups.
167 * If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
168 * (summed across child resource groups if @rgrp is a parent resource group).
169 * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
172 struct rdtgroup *rgrp;
173 struct rdt_resource *r;
174 struct rdt_mon_domain *d;
175 enum resctrl_event_id evtid;
177 struct cacheinfo *ci;
183 extern unsigned int rdt_mon_features;
184 extern struct list_head resctrl_schema_all;
185 extern bool resctrl_mounted;
187 enum rdt_group_type {
194 * enum rdtgrp_mode - Mode of a RDT resource group
195 * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
196 * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
197 * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
198 * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
199 * allowed AND the allocations are Cache Pseudo-Locked
200 * @RDT_NUM_MODES: Total number of modes
202 * The mode of a resource group enables control over the allowed overlap
203 * between allocations associated with different resource groups (classes
204 * of service). User is able to modify the mode of a resource group by
205 * writing to the "mode" resctrl file associated with the resource group.
207 * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
208 * writing the appropriate text to the "mode" file. A resource group enters
209 * "pseudo-locked" mode after the schemata is written while the resource
210 * group is in "pseudo-locksetup" mode.
213 RDT_MODE_SHAREABLE = 0,
215 RDT_MODE_PSEUDO_LOCKSETUP,
216 RDT_MODE_PSEUDO_LOCKED,
223 * struct mongroup - store mon group's data in resctrl fs.
224 * @mon_data_kn: kernfs node for the mon_data directory
225 * @parent: parent rdtgrp
226 * @crdtgrp_list: child rdtgroup node list
227 * @rmid: rmid for this rdtgroup
230 struct kernfs_node *mon_data_kn;
231 struct rdtgroup *parent;
232 struct list_head crdtgrp_list;
237 * struct pseudo_lock_region - pseudo-lock region information
238 * @s: Resctrl schema for the resource to which this
239 * pseudo-locked region belongs
240 * @d: RDT domain to which this pseudo-locked region
242 * @cbm: bitmask of the pseudo-locked region
243 * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread
245 * @thread_done: variable used by waitqueue to test if pseudo-locking
247 * @cpu: core associated with the cache on which the setup code
249 * @line_size: size of the cache lines
250 * @size: size of pseudo-locked region in bytes
251 * @kmem: the kernel memory associated with pseudo-locked region
252 * @minor: minor number of character device associated with this
254 * @debugfs_dir: pointer to this region's directory in the debugfs
256 * @pm_reqs: Power management QoS requests related to this region
258 struct pseudo_lock_region {
259 struct resctrl_schema *s;
260 struct rdt_ctrl_domain *d;
262 wait_queue_head_t lock_thread_wq;
265 unsigned int line_size;
269 struct dentry *debugfs_dir;
270 struct list_head pm_reqs;
274 * struct rdtgroup - store rdtgroup's data in resctrl file system.
276 * @rdtgroup_list: linked list for all rdtgroups
277 * @closid: closid for this rdtgroup
278 * @cpu_mask: CPUs assigned to this rdtgroup
279 * @flags: status bits
280 * @waitcount: how many cpus expect to find this
281 * group when they acquire rdtgroup_mutex
282 * @type: indicates type of this rdtgroup - either
283 * monitor only or ctrl_mon group
284 * @mon: mongroup related data
285 * @mode: mode of resource group
286 * @mba_mbps_event: input monitoring event id when mba_sc is enabled
287 * @plr: pseudo-locked region
290 struct kernfs_node *kn;
291 struct list_head rdtgroup_list;
293 struct cpumask cpu_mask;
296 enum rdt_group_type type;
298 enum rdtgrp_mode mode;
299 enum resctrl_event_id mba_mbps_event;
300 struct pseudo_lock_region *plr;
304 #define RDT_DELETED 1
307 #define RFTYPE_FLAGS_CPUS_LIST 1
310 * Define the file type flags for base and info directories.
312 #define RFTYPE_INFO BIT(0)
313 #define RFTYPE_BASE BIT(1)
314 #define RFTYPE_CTRL BIT(4)
315 #define RFTYPE_MON BIT(5)
316 #define RFTYPE_TOP BIT(6)
317 #define RFTYPE_RES_CACHE BIT(8)
318 #define RFTYPE_RES_MB BIT(9)
319 #define RFTYPE_DEBUG BIT(10)
320 #define RFTYPE_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
321 #define RFTYPE_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
322 #define RFTYPE_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
323 #define RFTYPE_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
324 #define RFTYPE_MON_BASE (RFTYPE_BASE | RFTYPE_MON)
326 /* List of all resource groups */
327 extern struct list_head rdt_all_groups;
329 extern int max_name_width, max_data_width;
331 int __init rdtgroup_init(void);
332 void __exit rdtgroup_exit(void);
335 * struct rftype - describe each file in the resctrl file system
338 * @kf_ops: File operations
339 * @flags: File specific RFTYPE_FLAGS_* flags
340 * @fflags: File specific RFTYPE_* flags
341 * @seq_show: Show content of the file
342 * @write: Write to the file
347 const struct kernfs_ops *kf_ops;
349 unsigned long fflags;
351 int (*seq_show)(struct kernfs_open_file *of,
352 struct seq_file *sf, void *v);
354 * write() is the generic write callback which maps directly to
355 * kernfs write operation and overrides all other operations.
356 * Maximum write size is determined by ->max_write_len.
358 ssize_t (*write)(struct kernfs_open_file *of,
359 char *buf, size_t nbytes, loff_t off);
363 * struct mbm_state - status for each MBM counter in each domain
364 * @prev_bw_bytes: Previous bytes value read for bandwidth calculation
365 * @prev_bw: The most recent bandwidth in MBps
373 * struct arch_mbm_state - values used to compute resctrl_arch_rmid_read()s
375 * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
376 * @prev_msr: Value of IA32_QM_CTR last time it was read for the RMID used to
379 struct arch_mbm_state {
385 * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
386 * a resource for a control function
387 * @d_resctrl: Properties exposed to the resctrl file system
388 * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
390 * Members of this structure are accessed via helpers that provide abstraction.
392 struct rdt_hw_ctrl_domain {
393 struct rdt_ctrl_domain d_resctrl;
398 * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
399 * a resource for a monitor function
400 * @d_resctrl: Properties exposed to the resctrl file system
401 * @arch_mbm_total: arch private state for MBM total bandwidth
402 * @arch_mbm_local: arch private state for MBM local bandwidth
404 * Members of this structure are accessed via helpers that provide abstraction.
406 struct rdt_hw_mon_domain {
407 struct rdt_mon_domain d_resctrl;
408 struct arch_mbm_state *arch_mbm_total;
409 struct arch_mbm_state *arch_mbm_local;
412 static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
414 return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
417 static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
419 return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
423 * struct msr_param - set a range of MSRs from a domain
424 * @res: The resource to use
425 * @dom: The domain to update
426 * @low: Beginning index from base MSR
430 struct rdt_resource *res;
431 struct rdt_ctrl_domain *dom;
436 static inline bool is_llc_occupancy_enabled(void)
438 return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
441 static inline bool is_mbm_total_enabled(void)
443 return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
446 static inline bool is_mbm_local_enabled(void)
448 return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
451 static inline bool is_mbm_enabled(void)
453 return (is_mbm_total_enabled() || is_mbm_local_enabled());
456 static inline bool is_mbm_event(int e)
458 return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
459 e <= QOS_L3_MBM_LOCAL_EVENT_ID);
462 struct rdt_parse_data {
463 struct rdtgroup *rdtgrp;
468 * struct rdt_hw_resource - arch private attributes of a resctrl resource
469 * @r_resctrl: Attributes of the resource used directly by resctrl.
470 * @num_closid: Maximum number of closid this hardware can support,
471 * regardless of CDP. This is exposed via
472 * resctrl_arch_get_num_closid() to avoid confusion
473 * with struct resctrl_schema's property of the same name,
474 * which has been corrected for features like CDP.
475 * @msr_base: Base MSR address for CBMs
476 * @msr_update: Function pointer to update QOS MSRs
477 * @mon_scale: cqm counter * mon_scale = occupancy in bytes
478 * @mbm_width: Monitor width, to detect and correct for overflow.
479 * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth
480 * Monitoring Event Configuration (BMEC) is supported.
481 * @cdp_enabled: CDP state of this resource
483 * Members of this structure are either private to the architecture
484 * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
485 * msr_update and msr_base.
487 struct rdt_hw_resource {
488 struct rdt_resource r_resctrl;
490 unsigned int msr_base;
491 void (*msr_update)(struct msr_param *m);
492 unsigned int mon_scale;
493 unsigned int mbm_width;
494 unsigned int mbm_cfg_mask;
498 static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
500 return container_of(r, struct rdt_hw_resource, r_resctrl);
503 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
504 struct rdt_ctrl_domain *d);
505 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
506 struct rdt_ctrl_domain *d);
508 extern struct mutex rdtgroup_mutex;
510 extern struct rdt_hw_resource rdt_resources_all[];
511 extern struct rdtgroup rdtgroup_default;
512 extern struct dentry *debugfs_resctrl;
513 extern enum resctrl_event_id mba_mbps_default_event;
515 enum resctrl_res_level {
521 /* Must be the last */
525 static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
527 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
530 return &hw_res->r_resctrl;
533 static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
535 return rdt_resources_all[l].cdp_enabled;
538 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
540 void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
543 * To return the common struct rdt_resource, which is contained in struct
544 * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
546 #define for_each_rdt_resource(r) \
547 for (r = &rdt_resources_all[0].r_resctrl; \
548 r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl; \
551 #define for_each_capable_rdt_resource(r) \
552 for_each_rdt_resource(r) \
553 if (r->alloc_capable || r->mon_capable)
555 #define for_each_alloc_capable_rdt_resource(r) \
556 for_each_rdt_resource(r) \
557 if (r->alloc_capable)
559 #define for_each_mon_capable_rdt_resource(r) \
560 for_each_rdt_resource(r) \
563 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
564 union cpuid_0x10_1_eax {
566 unsigned int cbm_len:5;
571 /* CPUID.(EAX=10H, ECX=ResID=3).EAX */
572 union cpuid_0x10_3_eax {
574 unsigned int max_delay:12;
579 /* CPUID.(EAX=10H, ECX=ResID).ECX */
580 union cpuid_0x10_x_ecx {
582 unsigned int reserved:3;
583 unsigned int noncont:1;
588 /* CPUID.(EAX=10H, ECX=ResID).EDX */
589 union cpuid_0x10_x_edx {
591 unsigned int cos_max:16;
596 void rdt_last_cmd_clear(void);
597 void rdt_last_cmd_puts(const char *s);
599 void rdt_last_cmd_printf(const char *fmt, ...);
601 void rdt_ctrl_update(void *arg);
602 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
603 void rdtgroup_kn_unlock(struct kernfs_node *kn);
604 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
605 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
607 struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
608 struct list_head **pos);
609 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
610 char *buf, size_t nbytes, loff_t off);
611 int rdtgroup_schemata_show(struct kernfs_open_file *of,
612 struct seq_file *s, void *v);
613 ssize_t rdtgroup_mba_mbps_event_write(struct kernfs_open_file *of,
614 char *buf, size_t nbytes, loff_t off);
615 int rdtgroup_mba_mbps_event_show(struct kernfs_open_file *of,
616 struct seq_file *s, void *v);
617 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
618 unsigned long cbm, int closid, bool exclusive);
619 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
621 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
622 int rdtgroup_tasks_assigned(struct rdtgroup *r);
623 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
624 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
625 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
626 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
627 int rdt_pseudo_lock_init(void);
628 void rdt_pseudo_lock_release(void);
629 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
630 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
631 struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
632 struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
633 int closids_supported(void);
634 void closid_free(int closid);
635 int alloc_rmid(u32 closid);
636 void free_rmid(u32 closid, u32 rmid);
637 int rdt_get_mon_l3_config(struct rdt_resource *r);
638 void __exit rdt_put_mon_l3_config(void);
639 bool __init rdt_cpu_has(int flag);
640 void mon_event_count(void *info);
641 int rdtgroup_mondata_show(struct seq_file *m, void *arg);
642 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
643 struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
644 cpumask_t *cpumask, int evtid, int first);
645 void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
646 unsigned long delay_ms,
648 void mbm_handle_overflow(struct work_struct *work);
649 void __init intel_rdt_mbm_apply_quirk(void);
650 bool is_mba_sc(struct rdt_resource *r);
651 void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
653 void cqm_handle_limbo(struct work_struct *work);
654 bool has_busy_rmid(struct rdt_mon_domain *d);
655 void __check_limbo(struct rdt_mon_domain *d, bool force_free);
656 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
657 void resctrl_file_fflags_init(const char *config, unsigned long fflags);
658 void rdt_staged_configs_clear(void);
659 bool closid_allocated(unsigned int closid);
660 int resctrl_find_cleanest_closid(void);
661 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */