]> Git Repo - J-linux.git/blob - arch/x86/kernel/cpu/mce/threshold.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / arch / x86 / kernel / cpu / mce / threshold.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Common corrected MCE threshold handler code:
4  */
5 #include <linux/interrupt.h>
6 #include <linux/kernel.h>
7
8 #include <asm/irq_vectors.h>
9 #include <asm/traps.h>
10 #include <asm/apic.h>
11 #include <asm/mce.h>
12 #include <asm/trace/irq_vectors.h>
13
14 #include "internal.h"
15
16 static void default_threshold_interrupt(void)
17 {
18         pr_err("Unexpected threshold interrupt at vector %x\n",
19                 THRESHOLD_APIC_VECTOR);
20 }
21
22 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
23
24 DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
25 {
26         trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
27         inc_irq_stat(irq_threshold_count);
28         mce_threshold_vector();
29         trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
30         apic_eoi();
31 }
32
33 DEFINE_PER_CPU(struct mca_storm_desc, storm_desc);
34
35 void mce_inherit_storm(unsigned int bank)
36 {
37         struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
38
39         /*
40          * Previous CPU owning this bank had put it into storm mode,
41          * but the precise history of that storm is unknown. Assume
42          * the worst (all recent polls of the bank found a valid error
43          * logged). This will avoid the new owner prematurely declaring
44          * the storm has ended.
45          */
46         storm->banks[bank].history = ~0ull;
47         storm->banks[bank].timestamp = jiffies;
48 }
49
50 bool mce_get_storm_mode(void)
51 {
52         return __this_cpu_read(storm_desc.poll_mode);
53 }
54
55 void mce_set_storm_mode(bool storm)
56 {
57         __this_cpu_write(storm_desc.poll_mode, storm);
58 }
59
60 static void mce_handle_storm(unsigned int bank, bool on)
61 {
62         switch (boot_cpu_data.x86_vendor) {
63         case X86_VENDOR_INTEL:
64                 mce_intel_handle_storm(bank, on);
65                 break;
66         }
67 }
68
69 void cmci_storm_begin(unsigned int bank)
70 {
71         struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
72
73         __set_bit(bank, this_cpu_ptr(mce_poll_banks));
74         storm->banks[bank].in_storm_mode = true;
75
76         /*
77          * If this is the first bank on this CPU to enter storm mode
78          * start polling.
79          */
80         if (++storm->stormy_bank_count == 1)
81                 mce_timer_kick(true);
82 }
83
84 void cmci_storm_end(unsigned int bank)
85 {
86         struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
87
88         __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
89         storm->banks[bank].history = 0;
90         storm->banks[bank].in_storm_mode = false;
91
92         /* If no banks left in storm mode, stop polling. */
93         if (!this_cpu_dec_return(storm_desc.stormy_bank_count))
94                 mce_timer_kick(false);
95 }
96
97 void mce_track_storm(struct mce *mce)
98 {
99         struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
100         unsigned long now = jiffies, delta;
101         unsigned int shift = 1;
102         u64 history = 0;
103
104         /* No tracking needed for banks that do not support CMCI */
105         if (storm->banks[mce->bank].poll_only)
106                 return;
107
108         /*
109          * When a bank is in storm mode it is polled once per second and
110          * the history mask will record about the last minute of poll results.
111          * If it is not in storm mode, then the bank is only checked when
112          * there is a CMCI interrupt. Check how long it has been since
113          * this bank was last checked, and adjust the amount of "shift"
114          * to apply to history.
115          */
116         if (!storm->banks[mce->bank].in_storm_mode) {
117                 delta = now - storm->banks[mce->bank].timestamp;
118                 shift = (delta + HZ) / HZ;
119         }
120
121         /* If it has been a long time since the last poll, clear history. */
122         if (shift < NUM_HISTORY_BITS)
123                 history = storm->banks[mce->bank].history << shift;
124
125         storm->banks[mce->bank].timestamp = now;
126
127         /* History keeps track of corrected errors. VAL=1 && UC=0 */
128         if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce))
129                 history |= 1;
130
131         storm->banks[mce->bank].history = history;
132
133         if (storm->banks[mce->bank].in_storm_mode) {
134                 if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0))
135                         return;
136                 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank);
137                 mce_handle_storm(mce->bank, false);
138                 cmci_storm_end(mce->bank);
139         } else {
140                 if (hweight64(history) < STORM_BEGIN_THRESHOLD)
141                         return;
142                 printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank);
143                 mce_handle_storm(mce->bank, true);
144                 cmci_storm_begin(mce->bank);
145         }
146 }
This page took 0.0339930000000001 seconds and 4 git commands to generate.