2 * Machine check exception handling.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright 2013 IBM Corporation
23 #define pr_fmt(fmt) "mce: " fmt
25 #include <linux/types.h>
26 #include <linux/ptrace.h>
27 #include <linux/percpu.h>
28 #include <linux/export.h>
29 #include <linux/irq_work.h>
32 static DEFINE_PER_CPU(int, mce_nest_count);
33 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
35 /* Queue for delayed MCE events. */
36 static DEFINE_PER_CPU(int, mce_queue_count);
37 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
39 static void machine_check_process_queued_event(struct irq_work *work);
40 static struct irq_work mce_event_process_work = {
41 .func = machine_check_process_queued_event,
44 static void mce_set_error_info(struct machine_check_event *mce,
45 struct mce_error_info *mce_err)
47 mce->error_type = mce_err->error_type;
48 switch (mce_err->error_type) {
49 case MCE_ERROR_TYPE_UE:
50 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
52 case MCE_ERROR_TYPE_SLB:
53 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
55 case MCE_ERROR_TYPE_ERAT:
56 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
58 case MCE_ERROR_TYPE_TLB:
59 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
61 case MCE_ERROR_TYPE_USER:
62 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
64 case MCE_ERROR_TYPE_RA:
65 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
67 case MCE_ERROR_TYPE_LINK:
68 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
70 case MCE_ERROR_TYPE_UNKNOWN:
77 * Decode and save high level MCE information into per cpu buffer which
78 * is an array of machine_check_event structure.
80 void save_mce_event(struct pt_regs *regs, long handled,
81 struct mce_error_info *mce_err,
82 uint64_t nip, uint64_t addr)
84 int index = __this_cpu_inc_return(mce_nest_count) - 1;
85 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
88 * Return if we don't have enough space to log mce event.
89 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
90 * the check below will stop buffer overrun.
92 if (index >= MAX_MC_EVT)
95 /* Populate generic machine check info */
96 mce->version = MCE_V1;
98 mce->srr1 = regs->msr;
99 mce->gpr3 = regs->gpr[3];
102 /* Mark it recovered if we have handled it and MSR(RI=1). */
103 if (handled && (regs->msr & MSR_RI))
104 mce->disposition = MCE_DISPOSITION_RECOVERED;
106 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
108 mce->initiator = mce_err->initiator;
109 mce->severity = mce_err->severity;
112 * Populate the mce error_type and type-specific error_type.
114 mce_set_error_info(mce, mce_err);
119 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
120 mce->u.tlb_error.effective_address_provided = true;
121 mce->u.tlb_error.effective_address = addr;
122 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
123 mce->u.slb_error.effective_address_provided = true;
124 mce->u.slb_error.effective_address = addr;
125 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
126 mce->u.erat_error.effective_address_provided = true;
127 mce->u.erat_error.effective_address = addr;
128 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
129 mce->u.user_error.effective_address_provided = true;
130 mce->u.user_error.effective_address = addr;
131 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
132 mce->u.ra_error.effective_address_provided = true;
133 mce->u.ra_error.effective_address = addr;
134 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
135 mce->u.link_error.effective_address_provided = true;
136 mce->u.link_error.effective_address = addr;
137 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
138 mce->u.ue_error.effective_address_provided = true;
139 mce->u.ue_error.effective_address = addr;
146 * mce Pointer to machine_check_event structure to be filled.
147 * release Flag to indicate whether to free the event slot or not.
148 * 0 <= do not release the mce event. Caller will invoke
149 * release_mce_event() once event has been consumed.
150 * 1 <= release the slot.
155 * get_mce_event() will be called by platform specific machine check
156 * handle routine and in KVM.
157 * When we call get_mce_event(), we are still in interrupt context and
158 * preemption will not be scheduled until ret_from_expect() routine
161 int get_mce_event(struct machine_check_event *mce, bool release)
163 int index = __this_cpu_read(mce_nest_count) - 1;
164 struct machine_check_event *mc_evt;
171 /* Check if we have MCE info to process. */
172 if (index < MAX_MC_EVT) {
173 mc_evt = this_cpu_ptr(&mce_event[index]);
174 /* Copy the event structure and release the original */
181 /* Decrement the count to free the slot. */
183 __this_cpu_dec(mce_nest_count);
188 void release_mce_event(void)
190 get_mce_event(NULL, true);
194 * Queue up the MCE event which then can be handled later.
196 void machine_check_queue_event(void)
199 struct machine_check_event evt;
201 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
204 index = __this_cpu_inc_return(mce_queue_count) - 1;
205 /* If queue is full, just return for now. */
206 if (index >= MAX_MC_EVT) {
207 __this_cpu_dec(mce_queue_count);
210 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
212 /* Queue irq work to process this event later. */
213 irq_work_queue(&mce_event_process_work);
217 * process pending MCE event from the mce event queue. This function will be
218 * called during syscall exit.
220 static void machine_check_process_queued_event(struct irq_work *work)
224 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
227 * For now just print it to console.
228 * TODO: log this error event to FSP or nvram.
230 while (__this_cpu_read(mce_queue_count) > 0) {
231 index = __this_cpu_read(mce_queue_count) - 1;
232 machine_check_print_event_info(
233 this_cpu_ptr(&mce_event_queue[index]), false);
234 __this_cpu_dec(mce_queue_count);
238 void machine_check_print_event_info(struct machine_check_event *evt,
241 const char *level, *sevstr, *subtype;
242 static const char *mc_ue_types[] = {
245 "Page table walk ifetch",
247 "Page table walk Load/Store",
249 static const char *mc_slb_types[] = {
254 static const char *mc_erat_types[] = {
259 static const char *mc_tlb_types[] = {
264 static const char *mc_user_types[] = {
268 static const char *mc_ra_types[] = {
270 "Instruction fetch (bad)",
271 "Instruction fetch (foreign)",
272 "Page table walk ifetch (bad)",
273 "Page table walk ifetch (foreign)",
276 "Page table walk Load/Store (bad)",
277 "Page table walk Load/Store (foreign)",
278 "Load/Store (foreign)",
280 static const char *mc_link_types[] = {
282 "Instruction fetch (timeout)",
283 "Page table walk ifetch (timeout)",
286 "Page table walk Load/Store (timeout)",
289 /* Print things out */
290 if (evt->version != MCE_V1) {
291 pr_err("Machine Check Exception, Unknown event version %d !\n",
295 switch (evt->severity) {
296 case MCE_SEV_NO_ERROR:
300 case MCE_SEV_WARNING:
301 level = KERN_WARNING;
304 case MCE_SEV_ERROR_SYNC:
315 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
316 evt->disposition == MCE_DISPOSITION_RECOVERED ?
317 "Recovered" : "Not recovered");
320 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
321 evt->srr0, current->pid, current->comm);
323 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
327 printk("%s Initiator: %s\n", level,
328 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
329 switch (evt->error_type) {
330 case MCE_ERROR_TYPE_UE:
331 subtype = evt->u.ue_error.ue_error_type <
332 ARRAY_SIZE(mc_ue_types) ?
333 mc_ue_types[evt->u.ue_error.ue_error_type]
335 printk("%s Error type: UE [%s]\n", level, subtype);
336 if (evt->u.ue_error.effective_address_provided)
337 printk("%s Effective address: %016llx\n",
338 level, evt->u.ue_error.effective_address);
339 if (evt->u.ue_error.physical_address_provided)
340 printk("%s Physical address: %016llx\n",
341 level, evt->u.ue_error.physical_address);
343 case MCE_ERROR_TYPE_SLB:
344 subtype = evt->u.slb_error.slb_error_type <
345 ARRAY_SIZE(mc_slb_types) ?
346 mc_slb_types[evt->u.slb_error.slb_error_type]
348 printk("%s Error type: SLB [%s]\n", level, subtype);
349 if (evt->u.slb_error.effective_address_provided)
350 printk("%s Effective address: %016llx\n",
351 level, evt->u.slb_error.effective_address);
353 case MCE_ERROR_TYPE_ERAT:
354 subtype = evt->u.erat_error.erat_error_type <
355 ARRAY_SIZE(mc_erat_types) ?
356 mc_erat_types[evt->u.erat_error.erat_error_type]
358 printk("%s Error type: ERAT [%s]\n", level, subtype);
359 if (evt->u.erat_error.effective_address_provided)
360 printk("%s Effective address: %016llx\n",
361 level, evt->u.erat_error.effective_address);
363 case MCE_ERROR_TYPE_TLB:
364 subtype = evt->u.tlb_error.tlb_error_type <
365 ARRAY_SIZE(mc_tlb_types) ?
366 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
368 printk("%s Error type: TLB [%s]\n", level, subtype);
369 if (evt->u.tlb_error.effective_address_provided)
370 printk("%s Effective address: %016llx\n",
371 level, evt->u.tlb_error.effective_address);
373 case MCE_ERROR_TYPE_USER:
374 subtype = evt->u.user_error.user_error_type <
375 ARRAY_SIZE(mc_user_types) ?
376 mc_user_types[evt->u.user_error.user_error_type]
378 printk("%s Error type: User [%s]\n", level, subtype);
379 if (evt->u.user_error.effective_address_provided)
380 printk("%s Effective address: %016llx\n",
381 level, evt->u.user_error.effective_address);
383 case MCE_ERROR_TYPE_RA:
384 subtype = evt->u.ra_error.ra_error_type <
385 ARRAY_SIZE(mc_ra_types) ?
386 mc_ra_types[evt->u.ra_error.ra_error_type]
388 printk("%s Error type: Real address [%s]\n", level, subtype);
389 if (evt->u.ra_error.effective_address_provided)
390 printk("%s Effective address: %016llx\n",
391 level, evt->u.ra_error.effective_address);
393 case MCE_ERROR_TYPE_LINK:
394 subtype = evt->u.link_error.link_error_type <
395 ARRAY_SIZE(mc_link_types) ?
396 mc_link_types[evt->u.link_error.link_error_type]
398 printk("%s Error type: Link [%s]\n", level, subtype);
399 if (evt->u.link_error.effective_address_provided)
400 printk("%s Effective address: %016llx\n",
401 level, evt->u.link_error.effective_address);
404 case MCE_ERROR_TYPE_UNKNOWN:
405 printk("%s Error type: Unknown\n", level);
409 EXPORT_SYMBOL_GPL(machine_check_print_event_info);
411 uint64_t get_mce_fault_addr(struct machine_check_event *evt)
413 switch (evt->error_type) {
414 case MCE_ERROR_TYPE_UE:
415 if (evt->u.ue_error.effective_address_provided)
416 return evt->u.ue_error.effective_address;
418 case MCE_ERROR_TYPE_SLB:
419 if (evt->u.slb_error.effective_address_provided)
420 return evt->u.slb_error.effective_address;
422 case MCE_ERROR_TYPE_ERAT:
423 if (evt->u.erat_error.effective_address_provided)
424 return evt->u.erat_error.effective_address;
426 case MCE_ERROR_TYPE_TLB:
427 if (evt->u.tlb_error.effective_address_provided)
428 return evt->u.tlb_error.effective_address;
430 case MCE_ERROR_TYPE_USER:
431 if (evt->u.user_error.effective_address_provided)
432 return evt->u.user_error.effective_address;
434 case MCE_ERROR_TYPE_RA:
435 if (evt->u.ra_error.effective_address_provided)
436 return evt->u.ra_error.effective_address;
438 case MCE_ERROR_TYPE_LINK:
439 if (evt->u.link_error.effective_address_provided)
440 return evt->u.link_error.effective_address;
443 case MCE_ERROR_TYPE_UNKNOWN:
448 EXPORT_SYMBOL(get_mce_fault_addr);