2 * UEFI Common Platform Error Record (CPER) support
4 * Copyright (C) 2010, Intel Corp.
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/dmi.h>
32 #include <linux/acpi.h>
33 #include <linux/pci.h>
34 #include <linux/aer.h>
35 #include <linux/printk.h>
36 #include <linux/bcd.h>
37 #include <acpi/ghes.h>
38 #include <ras/ras_event.h>
40 static char rcd_decode_str[CPER_REC_LEN];
43 * CPER record ID need to be unique even after reboot, because record
44 * ID is used as index for ERST storage, while CPER records from
45 * multiple boot may co-exist in ERST.
47 u64 cper_next_record_id(void)
49 static atomic64_t seq;
51 if (!atomic64_read(&seq))
52 atomic64_set(&seq, ((u64)get_seconds()) << 32);
54 return atomic64_inc_return(&seq);
56 EXPORT_SYMBOL_GPL(cper_next_record_id);
58 static const char * const severity_strs[] = {
65 const char *cper_severity_str(unsigned int severity)
67 return severity < ARRAY_SIZE(severity_strs) ?
68 severity_strs[severity] : "unknown";
70 EXPORT_SYMBOL_GPL(cper_severity_str);
73 * cper_print_bits - print strings for set bits
74 * @pfx: prefix for each line, including log level and prefix string
76 * @strs: string array, indexed by bit position
77 * @strs_size: size of the string array: @strs
79 * For each set bit in @bits, print the corresponding string in @strs.
80 * If the output length is longer than 80, multiple line will be
81 * printed, with @pfx is printed at the beginning of each line.
83 void cper_print_bits(const char *pfx, unsigned int bits,
84 const char * const strs[], unsigned int strs_size)
90 for (i = 0; i < strs_size; i++) {
91 if (!(bits & (1U << i)))
96 if (len && len + strlen(str) + 2 > 80) {
101 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
109 static const char * const proc_type_strs[] = {
115 static const char * const proc_isa_strs[] = {
123 const char * const cper_proc_error_type_strs[] = {
127 "micro-architectural error",
130 static const char * const proc_op_strs[] = {
131 "unknown or generic",
134 "instruction execution",
137 static const char * const proc_flag_strs[] = {
144 static void cper_print_proc_generic(const char *pfx,
145 const struct cper_sec_proc_generic *proc)
147 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
148 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
149 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
150 proc_type_strs[proc->proc_type] : "unknown");
151 if (proc->validation_bits & CPER_PROC_VALID_ISA)
152 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
153 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
154 proc_isa_strs[proc->proc_isa] : "unknown");
155 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
156 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
157 cper_print_bits(pfx, proc->proc_error_type,
158 cper_proc_error_type_strs,
159 ARRAY_SIZE(cper_proc_error_type_strs));
161 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
162 printk("%s""operation: %d, %s\n", pfx, proc->operation,
163 proc->operation < ARRAY_SIZE(proc_op_strs) ?
164 proc_op_strs[proc->operation] : "unknown");
165 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
166 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
167 cper_print_bits(pfx, proc->flags, proc_flag_strs,
168 ARRAY_SIZE(proc_flag_strs));
170 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
171 printk("%s""level: %d\n", pfx, proc->level);
172 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
173 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
174 if (proc->validation_bits & CPER_PROC_VALID_ID)
175 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
176 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
177 printk("%s""target_address: 0x%016llx\n",
178 pfx, proc->target_addr);
179 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
180 printk("%s""requestor_id: 0x%016llx\n",
181 pfx, proc->requestor_id);
182 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
183 printk("%s""responder_id: 0x%016llx\n",
184 pfx, proc->responder_id);
185 if (proc->validation_bits & CPER_PROC_VALID_IP)
186 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
189 static const char * const mem_err_type_strs[] = {
194 "single-symbol chipkill ECC",
195 "multi-symbol chipkill ECC",
203 "scrub corrected error",
204 "scrub uncorrected error",
205 "physical memory map-out event",
208 const char *cper_mem_err_type_str(unsigned int etype)
210 return etype < ARRAY_SIZE(mem_err_type_strs) ?
211 mem_err_type_strs[etype] : "unknown";
213 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
223 len = CPER_REC_LEN - 1;
224 if (mem->validation_bits & CPER_MEM_VALID_NODE)
225 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
226 if (mem->validation_bits & CPER_MEM_VALID_CARD)
227 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
228 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
229 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
230 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
231 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
232 if (mem->validation_bits & CPER_MEM_VALID_BANK)
233 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
234 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
235 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
236 if (mem->validation_bits & CPER_MEM_VALID_ROW)
237 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
238 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
239 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
240 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
241 n += scnprintf(msg + n, len - n, "bit_position: %d ",
243 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
244 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
247 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
250 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
257 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
260 const char *bank = NULL, *device = NULL;
262 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
266 len = CPER_REC_LEN - 1;
267 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271 n = snprintf(msg, len,
272 "DIMM location: not present. DMI handle: 0x%.4x ",
273 mem->mem_dev_handle);
279 void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
280 struct cper_mem_err_compact *cmem)
282 cmem->validation_bits = mem->validation_bits;
283 cmem->node = mem->node;
284 cmem->card = mem->card;
285 cmem->module = mem->module;
286 cmem->bank = mem->bank;
287 cmem->device = mem->device;
288 cmem->row = mem->row;
289 cmem->column = mem->column;
290 cmem->bit_pos = mem->bit_pos;
291 cmem->requestor_id = mem->requestor_id;
292 cmem->responder_id = mem->responder_id;
293 cmem->target_id = mem->target_id;
294 cmem->rank = mem->rank;
295 cmem->mem_array_handle = mem->mem_array_handle;
296 cmem->mem_dev_handle = mem->mem_dev_handle;
299 const char *cper_mem_err_unpack(struct trace_seq *p,
300 struct cper_mem_err_compact *cmem)
302 const char *ret = trace_seq_buffer_ptr(p);
304 if (cper_mem_err_location(cmem, rcd_decode_str))
305 trace_seq_printf(p, "%s", rcd_decode_str);
306 if (cper_dimm_err_location(cmem, rcd_decode_str))
307 trace_seq_printf(p, "%s", rcd_decode_str);
308 trace_seq_putc(p, '\0');
313 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
316 struct cper_mem_err_compact cmem;
318 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
319 if (len == sizeof(struct cper_sec_mem_err_old) &&
320 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
321 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
324 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
325 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
326 if (mem->validation_bits & CPER_MEM_VALID_PA)
327 printk("%s""physical_address: 0x%016llx\n",
328 pfx, mem->physical_addr);
329 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
330 printk("%s""physical_address_mask: 0x%016llx\n",
331 pfx, mem->physical_addr_mask);
332 cper_mem_err_pack(mem, &cmem);
333 if (cper_mem_err_location(&cmem, rcd_decode_str))
334 printk("%s%s\n", pfx, rcd_decode_str);
335 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
336 u8 etype = mem->error_type;
337 printk("%s""error_type: %d, %s\n", pfx, etype,
338 cper_mem_err_type_str(etype));
340 if (cper_dimm_err_location(&cmem, rcd_decode_str))
341 printk("%s%s\n", pfx, rcd_decode_str);
344 static const char * const pcie_port_type_strs[] = {
346 "legacy PCI end point",
350 "upstream switch port",
351 "downstream switch port",
352 "PCIe to PCI/PCI-X bridge",
353 "PCI/PCI-X to PCIe bridge",
354 "root complex integrated endpoint device",
355 "root complex event collector",
358 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
359 const struct acpi_hest_generic_data *gdata)
361 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
362 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
363 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
364 pcie_port_type_strs[pcie->port_type] : "unknown");
365 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
366 printk("%s""version: %d.%d\n", pfx,
367 pcie->version.major, pcie->version.minor);
368 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
369 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
370 pcie->command, pcie->status);
371 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
374 pcie->device_id.segment, pcie->device_id.bus,
375 pcie->device_id.device, pcie->device_id.function);
376 printk("%s""slot: %d\n", pfx,
377 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
378 printk("%s""secondary_bus: 0x%02x\n", pfx,
379 pcie->device_id.secondary_bus);
380 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
381 pcie->device_id.vendor_id, pcie->device_id.device_id);
382 p = pcie->device_id.class_code;
383 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
385 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
386 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
387 pcie->serial_number.lower, pcie->serial_number.upper);
388 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
391 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
394 static void cper_print_tstamp(const char *pfx,
395 struct acpi_hest_generic_data_v300 *gdata)
397 __u8 hour, min, sec, day, mon, year, century, *timestamp;
399 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
400 timestamp = (__u8 *)&(gdata->time_stamp);
401 sec = bcd2bin(timestamp[0]);
402 min = bcd2bin(timestamp[1]);
403 hour = bcd2bin(timestamp[2]);
404 day = bcd2bin(timestamp[4]);
405 mon = bcd2bin(timestamp[5]);
406 year = bcd2bin(timestamp[6]);
407 century = bcd2bin(timestamp[7]);
409 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
410 (timestamp[3] & 0x1 ? "precise " : "imprecise "),
411 century, year, mon, day, hour, min, sec);
416 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
419 guid_t *sec_type = (guid_t *)gdata->section_type;
423 if (acpi_hest_get_version(gdata) >= 3)
424 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
426 severity = gdata->error_severity;
427 printk("%s""Error %d, type: %s\n", pfx, sec_no,
428 cper_severity_str(severity));
429 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
430 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
431 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
432 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
434 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
435 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
436 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
438 printk("%s""section_type: general processor error\n", newpfx);
439 if (gdata->error_data_length >= sizeof(*proc_err))
440 cper_print_proc_generic(newpfx, proc_err);
442 goto err_section_too_small;
443 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
444 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
446 printk("%s""section_type: memory error\n", newpfx);
447 if (gdata->error_data_length >=
448 sizeof(struct cper_sec_mem_err_old))
449 cper_print_mem(newpfx, mem_err,
450 gdata->error_data_length);
452 goto err_section_too_small;
453 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
454 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
456 printk("%s""section_type: PCIe error\n", newpfx);
457 if (gdata->error_data_length >= sizeof(*pcie))
458 cper_print_pcie(newpfx, pcie, gdata);
460 goto err_section_too_small;
461 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
462 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
463 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
465 printk("%ssection_type: ARM processor error\n", newpfx);
466 if (gdata->error_data_length >= sizeof(*arm_err))
467 cper_print_proc_arm(newpfx, arm_err);
469 goto err_section_too_small;
471 #if defined(CONFIG_UEFI_CPER_X86)
472 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
473 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
475 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
476 if (gdata->error_data_length >= sizeof(*ia_err))
477 cper_print_proc_ia(newpfx, ia_err);
479 goto err_section_too_small;
482 const void *err = acpi_hest_get_payload(gdata);
484 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
485 printk("%ssection length: %#x\n", newpfx,
486 gdata->error_data_length);
487 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
488 gdata->error_data_length, true);
493 err_section_too_small:
494 pr_err(FW_WARN "error section length is too small\n");
497 void cper_estatus_print(const char *pfx,
498 const struct acpi_hest_generic_status *estatus)
500 struct acpi_hest_generic_data *gdata;
505 severity = estatus->error_severity;
506 if (severity == CPER_SEV_CORRECTED)
507 printk("%s%s\n", pfx,
508 "It has been corrected by h/w "
509 "and requires no further action");
510 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
511 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
513 apei_estatus_for_each_section(estatus, gdata) {
514 cper_estatus_print_section(newpfx, gdata, sec_no);
518 EXPORT_SYMBOL_GPL(cper_estatus_print);
520 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
522 if (estatus->data_length &&
523 estatus->data_length < sizeof(struct acpi_hest_generic_data))
525 if (estatus->raw_data_length &&
526 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
531 EXPORT_SYMBOL_GPL(cper_estatus_check_header);
533 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
535 struct acpi_hest_generic_data *gdata;
536 unsigned int data_len, gedata_len;
539 rc = cper_estatus_check_header(estatus);
542 data_len = estatus->data_length;
544 apei_estatus_for_each_section(estatus, gdata) {
545 gedata_len = acpi_hest_get_error_length(gdata);
546 if (gedata_len > data_len - acpi_hest_get_size(gdata))
548 data_len -= acpi_hest_get_record_size(gdata);
555 EXPORT_SYMBOL_GPL(cper_estatus_check);