X-Git-Url: https://repo.jachan.dev/linux.git/blobdiff_plain/ae574a5d7aa1d80469dfcbaa757db2bea536ee66..db1a19b38f3a85f475b4ad716c71be133d8ca48e:/arch/ia64/kernel/mca_drv.c diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 37c88eb55873..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -8,7 +8,6 @@ * Copyright (C) 2005 Keith Owens * Copyright (C) 2006 Russ Anderson */ -#include #include #include #include @@ -62,6 +61,11 @@ typedef enum { ISOLATE_NONE } isolate_status_t; +typedef enum { + MCA_NOT_RECOVERED = 0, + MCA_RECOVERED = 1 +} recovery_status_t; + /* * This pool keeps pointers to the section part of SAL error record */ @@ -71,6 +75,34 @@ static struct { int max_idx; /* Maximum index of section pointer list pool */ } slidx_pool; +static int +fatal_mca(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); + + return MCA_NOT_RECOVERED; +} + +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + /** * mca_page_isolate - isolate a poisoned page in order not to use it later * @paddr: poisoned memory location @@ -124,6 +156,7 @@ mca_page_isolate(unsigned long paddr) void mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) { + ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", raw_smp_processor_id(), current->pid, current->uid, @@ -424,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is target address valid? */ if (!pbci->tv) - return 0; + return fatal_mca("target address not valid"); /* * cpu read or memory-mapped io read @@ -442,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, /* Is minstate valid? */ if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) - return 0; + return fatal_mca("minstate not valid"); psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); @@ -476,12 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, psr2->bn = 1; psr2->i = 0; - return 1; + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); } } - return 0; + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); } /** @@ -567,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * The machine check is corrected. */ if (psp->cm == 1) - return 1; + return mca_recovered("machine check is already corrected."); /* * The error was not contained. Software must be reset. */ if (psp->us || psp->ci == 0) - return 0; + return fatal_mca("error not contained"); /* * The cache check and bus check bits have four possible states @@ -584,20 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, * 1 1 Memory error, attempt recovery */ if (psp->bc == 0 || pbci == NULL) - return 0; + return fatal_mca("No bus check"); /* * Sorry, we cannot handle so many. */ if (peidx_bus_check_num(peidx) > 1) - return 0; + return fatal_mca("Too many bus checks"); /* * Well, here is only one bus error. */ - if (pbci->ib || pbci->cc) - return 0; + if (pbci->ib) + return fatal_mca("Internal Bus error"); + if (pbci->cc) + return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) - return 0; + return fatal_mca("External bus check fatal status"); /* * This is a local MCA and estimated as recoverble external bus error. @@ -609,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, /* * On account of strange SAL error record, we cannot recover. */ - return 0; + return fatal_mca("Strange SAL record"); } /** @@ -638,12 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Now, OS can recover when there is one processor error section */ if (n_proc_err > 1) - return 0; - else if (n_proc_err == 0) { - /* Weird SAL record ... We need not to recover */ - - return 1; - } + return fatal_mca("Too Many Errors"); + else if (n_proc_err == 0) + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); /* Make index of processor error section */ mca_make_peidx((sal_log_processor_info_t*) @@ -654,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) /* Check whether MCA is global or not */ if (is_mca_global(&peidx, &pbci, sos)) - return 0; + return fatal_mca("global MCA"); /* Try to recover a processor error */ return recover_from_processor_error(platform_err, &slidx, &peidx,