arch/powerpc/platforms/pseries/ras.c

   1 /*
   2  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  17  */
  18
  19 #include <linux/sched.h>
  20 #include <linux/interrupt.h>
  21 #include <linux/irq.h>
  22 #include <linux/of.h>
  23 #include <linux/fs.h>
  24 #include <linux/reboot.h>
  25 #include <linux/irq_work.h>
  26
  27 #include <asm/machdep.h>
  28 #include <asm/rtas.h>
  29 #include <asm/firmware.h>
  30 #include <asm/mce.h>
  31
  32 #include "pseries.h"
  33
  34 static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
  35 static DEFINE_SPINLOCK(ras_log_buf_lock);
  36
  37 static int ras_check_exception_token;
  38
  39 static void mce_process_errlog_event(struct irq_work *work);
  40 static struct irq_work mce_errlog_process_work = {
  41         .func = mce_process_errlog_event,
  42 };
  43
  44 #define EPOW_SENSOR_TOKEN       9
  45 #define EPOW_SENSOR_INDEX       0
  46
  47 /* EPOW events counter variable */
  48 static int num_epow_events;
  49
  50 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  51 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  52 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  53
  54 /* RTAS pseries MCE errorlog section. */
  55 struct pseries_mc_errorlog {
  56         __be32  fru_id;
  57         __be32  proc_id;
  58         u8      error_type;
  59         /*
  60          * sub_err_type (1 byte). Bit fields depends on error_type
  61          *
  62          *   MSB0
  63          *   |
  64          *   V
  65          *   01234567
  66          *   XXXXXXXX
  67          *
  68          * For error_type == MC_ERROR_TYPE_UE
  69          *   XXXXXXXX
  70          *   X          1: Permanent or Transient UE.
  71          *    X         1: Effective address provided.
  72          *     X        1: Logical address provided.
  73          *      XX      2: Reserved.
  74          *        XXX   3: Type of UE error.
  75          *
  76          * For error_type != MC_ERROR_TYPE_UE
  77          *   XXXXXXXX
  78          *   X          1: Effective address provided.
  79          *    XXXXX     5: Reserved.
  80          *         XX   2: Type of SLB/ERAT/TLB error.
  81          */
  82         u8      sub_err_type;
  83         u8      reserved_1[6];
  84         __be64  effective_address;
  85         __be64  logical_address;
  86 } __packed;
  87
  88 /* RTAS pseries MCE error types */
  89 #define MC_ERROR_TYPE_UE                0x00
  90 #define MC_ERROR_TYPE_SLB               0x01
  91 #define MC_ERROR_TYPE_ERAT              0x02
  92 #define MC_ERROR_TYPE_TLB               0x04
  93 #define MC_ERROR_TYPE_D_CACHE           0x05
  94 #define MC_ERROR_TYPE_I_CACHE           0x07
  95
  96 /* RTAS pseries MCE error sub types */
  97 #define MC_ERROR_UE_INDETERMINATE               0
  98 #define MC_ERROR_UE_IFETCH                      1
  99 #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH      2
 100 #define MC_ERROR_UE_LOAD_STORE                  3
 101 #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE  4
 102
 103 #define MC_ERROR_SLB_PARITY             0
 104 #define MC_ERROR_SLB_MULTIHIT           1
 105 #define MC_ERROR_SLB_INDETERMINATE      2
 106
 107 #define MC_ERROR_ERAT_PARITY            1
 108 #define MC_ERROR_ERAT_MULTIHIT          2
 109 #define MC_ERROR_ERAT_INDETERMINATE     3
 110
 111 #define MC_ERROR_TLB_PARITY             1
 112 #define MC_ERROR_TLB_MULTIHIT           2
 113 #define MC_ERROR_TLB_INDETERMINATE      3
 114
 115 static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
 116 {
 117         switch (mlog->error_type) {
 118         case    MC_ERROR_TYPE_UE:
 119                 return (mlog->sub_err_type & 0x07);
 120         case    MC_ERROR_TYPE_SLB:
 121         case    MC_ERROR_TYPE_ERAT:
 122         case    MC_ERROR_TYPE_TLB:
 123                 return (mlog->sub_err_type & 0x03);
 124         default:
 125                 return 0;
 126         }
 127 }
 128
 129 static
 130 inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
 131 {
 132         __be64 addr = 0;
 133
 134         switch (mlog->error_type) {
 135         case    MC_ERROR_TYPE_UE:
 136                 if (mlog->sub_err_type & 0x40)
 137                         addr = mlog->effective_address;
 138                 break;
 139         case    MC_ERROR_TYPE_SLB:
 140         case    MC_ERROR_TYPE_ERAT:
 141         case    MC_ERROR_TYPE_TLB:
 142                 if (mlog->sub_err_type & 0x80)
 143                         addr = mlog->effective_address;
 144         default:
 145                 break;
 146         }
 147         return be64_to_cpu(addr);
 148 }
 149
 150 /*
 151  * Enable the hotplug interrupt late because processing them may touch other
 152  * devices or systems (e.g. hugepages) that have not been initialized at the
 153  * subsys stage.
 154  */
 155 int __init init_ras_hotplug_IRQ(void)
 156 {
 157         struct device_node *np;
 158
 159         /* Hotplug Events */
 160         np = of_find_node_by_path("/event-sources/hot-plug-events");
 161         if (np != NULL) {
 162                 if (dlpar_workqueue_init() == 0)
 163                         request_event_sources_irqs(np, ras_hotplug_interrupt,
 164                                                    "RAS_HOTPLUG");
 165                 of_node_put(np);
 166         }
 167
 168         return 0;
 169 }
 170 machine_late_initcall(pseries, init_ras_hotplug_IRQ);
 171
 172 /*
 173  * Initialize handlers for the set of interrupts caused by hardware errors
 174  * and power system events.
 175  */
 176 static int __init init_ras_IRQ(void)
 177 {
 178         struct device_node *np;
 179
 180         ras_check_exception_token = rtas_token("check-exception");
 181
 182         /* Internal Errors */
 183         np = of_find_node_by_path("/event-sources/internal-errors");
 184         if (np != NULL) {
 185                 request_event_sources_irqs(np, ras_error_interrupt,
 186                                            "RAS_ERROR");
 187                 of_node_put(np);
 188         }
 189
 190         /* EPOW Events */
 191         np = of_find_node_by_path("/event-sources/epow-events");
 192         if (np != NULL) {
 193                 request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 194                 of_node_put(np);
 195         }
 196
 197         return 0;
 198 }
 199 machine_subsys_initcall(pseries, init_ras_IRQ);
 200
 201 #define EPOW_SHUTDOWN_NORMAL                            1
 202 #define EPOW_SHUTDOWN_ON_UPS                            2
 203 #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS        3
 204 #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH      4
 205
 206 static void handle_system_shutdown(char event_modifier)
 207 {
 208         switch (event_modifier) {
 209         case EPOW_SHUTDOWN_NORMAL:
 210                 pr_emerg("Power off requested\n");
 211                 orderly_poweroff(true);
 212                 break;
 213
 214         case EPOW_SHUTDOWN_ON_UPS:
 215                 pr_emerg("Loss of system power detected. System is running on"
 216                          " UPS/battery. Check RTAS error log for details\n");
 217                 orderly_poweroff(true);
 218                 break;
 219
 220         case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
 221                 pr_emerg("Loss of system critical functions detected. Check"
 222                          " RTAS error log for details\n");
 223                 orderly_poweroff(true);
 224                 break;
 225
 226         case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
 227                 pr_emerg("High ambient temperature detected. Check RTAS"
 228                          " error log for details\n");
 229                 orderly_poweroff(true);
 230                 break;
 231
 232         default:
 233                 pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 234                         event_modifier);
 235         }
 236 }
 237
 238 struct epow_errorlog {
 239         unsigned char sensor_value;
 240         unsigned char event_modifier;
 241         unsigned char extended_modifier;
 242         unsigned char reserved;
 243         unsigned char platform_reason;
 244 };
 245
 246 #define EPOW_RESET                      0
 247 #define EPOW_WARN_COOLING               1
 248 #define EPOW_WARN_POWER                 2
 249 #define EPOW_SYSTEM_SHUTDOWN            3
 250 #define EPOW_SYSTEM_HALT                4
 251 #define EPOW_MAIN_ENCLOSURE             5
 252 #define EPOW_POWER_OFF                  7
 253
 254 static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 255 {
 256         struct pseries_errorlog *pseries_log;
 257         struct epow_errorlog *epow_log;
 258         char action_code;
 259         char modifier;
 260
 261         pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
 262         if (pseries_log == NULL)
 263                 return;
 264
 265         epow_log = (struct epow_errorlog *)pseries_log->data;
 266         action_code = epow_log->sensor_value & 0xF;     /* bottom 4 bits */
 267         modifier = epow_log->event_modifier & 0xF;      /* bottom 4 bits */
 268
 269         switch (action_code) {
 270         case EPOW_RESET:
 271                 if (num_epow_events) {
 272                         pr_info("Non critical power/cooling issue cleared\n");
 273                         num_epow_events--;
 274                 }
 275                 break;
 276
 277         case EPOW_WARN_COOLING:
 278                 pr_info("Non-critical cooling issue detected. Check RTAS error"
 279                         " log for details\n");
 280                 break;
 281
 282         case EPOW_WARN_POWER:
 283                 pr_info("Non-critical power issue detected. Check RTAS error"
 284                         " log for details\n");
 285                 break;
 286
 287         case EPOW_SYSTEM_SHUTDOWN:
 288                 handle_system_shutdown(epow_log->event_modifier);
 289                 break;
 290
 291         case EPOW_SYSTEM_HALT:
 292                 pr_emerg("Critical power/cooling issue detected. Check RTAS"
 293                          " error log for details. Powering off.\n");
 294                 orderly_poweroff(true);
 295                 break;
 296
 297         case EPOW_MAIN_ENCLOSURE:
 298         case EPOW_POWER_OFF:
 299                 pr_emerg("System about to lose power. Check RTAS error log "
 300                          " for details. Powering off immediately.\n");
 301                 emergency_sync();
 302                 kernel_power_off();
 303                 break;
 304
 305         default:
 306                 pr_err("Unknown power/cooling event (action code  = %d)\n",
 307                         action_code);
 308         }
 309
 310         /* Increment epow events counter variable */
 311         if (action_code != EPOW_RESET)
 312                 num_epow_events++;
 313 }
 314
 315 static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 316 {
 317         struct pseries_errorlog *pseries_log;
 318         struct pseries_hp_errorlog *hp_elog;
 319
 320         spin_lock(&ras_log_buf_lock);
 321
 322         rtas_call(ras_check_exception_token, 6, 1, NULL,
 323                   RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
 324                   RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
 325                   rtas_get_error_log_max());
 326
 327         pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
 328                                            PSERIES_ELOG_SECT_ID_HOTPLUG);
 329         hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
 330
 331         /*
 332          * Since PCI hotplug is not currently supported on pseries, put PCI
 333          * hotplug events on the ras_log_buf to be handled by rtas_errd.
 334          */
 335         if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
 336             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
 337             hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 338                 queue_hotplug_event(hp_elog);
 339         else
 340                 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 341
 342         spin_unlock(&ras_log_buf_lock);
 343         return IRQ_HANDLED;
 344 }
 345
 346 /* Handle environmental and power warning (EPOW) interrupts. */
 347 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 348 {
 349         int status;
 350         int state;
 351         int critical;
 352
 353         status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
 354                                       &state);
 355
 356         if (state > 3)
 357                 critical = 1;           /* Time Critical */
 358         else
 359                 critical = 0;
 360
 361         spin_lock(&ras_log_buf_lock);
 362
 363         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 364                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 365                            virq_to_hw(irq),
 366                            RTAS_EPOW_WARNING,
 367                            critical, __pa(&ras_log_buf),
 368                                 rtas_get_error_log_max());
 369
 370         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 371
 372         rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
 373
 374         spin_unlock(&ras_log_buf_lock);
 375         return IRQ_HANDLED;
 376 }
 377
 378 /*
 379  * Handle hardware error interrupts.
 380  *
 381  * RTAS check-exception is called to collect data on the exception.  If
 382  * the error is deemed recoverable, we log a warning and return.
 383  * For nonrecoverable errors, an error is logged and we stop all processing
 384  * as quickly as possible in order to prevent propagation of the failure.
 385  */
 386 static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 387 {
 388         struct rtas_error_log *rtas_elog;
 389         int status;
 390         int fatal;
 391
 392         spin_lock(&ras_log_buf_lock);
 393
 394         status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 395                            RTAS_VECTOR_EXTERNAL_INTERRUPT,
 396                            virq_to_hw(irq),
 397                            RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
 398                            __pa(&ras_log_buf),
 399                                 rtas_get_error_log_max());
 400
 401         rtas_elog = (struct rtas_error_log *)ras_log_buf;
 402
 403         if (status == 0 &&
 404             rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
 405                 fatal = 1;
 406         else
 407                 fatal = 0;
 408
 409         /* format and print the extended information */
 410         log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 411
 412         if (fatal) {
 413                 pr_emerg("Fatal hardware error detected. Check RTAS error"
 414                          " log for details. Powering off immediately\n");
 415                 emergency_sync();
 416                 kernel_power_off();
 417         } else {
 418                 pr_err("Recoverable hardware error detected\n");
 419         }
 420
 421         spin_unlock(&ras_log_buf_lock);
 422         return IRQ_HANDLED;
 423 }
 424
 425 /*
 426  * Some versions of FWNMI place the buffer inside the 4kB page starting at
 427  * 0x7000. Other versions place it inside the rtas buffer. We check both.
 428  */
 429 #define VALID_FWNMI_BUFFER(A) \
 430         ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
 431         (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
 432
 433 static inline struct rtas_error_log *fwnmi_get_errlog(void)
 434 {
 435         return (struct rtas_error_log *)local_paca->mce_data_buf;
 436 }
 437
 438 /*
 439  * Get the error information for errors coming through the
 440  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
 441  * the actual r3 if possible, and a ptr to the error log entry
 442  * will be returned if found.
 443  *
 444  * Use one buffer mce_data_buf per cpu to store RTAS error.
 445  *
 446  * The mce_data_buf does not have any locks or protection around it,
 447  * if a second machine check comes in, or a system reset is done
 448  * before we have logged the error, then we will get corruption in the
 449  * error log.  This is preferable over holding off on calling
 450  * ibm,nmi-interlock which would result in us checkstopping if a
 451  * second machine check did come in.
 452  */
 453 static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 454 {
 455         unsigned long *savep;
 456         struct rtas_error_log *h;
 457
 458         /* Mask top two bits */
 459         regs->gpr[3] &= ~(0x3UL << 62);
 460
 461         if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 462                 printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 463                 return NULL;
 464         }
 465
 466         savep = __va(regs->gpr[3]);
 467         regs->gpr[3] = be64_to_cpu(savep[0]);   /* restore original r3 */
 468
 469         h = (struct rtas_error_log *)&savep[1];
 470         /* Use the per cpu buffer from paca to store rtas error log */
 471         memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
 472         if (!rtas_error_extended(h)) {
 473                 memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
 474         } else {
 475                 int len, error_log_length;
 476
 477                 error_log_length = 8 + rtas_error_extended_log_length(h);
 478                 len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
 479                 memcpy(local_paca->mce_data_buf, h, len);
 480         }
 481
 482         return (struct rtas_error_log *)local_paca->mce_data_buf;
 483 }
 484
 485 /* Call this when done with the data returned by FWNMI_get_errinfo.
 486  * It will release the saved data area for other CPUs in the
 487  * partition to receive FWNMI errors.
 488  */
 489 static void fwnmi_release_errinfo(void)
 490 {
 491         int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
 492         if (ret != 0)
 493                 printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
 494 }
 495
 496 int pSeries_system_reset_exception(struct pt_regs *regs)
 497 {
 498 #ifdef __LITTLE_ENDIAN__
 499         /*
 500          * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
 501          * to detect the bad SRR1 pattern here. Flip the NIP back to correct
 502          * endian for reporting purposes. Unfortunately the MSR can't be fixed,
 503          * so clear it. It will be missing MSR_RI so we won't try to recover.
 504          */
 505         if ((be64_to_cpu(regs->msr) &
 506                         (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
 507                          MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
 508                 regs->nip = be64_to_cpu((__be64)regs->nip);
 509                 regs->msr = 0;
 510         }
 511 #endif
 512
 513         if (fwnmi_active) {
 514                 struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
 515                 if (errhdr) {
 516                         /* XXX Should look at FWNMI information */
 517                 }
 518                 fwnmi_release_errinfo();
 519         }
 520
 521         if (smp_handle_nmi_ipi(regs))
 522                 return 1;
 523
 524         return 0; /* need to perform reset */
 525 }
 526
 527 #define VAL_TO_STRING(ar, val)  \
 528         (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
 529
 530 static void pseries_print_mce_info(struct pt_regs *regs,
 531                                    struct rtas_error_log *errp)
 532 {
 533         const char *level, *sevstr;
 534         struct pseries_errorlog *pseries_log;
 535         struct pseries_mc_errorlog *mce_log;
 536         u8 error_type, err_sub_type;
 537         u64 addr;
 538         u8 initiator = rtas_error_initiator(errp);
 539         int disposition = rtas_error_disposition(errp);
 540
 541         static const char * const initiators[] = {
 542                 "Unknown",
 543                 "CPU",
 544                 "PCI",
 545                 "ISA",
 546                 "Memory",
 547                 "Power Mgmt",
 548         };
 549         static const char * const mc_err_types[] = {
 550                 "UE",
 551                 "SLB",
 552                 "ERAT",
 553                 "TLB",
 554                 "D-Cache",
 555                 "Unknown",
 556                 "I-Cache",
 557         };
 558         static const char * const mc_ue_types[] = {
 559                 "Indeterminate",
 560                 "Instruction fetch",
 561                 "Page table walk ifetch",
 562                 "Load/Store",
 563                 "Page table walk Load/Store",
 564         };
 565
 566         /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
 567         static const char * const mc_slb_types[] = {
 568                 "Parity",
 569                 "Multihit",
 570                 "Indeterminate",
 571         };
 572
 573         /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
 574         static const char * const mc_soft_types[] = {
 575                 "Unknown",
 576                 "Parity",
 577                 "Multihit",
 578                 "Indeterminate",
 579         };
 580
 581         if (!rtas_error_extended(errp)) {
 582                 pr_err("Machine check interrupt: Missing extended error log\n");
 583                 return;
 584         }
 585
 586         pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 587         if (pseries_log == NULL)
 588                 return;
 589
 590         mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 591
 592         error_type = mce_log->error_type;
 593         err_sub_type = rtas_mc_error_sub_type(mce_log);
 594
 595         switch (rtas_error_severity(errp)) {
 596         case RTAS_SEVERITY_NO_ERROR:
 597                 level = KERN_INFO;
 598                 sevstr = "Harmless";
 599                 break;
 600         case RTAS_SEVERITY_WARNING:
 601                 level = KERN_WARNING;
 602                 sevstr = "";
 603                 break;
 604         case RTAS_SEVERITY_ERROR:
 605         case RTAS_SEVERITY_ERROR_SYNC:
 606                 level = KERN_ERR;
 607                 sevstr = "Severe";
 608                 break;
 609         case RTAS_SEVERITY_FATAL:
 610         default:
 611                 level = KERN_ERR;
 612                 sevstr = "Fatal";
 613                 break;
 614         }
 615
 616 #ifdef CONFIG_PPC_BOOK3S_64
 617         /* Display faulty slb contents for SLB errors. */
 618         if (error_type == MC_ERROR_TYPE_SLB)
 619                 slb_dump_contents(local_paca->mce_faulty_slbs);
 620 #endif
 621
 622         printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
 623                disposition == RTAS_DISP_FULLY_RECOVERED ?
 624                "Recovered" : "Not recovered");
 625         if (user_mode(regs)) {
 626                 printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
 627                        regs->nip, current->pid, current->comm);
 628         } else {
 629                 printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
 630                        (void *)regs->nip);
 631         }
 632         printk("%s  Initiator: %s\n", level,
 633                VAL_TO_STRING(initiators, initiator));
 634
 635         switch (error_type) {
 636         case MC_ERROR_TYPE_UE:
 637                 printk("%s  Error type: %s [%s]\n", level,
 638                        VAL_TO_STRING(mc_err_types, error_type),
 639                        VAL_TO_STRING(mc_ue_types, err_sub_type));
 640                 break;
 641         case MC_ERROR_TYPE_SLB:
 642                 printk("%s  Error type: %s [%s]\n", level,
 643                        VAL_TO_STRING(mc_err_types, error_type),
 644                        VAL_TO_STRING(mc_slb_types, err_sub_type));
 645                 break;
 646         case MC_ERROR_TYPE_ERAT:
 647         case MC_ERROR_TYPE_TLB:
 648                 printk("%s  Error type: %s [%s]\n", level,
 649                        VAL_TO_STRING(mc_err_types, error_type),
 650                        VAL_TO_STRING(mc_soft_types, err_sub_type));
 651                 break;
 652         default:
 653                 printk("%s  Error type: %s\n", level,
 654                        VAL_TO_STRING(mc_err_types, error_type));
 655                 break;
 656         }
 657
 658         addr = rtas_mc_get_effective_addr(mce_log);
 659         if (addr)
 660                 printk("%s    Effective address: %016llx\n", level, addr);
 661 }
 662
 663 static int mce_handle_error(struct rtas_error_log *errp)
 664 {
 665         struct pseries_errorlog *pseries_log;
 666         struct pseries_mc_errorlog *mce_log;
 667         int disposition = rtas_error_disposition(errp);
 668         u8 error_type;
 669
 670         if (!rtas_error_extended(errp))
 671                 goto out;
 672
 673         pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
 674         if (pseries_log == NULL)
 675                 goto out;
 676
 677         mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
 678         error_type = mce_log->error_type;
 679
 680 #ifdef CONFIG_PPC_BOOK3S_64
 681         if (disposition == RTAS_DISP_NOT_RECOVERED) {
 682                 switch (error_type) {
 683                 case    MC_ERROR_TYPE_SLB:
 684                 case    MC_ERROR_TYPE_ERAT:
 685                         /*
 686                          * Store the old slb content in paca before flushing.
 687                          * Print this when we go to virtual mode.
 688                          * There are chances that we may hit MCE again if there
 689                          * is a parity error on the SLB entry we trying to read
 690                          * for saving. Hence limit the slb saving to single
 691                          * level of recursion.
 692                          */
 693                         if (local_paca->in_mce == 1)
 694                                 slb_save_contents(local_paca->mce_faulty_slbs);
 695                         flush_and_reload_slb();
 696                         disposition = RTAS_DISP_FULLY_RECOVERED;
 697                         rtas_set_disposition_recovered(errp);
 698                         break;
 699                 default:
 700                         break;
 701                 }
 702         }
 703 #endif
 704
 705 out:
 706         return disposition;
 707 }
 708
 709 /*
 710  * Process MCE rtas errlog event.
 711  */
 712 static void mce_process_errlog_event(struct irq_work *work)
 713 {
 714         struct rtas_error_log *err;
 715
 716         err = fwnmi_get_errlog();
 717         log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
 718 }
 719
 720 /*
 721  * See if we can recover from a machine check exception.
 722  * This is only called on power4 (or above) and only via
 723  * the Firmware Non-Maskable Interrupts (fwnmi) handler
 724  * which provides the error analysis for us.
 725  *
 726  * Return 1 if corrected (or delivered a signal).
 727  * Return 0 if there is nothing we can do.
 728  */
 729 static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
 730 {
 731         int recovered = 0;
 732         int disposition = rtas_error_disposition(err);
 733
 734         pseries_print_mce_info(regs, err);
 735
 736         if (!(regs->msr & MSR_RI)) {
 737                 /* If MSR_RI isn't set, we cannot recover */
 738                 pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 739                 recovered = 0;
 740
 741         } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
 742                 /* Platform corrected itself */
 743                 recovered = 1;
 744
 745         } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
 746                 /* Platform corrected itself but could be degraded */
 747                 printk(KERN_ERR "MCE: limited recovery, system may "
 748                        "be degraded\n");
 749                 recovered = 1;
 750
 751         } else if (user_mode(regs) && !is_global_init(current) &&
 752                    rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
 753
 754                 /*
 755                  * If we received a synchronous error when in userspace
 756                  * kill the task. Firmware may report details of the fail
 757                  * asynchronously, so we can't rely on the target and type
 758                  * fields being valid here.
 759                  */
 760                 printk(KERN_ERR "MCE: uncorrectable error, killing task "
 761                        "%s:%d\n", current->comm, current->pid);
 762
 763                 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 764                 recovered = 1;
 765         }
 766
 767         /* Queue irq work to log this rtas event later. */
 768         irq_work_queue(&mce_errlog_process_work);
 769
 770         return recovered;
 771 }
 772
 773 /*
 774  * Handle a machine check.
 775  *
 776  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
 777  * should be present.  If so the handler which called us tells us if the
 778  * error was recovered (never true if RI=0).
 779  *
 780  * On hardware prior to Power 4 these exceptions were asynchronous which
 781  * means we can't tell exactly where it occurred and so we can't recover.
 782  */
 783 int pSeries_machine_check_exception(struct pt_regs *regs)
 784 {
 785         struct rtas_error_log *errp;
 786
 787         if (fwnmi_active) {
 788                 fwnmi_release_errinfo();
 789                 errp = fwnmi_get_errlog();
 790                 if (errp && recover_mce(regs, errp))
 791                         return 1;
 792         }
 793
 794         return 0;
 795 }
 796
 797 long pseries_machine_check_realmode(struct pt_regs *regs)
 798 {
 799         struct rtas_error_log *errp;
 800         int disposition;
 801
 802         if (fwnmi_active) {
 803                 errp = fwnmi_get_errinfo(regs);
 804                 /*
 805                  * Call to fwnmi_release_errinfo() in real mode causes kernel
 806                  * to panic. Hence we will call it as soon as we go into
 807                  * virtual mode.
 808                  */
 809                 disposition = mce_handle_error(errp);
 810                 if (disposition == RTAS_DISP_FULLY_RECOVERED)
 811                         return 1;
 812         }
 813
 814         return 0;
 815 }