Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20
21#include <asm/machdep.h>
22#include <asm/mce.h>
23#include <asm/nmi.h>
24
25static DEFINE_PER_CPU(int, mce_nest_count);
26static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
27
28/* Queue for delayed MCE events. */
29static DEFINE_PER_CPU(int, mce_queue_count);
30static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
31
32/* Queue for delayed MCE UE events. */
33static DEFINE_PER_CPU(int, mce_ue_count);
34static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
35 mce_ue_event_queue);
36
37static void machine_check_process_queued_event(struct irq_work *work);
38static void machine_check_ue_irq_work(struct irq_work *work);
39static void machine_check_ue_event(struct machine_check_event *evt);
40static void machine_process_ue_event(struct work_struct *work);
41
42static struct irq_work mce_event_process_work = {
43 .func = machine_check_process_queued_event,
44};
45
46static struct irq_work mce_ue_event_irq_work = {
47 .func = machine_check_ue_irq_work,
48};
49
50DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
51
52static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
53
54int mce_register_notifier(struct notifier_block *nb)
55{
56 return blocking_notifier_chain_register(&mce_notifier_list, nb);
57}
58EXPORT_SYMBOL_GPL(mce_register_notifier);
59
60int mce_unregister_notifier(struct notifier_block *nb)
61{
62 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
63}
64EXPORT_SYMBOL_GPL(mce_unregister_notifier);
65
66static void mce_set_error_info(struct machine_check_event *mce,
67 struct mce_error_info *mce_err)
68{
69 mce->error_type = mce_err->error_type;
70 switch (mce_err->error_type) {
71 case MCE_ERROR_TYPE_UE:
72 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
73 break;
74 case MCE_ERROR_TYPE_SLB:
75 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
76 break;
77 case MCE_ERROR_TYPE_ERAT:
78 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
79 break;
80 case MCE_ERROR_TYPE_TLB:
81 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
82 break;
83 case MCE_ERROR_TYPE_USER:
84 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
85 break;
86 case MCE_ERROR_TYPE_RA:
87 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
88 break;
89 case MCE_ERROR_TYPE_LINK:
90 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
91 break;
92 case MCE_ERROR_TYPE_UNKNOWN:
93 default:
94 break;
95 }
96}
97
98/*
99 * Decode and save high level MCE information into per cpu buffer which
100 * is an array of machine_check_event structure.
101 */
102void save_mce_event(struct pt_regs *regs, long handled,
103 struct mce_error_info *mce_err,
104 uint64_t nip, uint64_t addr, uint64_t phys_addr)
105{
106 int index = __this_cpu_inc_return(mce_nest_count) - 1;
107 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
108
109 /*
110 * Return if we don't have enough space to log mce event.
111 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
112 * the check below will stop buffer overrun.
113 */
114 if (index >= MAX_MC_EVT)
115 return;
116
117 /* Populate generic machine check info */
118 mce->version = MCE_V1;
119 mce->srr0 = nip;
120 mce->srr1 = regs->msr;
121 mce->gpr3 = regs->gpr[3];
122 mce->in_use = 1;
123 mce->cpu = get_paca()->paca_index;
124
125 /* Mark it recovered if we have handled it and MSR(RI=1). */
126 if (handled && (regs->msr & MSR_RI))
127 mce->disposition = MCE_DISPOSITION_RECOVERED;
128 else
129 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
130
131 mce->initiator = mce_err->initiator;
132 mce->severity = mce_err->severity;
133 mce->sync_error = mce_err->sync_error;
134 mce->error_class = mce_err->error_class;
135
136 /*
137 * Populate the mce error_type and type-specific error_type.
138 */
139 mce_set_error_info(mce, mce_err);
140
141 if (!addr)
142 return;
143
144 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145 mce->u.tlb_error.effective_address_provided = true;
146 mce->u.tlb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148 mce->u.slb_error.effective_address_provided = true;
149 mce->u.slb_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151 mce->u.erat_error.effective_address_provided = true;
152 mce->u.erat_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154 mce->u.user_error.effective_address_provided = true;
155 mce->u.user_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157 mce->u.ra_error.effective_address_provided = true;
158 mce->u.ra_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160 mce->u.link_error.effective_address_provided = true;
161 mce->u.link_error.effective_address = addr;
162 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163 mce->u.ue_error.effective_address_provided = true;
164 mce->u.ue_error.effective_address = addr;
165 if (phys_addr != ULONG_MAX) {
166 mce->u.ue_error.physical_address_provided = true;
167 mce->u.ue_error.physical_address = phys_addr;
168 mce->u.ue_error.ignore_event = mce_err->ignore_event;
169 machine_check_ue_event(mce);
170 }
171 }
172 return;
173}
174
175/*
176 * get_mce_event:
177 * mce Pointer to machine_check_event structure to be filled.
178 * release Flag to indicate whether to free the event slot or not.
179 * 0 <= do not release the mce event. Caller will invoke
180 * release_mce_event() once event has been consumed.
181 * 1 <= release the slot.
182 *
183 * return 1 = success
184 * 0 = failure
185 *
186 * get_mce_event() will be called by platform specific machine check
187 * handle routine and in KVM.
188 * When we call get_mce_event(), we are still in interrupt context and
189 * preemption will not be scheduled until ret_from_expect() routine
190 * is called.
191 */
192int get_mce_event(struct machine_check_event *mce, bool release)
193{
194 int index = __this_cpu_read(mce_nest_count) - 1;
195 struct machine_check_event *mc_evt;
196 int ret = 0;
197
198 /* Sanity check */
199 if (index < 0)
200 return ret;
201
202 /* Check if we have MCE info to process. */
203 if (index < MAX_MC_EVT) {
204 mc_evt = this_cpu_ptr(&mce_event[index]);
205 /* Copy the event structure and release the original */
206 if (mce)
207 *mce = *mc_evt;
208 if (release)
209 mc_evt->in_use = 0;
210 ret = 1;
211 }
212 /* Decrement the count to free the slot. */
213 if (release)
214 __this_cpu_dec(mce_nest_count);
215
216 return ret;
217}
218
219void release_mce_event(void)
220{
221 get_mce_event(NULL, true);
222}
223
224static void machine_check_ue_irq_work(struct irq_work *work)
225{
226 schedule_work(&mce_ue_event_work);
227}
228
229/*
230 * Queue up the MCE event which then can be handled later.
231 */
232static void machine_check_ue_event(struct machine_check_event *evt)
233{
234 int index;
235
236 index = __this_cpu_inc_return(mce_ue_count) - 1;
237 /* If queue is full, just return for now. */
238 if (index >= MAX_MC_EVT) {
239 __this_cpu_dec(mce_ue_count);
240 return;
241 }
242 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
243
244 /* Queue work to process this event later. */
245 irq_work_queue(&mce_ue_event_irq_work);
246}
247
248/*
249 * Queue up the MCE event which then can be handled later.
250 */
251void machine_check_queue_event(void)
252{
253 int index;
254 struct machine_check_event evt;
255
256 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
257 return;
258
259 index = __this_cpu_inc_return(mce_queue_count) - 1;
260 /* If queue is full, just return for now. */
261 if (index >= MAX_MC_EVT) {
262 __this_cpu_dec(mce_queue_count);
263 return;
264 }
265 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
266
267 /* Queue irq work to process this event later. */
268 irq_work_queue(&mce_event_process_work);
269}
270
271void mce_common_process_ue(struct pt_regs *regs,
272 struct mce_error_info *mce_err)
273{
274 const struct exception_table_entry *entry;
275
276 entry = search_kernel_exception_table(regs->nip);
277 if (entry) {
278 mce_err->ignore_event = true;
279 regs->nip = extable_fixup(entry);
280 }
281}
282
283/*
284 * process pending MCE event from the mce event queue. This function will be
285 * called during syscall exit.
286 */
287static void machine_process_ue_event(struct work_struct *work)
288{
289 int index;
290 struct machine_check_event *evt;
291
292 while (__this_cpu_read(mce_ue_count) > 0) {
293 index = __this_cpu_read(mce_ue_count) - 1;
294 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
295 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
296#ifdef CONFIG_MEMORY_FAILURE
297 /*
298 * This should probably queued elsewhere, but
299 * oh! well
300 *
301 * Don't report this machine check because the caller has a
302 * asked us to ignore the event, it has a fixup handler which
303 * will do the appropriate error handling and reporting.
304 */
305 if (evt->error_type == MCE_ERROR_TYPE_UE) {
306 if (evt->u.ue_error.ignore_event) {
307 __this_cpu_dec(mce_ue_count);
308 continue;
309 }
310
311 if (evt->u.ue_error.physical_address_provided) {
312 unsigned long pfn;
313
314 pfn = evt->u.ue_error.physical_address >>
315 PAGE_SHIFT;
316 memory_failure(pfn, 0);
317 } else
318 pr_warn("Failed to identify bad address from "
319 "where the uncorrectable error (UE) "
320 "was generated\n");
321 }
322#endif
323 __this_cpu_dec(mce_ue_count);
324 }
325}
326/*
327 * process pending MCE event from the mce event queue. This function will be
328 * called during syscall exit.
329 */
330static void machine_check_process_queued_event(struct irq_work *work)
331{
332 int index;
333 struct machine_check_event *evt;
334
335 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
336
337 /*
338 * For now just print it to console.
339 * TODO: log this error event to FSP or nvram.
340 */
341 while (__this_cpu_read(mce_queue_count) > 0) {
342 index = __this_cpu_read(mce_queue_count) - 1;
343 evt = this_cpu_ptr(&mce_event_queue[index]);
344
345 if (evt->error_type == MCE_ERROR_TYPE_UE &&
346 evt->u.ue_error.ignore_event) {
347 __this_cpu_dec(mce_queue_count);
348 continue;
349 }
350 machine_check_print_event_info(evt, false, false);
351 __this_cpu_dec(mce_queue_count);
352 }
353}
354
355void machine_check_print_event_info(struct machine_check_event *evt,
356 bool user_mode, bool in_guest)
357{
358 const char *level, *sevstr, *subtype, *err_type, *initiator;
359 uint64_t ea = 0, pa = 0;
360 int n = 0;
361 char dar_str[50];
362 char pa_str[50];
363 static const char *mc_ue_types[] = {
364 "Indeterminate",
365 "Instruction fetch",
366 "Page table walk ifetch",
367 "Load/Store",
368 "Page table walk Load/Store",
369 };
370 static const char *mc_slb_types[] = {
371 "Indeterminate",
372 "Parity",
373 "Multihit",
374 };
375 static const char *mc_erat_types[] = {
376 "Indeterminate",
377 "Parity",
378 "Multihit",
379 };
380 static const char *mc_tlb_types[] = {
381 "Indeterminate",
382 "Parity",
383 "Multihit",
384 };
385 static const char *mc_user_types[] = {
386 "Indeterminate",
387 "tlbie(l) invalid",
388 "scv invalid",
389 };
390 static const char *mc_ra_types[] = {
391 "Indeterminate",
392 "Instruction fetch (bad)",
393 "Instruction fetch (foreign)",
394 "Page table walk ifetch (bad)",
395 "Page table walk ifetch (foreign)",
396 "Load (bad)",
397 "Store (bad)",
398 "Page table walk Load/Store (bad)",
399 "Page table walk Load/Store (foreign)",
400 "Load/Store (foreign)",
401 };
402 static const char *mc_link_types[] = {
403 "Indeterminate",
404 "Instruction fetch (timeout)",
405 "Page table walk ifetch (timeout)",
406 "Load (timeout)",
407 "Store (timeout)",
408 "Page table walk Load/Store (timeout)",
409 };
410 static const char *mc_error_class[] = {
411 "Unknown",
412 "Hardware error",
413 "Probable Hardware error (some chance of software cause)",
414 "Software error",
415 "Probable Software error (some chance of hardware cause)",
416 };
417
418 /* Print things out */
419 if (evt->version != MCE_V1) {
420 pr_err("Machine Check Exception, Unknown event version %d !\n",
421 evt->version);
422 return;
423 }
424 switch (evt->severity) {
425 case MCE_SEV_NO_ERROR:
426 level = KERN_INFO;
427 sevstr = "Harmless";
428 break;
429 case MCE_SEV_WARNING:
430 level = KERN_WARNING;
431 sevstr = "Warning";
432 break;
433 case MCE_SEV_SEVERE:
434 level = KERN_ERR;
435 sevstr = "Severe";
436 break;
437 case MCE_SEV_FATAL:
438 default:
439 level = KERN_ERR;
440 sevstr = "Fatal";
441 break;
442 }
443
444 switch(evt->initiator) {
445 case MCE_INITIATOR_CPU:
446 initiator = "CPU";
447 break;
448 case MCE_INITIATOR_PCI:
449 initiator = "PCI";
450 break;
451 case MCE_INITIATOR_ISA:
452 initiator = "ISA";
453 break;
454 case MCE_INITIATOR_MEMORY:
455 initiator = "Memory";
456 break;
457 case MCE_INITIATOR_POWERMGM:
458 initiator = "Power Management";
459 break;
460 case MCE_INITIATOR_UNKNOWN:
461 default:
462 initiator = "Unknown";
463 break;
464 }
465
466 switch (evt->error_type) {
467 case MCE_ERROR_TYPE_UE:
468 err_type = "UE";
469 subtype = evt->u.ue_error.ue_error_type <
470 ARRAY_SIZE(mc_ue_types) ?
471 mc_ue_types[evt->u.ue_error.ue_error_type]
472 : "Unknown";
473 if (evt->u.ue_error.effective_address_provided)
474 ea = evt->u.ue_error.effective_address;
475 if (evt->u.ue_error.physical_address_provided)
476 pa = evt->u.ue_error.physical_address;
477 break;
478 case MCE_ERROR_TYPE_SLB:
479 err_type = "SLB";
480 subtype = evt->u.slb_error.slb_error_type <
481 ARRAY_SIZE(mc_slb_types) ?
482 mc_slb_types[evt->u.slb_error.slb_error_type]
483 : "Unknown";
484 if (evt->u.slb_error.effective_address_provided)
485 ea = evt->u.slb_error.effective_address;
486 break;
487 case MCE_ERROR_TYPE_ERAT:
488 err_type = "ERAT";
489 subtype = evt->u.erat_error.erat_error_type <
490 ARRAY_SIZE(mc_erat_types) ?
491 mc_erat_types[evt->u.erat_error.erat_error_type]
492 : "Unknown";
493 if (evt->u.erat_error.effective_address_provided)
494 ea = evt->u.erat_error.effective_address;
495 break;
496 case MCE_ERROR_TYPE_TLB:
497 err_type = "TLB";
498 subtype = evt->u.tlb_error.tlb_error_type <
499 ARRAY_SIZE(mc_tlb_types) ?
500 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
501 : "Unknown";
502 if (evt->u.tlb_error.effective_address_provided)
503 ea = evt->u.tlb_error.effective_address;
504 break;
505 case MCE_ERROR_TYPE_USER:
506 err_type = "User";
507 subtype = evt->u.user_error.user_error_type <
508 ARRAY_SIZE(mc_user_types) ?
509 mc_user_types[evt->u.user_error.user_error_type]
510 : "Unknown";
511 if (evt->u.user_error.effective_address_provided)
512 ea = evt->u.user_error.effective_address;
513 break;
514 case MCE_ERROR_TYPE_RA:
515 err_type = "Real address";
516 subtype = evt->u.ra_error.ra_error_type <
517 ARRAY_SIZE(mc_ra_types) ?
518 mc_ra_types[evt->u.ra_error.ra_error_type]
519 : "Unknown";
520 if (evt->u.ra_error.effective_address_provided)
521 ea = evt->u.ra_error.effective_address;
522 break;
523 case MCE_ERROR_TYPE_LINK:
524 err_type = "Link";
525 subtype = evt->u.link_error.link_error_type <
526 ARRAY_SIZE(mc_link_types) ?
527 mc_link_types[evt->u.link_error.link_error_type]
528 : "Unknown";
529 if (evt->u.link_error.effective_address_provided)
530 ea = evt->u.link_error.effective_address;
531 break;
532 case MCE_ERROR_TYPE_DCACHE:
533 err_type = "D-Cache";
534 subtype = "Unknown";
535 break;
536 case MCE_ERROR_TYPE_ICACHE:
537 err_type = "I-Cache";
538 subtype = "Unknown";
539 break;
540 default:
541 case MCE_ERROR_TYPE_UNKNOWN:
542 err_type = "Unknown";
543 subtype = "";
544 break;
545 }
546
547 dar_str[0] = pa_str[0] = '\0';
548 if (ea && evt->srr0 != ea) {
549 /* Load/Store address */
550 n = sprintf(dar_str, "DAR: %016llx ", ea);
551 if (pa)
552 sprintf(dar_str + n, "paddr: %016llx ", pa);
553 } else if (pa) {
554 sprintf(pa_str, " paddr: %016llx", pa);
555 }
556
557 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
558 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
559 err_type, subtype, dar_str,
560 evt->disposition == MCE_DISPOSITION_RECOVERED ?
561 "Recovered" : "Not recovered");
562
563 if (in_guest || user_mode) {
564 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
565 level, evt->cpu, current->pid, current->comm,
566 in_guest ? "Guest " : "", evt->srr0, pa_str);
567 } else {
568 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
569 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
570 }
571
572 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
573
574 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
575 mc_error_class[evt->error_class] : "Unknown";
576 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
577
578#ifdef CONFIG_PPC_BOOK3S_64
579 /* Display faulty slb contents for SLB errors. */
580 if (evt->error_type == MCE_ERROR_TYPE_SLB)
581 slb_dump_contents(local_paca->mce_faulty_slbs);
582#endif
583}
584EXPORT_SYMBOL_GPL(machine_check_print_event_info);
585
586/*
587 * This function is called in real mode. Strictly no printk's please.
588 *
589 * regs->nip and regs->msr contains srr0 and ssr1.
590 */
591long notrace machine_check_early(struct pt_regs *regs)
592{
593 long handled = 0;
594 bool nested = in_nmi();
595 u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
596
597 this_cpu_set_ftrace_enabled(0);
598
599 if (!nested)
600 nmi_enter();
601
602 hv_nmi_check_nonrecoverable(regs);
603
604 /*
605 * See if platform is capable of handling machine check.
606 */
607 if (ppc_md.machine_check_early)
608 handled = ppc_md.machine_check_early(regs);
609
610 if (!nested)
611 nmi_exit();
612
613 this_cpu_set_ftrace_enabled(ftrace_enabled);
614
615 return handled;
616}
617
618/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
619static enum {
620 DTRIG_UNKNOWN,
621 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
622 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
623} hmer_debug_trig_function;
624
625static int init_debug_trig_function(void)
626{
627 int pvr;
628 struct device_node *cpun;
629 struct property *prop = NULL;
630 const char *str;
631
632 /* First look in the device tree */
633 preempt_disable();
634 cpun = of_get_cpu_node(smp_processor_id(), NULL);
635 if (cpun) {
636 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
637 prop, str) {
638 if (strcmp(str, "bit17-vector-ci-load") == 0)
639 hmer_debug_trig_function = DTRIG_VECTOR_CI;
640 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
641 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
642 }
643 of_node_put(cpun);
644 }
645 preempt_enable();
646
647 /* If we found the property, don't look at PVR */
648 if (prop)
649 goto out;
650
651 pvr = mfspr(SPRN_PVR);
652 /* Check for POWER9 Nimbus (scale-out) */
653 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
654 /* DD2.2 and later */
655 if ((pvr & 0xfff) >= 0x202)
656 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
657 /* DD2.0 and DD2.1 - used for vector CI load emulation */
658 else if ((pvr & 0xfff) >= 0x200)
659 hmer_debug_trig_function = DTRIG_VECTOR_CI;
660 }
661
662 out:
663 switch (hmer_debug_trig_function) {
664 case DTRIG_VECTOR_CI:
665 pr_debug("HMI debug trigger used for vector CI load\n");
666 break;
667 case DTRIG_SUSPEND_ESCAPE:
668 pr_debug("HMI debug trigger used for TM suspend escape\n");
669 break;
670 default:
671 break;
672 }
673 return 0;
674}
675__initcall(init_debug_trig_function);
676
677/*
678 * Handle HMIs that occur as a result of a debug trigger.
679 * Return values:
680 * -1 means this is not a HMI cause that we know about
681 * 0 means no further handling is required
682 * 1 means further handling is required
683 */
684long hmi_handle_debugtrig(struct pt_regs *regs)
685{
686 unsigned long hmer = mfspr(SPRN_HMER);
687 long ret = 0;
688
689 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
690 if (!((hmer & HMER_DEBUG_TRIG)
691 && hmer_debug_trig_function != DTRIG_UNKNOWN))
692 return -1;
693
694 hmer &= ~HMER_DEBUG_TRIG;
695 /* HMER is a write-AND register */
696 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
697
698 switch (hmer_debug_trig_function) {
699 case DTRIG_VECTOR_CI:
700 /*
701 * Now to avoid problems with soft-disable we
702 * only do the emulation if we are coming from
703 * host user space
704 */
705 if (regs && user_mode(regs))
706 ret = local_paca->hmi_p9_special_emu = 1;
707
708 break;
709
710 default:
711 break;
712 }
713
714 /*
715 * See if any other HMI causes remain to be handled
716 */
717 if (hmer & mfspr(SPRN_HMEER))
718 return -1;
719
720 return ret;
721}
722
723/*
724 * Return values:
725 */
726long hmi_exception_realmode(struct pt_regs *regs)
727{
728 int ret;
729
730 local_paca->hmi_irqs++;
731
732 ret = hmi_handle_debugtrig(regs);
733 if (ret >= 0)
734 return ret;
735
736 wait_for_subcore_guest_exit();
737
738 if (ppc_md.hmi_exception_early)
739 ppc_md.hmi_exception_early(regs);
740
741 wait_for_tb_resync();
742
743 return 1;
744}
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20#include <linux/memblock.h>
21#include <linux/of.h>
22
23#include <asm/interrupt.h>
24#include <asm/machdep.h>
25#include <asm/mce.h>
26#include <asm/nmi.h>
27
28#include "setup.h"
29
30static void machine_check_ue_event(struct machine_check_event *evt);
31static void machine_process_ue_event(struct work_struct *work);
32
33static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
34
35static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
36
37int mce_register_notifier(struct notifier_block *nb)
38{
39 return blocking_notifier_chain_register(&mce_notifier_list, nb);
40}
41EXPORT_SYMBOL_GPL(mce_register_notifier);
42
43int mce_unregister_notifier(struct notifier_block *nb)
44{
45 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
46}
47EXPORT_SYMBOL_GPL(mce_unregister_notifier);
48
49static void mce_set_error_info(struct machine_check_event *mce,
50 struct mce_error_info *mce_err)
51{
52 mce->error_type = mce_err->error_type;
53 switch (mce_err->error_type) {
54 case MCE_ERROR_TYPE_UE:
55 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
56 break;
57 case MCE_ERROR_TYPE_SLB:
58 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
59 break;
60 case MCE_ERROR_TYPE_ERAT:
61 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
62 break;
63 case MCE_ERROR_TYPE_TLB:
64 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
65 break;
66 case MCE_ERROR_TYPE_USER:
67 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
68 break;
69 case MCE_ERROR_TYPE_RA:
70 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
71 break;
72 case MCE_ERROR_TYPE_LINK:
73 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
74 break;
75 case MCE_ERROR_TYPE_UNKNOWN:
76 default:
77 break;
78 }
79}
80
81void mce_irq_work_queue(void)
82{
83 /* Raise decrementer interrupt */
84 arch_irq_work_raise();
85 set_mce_pending_irq_work();
86}
87
88/*
89 * Decode and save high level MCE information into per cpu buffer which
90 * is an array of machine_check_event structure.
91 */
92void save_mce_event(struct pt_regs *regs, long handled,
93 struct mce_error_info *mce_err,
94 uint64_t nip, uint64_t addr, uint64_t phys_addr)
95{
96 int index = local_paca->mce_info->mce_nest_count++;
97 struct machine_check_event *mce;
98
99 mce = &local_paca->mce_info->mce_event[index];
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114 mce->cpu = get_paca()->paca_index;
115
116 /* Mark it recovered if we have handled it and MSR(RI=1). */
117 if (handled && (regs->msr & MSR_RI))
118 mce->disposition = MCE_DISPOSITION_RECOVERED;
119 else
120 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
121
122 mce->initiator = mce_err->initiator;
123 mce->severity = mce_err->severity;
124 mce->sync_error = mce_err->sync_error;
125 mce->error_class = mce_err->error_class;
126
127 /*
128 * Populate the mce error_type and type-specific error_type.
129 */
130 mce_set_error_info(mce, mce_err);
131 if (mce->error_type == MCE_ERROR_TYPE_UE)
132 mce->u.ue_error.ignore_event = mce_err->ignore_event;
133
134 if (!addr)
135 return;
136
137 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
138 mce->u.tlb_error.effective_address_provided = true;
139 mce->u.tlb_error.effective_address = addr;
140 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
141 mce->u.slb_error.effective_address_provided = true;
142 mce->u.slb_error.effective_address = addr;
143 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
144 mce->u.erat_error.effective_address_provided = true;
145 mce->u.erat_error.effective_address = addr;
146 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
147 mce->u.user_error.effective_address_provided = true;
148 mce->u.user_error.effective_address = addr;
149 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
150 mce->u.ra_error.effective_address_provided = true;
151 mce->u.ra_error.effective_address = addr;
152 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
153 mce->u.link_error.effective_address_provided = true;
154 mce->u.link_error.effective_address = addr;
155 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
156 mce->u.ue_error.effective_address_provided = true;
157 mce->u.ue_error.effective_address = addr;
158 if (phys_addr != ULONG_MAX) {
159 mce->u.ue_error.physical_address_provided = true;
160 mce->u.ue_error.physical_address = phys_addr;
161 machine_check_ue_event(mce);
162 }
163 }
164 return;
165}
166
167/*
168 * get_mce_event:
169 * mce Pointer to machine_check_event structure to be filled.
170 * release Flag to indicate whether to free the event slot or not.
171 * 0 <= do not release the mce event. Caller will invoke
172 * release_mce_event() once event has been consumed.
173 * 1 <= release the slot.
174 *
175 * return 1 = success
176 * 0 = failure
177 *
178 * get_mce_event() will be called by platform specific machine check
179 * handle routine and in KVM.
180 * When we call get_mce_event(), we are still in interrupt context and
181 * preemption will not be scheduled until ret_from_expect() routine
182 * is called.
183 */
184int get_mce_event(struct machine_check_event *mce, bool release)
185{
186 int index = local_paca->mce_info->mce_nest_count - 1;
187 struct machine_check_event *mc_evt;
188 int ret = 0;
189
190 /* Sanity check */
191 if (index < 0)
192 return ret;
193
194 /* Check if we have MCE info to process. */
195 if (index < MAX_MC_EVT) {
196 mc_evt = &local_paca->mce_info->mce_event[index];
197 /* Copy the event structure and release the original */
198 if (mce)
199 *mce = *mc_evt;
200 if (release)
201 mc_evt->in_use = 0;
202 ret = 1;
203 }
204 /* Decrement the count to free the slot. */
205 if (release)
206 local_paca->mce_info->mce_nest_count--;
207
208 return ret;
209}
210
211void release_mce_event(void)
212{
213 get_mce_event(NULL, true);
214}
215
216static void machine_check_ue_work(void)
217{
218 schedule_work(&mce_ue_event_work);
219}
220
221/*
222 * Queue up the MCE event which then can be handled later.
223 */
224static void machine_check_ue_event(struct machine_check_event *evt)
225{
226 int index;
227
228 index = local_paca->mce_info->mce_ue_count++;
229 /* If queue is full, just return for now. */
230 if (index >= MAX_MC_EVT) {
231 local_paca->mce_info->mce_ue_count--;
232 return;
233 }
234 memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
235 evt, sizeof(*evt));
236
237 /* Queue work to process this event later. */
238 mce_irq_work_queue();
239}
240
241/*
242 * Queue up the MCE event which then can be handled later.
243 */
244void machine_check_queue_event(void)
245{
246 int index;
247 struct machine_check_event evt;
248
249 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
250 return;
251
252 index = local_paca->mce_info->mce_queue_count++;
253 /* If queue is full, just return for now. */
254 if (index >= MAX_MC_EVT) {
255 local_paca->mce_info->mce_queue_count--;
256 return;
257 }
258 memcpy(&local_paca->mce_info->mce_event_queue[index],
259 &evt, sizeof(evt));
260
261 mce_irq_work_queue();
262}
263
264void mce_common_process_ue(struct pt_regs *regs,
265 struct mce_error_info *mce_err)
266{
267 const struct exception_table_entry *entry;
268
269 entry = search_kernel_exception_table(regs->nip);
270 if (entry) {
271 mce_err->ignore_event = true;
272 regs_set_return_ip(regs, extable_fixup(entry));
273 }
274}
275
276/*
277 * process pending MCE event from the mce event queue. This function will be
278 * called during syscall exit.
279 */
280static void machine_process_ue_event(struct work_struct *work)
281{
282 int index;
283 struct machine_check_event *evt;
284
285 while (local_paca->mce_info->mce_ue_count > 0) {
286 index = local_paca->mce_info->mce_ue_count - 1;
287 evt = &local_paca->mce_info->mce_ue_event_queue[index];
288 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
289#ifdef CONFIG_MEMORY_FAILURE
290 /*
291 * This should probably queued elsewhere, but
292 * oh! well
293 *
294 * Don't report this machine check because the caller has a
295 * asked us to ignore the event, it has a fixup handler which
296 * will do the appropriate error handling and reporting.
297 */
298 if (evt->error_type == MCE_ERROR_TYPE_UE) {
299 if (evt->u.ue_error.ignore_event) {
300 local_paca->mce_info->mce_ue_count--;
301 continue;
302 }
303
304 if (evt->u.ue_error.physical_address_provided) {
305 unsigned long pfn;
306
307 pfn = evt->u.ue_error.physical_address >>
308 PAGE_SHIFT;
309 memory_failure(pfn, 0);
310 } else
311 pr_warn("Failed to identify bad address from "
312 "where the uncorrectable error (UE) "
313 "was generated\n");
314 }
315#endif
316 local_paca->mce_info->mce_ue_count--;
317 }
318}
319/*
320 * process pending MCE event from the mce event queue. This function will be
321 * called during syscall exit.
322 */
323static void machine_check_process_queued_event(void)
324{
325 int index;
326 struct machine_check_event *evt;
327
328 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
329
330 /*
331 * For now just print it to console.
332 * TODO: log this error event to FSP or nvram.
333 */
334 while (local_paca->mce_info->mce_queue_count > 0) {
335 index = local_paca->mce_info->mce_queue_count - 1;
336 evt = &local_paca->mce_info->mce_event_queue[index];
337
338 if (evt->error_type == MCE_ERROR_TYPE_UE &&
339 evt->u.ue_error.ignore_event) {
340 local_paca->mce_info->mce_queue_count--;
341 continue;
342 }
343 machine_check_print_event_info(evt, false, false);
344 local_paca->mce_info->mce_queue_count--;
345 }
346}
347
348void set_mce_pending_irq_work(void)
349{
350 local_paca->mce_pending_irq_work = 1;
351}
352
353void clear_mce_pending_irq_work(void)
354{
355 local_paca->mce_pending_irq_work = 0;
356}
357
358void mce_run_irq_context_handlers(void)
359{
360 if (unlikely(local_paca->mce_pending_irq_work)) {
361 if (ppc_md.machine_check_log_err)
362 ppc_md.machine_check_log_err();
363 machine_check_process_queued_event();
364 machine_check_ue_work();
365 clear_mce_pending_irq_work();
366 }
367}
368
369void machine_check_print_event_info(struct machine_check_event *evt,
370 bool user_mode, bool in_guest)
371{
372 const char *level, *sevstr, *subtype, *err_type, *initiator;
373 uint64_t ea = 0, pa = 0;
374 int n = 0;
375 char dar_str[50];
376 char pa_str[50];
377 static const char *mc_ue_types[] = {
378 "Indeterminate",
379 "Instruction fetch",
380 "Page table walk ifetch",
381 "Load/Store",
382 "Page table walk Load/Store",
383 };
384 static const char *mc_slb_types[] = {
385 "Indeterminate",
386 "Parity",
387 "Multihit",
388 };
389 static const char *mc_erat_types[] = {
390 "Indeterminate",
391 "Parity",
392 "Multihit",
393 };
394 static const char *mc_tlb_types[] = {
395 "Indeterminate",
396 "Parity",
397 "Multihit",
398 };
399 static const char *mc_user_types[] = {
400 "Indeterminate",
401 "tlbie(l) invalid",
402 "scv invalid",
403 };
404 static const char *mc_ra_types[] = {
405 "Indeterminate",
406 "Instruction fetch (bad)",
407 "Instruction fetch (foreign/control memory)",
408 "Page table walk ifetch (bad)",
409 "Page table walk ifetch (foreign/control memory)",
410 "Load (bad)",
411 "Store (bad)",
412 "Page table walk Load/Store (bad)",
413 "Page table walk Load/Store (foreign/control memory)",
414 "Load/Store (foreign/control memory)",
415 };
416 static const char *mc_link_types[] = {
417 "Indeterminate",
418 "Instruction fetch (timeout)",
419 "Page table walk ifetch (timeout)",
420 "Load (timeout)",
421 "Store (timeout)",
422 "Page table walk Load/Store (timeout)",
423 };
424 static const char *mc_error_class[] = {
425 "Unknown",
426 "Hardware error",
427 "Probable Hardware error (some chance of software cause)",
428 "Software error",
429 "Probable Software error (some chance of hardware cause)",
430 };
431
432 /* Print things out */
433 if (evt->version != MCE_V1) {
434 pr_err("Machine Check Exception, Unknown event version %d !\n",
435 evt->version);
436 return;
437 }
438 switch (evt->severity) {
439 case MCE_SEV_NO_ERROR:
440 level = KERN_INFO;
441 sevstr = "Harmless";
442 break;
443 case MCE_SEV_WARNING:
444 level = KERN_WARNING;
445 sevstr = "Warning";
446 break;
447 case MCE_SEV_SEVERE:
448 level = KERN_ERR;
449 sevstr = "Severe";
450 break;
451 case MCE_SEV_FATAL:
452 default:
453 level = KERN_ERR;
454 sevstr = "Fatal";
455 break;
456 }
457
458 switch(evt->initiator) {
459 case MCE_INITIATOR_CPU:
460 initiator = "CPU";
461 break;
462 case MCE_INITIATOR_PCI:
463 initiator = "PCI";
464 break;
465 case MCE_INITIATOR_ISA:
466 initiator = "ISA";
467 break;
468 case MCE_INITIATOR_MEMORY:
469 initiator = "Memory";
470 break;
471 case MCE_INITIATOR_POWERMGM:
472 initiator = "Power Management";
473 break;
474 case MCE_INITIATOR_UNKNOWN:
475 default:
476 initiator = "Unknown";
477 break;
478 }
479
480 switch (evt->error_type) {
481 case MCE_ERROR_TYPE_UE:
482 err_type = "UE";
483 subtype = evt->u.ue_error.ue_error_type <
484 ARRAY_SIZE(mc_ue_types) ?
485 mc_ue_types[evt->u.ue_error.ue_error_type]
486 : "Unknown";
487 if (evt->u.ue_error.effective_address_provided)
488 ea = evt->u.ue_error.effective_address;
489 if (evt->u.ue_error.physical_address_provided)
490 pa = evt->u.ue_error.physical_address;
491 break;
492 case MCE_ERROR_TYPE_SLB:
493 err_type = "SLB";
494 subtype = evt->u.slb_error.slb_error_type <
495 ARRAY_SIZE(mc_slb_types) ?
496 mc_slb_types[evt->u.slb_error.slb_error_type]
497 : "Unknown";
498 if (evt->u.slb_error.effective_address_provided)
499 ea = evt->u.slb_error.effective_address;
500 break;
501 case MCE_ERROR_TYPE_ERAT:
502 err_type = "ERAT";
503 subtype = evt->u.erat_error.erat_error_type <
504 ARRAY_SIZE(mc_erat_types) ?
505 mc_erat_types[evt->u.erat_error.erat_error_type]
506 : "Unknown";
507 if (evt->u.erat_error.effective_address_provided)
508 ea = evt->u.erat_error.effective_address;
509 break;
510 case MCE_ERROR_TYPE_TLB:
511 err_type = "TLB";
512 subtype = evt->u.tlb_error.tlb_error_type <
513 ARRAY_SIZE(mc_tlb_types) ?
514 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
515 : "Unknown";
516 if (evt->u.tlb_error.effective_address_provided)
517 ea = evt->u.tlb_error.effective_address;
518 break;
519 case MCE_ERROR_TYPE_USER:
520 err_type = "User";
521 subtype = evt->u.user_error.user_error_type <
522 ARRAY_SIZE(mc_user_types) ?
523 mc_user_types[evt->u.user_error.user_error_type]
524 : "Unknown";
525 if (evt->u.user_error.effective_address_provided)
526 ea = evt->u.user_error.effective_address;
527 break;
528 case MCE_ERROR_TYPE_RA:
529 err_type = "Real address";
530 subtype = evt->u.ra_error.ra_error_type <
531 ARRAY_SIZE(mc_ra_types) ?
532 mc_ra_types[evt->u.ra_error.ra_error_type]
533 : "Unknown";
534 if (evt->u.ra_error.effective_address_provided)
535 ea = evt->u.ra_error.effective_address;
536 break;
537 case MCE_ERROR_TYPE_LINK:
538 err_type = "Link";
539 subtype = evt->u.link_error.link_error_type <
540 ARRAY_SIZE(mc_link_types) ?
541 mc_link_types[evt->u.link_error.link_error_type]
542 : "Unknown";
543 if (evt->u.link_error.effective_address_provided)
544 ea = evt->u.link_error.effective_address;
545 break;
546 case MCE_ERROR_TYPE_DCACHE:
547 err_type = "D-Cache";
548 subtype = "Unknown";
549 break;
550 case MCE_ERROR_TYPE_ICACHE:
551 err_type = "I-Cache";
552 subtype = "Unknown";
553 break;
554 default:
555 case MCE_ERROR_TYPE_UNKNOWN:
556 err_type = "Unknown";
557 subtype = "";
558 break;
559 }
560
561 dar_str[0] = pa_str[0] = '\0';
562 if (ea && evt->srr0 != ea) {
563 /* Load/Store address */
564 n = sprintf(dar_str, "DAR: %016llx ", ea);
565 if (pa)
566 sprintf(dar_str + n, "paddr: %016llx ", pa);
567 } else if (pa) {
568 sprintf(pa_str, " paddr: %016llx", pa);
569 }
570
571 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
572 level, evt->cpu, sevstr, in_guest ? "Guest" : "",
573 err_type, subtype, dar_str,
574 evt->disposition == MCE_DISPOSITION_RECOVERED ?
575 "Recovered" : "Not recovered");
576
577 if (in_guest || user_mode) {
578 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
579 level, evt->cpu, current->pid, current->comm,
580 in_guest ? "Guest " : "", evt->srr0, pa_str);
581 } else {
582 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
583 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
584 }
585
586 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
587
588 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
589 mc_error_class[evt->error_class] : "Unknown";
590 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
591
592#ifdef CONFIG_PPC_64S_HASH_MMU
593 /* Display faulty slb contents for SLB errors. */
594 if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
595 slb_dump_contents(local_paca->mce_faulty_slbs);
596#endif
597}
598EXPORT_SYMBOL_GPL(machine_check_print_event_info);
599
600/*
601 * This function is called in real mode. Strictly no printk's please.
602 *
603 * regs->nip and regs->msr contains srr0 and ssr1.
604 */
605DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
606{
607 long handled = 0;
608
609 hv_nmi_check_nonrecoverable(regs);
610
611 /*
612 * See if platform is capable of handling machine check.
613 */
614 if (ppc_md.machine_check_early)
615 handled = ppc_md.machine_check_early(regs);
616
617 return handled;
618}
619
620/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
621static enum {
622 DTRIG_UNKNOWN,
623 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
624 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
625} hmer_debug_trig_function;
626
627static int init_debug_trig_function(void)
628{
629 int pvr;
630 struct device_node *cpun;
631 struct property *prop = NULL;
632 const char *str;
633
634 /* First look in the device tree */
635 preempt_disable();
636 cpun = of_get_cpu_node(smp_processor_id(), NULL);
637 if (cpun) {
638 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
639 prop, str) {
640 if (strcmp(str, "bit17-vector-ci-load") == 0)
641 hmer_debug_trig_function = DTRIG_VECTOR_CI;
642 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
643 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
644 }
645 of_node_put(cpun);
646 }
647 preempt_enable();
648
649 /* If we found the property, don't look at PVR */
650 if (prop)
651 goto out;
652
653 pvr = mfspr(SPRN_PVR);
654 /* Check for POWER9 Nimbus (scale-out) */
655 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
656 /* DD2.2 and later */
657 if ((pvr & 0xfff) >= 0x202)
658 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
659 /* DD2.0 and DD2.1 - used for vector CI load emulation */
660 else if ((pvr & 0xfff) >= 0x200)
661 hmer_debug_trig_function = DTRIG_VECTOR_CI;
662 }
663
664 out:
665 switch (hmer_debug_trig_function) {
666 case DTRIG_VECTOR_CI:
667 pr_debug("HMI debug trigger used for vector CI load\n");
668 break;
669 case DTRIG_SUSPEND_ESCAPE:
670 pr_debug("HMI debug trigger used for TM suspend escape\n");
671 break;
672 default:
673 break;
674 }
675 return 0;
676}
677__initcall(init_debug_trig_function);
678
679/*
680 * Handle HMIs that occur as a result of a debug trigger.
681 * Return values:
682 * -1 means this is not a HMI cause that we know about
683 * 0 means no further handling is required
684 * 1 means further handling is required
685 */
686long hmi_handle_debugtrig(struct pt_regs *regs)
687{
688 unsigned long hmer = mfspr(SPRN_HMER);
689 long ret = 0;
690
691 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
692 if (!((hmer & HMER_DEBUG_TRIG)
693 && hmer_debug_trig_function != DTRIG_UNKNOWN))
694 return -1;
695
696 hmer &= ~HMER_DEBUG_TRIG;
697 /* HMER is a write-AND register */
698 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
699
700 switch (hmer_debug_trig_function) {
701 case DTRIG_VECTOR_CI:
702 /*
703 * Now to avoid problems with soft-disable we
704 * only do the emulation if we are coming from
705 * host user space
706 */
707 if (regs && user_mode(regs))
708 ret = local_paca->hmi_p9_special_emu = 1;
709
710 break;
711
712 default:
713 break;
714 }
715
716 /*
717 * See if any other HMI causes remain to be handled
718 */
719 if (hmer & mfspr(SPRN_HMEER))
720 return -1;
721
722 return ret;
723}
724
725/*
726 * Return values:
727 */
728DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
729{
730 int ret;
731
732 local_paca->hmi_irqs++;
733
734 ret = hmi_handle_debugtrig(regs);
735 if (ret >= 0)
736 return ret;
737
738 wait_for_subcore_guest_exit();
739
740 if (ppc_md.hmi_exception_early)
741 ppc_md.hmi_exception_early(regs);
742
743 wait_for_tb_resync();
744
745 return 1;
746}
747
748void __init mce_init(void)
749{
750 struct mce_info *mce_info;
751 u64 limit;
752 int i;
753
754 limit = min(ppc64_bolted_size(), ppc64_rma_size);
755 for_each_possible_cpu(i) {
756 mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
757 __alignof__(*mce_info),
758 MEMBLOCK_LOW_LIMIT,
759 limit, early_cpu_to_node(i));
760 if (!mce_info)
761 goto err;
762 paca_ptrs[i]->mce_info = mce_info;
763 }
764 return;
765err:
766 panic("Failed to allocate memory for MCE event data\n");
767}