Loading...
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20#include <linux/memblock.h>
21#include <linux/of.h>
22
23#include <asm/interrupt.h>
24#include <asm/machdep.h>
25#include <asm/mce.h>
26#include <asm/nmi.h>
27
28#include "setup.h"
29
30static void machine_check_ue_event(struct machine_check_event *evt);
31static void machine_process_ue_event(struct work_struct *work);
32
33static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
34
35static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
36
37int mce_register_notifier(struct notifier_block *nb)
38{
39 return blocking_notifier_chain_register(&mce_notifier_list, nb);
40}
41EXPORT_SYMBOL_GPL(mce_register_notifier);
42
43int mce_unregister_notifier(struct notifier_block *nb)
44{
45 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
46}
47EXPORT_SYMBOL_GPL(mce_unregister_notifier);
48
49static void mce_set_error_info(struct machine_check_event *mce,
50 struct mce_error_info *mce_err)
51{
52 mce->error_type = mce_err->error_type;
53 switch (mce_err->error_type) {
54 case MCE_ERROR_TYPE_UE:
55 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
56 break;
57 case MCE_ERROR_TYPE_SLB:
58 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
59 break;
60 case MCE_ERROR_TYPE_ERAT:
61 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
62 break;
63 case MCE_ERROR_TYPE_TLB:
64 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
65 break;
66 case MCE_ERROR_TYPE_USER:
67 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
68 break;
69 case MCE_ERROR_TYPE_RA:
70 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
71 break;
72 case MCE_ERROR_TYPE_LINK:
73 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
74 break;
75 case MCE_ERROR_TYPE_UNKNOWN:
76 default:
77 break;
78 }
79}
80
81void mce_irq_work_queue(void)
82{
83 /* Raise decrementer interrupt */
84 arch_irq_work_raise();
85 set_mce_pending_irq_work();
86}
87
88/*
89 * Decode and save high level MCE information into per cpu buffer which
90 * is an array of machine_check_event structure.
91 */
92void save_mce_event(struct pt_regs *regs, long handled,
93 struct mce_error_info *mce_err,
94 uint64_t nip, uint64_t addr, uint64_t phys_addr)
95{
96 int index = local_paca->mce_info->mce_nest_count++;
97 struct machine_check_event *mce;
98
99 mce = &local_paca->mce_info->mce_event[index];
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114 mce->cpu = get_paca()->paca_index;
115
116 /* Mark it recovered if we have handled it and MSR(RI=1). */
117 if (handled && (regs->msr & MSR_RI))
118 mce->disposition = MCE_DISPOSITION_RECOVERED;
119 else
120 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
121
122 mce->initiator = mce_err->initiator;
123 mce->severity = mce_err->severity;
124 mce->sync_error = mce_err->sync_error;
125 mce->error_class = mce_err->error_class;
126
127 /*
128 * Populate the mce error_type and type-specific error_type.
129 */
130 mce_set_error_info(mce, mce_err);
131 if (mce->error_type == MCE_ERROR_TYPE_UE)
132 mce->u.ue_error.ignore_event = mce_err->ignore_event;
133
134 /*
135 * Raise irq work, So that we don't miss to log the error for
136 * unrecoverable errors.
137 */
138 if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
139 mce_irq_work_queue();
140
141 if (!addr)
142 return;
143
144 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145 mce->u.tlb_error.effective_address_provided = true;
146 mce->u.tlb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148 mce->u.slb_error.effective_address_provided = true;
149 mce->u.slb_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151 mce->u.erat_error.effective_address_provided = true;
152 mce->u.erat_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154 mce->u.user_error.effective_address_provided = true;
155 mce->u.user_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157 mce->u.ra_error.effective_address_provided = true;
158 mce->u.ra_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160 mce->u.link_error.effective_address_provided = true;
161 mce->u.link_error.effective_address = addr;
162 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163 mce->u.ue_error.effective_address_provided = true;
164 mce->u.ue_error.effective_address = addr;
165 if (phys_addr != ULONG_MAX) {
166 mce->u.ue_error.physical_address_provided = true;
167 mce->u.ue_error.physical_address = phys_addr;
168 machine_check_ue_event(mce);
169 }
170 }
171 return;
172}
173
174/*
175 * get_mce_event:
176 * mce Pointer to machine_check_event structure to be filled.
177 * release Flag to indicate whether to free the event slot or not.
178 * 0 <= do not release the mce event. Caller will invoke
179 * release_mce_event() once event has been consumed.
180 * 1 <= release the slot.
181 *
182 * return 1 = success
183 * 0 = failure
184 *
185 * get_mce_event() will be called by platform specific machine check
186 * handle routine and in KVM.
187 * When we call get_mce_event(), we are still in interrupt context and
188 * preemption will not be scheduled until ret_from_expect() routine
189 * is called.
190 */
191int get_mce_event(struct machine_check_event *mce, bool release)
192{
193 int index = local_paca->mce_info->mce_nest_count - 1;
194 struct machine_check_event *mc_evt;
195 int ret = 0;
196
197 /* Sanity check */
198 if (index < 0)
199 return ret;
200
201 /* Check if we have MCE info to process. */
202 if (index < MAX_MC_EVT) {
203 mc_evt = &local_paca->mce_info->mce_event[index];
204 /* Copy the event structure and release the original */
205 if (mce)
206 *mce = *mc_evt;
207 if (release)
208 mc_evt->in_use = 0;
209 ret = 1;
210 }
211 /* Decrement the count to free the slot. */
212 if (release)
213 local_paca->mce_info->mce_nest_count--;
214
215 return ret;
216}
217
218void release_mce_event(void)
219{
220 get_mce_event(NULL, true);
221}
222
223static void machine_check_ue_work(void)
224{
225 schedule_work(&mce_ue_event_work);
226}
227
228/*
229 * Queue up the MCE event which then can be handled later.
230 */
231static void machine_check_ue_event(struct machine_check_event *evt)
232{
233 int index;
234
235 index = local_paca->mce_info->mce_ue_count++;
236 /* If queue is full, just return for now. */
237 if (index >= MAX_MC_EVT) {
238 local_paca->mce_info->mce_ue_count--;
239 return;
240 }
241 memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
242 evt, sizeof(*evt));
243}
244
245/*
246 * Queue up the MCE event which then can be handled later.
247 */
248void machine_check_queue_event(void)
249{
250 int index;
251 struct machine_check_event evt;
252
253 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
254 return;
255
256 index = local_paca->mce_info->mce_queue_count++;
257 /* If queue is full, just return for now. */
258 if (index >= MAX_MC_EVT) {
259 local_paca->mce_info->mce_queue_count--;
260 return;
261 }
262 memcpy(&local_paca->mce_info->mce_event_queue[index],
263 &evt, sizeof(evt));
264
265 mce_irq_work_queue();
266}
267
268void mce_common_process_ue(struct pt_regs *regs,
269 struct mce_error_info *mce_err)
270{
271 const struct exception_table_entry *entry;
272
273 entry = search_kernel_exception_table(regs->nip);
274 if (entry) {
275 mce_err->ignore_event = true;
276 regs_set_return_ip(regs, extable_fixup(entry));
277 }
278}
279
280/*
281 * process pending MCE event from the mce event queue. This function will be
282 * called during syscall exit.
283 */
284static void machine_process_ue_event(struct work_struct *work)
285{
286 int index;
287 struct machine_check_event *evt;
288
289 while (local_paca->mce_info->mce_ue_count > 0) {
290 index = local_paca->mce_info->mce_ue_count - 1;
291 evt = &local_paca->mce_info->mce_ue_event_queue[index];
292 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
293#ifdef CONFIG_MEMORY_FAILURE
294 /*
295 * This should probably queued elsewhere, but
296 * oh! well
297 *
298 * Don't report this machine check because the caller has a
299 * asked us to ignore the event, it has a fixup handler which
300 * will do the appropriate error handling and reporting.
301 */
302 if (evt->error_type == MCE_ERROR_TYPE_UE) {
303 if (evt->u.ue_error.ignore_event) {
304 local_paca->mce_info->mce_ue_count--;
305 continue;
306 }
307
308 if (evt->u.ue_error.physical_address_provided) {
309 unsigned long pfn;
310
311 pfn = evt->u.ue_error.physical_address >>
312 PAGE_SHIFT;
313 memory_failure(pfn, 0);
314 } else
315 pr_warn("Failed to identify bad address from "
316 "where the uncorrectable error (UE) "
317 "was generated\n");
318 }
319#endif
320 local_paca->mce_info->mce_ue_count--;
321 }
322}
323/*
324 * process pending MCE event from the mce event queue. This function will be
325 * called during syscall exit.
326 */
327static void machine_check_process_queued_event(void)
328{
329 int index;
330 struct machine_check_event *evt;
331
332 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
333
334 /*
335 * For now just print it to console.
336 * TODO: log this error event to FSP or nvram.
337 */
338 while (local_paca->mce_info->mce_queue_count > 0) {
339 index = local_paca->mce_info->mce_queue_count - 1;
340 evt = &local_paca->mce_info->mce_event_queue[index];
341
342 if (evt->error_type == MCE_ERROR_TYPE_UE &&
343 evt->u.ue_error.ignore_event) {
344 local_paca->mce_info->mce_queue_count--;
345 continue;
346 }
347 machine_check_print_event_info(evt, false, false);
348 local_paca->mce_info->mce_queue_count--;
349 }
350}
351
352void set_mce_pending_irq_work(void)
353{
354 local_paca->mce_pending_irq_work = 1;
355}
356
357void clear_mce_pending_irq_work(void)
358{
359 local_paca->mce_pending_irq_work = 0;
360}
361
362void mce_run_irq_context_handlers(void)
363{
364 if (unlikely(local_paca->mce_pending_irq_work)) {
365 if (ppc_md.machine_check_log_err)
366 ppc_md.machine_check_log_err();
367 machine_check_process_queued_event();
368 machine_check_ue_work();
369 clear_mce_pending_irq_work();
370 }
371}
372
373void machine_check_print_event_info(struct machine_check_event *evt,
374 bool user_mode, bool in_guest)
375{
376 const char *level, *sevstr, *subtype, *err_type, *initiator;
377 uint64_t ea = 0, pa = 0;
378 int n = 0;
379 char dar_str[50];
380 char pa_str[50];
381 static const char *mc_ue_types[] = {
382 "Indeterminate",
383 "Instruction fetch",
384 "Page table walk ifetch",
385 "Load/Store",
386 "Page table walk Load/Store",
387 };
388 static const char *mc_slb_types[] = {
389 "Indeterminate",
390 "Parity",
391 "Multihit",
392 };
393 static const char *mc_erat_types[] = {
394 "Indeterminate",
395 "Parity",
396 "Multihit",
397 };
398 static const char *mc_tlb_types[] = {
399 "Indeterminate",
400 "Parity",
401 "Multihit",
402 };
403 static const char *mc_user_types[] = {
404 "Indeterminate",
405 "tlbie(l) invalid",
406 "scv invalid",
407 };
408 static const char *mc_ra_types[] = {
409 "Indeterminate",
410 "Instruction fetch (bad)",
411 "Instruction fetch (foreign/control memory)",
412 "Page table walk ifetch (bad)",
413 "Page table walk ifetch (foreign/control memory)",
414 "Load (bad)",
415 "Store (bad)",
416 "Page table walk Load/Store (bad)",
417 "Page table walk Load/Store (foreign/control memory)",
418 "Load/Store (foreign/control memory)",
419 };
420 static const char *mc_link_types[] = {
421 "Indeterminate",
422 "Instruction fetch (timeout)",
423 "Page table walk ifetch (timeout)",
424 "Load (timeout)",
425 "Store (timeout)",
426 "Page table walk Load/Store (timeout)",
427 };
428 static const char *mc_error_class[] = {
429 "Unknown",
430 "Hardware error",
431 "Probable Hardware error (some chance of software cause)",
432 "Software error",
433 "Probable Software error (some chance of hardware cause)",
434 };
435
436 /* Print things out */
437 if (evt->version != MCE_V1) {
438 pr_err("Machine Check Exception, Unknown event version %d !\n",
439 evt->version);
440 return;
441 }
442 switch (evt->severity) {
443 case MCE_SEV_NO_ERROR:
444 level = KERN_INFO;
445 sevstr = "Harmless";
446 break;
447 case MCE_SEV_WARNING:
448 level = KERN_WARNING;
449 sevstr = "Warning";
450 break;
451 case MCE_SEV_SEVERE:
452 level = KERN_ERR;
453 sevstr = "Severe";
454 break;
455 case MCE_SEV_FATAL:
456 default:
457 level = KERN_ERR;
458 sevstr = "Fatal";
459 break;
460 }
461
462 switch(evt->initiator) {
463 case MCE_INITIATOR_CPU:
464 initiator = "CPU";
465 break;
466 case MCE_INITIATOR_PCI:
467 initiator = "PCI";
468 break;
469 case MCE_INITIATOR_ISA:
470 initiator = "ISA";
471 break;
472 case MCE_INITIATOR_MEMORY:
473 initiator = "Memory";
474 break;
475 case MCE_INITIATOR_POWERMGM:
476 initiator = "Power Management";
477 break;
478 case MCE_INITIATOR_UNKNOWN:
479 default:
480 initiator = "Unknown";
481 break;
482 }
483
484 switch (evt->error_type) {
485 case MCE_ERROR_TYPE_UE:
486 err_type = "UE";
487 subtype = evt->u.ue_error.ue_error_type <
488 ARRAY_SIZE(mc_ue_types) ?
489 mc_ue_types[evt->u.ue_error.ue_error_type]
490 : "Unknown";
491 if (evt->u.ue_error.effective_address_provided)
492 ea = evt->u.ue_error.effective_address;
493 if (evt->u.ue_error.physical_address_provided)
494 pa = evt->u.ue_error.physical_address;
495 break;
496 case MCE_ERROR_TYPE_SLB:
497 err_type = "SLB";
498 subtype = evt->u.slb_error.slb_error_type <
499 ARRAY_SIZE(mc_slb_types) ?
500 mc_slb_types[evt->u.slb_error.slb_error_type]
501 : "Unknown";
502 if (evt->u.slb_error.effective_address_provided)
503 ea = evt->u.slb_error.effective_address;
504 break;
505 case MCE_ERROR_TYPE_ERAT:
506 err_type = "ERAT";
507 subtype = evt->u.erat_error.erat_error_type <
508 ARRAY_SIZE(mc_erat_types) ?
509 mc_erat_types[evt->u.erat_error.erat_error_type]
510 : "Unknown";
511 if (evt->u.erat_error.effective_address_provided)
512 ea = evt->u.erat_error.effective_address;
513 break;
514 case MCE_ERROR_TYPE_TLB:
515 err_type = "TLB";
516 subtype = evt->u.tlb_error.tlb_error_type <
517 ARRAY_SIZE(mc_tlb_types) ?
518 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
519 : "Unknown";
520 if (evt->u.tlb_error.effective_address_provided)
521 ea = evt->u.tlb_error.effective_address;
522 break;
523 case MCE_ERROR_TYPE_USER:
524 err_type = "User";
525 subtype = evt->u.user_error.user_error_type <
526 ARRAY_SIZE(mc_user_types) ?
527 mc_user_types[evt->u.user_error.user_error_type]
528 : "Unknown";
529 if (evt->u.user_error.effective_address_provided)
530 ea = evt->u.user_error.effective_address;
531 break;
532 case MCE_ERROR_TYPE_RA:
533 err_type = "Real address";
534 subtype = evt->u.ra_error.ra_error_type <
535 ARRAY_SIZE(mc_ra_types) ?
536 mc_ra_types[evt->u.ra_error.ra_error_type]
537 : "Unknown";
538 if (evt->u.ra_error.effective_address_provided)
539 ea = evt->u.ra_error.effective_address;
540 break;
541 case MCE_ERROR_TYPE_LINK:
542 err_type = "Link";
543 subtype = evt->u.link_error.link_error_type <
544 ARRAY_SIZE(mc_link_types) ?
545 mc_link_types[evt->u.link_error.link_error_type]
546 : "Unknown";
547 if (evt->u.link_error.effective_address_provided)
548 ea = evt->u.link_error.effective_address;
549 break;
550 case MCE_ERROR_TYPE_DCACHE:
551 err_type = "D-Cache";
552 subtype = "Unknown";
553 break;
554 case MCE_ERROR_TYPE_ICACHE:
555 err_type = "I-Cache";
556 subtype = "Unknown";
557 break;
558 default:
559 case MCE_ERROR_TYPE_UNKNOWN:
560 err_type = "Unknown";
561 subtype = "";
562 break;
563 }
564
565 dar_str[0] = pa_str[0] = '\0';
566 if (ea && evt->srr0 != ea) {
567 /* Load/Store address */
568 n = sprintf(dar_str, "DAR: %016llx ", ea);
569 if (pa)
570 sprintf(dar_str + n, "paddr: %016llx ", pa);
571 } else if (pa) {
572 sprintf(pa_str, " paddr: %016llx", pa);
573 }
574
575 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
576 level, evt->cpu, sevstr, in_guest ? "Guest" : "",
577 err_type, subtype, dar_str,
578 evt->disposition == MCE_DISPOSITION_RECOVERED ?
579 "Recovered" : "Not recovered");
580
581 if (in_guest || user_mode) {
582 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
583 level, evt->cpu, current->pid, current->comm,
584 in_guest ? "Guest " : "", evt->srr0, pa_str);
585 } else {
586 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
587 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
588 }
589
590 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
591
592 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
593 mc_error_class[evt->error_class] : "Unknown";
594 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
595
596#ifdef CONFIG_PPC_64S_HASH_MMU
597 /* Display faulty slb contents for SLB errors. */
598 if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
599 slb_dump_contents(local_paca->mce_faulty_slbs);
600#endif
601}
602EXPORT_SYMBOL_GPL(machine_check_print_event_info);
603
604/*
605 * This function is called in real mode. Strictly no printk's please.
606 *
607 * regs->nip and regs->msr contains srr0 and ssr1.
608 */
609DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
610{
611 long handled = 0;
612
613 hv_nmi_check_nonrecoverable(regs);
614
615 /*
616 * See if platform is capable of handling machine check.
617 */
618 if (ppc_md.machine_check_early)
619 handled = ppc_md.machine_check_early(regs);
620
621 return handled;
622}
623
624/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
625static enum {
626 DTRIG_UNKNOWN,
627 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
628 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
629} hmer_debug_trig_function;
630
631static int init_debug_trig_function(void)
632{
633 int pvr;
634 struct device_node *cpun;
635 struct property *prop = NULL;
636 const char *str;
637
638 /* First look in the device tree */
639 preempt_disable();
640 cpun = of_get_cpu_node(smp_processor_id(), NULL);
641 if (cpun) {
642 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
643 prop, str) {
644 if (strcmp(str, "bit17-vector-ci-load") == 0)
645 hmer_debug_trig_function = DTRIG_VECTOR_CI;
646 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
647 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
648 }
649 of_node_put(cpun);
650 }
651 preempt_enable();
652
653 /* If we found the property, don't look at PVR */
654 if (prop)
655 goto out;
656
657 pvr = mfspr(SPRN_PVR);
658 /* Check for POWER9 Nimbus (scale-out) */
659 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
660 /* DD2.2 and later */
661 if ((pvr & 0xfff) >= 0x202)
662 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
663 /* DD2.0 and DD2.1 - used for vector CI load emulation */
664 else if ((pvr & 0xfff) >= 0x200)
665 hmer_debug_trig_function = DTRIG_VECTOR_CI;
666 }
667
668 out:
669 switch (hmer_debug_trig_function) {
670 case DTRIG_VECTOR_CI:
671 pr_debug("HMI debug trigger used for vector CI load\n");
672 break;
673 case DTRIG_SUSPEND_ESCAPE:
674 pr_debug("HMI debug trigger used for TM suspend escape\n");
675 break;
676 default:
677 break;
678 }
679 return 0;
680}
681__initcall(init_debug_trig_function);
682
683/*
684 * Handle HMIs that occur as a result of a debug trigger.
685 * Return values:
686 * -1 means this is not a HMI cause that we know about
687 * 0 means no further handling is required
688 * 1 means further handling is required
689 */
690long hmi_handle_debugtrig(struct pt_regs *regs)
691{
692 unsigned long hmer = mfspr(SPRN_HMER);
693 long ret = 0;
694
695 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
696 if (!((hmer & HMER_DEBUG_TRIG)
697 && hmer_debug_trig_function != DTRIG_UNKNOWN))
698 return -1;
699
700 hmer &= ~HMER_DEBUG_TRIG;
701 /* HMER is a write-AND register */
702 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
703
704 switch (hmer_debug_trig_function) {
705 case DTRIG_VECTOR_CI:
706 /*
707 * Now to avoid problems with soft-disable we
708 * only do the emulation if we are coming from
709 * host user space
710 */
711 if (regs && user_mode(regs))
712 ret = local_paca->hmi_p9_special_emu = 1;
713
714 break;
715
716 default:
717 break;
718 }
719
720 /*
721 * See if any other HMI causes remain to be handled
722 */
723 if (hmer & mfspr(SPRN_HMEER))
724 return -1;
725
726 return ret;
727}
728
729/*
730 * Return values:
731 */
732DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
733{
734 int ret;
735
736 local_paca->hmi_irqs++;
737
738 ret = hmi_handle_debugtrig(regs);
739 if (ret >= 0)
740 return ret;
741
742 wait_for_subcore_guest_exit();
743
744 if (ppc_md.hmi_exception_early)
745 ppc_md.hmi_exception_early(regs);
746
747 wait_for_tb_resync();
748
749 return 1;
750}
751
752void __init mce_init(void)
753{
754 struct mce_info *mce_info;
755 u64 limit;
756 int i;
757
758 limit = min(ppc64_bolted_size(), ppc64_rma_size);
759 for_each_possible_cpu(i) {
760 mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
761 __alignof__(*mce_info),
762 MEMBLOCK_LOW_LIMIT,
763 limit, early_cpu_to_node(i));
764 if (!mce_info)
765 goto err;
766 paca_ptrs[i]->mce_info = mce_info;
767 }
768 return;
769err:
770 panic("Failed to allocate memory for MCE event data\n");
771}
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20#include <linux/memblock.h>
21#include <linux/of.h>
22
23#include <asm/interrupt.h>
24#include <asm/machdep.h>
25#include <asm/mce.h>
26#include <asm/nmi.h>
27#include <asm/asm-prototypes.h>
28
29#include "setup.h"
30
31static void machine_check_process_queued_event(struct irq_work *work);
32static void machine_check_ue_irq_work(struct irq_work *work);
33static void machine_check_ue_event(struct machine_check_event *evt);
34static void machine_process_ue_event(struct work_struct *work);
35
36static struct irq_work mce_event_process_work = {
37 .func = machine_check_process_queued_event,
38};
39
40static struct irq_work mce_ue_event_irq_work = {
41 .func = machine_check_ue_irq_work,
42};
43
44static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
45
46static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
47
48int mce_register_notifier(struct notifier_block *nb)
49{
50 return blocking_notifier_chain_register(&mce_notifier_list, nb);
51}
52EXPORT_SYMBOL_GPL(mce_register_notifier);
53
54int mce_unregister_notifier(struct notifier_block *nb)
55{
56 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
57}
58EXPORT_SYMBOL_GPL(mce_unregister_notifier);
59
60static void mce_set_error_info(struct machine_check_event *mce,
61 struct mce_error_info *mce_err)
62{
63 mce->error_type = mce_err->error_type;
64 switch (mce_err->error_type) {
65 case MCE_ERROR_TYPE_UE:
66 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
67 break;
68 case MCE_ERROR_TYPE_SLB:
69 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
70 break;
71 case MCE_ERROR_TYPE_ERAT:
72 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
73 break;
74 case MCE_ERROR_TYPE_TLB:
75 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
76 break;
77 case MCE_ERROR_TYPE_USER:
78 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
79 break;
80 case MCE_ERROR_TYPE_RA:
81 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
82 break;
83 case MCE_ERROR_TYPE_LINK:
84 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
85 break;
86 case MCE_ERROR_TYPE_UNKNOWN:
87 default:
88 break;
89 }
90}
91
92/*
93 * Decode and save high level MCE information into per cpu buffer which
94 * is an array of machine_check_event structure.
95 */
96void save_mce_event(struct pt_regs *regs, long handled,
97 struct mce_error_info *mce_err,
98 uint64_t nip, uint64_t addr, uint64_t phys_addr)
99{
100 int index = local_paca->mce_info->mce_nest_count++;
101 struct machine_check_event *mce;
102
103 mce = &local_paca->mce_info->mce_event[index];
104 /*
105 * Return if we don't have enough space to log mce event.
106 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
107 * the check below will stop buffer overrun.
108 */
109 if (index >= MAX_MC_EVT)
110 return;
111
112 /* Populate generic machine check info */
113 mce->version = MCE_V1;
114 mce->srr0 = nip;
115 mce->srr1 = regs->msr;
116 mce->gpr3 = regs->gpr[3];
117 mce->in_use = 1;
118 mce->cpu = get_paca()->paca_index;
119
120 /* Mark it recovered if we have handled it and MSR(RI=1). */
121 if (handled && (regs->msr & MSR_RI))
122 mce->disposition = MCE_DISPOSITION_RECOVERED;
123 else
124 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
125
126 mce->initiator = mce_err->initiator;
127 mce->severity = mce_err->severity;
128 mce->sync_error = mce_err->sync_error;
129 mce->error_class = mce_err->error_class;
130
131 /*
132 * Populate the mce error_type and type-specific error_type.
133 */
134 mce_set_error_info(mce, mce_err);
135 if (mce->error_type == MCE_ERROR_TYPE_UE)
136 mce->u.ue_error.ignore_event = mce_err->ignore_event;
137
138 if (!addr)
139 return;
140
141 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
142 mce->u.tlb_error.effective_address_provided = true;
143 mce->u.tlb_error.effective_address = addr;
144 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
145 mce->u.slb_error.effective_address_provided = true;
146 mce->u.slb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
148 mce->u.erat_error.effective_address_provided = true;
149 mce->u.erat_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
151 mce->u.user_error.effective_address_provided = true;
152 mce->u.user_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
154 mce->u.ra_error.effective_address_provided = true;
155 mce->u.ra_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
157 mce->u.link_error.effective_address_provided = true;
158 mce->u.link_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
160 mce->u.ue_error.effective_address_provided = true;
161 mce->u.ue_error.effective_address = addr;
162 if (phys_addr != ULONG_MAX) {
163 mce->u.ue_error.physical_address_provided = true;
164 mce->u.ue_error.physical_address = phys_addr;
165 machine_check_ue_event(mce);
166 }
167 }
168 return;
169}
170
171/*
172 * get_mce_event:
173 * mce Pointer to machine_check_event structure to be filled.
174 * release Flag to indicate whether to free the event slot or not.
175 * 0 <= do not release the mce event. Caller will invoke
176 * release_mce_event() once event has been consumed.
177 * 1 <= release the slot.
178 *
179 * return 1 = success
180 * 0 = failure
181 *
182 * get_mce_event() will be called by platform specific machine check
183 * handle routine and in KVM.
184 * When we call get_mce_event(), we are still in interrupt context and
185 * preemption will not be scheduled until ret_from_expect() routine
186 * is called.
187 */
188int get_mce_event(struct machine_check_event *mce, bool release)
189{
190 int index = local_paca->mce_info->mce_nest_count - 1;
191 struct machine_check_event *mc_evt;
192 int ret = 0;
193
194 /* Sanity check */
195 if (index < 0)
196 return ret;
197
198 /* Check if we have MCE info to process. */
199 if (index < MAX_MC_EVT) {
200 mc_evt = &local_paca->mce_info->mce_event[index];
201 /* Copy the event structure and release the original */
202 if (mce)
203 *mce = *mc_evt;
204 if (release)
205 mc_evt->in_use = 0;
206 ret = 1;
207 }
208 /* Decrement the count to free the slot. */
209 if (release)
210 local_paca->mce_info->mce_nest_count--;
211
212 return ret;
213}
214
215void release_mce_event(void)
216{
217 get_mce_event(NULL, true);
218}
219
220static void machine_check_ue_irq_work(struct irq_work *work)
221{
222 schedule_work(&mce_ue_event_work);
223}
224
225/*
226 * Queue up the MCE event which then can be handled later.
227 */
228static void machine_check_ue_event(struct machine_check_event *evt)
229{
230 int index;
231
232 index = local_paca->mce_info->mce_ue_count++;
233 /* If queue is full, just return for now. */
234 if (index >= MAX_MC_EVT) {
235 local_paca->mce_info->mce_ue_count--;
236 return;
237 }
238 memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
239 evt, sizeof(*evt));
240
241 /* Queue work to process this event later. */
242 irq_work_queue(&mce_ue_event_irq_work);
243}
244
245/*
246 * Queue up the MCE event which then can be handled later.
247 */
248void machine_check_queue_event(void)
249{
250 int index;
251 struct machine_check_event evt;
252 unsigned long msr;
253
254 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
255 return;
256
257 index = local_paca->mce_info->mce_queue_count++;
258 /* If queue is full, just return for now. */
259 if (index >= MAX_MC_EVT) {
260 local_paca->mce_info->mce_queue_count--;
261 return;
262 }
263 memcpy(&local_paca->mce_info->mce_event_queue[index],
264 &evt, sizeof(evt));
265
266 /*
267 * Queue irq work to process this event later. Before
268 * queuing the work enable translation for non radix LPAR,
269 * as irq_work_queue may try to access memory outside RMO
270 * region.
271 */
272 if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
273 msr = mfmsr();
274 mtmsr(msr | MSR_IR | MSR_DR);
275 irq_work_queue(&mce_event_process_work);
276 mtmsr(msr);
277 } else {
278 irq_work_queue(&mce_event_process_work);
279 }
280}
281
282void mce_common_process_ue(struct pt_regs *regs,
283 struct mce_error_info *mce_err)
284{
285 const struct exception_table_entry *entry;
286
287 entry = search_kernel_exception_table(regs->nip);
288 if (entry) {
289 mce_err->ignore_event = true;
290 regs_set_return_ip(regs, extable_fixup(entry));
291 }
292}
293
294/*
295 * process pending MCE event from the mce event queue. This function will be
296 * called during syscall exit.
297 */
298static void machine_process_ue_event(struct work_struct *work)
299{
300 int index;
301 struct machine_check_event *evt;
302
303 while (local_paca->mce_info->mce_ue_count > 0) {
304 index = local_paca->mce_info->mce_ue_count - 1;
305 evt = &local_paca->mce_info->mce_ue_event_queue[index];
306 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
307#ifdef CONFIG_MEMORY_FAILURE
308 /*
309 * This should probably queued elsewhere, but
310 * oh! well
311 *
312 * Don't report this machine check because the caller has a
313 * asked us to ignore the event, it has a fixup handler which
314 * will do the appropriate error handling and reporting.
315 */
316 if (evt->error_type == MCE_ERROR_TYPE_UE) {
317 if (evt->u.ue_error.ignore_event) {
318 local_paca->mce_info->mce_ue_count--;
319 continue;
320 }
321
322 if (evt->u.ue_error.physical_address_provided) {
323 unsigned long pfn;
324
325 pfn = evt->u.ue_error.physical_address >>
326 PAGE_SHIFT;
327 memory_failure(pfn, 0);
328 } else
329 pr_warn("Failed to identify bad address from "
330 "where the uncorrectable error (UE) "
331 "was generated\n");
332 }
333#endif
334 local_paca->mce_info->mce_ue_count--;
335 }
336}
337/*
338 * process pending MCE event from the mce event queue. This function will be
339 * called during syscall exit.
340 */
341static void machine_check_process_queued_event(struct irq_work *work)
342{
343 int index;
344 struct machine_check_event *evt;
345
346 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
347
348 /*
349 * For now just print it to console.
350 * TODO: log this error event to FSP or nvram.
351 */
352 while (local_paca->mce_info->mce_queue_count > 0) {
353 index = local_paca->mce_info->mce_queue_count - 1;
354 evt = &local_paca->mce_info->mce_event_queue[index];
355
356 if (evt->error_type == MCE_ERROR_TYPE_UE &&
357 evt->u.ue_error.ignore_event) {
358 local_paca->mce_info->mce_queue_count--;
359 continue;
360 }
361 machine_check_print_event_info(evt, false, false);
362 local_paca->mce_info->mce_queue_count--;
363 }
364}
365
366void machine_check_print_event_info(struct machine_check_event *evt,
367 bool user_mode, bool in_guest)
368{
369 const char *level, *sevstr, *subtype, *err_type, *initiator;
370 uint64_t ea = 0, pa = 0;
371 int n = 0;
372 char dar_str[50];
373 char pa_str[50];
374 static const char *mc_ue_types[] = {
375 "Indeterminate",
376 "Instruction fetch",
377 "Page table walk ifetch",
378 "Load/Store",
379 "Page table walk Load/Store",
380 };
381 static const char *mc_slb_types[] = {
382 "Indeterminate",
383 "Parity",
384 "Multihit",
385 };
386 static const char *mc_erat_types[] = {
387 "Indeterminate",
388 "Parity",
389 "Multihit",
390 };
391 static const char *mc_tlb_types[] = {
392 "Indeterminate",
393 "Parity",
394 "Multihit",
395 };
396 static const char *mc_user_types[] = {
397 "Indeterminate",
398 "tlbie(l) invalid",
399 "scv invalid",
400 };
401 static const char *mc_ra_types[] = {
402 "Indeterminate",
403 "Instruction fetch (bad)",
404 "Instruction fetch (foreign)",
405 "Page table walk ifetch (bad)",
406 "Page table walk ifetch (foreign)",
407 "Load (bad)",
408 "Store (bad)",
409 "Page table walk Load/Store (bad)",
410 "Page table walk Load/Store (foreign)",
411 "Load/Store (foreign)",
412 };
413 static const char *mc_link_types[] = {
414 "Indeterminate",
415 "Instruction fetch (timeout)",
416 "Page table walk ifetch (timeout)",
417 "Load (timeout)",
418 "Store (timeout)",
419 "Page table walk Load/Store (timeout)",
420 };
421 static const char *mc_error_class[] = {
422 "Unknown",
423 "Hardware error",
424 "Probable Hardware error (some chance of software cause)",
425 "Software error",
426 "Probable Software error (some chance of hardware cause)",
427 };
428
429 /* Print things out */
430 if (evt->version != MCE_V1) {
431 pr_err("Machine Check Exception, Unknown event version %d !\n",
432 evt->version);
433 return;
434 }
435 switch (evt->severity) {
436 case MCE_SEV_NO_ERROR:
437 level = KERN_INFO;
438 sevstr = "Harmless";
439 break;
440 case MCE_SEV_WARNING:
441 level = KERN_WARNING;
442 sevstr = "Warning";
443 break;
444 case MCE_SEV_SEVERE:
445 level = KERN_ERR;
446 sevstr = "Severe";
447 break;
448 case MCE_SEV_FATAL:
449 default:
450 level = KERN_ERR;
451 sevstr = "Fatal";
452 break;
453 }
454
455 switch(evt->initiator) {
456 case MCE_INITIATOR_CPU:
457 initiator = "CPU";
458 break;
459 case MCE_INITIATOR_PCI:
460 initiator = "PCI";
461 break;
462 case MCE_INITIATOR_ISA:
463 initiator = "ISA";
464 break;
465 case MCE_INITIATOR_MEMORY:
466 initiator = "Memory";
467 break;
468 case MCE_INITIATOR_POWERMGM:
469 initiator = "Power Management";
470 break;
471 case MCE_INITIATOR_UNKNOWN:
472 default:
473 initiator = "Unknown";
474 break;
475 }
476
477 switch (evt->error_type) {
478 case MCE_ERROR_TYPE_UE:
479 err_type = "UE";
480 subtype = evt->u.ue_error.ue_error_type <
481 ARRAY_SIZE(mc_ue_types) ?
482 mc_ue_types[evt->u.ue_error.ue_error_type]
483 : "Unknown";
484 if (evt->u.ue_error.effective_address_provided)
485 ea = evt->u.ue_error.effective_address;
486 if (evt->u.ue_error.physical_address_provided)
487 pa = evt->u.ue_error.physical_address;
488 break;
489 case MCE_ERROR_TYPE_SLB:
490 err_type = "SLB";
491 subtype = evt->u.slb_error.slb_error_type <
492 ARRAY_SIZE(mc_slb_types) ?
493 mc_slb_types[evt->u.slb_error.slb_error_type]
494 : "Unknown";
495 if (evt->u.slb_error.effective_address_provided)
496 ea = evt->u.slb_error.effective_address;
497 break;
498 case MCE_ERROR_TYPE_ERAT:
499 err_type = "ERAT";
500 subtype = evt->u.erat_error.erat_error_type <
501 ARRAY_SIZE(mc_erat_types) ?
502 mc_erat_types[evt->u.erat_error.erat_error_type]
503 : "Unknown";
504 if (evt->u.erat_error.effective_address_provided)
505 ea = evt->u.erat_error.effective_address;
506 break;
507 case MCE_ERROR_TYPE_TLB:
508 err_type = "TLB";
509 subtype = evt->u.tlb_error.tlb_error_type <
510 ARRAY_SIZE(mc_tlb_types) ?
511 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
512 : "Unknown";
513 if (evt->u.tlb_error.effective_address_provided)
514 ea = evt->u.tlb_error.effective_address;
515 break;
516 case MCE_ERROR_TYPE_USER:
517 err_type = "User";
518 subtype = evt->u.user_error.user_error_type <
519 ARRAY_SIZE(mc_user_types) ?
520 mc_user_types[evt->u.user_error.user_error_type]
521 : "Unknown";
522 if (evt->u.user_error.effective_address_provided)
523 ea = evt->u.user_error.effective_address;
524 break;
525 case MCE_ERROR_TYPE_RA:
526 err_type = "Real address";
527 subtype = evt->u.ra_error.ra_error_type <
528 ARRAY_SIZE(mc_ra_types) ?
529 mc_ra_types[evt->u.ra_error.ra_error_type]
530 : "Unknown";
531 if (evt->u.ra_error.effective_address_provided)
532 ea = evt->u.ra_error.effective_address;
533 break;
534 case MCE_ERROR_TYPE_LINK:
535 err_type = "Link";
536 subtype = evt->u.link_error.link_error_type <
537 ARRAY_SIZE(mc_link_types) ?
538 mc_link_types[evt->u.link_error.link_error_type]
539 : "Unknown";
540 if (evt->u.link_error.effective_address_provided)
541 ea = evt->u.link_error.effective_address;
542 break;
543 case MCE_ERROR_TYPE_DCACHE:
544 err_type = "D-Cache";
545 subtype = "Unknown";
546 break;
547 case MCE_ERROR_TYPE_ICACHE:
548 err_type = "I-Cache";
549 subtype = "Unknown";
550 break;
551 default:
552 case MCE_ERROR_TYPE_UNKNOWN:
553 err_type = "Unknown";
554 subtype = "";
555 break;
556 }
557
558 dar_str[0] = pa_str[0] = '\0';
559 if (ea && evt->srr0 != ea) {
560 /* Load/Store address */
561 n = sprintf(dar_str, "DAR: %016llx ", ea);
562 if (pa)
563 sprintf(dar_str + n, "paddr: %016llx ", pa);
564 } else if (pa) {
565 sprintf(pa_str, " paddr: %016llx", pa);
566 }
567
568 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
569 level, evt->cpu, sevstr, in_guest ? "Guest" : "",
570 err_type, subtype, dar_str,
571 evt->disposition == MCE_DISPOSITION_RECOVERED ?
572 "Recovered" : "Not recovered");
573
574 if (in_guest || user_mode) {
575 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
576 level, evt->cpu, current->pid, current->comm,
577 in_guest ? "Guest " : "", evt->srr0, pa_str);
578 } else {
579 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
580 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
581 }
582
583 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
584
585 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
586 mc_error_class[evt->error_class] : "Unknown";
587 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
588
589#ifdef CONFIG_PPC_BOOK3S_64
590 /* Display faulty slb contents for SLB errors. */
591 if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
592 slb_dump_contents(local_paca->mce_faulty_slbs);
593#endif
594}
595EXPORT_SYMBOL_GPL(machine_check_print_event_info);
596
597/*
598 * This function is called in real mode. Strictly no printk's please.
599 *
600 * regs->nip and regs->msr contains srr0 and ssr1.
601 */
602DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
603{
604 long handled = 0;
605
606 hv_nmi_check_nonrecoverable(regs);
607
608 /*
609 * See if platform is capable of handling machine check.
610 */
611 if (ppc_md.machine_check_early)
612 handled = ppc_md.machine_check_early(regs);
613
614 return handled;
615}
616
617/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
618static enum {
619 DTRIG_UNKNOWN,
620 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
621 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
622} hmer_debug_trig_function;
623
624static int init_debug_trig_function(void)
625{
626 int pvr;
627 struct device_node *cpun;
628 struct property *prop = NULL;
629 const char *str;
630
631 /* First look in the device tree */
632 preempt_disable();
633 cpun = of_get_cpu_node(smp_processor_id(), NULL);
634 if (cpun) {
635 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
636 prop, str) {
637 if (strcmp(str, "bit17-vector-ci-load") == 0)
638 hmer_debug_trig_function = DTRIG_VECTOR_CI;
639 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
640 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
641 }
642 of_node_put(cpun);
643 }
644 preempt_enable();
645
646 /* If we found the property, don't look at PVR */
647 if (prop)
648 goto out;
649
650 pvr = mfspr(SPRN_PVR);
651 /* Check for POWER9 Nimbus (scale-out) */
652 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
653 /* DD2.2 and later */
654 if ((pvr & 0xfff) >= 0x202)
655 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
656 /* DD2.0 and DD2.1 - used for vector CI load emulation */
657 else if ((pvr & 0xfff) >= 0x200)
658 hmer_debug_trig_function = DTRIG_VECTOR_CI;
659 }
660
661 out:
662 switch (hmer_debug_trig_function) {
663 case DTRIG_VECTOR_CI:
664 pr_debug("HMI debug trigger used for vector CI load\n");
665 break;
666 case DTRIG_SUSPEND_ESCAPE:
667 pr_debug("HMI debug trigger used for TM suspend escape\n");
668 break;
669 default:
670 break;
671 }
672 return 0;
673}
674__initcall(init_debug_trig_function);
675
676/*
677 * Handle HMIs that occur as a result of a debug trigger.
678 * Return values:
679 * -1 means this is not a HMI cause that we know about
680 * 0 means no further handling is required
681 * 1 means further handling is required
682 */
683long hmi_handle_debugtrig(struct pt_regs *regs)
684{
685 unsigned long hmer = mfspr(SPRN_HMER);
686 long ret = 0;
687
688 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
689 if (!((hmer & HMER_DEBUG_TRIG)
690 && hmer_debug_trig_function != DTRIG_UNKNOWN))
691 return -1;
692
693 hmer &= ~HMER_DEBUG_TRIG;
694 /* HMER is a write-AND register */
695 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
696
697 switch (hmer_debug_trig_function) {
698 case DTRIG_VECTOR_CI:
699 /*
700 * Now to avoid problems with soft-disable we
701 * only do the emulation if we are coming from
702 * host user space
703 */
704 if (regs && user_mode(regs))
705 ret = local_paca->hmi_p9_special_emu = 1;
706
707 break;
708
709 default:
710 break;
711 }
712
713 /*
714 * See if any other HMI causes remain to be handled
715 */
716 if (hmer & mfspr(SPRN_HMEER))
717 return -1;
718
719 return ret;
720}
721
722/*
723 * Return values:
724 */
725DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
726{
727 int ret;
728
729 local_paca->hmi_irqs++;
730
731 ret = hmi_handle_debugtrig(regs);
732 if (ret >= 0)
733 return ret;
734
735 wait_for_subcore_guest_exit();
736
737 if (ppc_md.hmi_exception_early)
738 ppc_md.hmi_exception_early(regs);
739
740 wait_for_tb_resync();
741
742 return 1;
743}
744
745void __init mce_init(void)
746{
747 struct mce_info *mce_info;
748 u64 limit;
749 int i;
750
751 limit = min(ppc64_bolted_size(), ppc64_rma_size);
752 for_each_possible_cpu(i) {
753 mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
754 __alignof__(*mce_info),
755 MEMBLOCK_LOW_LIMIT,
756 limit, cpu_to_node(i));
757 if (!mce_info)
758 goto err;
759 paca_ptrs[i]->mce_info = mce_info;
760 }
761 return;
762err:
763 panic("Failed to allocate memory for MCE event data\n");
764}