Loading...
1/*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/ptrace.h>
28#include <linux/percpu.h>
29#include <linux/export.h>
30#include <linux/irq_work.h>
31
32#include <asm/machdep.h>
33#include <asm/mce.h>
34
35static DEFINE_PER_CPU(int, mce_nest_count);
36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
38/* Queue for delayed MCE events. */
39static DEFINE_PER_CPU(int, mce_queue_count);
40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
42/* Queue for delayed MCE UE events. */
43static DEFINE_PER_CPU(int, mce_ue_count);
44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 mce_ue_event_queue);
46
47static void machine_check_process_queued_event(struct irq_work *work);
48void machine_check_ue_event(struct machine_check_event *evt);
49static void machine_process_ue_event(struct work_struct *work);
50
51static struct irq_work mce_event_process_work = {
52 .func = machine_check_process_queued_event,
53};
54
55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56
57static void mce_set_error_info(struct machine_check_event *mce,
58 struct mce_error_info *mce_err)
59{
60 mce->error_type = mce_err->error_type;
61 switch (mce_err->error_type) {
62 case MCE_ERROR_TYPE_UE:
63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 break;
65 case MCE_ERROR_TYPE_SLB:
66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 break;
68 case MCE_ERROR_TYPE_ERAT:
69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 break;
71 case MCE_ERROR_TYPE_TLB:
72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 break;
74 case MCE_ERROR_TYPE_USER:
75 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 break;
77 case MCE_ERROR_TYPE_RA:
78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 break;
80 case MCE_ERROR_TYPE_LINK:
81 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 break;
83 case MCE_ERROR_TYPE_UNKNOWN:
84 default:
85 break;
86 }
87}
88
89/*
90 * Decode and save high level MCE information into per cpu buffer which
91 * is an array of machine_check_event structure.
92 */
93void save_mce_event(struct pt_regs *regs, long handled,
94 struct mce_error_info *mce_err,
95 uint64_t nip, uint64_t addr, uint64_t phys_addr)
96{
97 int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114
115 /* Mark it recovered if we have handled it and MSR(RI=1). */
116 if (handled && (regs->msr & MSR_RI))
117 mce->disposition = MCE_DISPOSITION_RECOVERED;
118 else
119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120
121 mce->initiator = mce_err->initiator;
122 mce->severity = mce_err->severity;
123
124 /*
125 * Populate the mce error_type and type-specific error_type.
126 */
127 mce_set_error_info(mce, mce_err);
128
129 if (!addr)
130 return;
131
132 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 mce->u.tlb_error.effective_address_provided = true;
134 mce->u.tlb_error.effective_address = addr;
135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 mce->u.slb_error.effective_address_provided = true;
137 mce->u.slb_error.effective_address = addr;
138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 mce->u.erat_error.effective_address_provided = true;
140 mce->u.erat_error.effective_address = addr;
141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 mce->u.user_error.effective_address_provided = true;
143 mce->u.user_error.effective_address = addr;
144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 mce->u.ra_error.effective_address_provided = true;
146 mce->u.ra_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 mce->u.link_error.effective_address_provided = true;
149 mce->u.link_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 mce->u.ue_error.effective_address_provided = true;
152 mce->u.ue_error.effective_address = addr;
153 if (phys_addr != ULONG_MAX) {
154 mce->u.ue_error.physical_address_provided = true;
155 mce->u.ue_error.physical_address = phys_addr;
156 machine_check_ue_event(mce);
157 }
158 }
159 return;
160}
161
162/*
163 * get_mce_event:
164 * mce Pointer to machine_check_event structure to be filled.
165 * release Flag to indicate whether to free the event slot or not.
166 * 0 <= do not release the mce event. Caller will invoke
167 * release_mce_event() once event has been consumed.
168 * 1 <= release the slot.
169 *
170 * return 1 = success
171 * 0 = failure
172 *
173 * get_mce_event() will be called by platform specific machine check
174 * handle routine and in KVM.
175 * When we call get_mce_event(), we are still in interrupt context and
176 * preemption will not be scheduled until ret_from_expect() routine
177 * is called.
178 */
179int get_mce_event(struct machine_check_event *mce, bool release)
180{
181 int index = __this_cpu_read(mce_nest_count) - 1;
182 struct machine_check_event *mc_evt;
183 int ret = 0;
184
185 /* Sanity check */
186 if (index < 0)
187 return ret;
188
189 /* Check if we have MCE info to process. */
190 if (index < MAX_MC_EVT) {
191 mc_evt = this_cpu_ptr(&mce_event[index]);
192 /* Copy the event structure and release the original */
193 if (mce)
194 *mce = *mc_evt;
195 if (release)
196 mc_evt->in_use = 0;
197 ret = 1;
198 }
199 /* Decrement the count to free the slot. */
200 if (release)
201 __this_cpu_dec(mce_nest_count);
202
203 return ret;
204}
205
206void release_mce_event(void)
207{
208 get_mce_event(NULL, true);
209}
210
211
212/*
213 * Queue up the MCE event which then can be handled later.
214 */
215void machine_check_ue_event(struct machine_check_event *evt)
216{
217 int index;
218
219 index = __this_cpu_inc_return(mce_ue_count) - 1;
220 /* If queue is full, just return for now. */
221 if (index >= MAX_MC_EVT) {
222 __this_cpu_dec(mce_ue_count);
223 return;
224 }
225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227 /* Queue work to process this event later. */
228 schedule_work(&mce_ue_event_work);
229}
230
231/*
232 * Queue up the MCE event which then can be handled later.
233 */
234void machine_check_queue_event(void)
235{
236 int index;
237 struct machine_check_event evt;
238
239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 return;
241
242 index = __this_cpu_inc_return(mce_queue_count) - 1;
243 /* If queue is full, just return for now. */
244 if (index >= MAX_MC_EVT) {
245 __this_cpu_dec(mce_queue_count);
246 return;
247 }
248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249
250 /* Queue irq work to process this event later. */
251 irq_work_queue(&mce_event_process_work);
252}
253/*
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
256 */
257static void machine_process_ue_event(struct work_struct *work)
258{
259 int index;
260 struct machine_check_event *evt;
261
262 while (__this_cpu_read(mce_ue_count) > 0) {
263 index = __this_cpu_read(mce_ue_count) - 1;
264 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265#ifdef CONFIG_MEMORY_FAILURE
266 /*
267 * This should probably queued elsewhere, but
268 * oh! well
269 */
270 if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 if (evt->u.ue_error.physical_address_provided) {
272 unsigned long pfn;
273
274 pfn = evt->u.ue_error.physical_address >>
275 PAGE_SHIFT;
276 memory_failure(pfn, 0);
277 } else
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
280 "was generated\n");
281 }
282#endif
283 __this_cpu_dec(mce_ue_count);
284 }
285}
286/*
287 * process pending MCE event from the mce event queue. This function will be
288 * called during syscall exit.
289 */
290static void machine_check_process_queued_event(struct irq_work *work)
291{
292 int index;
293 struct machine_check_event *evt;
294
295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296
297 /*
298 * For now just print it to console.
299 * TODO: log this error event to FSP or nvram.
300 */
301 while (__this_cpu_read(mce_queue_count) > 0) {
302 index = __this_cpu_read(mce_queue_count) - 1;
303 evt = this_cpu_ptr(&mce_event_queue[index]);
304 machine_check_print_event_info(evt, false);
305 __this_cpu_dec(mce_queue_count);
306 }
307}
308
309void machine_check_print_event_info(struct machine_check_event *evt,
310 bool user_mode)
311{
312 const char *level, *sevstr, *subtype;
313 static const char *mc_ue_types[] = {
314 "Indeterminate",
315 "Instruction fetch",
316 "Page table walk ifetch",
317 "Load/Store",
318 "Page table walk Load/Store",
319 };
320 static const char *mc_slb_types[] = {
321 "Indeterminate",
322 "Parity",
323 "Multihit",
324 };
325 static const char *mc_erat_types[] = {
326 "Indeterminate",
327 "Parity",
328 "Multihit",
329 };
330 static const char *mc_tlb_types[] = {
331 "Indeterminate",
332 "Parity",
333 "Multihit",
334 };
335 static const char *mc_user_types[] = {
336 "Indeterminate",
337 "tlbie(l) invalid",
338 };
339 static const char *mc_ra_types[] = {
340 "Indeterminate",
341 "Instruction fetch (bad)",
342 "Instruction fetch (foreign)",
343 "Page table walk ifetch (bad)",
344 "Page table walk ifetch (foreign)",
345 "Load (bad)",
346 "Store (bad)",
347 "Page table walk Load/Store (bad)",
348 "Page table walk Load/Store (foreign)",
349 "Load/Store (foreign)",
350 };
351 static const char *mc_link_types[] = {
352 "Indeterminate",
353 "Instruction fetch (timeout)",
354 "Page table walk ifetch (timeout)",
355 "Load (timeout)",
356 "Store (timeout)",
357 "Page table walk Load/Store (timeout)",
358 };
359
360 /* Print things out */
361 if (evt->version != MCE_V1) {
362 pr_err("Machine Check Exception, Unknown event version %d !\n",
363 evt->version);
364 return;
365 }
366 switch (evt->severity) {
367 case MCE_SEV_NO_ERROR:
368 level = KERN_INFO;
369 sevstr = "Harmless";
370 break;
371 case MCE_SEV_WARNING:
372 level = KERN_WARNING;
373 sevstr = "";
374 break;
375 case MCE_SEV_ERROR_SYNC:
376 level = KERN_ERR;
377 sevstr = "Severe";
378 break;
379 case MCE_SEV_FATAL:
380 default:
381 level = KERN_ERR;
382 sevstr = "Fatal";
383 break;
384 }
385
386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 "Recovered" : "Not recovered");
389
390 if (user_mode) {
391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
392 evt->srr0, current->pid, current->comm);
393 } else {
394 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
395 (void *)evt->srr0);
396 }
397
398 printk("%s Initiator: %s\n", level,
399 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 switch (evt->error_type) {
401 case MCE_ERROR_TYPE_UE:
402 subtype = evt->u.ue_error.ue_error_type <
403 ARRAY_SIZE(mc_ue_types) ?
404 mc_ue_types[evt->u.ue_error.ue_error_type]
405 : "Unknown";
406 printk("%s Error type: UE [%s]\n", level, subtype);
407 if (evt->u.ue_error.effective_address_provided)
408 printk("%s Effective address: %016llx\n",
409 level, evt->u.ue_error.effective_address);
410 if (evt->u.ue_error.physical_address_provided)
411 printk("%s Physical address: %016llx\n",
412 level, evt->u.ue_error.physical_address);
413 break;
414 case MCE_ERROR_TYPE_SLB:
415 subtype = evt->u.slb_error.slb_error_type <
416 ARRAY_SIZE(mc_slb_types) ?
417 mc_slb_types[evt->u.slb_error.slb_error_type]
418 : "Unknown";
419 printk("%s Error type: SLB [%s]\n", level, subtype);
420 if (evt->u.slb_error.effective_address_provided)
421 printk("%s Effective address: %016llx\n",
422 level, evt->u.slb_error.effective_address);
423 break;
424 case MCE_ERROR_TYPE_ERAT:
425 subtype = evt->u.erat_error.erat_error_type <
426 ARRAY_SIZE(mc_erat_types) ?
427 mc_erat_types[evt->u.erat_error.erat_error_type]
428 : "Unknown";
429 printk("%s Error type: ERAT [%s]\n", level, subtype);
430 if (evt->u.erat_error.effective_address_provided)
431 printk("%s Effective address: %016llx\n",
432 level, evt->u.erat_error.effective_address);
433 break;
434 case MCE_ERROR_TYPE_TLB:
435 subtype = evt->u.tlb_error.tlb_error_type <
436 ARRAY_SIZE(mc_tlb_types) ?
437 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 : "Unknown";
439 printk("%s Error type: TLB [%s]\n", level, subtype);
440 if (evt->u.tlb_error.effective_address_provided)
441 printk("%s Effective address: %016llx\n",
442 level, evt->u.tlb_error.effective_address);
443 break;
444 case MCE_ERROR_TYPE_USER:
445 subtype = evt->u.user_error.user_error_type <
446 ARRAY_SIZE(mc_user_types) ?
447 mc_user_types[evt->u.user_error.user_error_type]
448 : "Unknown";
449 printk("%s Error type: User [%s]\n", level, subtype);
450 if (evt->u.user_error.effective_address_provided)
451 printk("%s Effective address: %016llx\n",
452 level, evt->u.user_error.effective_address);
453 break;
454 case MCE_ERROR_TYPE_RA:
455 subtype = evt->u.ra_error.ra_error_type <
456 ARRAY_SIZE(mc_ra_types) ?
457 mc_ra_types[evt->u.ra_error.ra_error_type]
458 : "Unknown";
459 printk("%s Error type: Real address [%s]\n", level, subtype);
460 if (evt->u.ra_error.effective_address_provided)
461 printk("%s Effective address: %016llx\n",
462 level, evt->u.ra_error.effective_address);
463 break;
464 case MCE_ERROR_TYPE_LINK:
465 subtype = evt->u.link_error.link_error_type <
466 ARRAY_SIZE(mc_link_types) ?
467 mc_link_types[evt->u.link_error.link_error_type]
468 : "Unknown";
469 printk("%s Error type: Link [%s]\n", level, subtype);
470 if (evt->u.link_error.effective_address_provided)
471 printk("%s Effective address: %016llx\n",
472 level, evt->u.link_error.effective_address);
473 break;
474 default:
475 case MCE_ERROR_TYPE_UNKNOWN:
476 printk("%s Error type: Unknown\n", level);
477 break;
478 }
479}
480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481
482/*
483 * This function is called in real mode. Strictly no printk's please.
484 *
485 * regs->nip and regs->msr contains srr0 and ssr1.
486 */
487long machine_check_early(struct pt_regs *regs)
488{
489 long handled = 0;
490
491 __this_cpu_inc(irq_stat.mce_exceptions);
492
493 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
494 handled = cur_cpu_spec->machine_check_early(regs);
495 return handled;
496}
497
498/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499static enum {
500 DTRIG_UNKNOWN,
501 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
502 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
503} hmer_debug_trig_function;
504
505static int init_debug_trig_function(void)
506{
507 int pvr;
508 struct device_node *cpun;
509 struct property *prop = NULL;
510 const char *str;
511
512 /* First look in the device tree */
513 preempt_disable();
514 cpun = of_get_cpu_node(smp_processor_id(), NULL);
515 if (cpun) {
516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
517 prop, str) {
518 if (strcmp(str, "bit17-vector-ci-load") == 0)
519 hmer_debug_trig_function = DTRIG_VECTOR_CI;
520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
522 }
523 of_node_put(cpun);
524 }
525 preempt_enable();
526
527 /* If we found the property, don't look at PVR */
528 if (prop)
529 goto out;
530
531 pvr = mfspr(SPRN_PVR);
532 /* Check for POWER9 Nimbus (scale-out) */
533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534 /* DD2.2 and later */
535 if ((pvr & 0xfff) >= 0x202)
536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537 /* DD2.0 and DD2.1 - used for vector CI load emulation */
538 else if ((pvr & 0xfff) >= 0x200)
539 hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 }
541
542 out:
543 switch (hmer_debug_trig_function) {
544 case DTRIG_VECTOR_CI:
545 pr_debug("HMI debug trigger used for vector CI load\n");
546 break;
547 case DTRIG_SUSPEND_ESCAPE:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
549 break;
550 default:
551 break;
552 }
553 return 0;
554}
555__initcall(init_debug_trig_function);
556
557/*
558 * Handle HMIs that occur as a result of a debug trigger.
559 * Return values:
560 * -1 means this is not a HMI cause that we know about
561 * 0 means no further handling is required
562 * 1 means further handling is required
563 */
564long hmi_handle_debugtrig(struct pt_regs *regs)
565{
566 unsigned long hmer = mfspr(SPRN_HMER);
567 long ret = 0;
568
569 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570 if (!((hmer & HMER_DEBUG_TRIG)
571 && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 return -1;
573
574 hmer &= ~HMER_DEBUG_TRIG;
575 /* HMER is a write-AND register */
576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578 switch (hmer_debug_trig_function) {
579 case DTRIG_VECTOR_CI:
580 /*
581 * Now to avoid problems with soft-disable we
582 * only do the emulation if we are coming from
583 * host user space
584 */
585 if (regs && user_mode(regs))
586 ret = local_paca->hmi_p9_special_emu = 1;
587
588 break;
589
590 default:
591 break;
592 }
593
594 /*
595 * See if any other HMI causes remain to be handled
596 */
597 if (hmer & mfspr(SPRN_HMEER))
598 return -1;
599
600 return ret;
601}
602
603/*
604 * Return values:
605 */
606long hmi_exception_realmode(struct pt_regs *regs)
607{
608 int ret;
609
610 __this_cpu_inc(irq_stat.hmi_exceptions);
611
612 ret = hmi_handle_debugtrig(regs);
613 if (ret >= 0)
614 return ret;
615
616 wait_for_subcore_guest_exit();
617
618 if (ppc_md.hmi_exception_early)
619 ppc_md.hmi_exception_early(regs);
620
621 wait_for_tb_resync();
622
623 return 1;
624}
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Machine check exception handling.
4 *
5 * Copyright 2013 IBM Corporation
6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
7 */
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18#include <linux/extable.h>
19#include <linux/ftrace.h>
20#include <linux/memblock.h>
21#include <linux/of.h>
22
23#include <asm/interrupt.h>
24#include <asm/machdep.h>
25#include <asm/mce.h>
26#include <asm/nmi.h>
27
28#include "setup.h"
29
30static void machine_check_ue_event(struct machine_check_event *evt);
31static void machine_process_ue_event(struct work_struct *work);
32
33static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
34
35static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
36
37int mce_register_notifier(struct notifier_block *nb)
38{
39 return blocking_notifier_chain_register(&mce_notifier_list, nb);
40}
41EXPORT_SYMBOL_GPL(mce_register_notifier);
42
43int mce_unregister_notifier(struct notifier_block *nb)
44{
45 return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
46}
47EXPORT_SYMBOL_GPL(mce_unregister_notifier);
48
49static void mce_set_error_info(struct machine_check_event *mce,
50 struct mce_error_info *mce_err)
51{
52 mce->error_type = mce_err->error_type;
53 switch (mce_err->error_type) {
54 case MCE_ERROR_TYPE_UE:
55 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
56 break;
57 case MCE_ERROR_TYPE_SLB:
58 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
59 break;
60 case MCE_ERROR_TYPE_ERAT:
61 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
62 break;
63 case MCE_ERROR_TYPE_TLB:
64 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
65 break;
66 case MCE_ERROR_TYPE_USER:
67 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
68 break;
69 case MCE_ERROR_TYPE_RA:
70 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
71 break;
72 case MCE_ERROR_TYPE_LINK:
73 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
74 break;
75 case MCE_ERROR_TYPE_UNKNOWN:
76 default:
77 break;
78 }
79}
80
81void mce_irq_work_queue(void)
82{
83 /* Raise decrementer interrupt */
84 arch_irq_work_raise();
85 set_mce_pending_irq_work();
86}
87
88/*
89 * Decode and save high level MCE information into per cpu buffer which
90 * is an array of machine_check_event structure.
91 */
92void save_mce_event(struct pt_regs *regs, long handled,
93 struct mce_error_info *mce_err,
94 uint64_t nip, uint64_t addr, uint64_t phys_addr)
95{
96 int index = local_paca->mce_info->mce_nest_count++;
97 struct machine_check_event *mce;
98
99 mce = &local_paca->mce_info->mce_event[index];
100 /*
101 * Return if we don't have enough space to log mce event.
102 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103 * the check below will stop buffer overrun.
104 */
105 if (index >= MAX_MC_EVT)
106 return;
107
108 /* Populate generic machine check info */
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114 mce->cpu = get_paca()->paca_index;
115
116 /* Mark it recovered if we have handled it and MSR(RI=1). */
117 if (handled && (regs->msr & MSR_RI))
118 mce->disposition = MCE_DISPOSITION_RECOVERED;
119 else
120 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
121
122 mce->initiator = mce_err->initiator;
123 mce->severity = mce_err->severity;
124 mce->sync_error = mce_err->sync_error;
125 mce->error_class = mce_err->error_class;
126
127 /*
128 * Populate the mce error_type and type-specific error_type.
129 */
130 mce_set_error_info(mce, mce_err);
131 if (mce->error_type == MCE_ERROR_TYPE_UE)
132 mce->u.ue_error.ignore_event = mce_err->ignore_event;
133
134 /*
135 * Raise irq work, So that we don't miss to log the error for
136 * unrecoverable errors.
137 */
138 if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
139 mce_irq_work_queue();
140
141 if (!addr)
142 return;
143
144 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145 mce->u.tlb_error.effective_address_provided = true;
146 mce->u.tlb_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148 mce->u.slb_error.effective_address_provided = true;
149 mce->u.slb_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151 mce->u.erat_error.effective_address_provided = true;
152 mce->u.erat_error.effective_address = addr;
153 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154 mce->u.user_error.effective_address_provided = true;
155 mce->u.user_error.effective_address = addr;
156 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157 mce->u.ra_error.effective_address_provided = true;
158 mce->u.ra_error.effective_address = addr;
159 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160 mce->u.link_error.effective_address_provided = true;
161 mce->u.link_error.effective_address = addr;
162 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163 mce->u.ue_error.effective_address_provided = true;
164 mce->u.ue_error.effective_address = addr;
165 if (phys_addr != ULONG_MAX) {
166 mce->u.ue_error.physical_address_provided = true;
167 mce->u.ue_error.physical_address = phys_addr;
168 machine_check_ue_event(mce);
169 }
170 }
171 return;
172}
173
174/*
175 * get_mce_event:
176 * mce Pointer to machine_check_event structure to be filled.
177 * release Flag to indicate whether to free the event slot or not.
178 * 0 <= do not release the mce event. Caller will invoke
179 * release_mce_event() once event has been consumed.
180 * 1 <= release the slot.
181 *
182 * return 1 = success
183 * 0 = failure
184 *
185 * get_mce_event() will be called by platform specific machine check
186 * handle routine and in KVM.
187 * When we call get_mce_event(), we are still in interrupt context and
188 * preemption will not be scheduled until ret_from_expect() routine
189 * is called.
190 */
191int get_mce_event(struct machine_check_event *mce, bool release)
192{
193 int index = local_paca->mce_info->mce_nest_count - 1;
194 struct machine_check_event *mc_evt;
195 int ret = 0;
196
197 /* Sanity check */
198 if (index < 0)
199 return ret;
200
201 /* Check if we have MCE info to process. */
202 if (index < MAX_MC_EVT) {
203 mc_evt = &local_paca->mce_info->mce_event[index];
204 /* Copy the event structure and release the original */
205 if (mce)
206 *mce = *mc_evt;
207 if (release)
208 mc_evt->in_use = 0;
209 ret = 1;
210 }
211 /* Decrement the count to free the slot. */
212 if (release)
213 local_paca->mce_info->mce_nest_count--;
214
215 return ret;
216}
217
218void release_mce_event(void)
219{
220 get_mce_event(NULL, true);
221}
222
223static void machine_check_ue_work(void)
224{
225 schedule_work(&mce_ue_event_work);
226}
227
228/*
229 * Queue up the MCE event which then can be handled later.
230 */
231static void machine_check_ue_event(struct machine_check_event *evt)
232{
233 int index;
234
235 index = local_paca->mce_info->mce_ue_count++;
236 /* If queue is full, just return for now. */
237 if (index >= MAX_MC_EVT) {
238 local_paca->mce_info->mce_ue_count--;
239 return;
240 }
241 memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
242 evt, sizeof(*evt));
243}
244
245/*
246 * Queue up the MCE event which then can be handled later.
247 */
248void machine_check_queue_event(void)
249{
250 int index;
251 struct machine_check_event evt;
252
253 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
254 return;
255
256 index = local_paca->mce_info->mce_queue_count++;
257 /* If queue is full, just return for now. */
258 if (index >= MAX_MC_EVT) {
259 local_paca->mce_info->mce_queue_count--;
260 return;
261 }
262 memcpy(&local_paca->mce_info->mce_event_queue[index],
263 &evt, sizeof(evt));
264
265 mce_irq_work_queue();
266}
267
268void mce_common_process_ue(struct pt_regs *regs,
269 struct mce_error_info *mce_err)
270{
271 const struct exception_table_entry *entry;
272
273 entry = search_kernel_exception_table(regs->nip);
274 if (entry) {
275 mce_err->ignore_event = true;
276 regs_set_return_ip(regs, extable_fixup(entry));
277 }
278}
279
280/*
281 * process pending MCE event from the mce event queue. This function will be
282 * called during syscall exit.
283 */
284static void machine_process_ue_event(struct work_struct *work)
285{
286 int index;
287 struct machine_check_event *evt;
288
289 while (local_paca->mce_info->mce_ue_count > 0) {
290 index = local_paca->mce_info->mce_ue_count - 1;
291 evt = &local_paca->mce_info->mce_ue_event_queue[index];
292 blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
293#ifdef CONFIG_MEMORY_FAILURE
294 /*
295 * This should probably queued elsewhere, but
296 * oh! well
297 *
298 * Don't report this machine check because the caller has a
299 * asked us to ignore the event, it has a fixup handler which
300 * will do the appropriate error handling and reporting.
301 */
302 if (evt->error_type == MCE_ERROR_TYPE_UE) {
303 if (evt->u.ue_error.ignore_event) {
304 local_paca->mce_info->mce_ue_count--;
305 continue;
306 }
307
308 if (evt->u.ue_error.physical_address_provided) {
309 unsigned long pfn;
310
311 pfn = evt->u.ue_error.physical_address >>
312 PAGE_SHIFT;
313 memory_failure(pfn, 0);
314 } else
315 pr_warn("Failed to identify bad address from "
316 "where the uncorrectable error (UE) "
317 "was generated\n");
318 }
319#endif
320 local_paca->mce_info->mce_ue_count--;
321 }
322}
323/*
324 * process pending MCE event from the mce event queue. This function will be
325 * called during syscall exit.
326 */
327static void machine_check_process_queued_event(void)
328{
329 int index;
330 struct machine_check_event *evt;
331
332 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
333
334 /*
335 * For now just print it to console.
336 * TODO: log this error event to FSP or nvram.
337 */
338 while (local_paca->mce_info->mce_queue_count > 0) {
339 index = local_paca->mce_info->mce_queue_count - 1;
340 evt = &local_paca->mce_info->mce_event_queue[index];
341
342 if (evt->error_type == MCE_ERROR_TYPE_UE &&
343 evt->u.ue_error.ignore_event) {
344 local_paca->mce_info->mce_queue_count--;
345 continue;
346 }
347 machine_check_print_event_info(evt, false, false);
348 local_paca->mce_info->mce_queue_count--;
349 }
350}
351
352void set_mce_pending_irq_work(void)
353{
354 local_paca->mce_pending_irq_work = 1;
355}
356
357void clear_mce_pending_irq_work(void)
358{
359 local_paca->mce_pending_irq_work = 0;
360}
361
362void mce_run_irq_context_handlers(void)
363{
364 if (unlikely(local_paca->mce_pending_irq_work)) {
365 if (ppc_md.machine_check_log_err)
366 ppc_md.machine_check_log_err();
367 machine_check_process_queued_event();
368 machine_check_ue_work();
369 clear_mce_pending_irq_work();
370 }
371}
372
373void machine_check_print_event_info(struct machine_check_event *evt,
374 bool user_mode, bool in_guest)
375{
376 const char *level, *sevstr, *subtype, *err_type, *initiator;
377 uint64_t ea = 0, pa = 0;
378 int n = 0;
379 char dar_str[50];
380 char pa_str[50];
381 static const char *mc_ue_types[] = {
382 "Indeterminate",
383 "Instruction fetch",
384 "Page table walk ifetch",
385 "Load/Store",
386 "Page table walk Load/Store",
387 };
388 static const char *mc_slb_types[] = {
389 "Indeterminate",
390 "Parity",
391 "Multihit",
392 };
393 static const char *mc_erat_types[] = {
394 "Indeterminate",
395 "Parity",
396 "Multihit",
397 };
398 static const char *mc_tlb_types[] = {
399 "Indeterminate",
400 "Parity",
401 "Multihit",
402 };
403 static const char *mc_user_types[] = {
404 "Indeterminate",
405 "tlbie(l) invalid",
406 "scv invalid",
407 };
408 static const char *mc_ra_types[] = {
409 "Indeterminate",
410 "Instruction fetch (bad)",
411 "Instruction fetch (foreign/control memory)",
412 "Page table walk ifetch (bad)",
413 "Page table walk ifetch (foreign/control memory)",
414 "Load (bad)",
415 "Store (bad)",
416 "Page table walk Load/Store (bad)",
417 "Page table walk Load/Store (foreign/control memory)",
418 "Load/Store (foreign/control memory)",
419 };
420 static const char *mc_link_types[] = {
421 "Indeterminate",
422 "Instruction fetch (timeout)",
423 "Page table walk ifetch (timeout)",
424 "Load (timeout)",
425 "Store (timeout)",
426 "Page table walk Load/Store (timeout)",
427 };
428 static const char *mc_error_class[] = {
429 "Unknown",
430 "Hardware error",
431 "Probable Hardware error (some chance of software cause)",
432 "Software error",
433 "Probable Software error (some chance of hardware cause)",
434 };
435
436 /* Print things out */
437 if (evt->version != MCE_V1) {
438 pr_err("Machine Check Exception, Unknown event version %d !\n",
439 evt->version);
440 return;
441 }
442 switch (evt->severity) {
443 case MCE_SEV_NO_ERROR:
444 level = KERN_INFO;
445 sevstr = "Harmless";
446 break;
447 case MCE_SEV_WARNING:
448 level = KERN_WARNING;
449 sevstr = "Warning";
450 break;
451 case MCE_SEV_SEVERE:
452 level = KERN_ERR;
453 sevstr = "Severe";
454 break;
455 case MCE_SEV_FATAL:
456 default:
457 level = KERN_ERR;
458 sevstr = "Fatal";
459 break;
460 }
461
462 switch(evt->initiator) {
463 case MCE_INITIATOR_CPU:
464 initiator = "CPU";
465 break;
466 case MCE_INITIATOR_PCI:
467 initiator = "PCI";
468 break;
469 case MCE_INITIATOR_ISA:
470 initiator = "ISA";
471 break;
472 case MCE_INITIATOR_MEMORY:
473 initiator = "Memory";
474 break;
475 case MCE_INITIATOR_POWERMGM:
476 initiator = "Power Management";
477 break;
478 case MCE_INITIATOR_UNKNOWN:
479 default:
480 initiator = "Unknown";
481 break;
482 }
483
484 switch (evt->error_type) {
485 case MCE_ERROR_TYPE_UE:
486 err_type = "UE";
487 subtype = evt->u.ue_error.ue_error_type <
488 ARRAY_SIZE(mc_ue_types) ?
489 mc_ue_types[evt->u.ue_error.ue_error_type]
490 : "Unknown";
491 if (evt->u.ue_error.effective_address_provided)
492 ea = evt->u.ue_error.effective_address;
493 if (evt->u.ue_error.physical_address_provided)
494 pa = evt->u.ue_error.physical_address;
495 break;
496 case MCE_ERROR_TYPE_SLB:
497 err_type = "SLB";
498 subtype = evt->u.slb_error.slb_error_type <
499 ARRAY_SIZE(mc_slb_types) ?
500 mc_slb_types[evt->u.slb_error.slb_error_type]
501 : "Unknown";
502 if (evt->u.slb_error.effective_address_provided)
503 ea = evt->u.slb_error.effective_address;
504 break;
505 case MCE_ERROR_TYPE_ERAT:
506 err_type = "ERAT";
507 subtype = evt->u.erat_error.erat_error_type <
508 ARRAY_SIZE(mc_erat_types) ?
509 mc_erat_types[evt->u.erat_error.erat_error_type]
510 : "Unknown";
511 if (evt->u.erat_error.effective_address_provided)
512 ea = evt->u.erat_error.effective_address;
513 break;
514 case MCE_ERROR_TYPE_TLB:
515 err_type = "TLB";
516 subtype = evt->u.tlb_error.tlb_error_type <
517 ARRAY_SIZE(mc_tlb_types) ?
518 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
519 : "Unknown";
520 if (evt->u.tlb_error.effective_address_provided)
521 ea = evt->u.tlb_error.effective_address;
522 break;
523 case MCE_ERROR_TYPE_USER:
524 err_type = "User";
525 subtype = evt->u.user_error.user_error_type <
526 ARRAY_SIZE(mc_user_types) ?
527 mc_user_types[evt->u.user_error.user_error_type]
528 : "Unknown";
529 if (evt->u.user_error.effective_address_provided)
530 ea = evt->u.user_error.effective_address;
531 break;
532 case MCE_ERROR_TYPE_RA:
533 err_type = "Real address";
534 subtype = evt->u.ra_error.ra_error_type <
535 ARRAY_SIZE(mc_ra_types) ?
536 mc_ra_types[evt->u.ra_error.ra_error_type]
537 : "Unknown";
538 if (evt->u.ra_error.effective_address_provided)
539 ea = evt->u.ra_error.effective_address;
540 break;
541 case MCE_ERROR_TYPE_LINK:
542 err_type = "Link";
543 subtype = evt->u.link_error.link_error_type <
544 ARRAY_SIZE(mc_link_types) ?
545 mc_link_types[evt->u.link_error.link_error_type]
546 : "Unknown";
547 if (evt->u.link_error.effective_address_provided)
548 ea = evt->u.link_error.effective_address;
549 break;
550 case MCE_ERROR_TYPE_DCACHE:
551 err_type = "D-Cache";
552 subtype = "Unknown";
553 break;
554 case MCE_ERROR_TYPE_ICACHE:
555 err_type = "I-Cache";
556 subtype = "Unknown";
557 break;
558 default:
559 case MCE_ERROR_TYPE_UNKNOWN:
560 err_type = "Unknown";
561 subtype = "";
562 break;
563 }
564
565 dar_str[0] = pa_str[0] = '\0';
566 if (ea && evt->srr0 != ea) {
567 /* Load/Store address */
568 n = sprintf(dar_str, "DAR: %016llx ", ea);
569 if (pa)
570 sprintf(dar_str + n, "paddr: %016llx ", pa);
571 } else if (pa) {
572 sprintf(pa_str, " paddr: %016llx", pa);
573 }
574
575 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
576 level, evt->cpu, sevstr, in_guest ? "Guest" : "",
577 err_type, subtype, dar_str,
578 evt->disposition == MCE_DISPOSITION_RECOVERED ?
579 "Recovered" : "Not recovered");
580
581 if (in_guest || user_mode) {
582 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
583 level, evt->cpu, current->pid, current->comm,
584 in_guest ? "Guest " : "", evt->srr0, pa_str);
585 } else {
586 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
587 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
588 }
589
590 printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
591
592 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
593 mc_error_class[evt->error_class] : "Unknown";
594 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
595
596#ifdef CONFIG_PPC_64S_HASH_MMU
597 /* Display faulty slb contents for SLB errors. */
598 if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
599 slb_dump_contents(local_paca->mce_faulty_slbs);
600#endif
601}
602EXPORT_SYMBOL_GPL(machine_check_print_event_info);
603
604/*
605 * This function is called in real mode. Strictly no printk's please.
606 *
607 * regs->nip and regs->msr contains srr0 and ssr1.
608 */
609DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
610{
611 long handled = 0;
612
613 hv_nmi_check_nonrecoverable(regs);
614
615 /*
616 * See if platform is capable of handling machine check.
617 */
618 if (ppc_md.machine_check_early)
619 handled = ppc_md.machine_check_early(regs);
620
621 return handled;
622}
623
624/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
625static enum {
626 DTRIG_UNKNOWN,
627 DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
628 DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
629} hmer_debug_trig_function;
630
631static int init_debug_trig_function(void)
632{
633 int pvr;
634 struct device_node *cpun;
635 struct property *prop = NULL;
636 const char *str;
637
638 /* First look in the device tree */
639 preempt_disable();
640 cpun = of_get_cpu_node(smp_processor_id(), NULL);
641 if (cpun) {
642 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
643 prop, str) {
644 if (strcmp(str, "bit17-vector-ci-load") == 0)
645 hmer_debug_trig_function = DTRIG_VECTOR_CI;
646 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
647 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
648 }
649 of_node_put(cpun);
650 }
651 preempt_enable();
652
653 /* If we found the property, don't look at PVR */
654 if (prop)
655 goto out;
656
657 pvr = mfspr(SPRN_PVR);
658 /* Check for POWER9 Nimbus (scale-out) */
659 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
660 /* DD2.2 and later */
661 if ((pvr & 0xfff) >= 0x202)
662 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
663 /* DD2.0 and DD2.1 - used for vector CI load emulation */
664 else if ((pvr & 0xfff) >= 0x200)
665 hmer_debug_trig_function = DTRIG_VECTOR_CI;
666 }
667
668 out:
669 switch (hmer_debug_trig_function) {
670 case DTRIG_VECTOR_CI:
671 pr_debug("HMI debug trigger used for vector CI load\n");
672 break;
673 case DTRIG_SUSPEND_ESCAPE:
674 pr_debug("HMI debug trigger used for TM suspend escape\n");
675 break;
676 default:
677 break;
678 }
679 return 0;
680}
681__initcall(init_debug_trig_function);
682
683/*
684 * Handle HMIs that occur as a result of a debug trigger.
685 * Return values:
686 * -1 means this is not a HMI cause that we know about
687 * 0 means no further handling is required
688 * 1 means further handling is required
689 */
690long hmi_handle_debugtrig(struct pt_regs *regs)
691{
692 unsigned long hmer = mfspr(SPRN_HMER);
693 long ret = 0;
694
695 /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
696 if (!((hmer & HMER_DEBUG_TRIG)
697 && hmer_debug_trig_function != DTRIG_UNKNOWN))
698 return -1;
699
700 hmer &= ~HMER_DEBUG_TRIG;
701 /* HMER is a write-AND register */
702 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
703
704 switch (hmer_debug_trig_function) {
705 case DTRIG_VECTOR_CI:
706 /*
707 * Now to avoid problems with soft-disable we
708 * only do the emulation if we are coming from
709 * host user space
710 */
711 if (regs && user_mode(regs))
712 ret = local_paca->hmi_p9_special_emu = 1;
713
714 break;
715
716 default:
717 break;
718 }
719
720 /*
721 * See if any other HMI causes remain to be handled
722 */
723 if (hmer & mfspr(SPRN_HMEER))
724 return -1;
725
726 return ret;
727}
728
729/*
730 * Return values:
731 */
732DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
733{
734 int ret;
735
736 local_paca->hmi_irqs++;
737
738 ret = hmi_handle_debugtrig(regs);
739 if (ret >= 0)
740 return ret;
741
742 wait_for_subcore_guest_exit();
743
744 if (ppc_md.hmi_exception_early)
745 ppc_md.hmi_exception_early(regs);
746
747 wait_for_tb_resync();
748
749 return 1;
750}
751
752void __init mce_init(void)
753{
754 struct mce_info *mce_info;
755 u64 limit;
756 int i;
757
758 limit = min(ppc64_bolted_size(), ppc64_rma_size);
759 for_each_possible_cpu(i) {
760 mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
761 __alignof__(*mce_info),
762 MEMBLOCK_LOW_LIMIT,
763 limit, early_cpu_to_node(i));
764 if (!mce_info)
765 goto err;
766 paca_ptrs[i]->mce_info = mce_info;
767 }
768 return;
769err:
770 panic("Failed to allocate memory for MCE event data\n");
771}