Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Machine check exception handling.
  4 *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  5 * Copyright 2013 IBM Corporation
  6 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  7 */
  8
  9#undef DEBUG
 10#define pr_fmt(fmt) "mce: " fmt
 11
 12#include <linux/hardirq.h>
 13#include <linux/types.h>
 14#include <linux/ptrace.h>
 15#include <linux/percpu.h>
 16#include <linux/export.h>
 17#include <linux/irq_work.h>
 18#include <linux/extable.h>
 19#include <linux/ftrace.h>
 20
 21#include <asm/machdep.h>
 22#include <asm/mce.h>
 23#include <asm/nmi.h>
 24
 25static DEFINE_PER_CPU(int, mce_nest_count);
 26static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 27
 28/* Queue for delayed MCE events. */
 29static DEFINE_PER_CPU(int, mce_queue_count);
 30static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 31
 32/* Queue for delayed MCE UE events. */
 33static DEFINE_PER_CPU(int, mce_ue_count);
 34static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
 35					mce_ue_event_queue);
 36
 37static void machine_check_process_queued_event(struct irq_work *work);
 38static void machine_check_ue_irq_work(struct irq_work *work);
 39static void machine_check_ue_event(struct machine_check_event *evt);
 40static void machine_process_ue_event(struct work_struct *work);
 41
 42static struct irq_work mce_event_process_work = {
 43        .func = machine_check_process_queued_event,
 44};
 45
 46static struct irq_work mce_ue_event_irq_work = {
 47	.func = machine_check_ue_irq_work,
 48};
 49
 50DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 51
 52static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
 53
 54int mce_register_notifier(struct notifier_block *nb)
 55{
 56	return blocking_notifier_chain_register(&mce_notifier_list, nb);
 57}
 58EXPORT_SYMBOL_GPL(mce_register_notifier);
 59
 60int mce_unregister_notifier(struct notifier_block *nb)
 61{
 62	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
 63}
 64EXPORT_SYMBOL_GPL(mce_unregister_notifier);
 65
 66static void mce_set_error_info(struct machine_check_event *mce,
 67			       struct mce_error_info *mce_err)
 68{
 69	mce->error_type = mce_err->error_type;
 70	switch (mce_err->error_type) {
 71	case MCE_ERROR_TYPE_UE:
 72		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
 73		break;
 74	case MCE_ERROR_TYPE_SLB:
 75		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
 76		break;
 77	case MCE_ERROR_TYPE_ERAT:
 78		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
 79		break;
 80	case MCE_ERROR_TYPE_TLB:
 81		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 82		break;
 83	case MCE_ERROR_TYPE_USER:
 84		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
 85		break;
 86	case MCE_ERROR_TYPE_RA:
 87		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
 88		break;
 89	case MCE_ERROR_TYPE_LINK:
 90		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
 91		break;
 92	case MCE_ERROR_TYPE_UNKNOWN:
 93	default:
 94		break;
 95	}
 96}
 97
 98/*
 99 * Decode and save high level MCE information into per cpu buffer which
100 * is an array of machine_check_event structure.
101 */
102void save_mce_event(struct pt_regs *regs, long handled,
103		    struct mce_error_info *mce_err,
104		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
105{
 
106	int index = __this_cpu_inc_return(mce_nest_count) - 1;
107	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
108
109	/*
110	 * Return if we don't have enough space to log mce event.
111	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
112	 * the check below will stop buffer overrun.
113	 */
114	if (index >= MAX_MC_EVT)
115		return;
116
117	/* Populate generic machine check info */
118	mce->version = MCE_V1;
119	mce->srr0 = nip;
120	mce->srr1 = regs->msr;
121	mce->gpr3 = regs->gpr[3];
122	mce->in_use = 1;
123	mce->cpu = get_paca()->paca_index;
124
125	/* Mark it recovered if we have handled it and MSR(RI=1). */
126	if (handled && (regs->msr & MSR_RI))
127		mce->disposition = MCE_DISPOSITION_RECOVERED;
128	else
129		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 
130
131	mce->initiator = mce_err->initiator;
132	mce->severity = mce_err->severity;
133	mce->sync_error = mce_err->sync_error;
134	mce->error_class = mce_err->error_class;
135
136	/*
137	 * Populate the mce error_type and type-specific error_type.
138	 */
139	mce_set_error_info(mce, mce_err);
140
141	if (!addr)
142		return;
143
144	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
145		mce->u.tlb_error.effective_address_provided = true;
146		mce->u.tlb_error.effective_address = addr;
147	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
148		mce->u.slb_error.effective_address_provided = true;
149		mce->u.slb_error.effective_address = addr;
150	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
151		mce->u.erat_error.effective_address_provided = true;
152		mce->u.erat_error.effective_address = addr;
153	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
154		mce->u.user_error.effective_address_provided = true;
155		mce->u.user_error.effective_address = addr;
156	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
157		mce->u.ra_error.effective_address_provided = true;
158		mce->u.ra_error.effective_address = addr;
159	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
160		mce->u.link_error.effective_address_provided = true;
161		mce->u.link_error.effective_address = addr;
162	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
163		mce->u.ue_error.effective_address_provided = true;
164		mce->u.ue_error.effective_address = addr;
165		if (phys_addr != ULONG_MAX) {
166			mce->u.ue_error.physical_address_provided = true;
167			mce->u.ue_error.physical_address = phys_addr;
168			mce->u.ue_error.ignore_event = mce_err->ignore_event;
169			machine_check_ue_event(mce);
170		}
171	}
172	return;
173}
174
175/*
176 * get_mce_event:
177 *	mce	Pointer to machine_check_event structure to be filled.
178 *	release Flag to indicate whether to free the event slot or not.
179 *		0 <= do not release the mce event. Caller will invoke
180 *		     release_mce_event() once event has been consumed.
181 *		1 <= release the slot.
182 *
183 *	return	1 = success
184 *		0 = failure
185 *
186 * get_mce_event() will be called by platform specific machine check
187 * handle routine and in KVM.
188 * When we call get_mce_event(), we are still in interrupt context and
189 * preemption will not be scheduled until ret_from_expect() routine
190 * is called.
191 */
192int get_mce_event(struct machine_check_event *mce, bool release)
193{
194	int index = __this_cpu_read(mce_nest_count) - 1;
195	struct machine_check_event *mc_evt;
196	int ret = 0;
197
198	/* Sanity check */
199	if (index < 0)
200		return ret;
201
202	/* Check if we have MCE info to process. */
203	if (index < MAX_MC_EVT) {
204		mc_evt = this_cpu_ptr(&mce_event[index]);
205		/* Copy the event structure and release the original */
206		if (mce)
207			*mce = *mc_evt;
208		if (release)
209			mc_evt->in_use = 0;
210		ret = 1;
211	}
212	/* Decrement the count to free the slot. */
213	if (release)
214		__this_cpu_dec(mce_nest_count);
215
216	return ret;
217}
218
219void release_mce_event(void)
220{
221	get_mce_event(NULL, true);
222}
223
224static void machine_check_ue_irq_work(struct irq_work *work)
225{
226	schedule_work(&mce_ue_event_work);
227}
228
229/*
230 * Queue up the MCE event which then can be handled later.
231 */
232static void machine_check_ue_event(struct machine_check_event *evt)
233{
234	int index;
235
236	index = __this_cpu_inc_return(mce_ue_count) - 1;
237	/* If queue is full, just return for now. */
238	if (index >= MAX_MC_EVT) {
239		__this_cpu_dec(mce_ue_count);
240		return;
241	}
242	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
243
244	/* Queue work to process this event later. */
245	irq_work_queue(&mce_ue_event_irq_work);
246}
247
248/*
249 * Queue up the MCE event which then can be handled later.
250 */
251void machine_check_queue_event(void)
252{
253	int index;
254	struct machine_check_event evt;
255
256	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
257		return;
258
259	index = __this_cpu_inc_return(mce_queue_count) - 1;
260	/* If queue is full, just return for now. */
261	if (index >= MAX_MC_EVT) {
262		__this_cpu_dec(mce_queue_count);
263		return;
264	}
265	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
266
267	/* Queue irq work to process this event later. */
268	irq_work_queue(&mce_event_process_work);
269}
270
271void mce_common_process_ue(struct pt_regs *regs,
272			   struct mce_error_info *mce_err)
273{
274	const struct exception_table_entry *entry;
275
276	entry = search_kernel_exception_table(regs->nip);
277	if (entry) {
278		mce_err->ignore_event = true;
279		regs->nip = extable_fixup(entry);
280	}
281}
282
283/*
284 * process pending MCE event from the mce event queue. This function will be
285 * called during syscall exit.
286 */
287static void machine_process_ue_event(struct work_struct *work)
288{
289	int index;
290	struct machine_check_event *evt;
291
292	while (__this_cpu_read(mce_ue_count) > 0) {
293		index = __this_cpu_read(mce_ue_count) - 1;
294		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
295		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
296#ifdef CONFIG_MEMORY_FAILURE
297		/*
298		 * This should probably queued elsewhere, but
299		 * oh! well
300		 *
301		 * Don't report this machine check because the caller has a
302		 * asked us to ignore the event, it has a fixup handler which
303		 * will do the appropriate error handling and reporting.
304		 */
305		if (evt->error_type == MCE_ERROR_TYPE_UE) {
306			if (evt->u.ue_error.ignore_event) {
307				__this_cpu_dec(mce_ue_count);
308				continue;
309			}
310
311			if (evt->u.ue_error.physical_address_provided) {
312				unsigned long pfn;
313
314				pfn = evt->u.ue_error.physical_address >>
315					PAGE_SHIFT;
316				memory_failure(pfn, 0);
317			} else
318				pr_warn("Failed to identify bad address from "
319					"where the uncorrectable error (UE) "
320					"was generated\n");
321		}
322#endif
323		__this_cpu_dec(mce_ue_count);
324	}
325}
326/*
327 * process pending MCE event from the mce event queue. This function will be
328 * called during syscall exit.
329 */
330static void machine_check_process_queued_event(struct irq_work *work)
331{
332	int index;
333	struct machine_check_event *evt;
334
335	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
336
337	/*
338	 * For now just print it to console.
339	 * TODO: log this error event to FSP or nvram.
340	 */
341	while (__this_cpu_read(mce_queue_count) > 0) {
342		index = __this_cpu_read(mce_queue_count) - 1;
343		evt = this_cpu_ptr(&mce_event_queue[index]);
344
345		if (evt->error_type == MCE_ERROR_TYPE_UE &&
346		    evt->u.ue_error.ignore_event) {
347			__this_cpu_dec(mce_queue_count);
348			continue;
349		}
350		machine_check_print_event_info(evt, false, false);
351		__this_cpu_dec(mce_queue_count);
352	}
353}
354
355void machine_check_print_event_info(struct machine_check_event *evt,
356				    bool user_mode, bool in_guest)
357{
358	const char *level, *sevstr, *subtype, *err_type, *initiator;
359	uint64_t ea = 0, pa = 0;
360	int n = 0;
361	char dar_str[50];
362	char pa_str[50];
363	static const char *mc_ue_types[] = {
364		"Indeterminate",
365		"Instruction fetch",
366		"Page table walk ifetch",
367		"Load/Store",
368		"Page table walk Load/Store",
369	};
370	static const char *mc_slb_types[] = {
371		"Indeterminate",
372		"Parity",
373		"Multihit",
374	};
375	static const char *mc_erat_types[] = {
376		"Indeterminate",
377		"Parity",
378		"Multihit",
379	};
380	static const char *mc_tlb_types[] = {
381		"Indeterminate",
382		"Parity",
383		"Multihit",
384	};
385	static const char *mc_user_types[] = {
386		"Indeterminate",
387		"tlbie(l) invalid",
388		"scv invalid",
389	};
390	static const char *mc_ra_types[] = {
391		"Indeterminate",
392		"Instruction fetch (bad)",
393		"Instruction fetch (foreign)",
394		"Page table walk ifetch (bad)",
395		"Page table walk ifetch (foreign)",
396		"Load (bad)",
397		"Store (bad)",
398		"Page table walk Load/Store (bad)",
399		"Page table walk Load/Store (foreign)",
400		"Load/Store (foreign)",
401	};
402	static const char *mc_link_types[] = {
403		"Indeterminate",
404		"Instruction fetch (timeout)",
405		"Page table walk ifetch (timeout)",
406		"Load (timeout)",
407		"Store (timeout)",
408		"Page table walk Load/Store (timeout)",
409	};
410	static const char *mc_error_class[] = {
411		"Unknown",
412		"Hardware error",
413		"Probable Hardware error (some chance of software cause)",
414		"Software error",
415		"Probable Software error (some chance of hardware cause)",
416	};
417
418	/* Print things out */
419	if (evt->version != MCE_V1) {
420		pr_err("Machine Check Exception, Unknown event version %d !\n",
421		       evt->version);
422		return;
423	}
424	switch (evt->severity) {
425	case MCE_SEV_NO_ERROR:
426		level = KERN_INFO;
427		sevstr = "Harmless";
428		break;
429	case MCE_SEV_WARNING:
430		level = KERN_WARNING;
431		sevstr = "Warning";
432		break;
433	case MCE_SEV_SEVERE:
434		level = KERN_ERR;
435		sevstr = "Severe";
436		break;
437	case MCE_SEV_FATAL:
438	default:
439		level = KERN_ERR;
440		sevstr = "Fatal";
441		break;
442	}
443
444	switch(evt->initiator) {
445	case MCE_INITIATOR_CPU:
446		initiator = "CPU";
447		break;
448	case MCE_INITIATOR_PCI:
449		initiator = "PCI";
450		break;
451	case MCE_INITIATOR_ISA:
452		initiator = "ISA";
453		break;
454	case MCE_INITIATOR_MEMORY:
455		initiator = "Memory";
456		break;
457	case MCE_INITIATOR_POWERMGM:
458		initiator = "Power Management";
459		break;
460	case MCE_INITIATOR_UNKNOWN:
461	default:
462		initiator = "Unknown";
463		break;
464	}
465
466	switch (evt->error_type) {
467	case MCE_ERROR_TYPE_UE:
468		err_type = "UE";
469		subtype = evt->u.ue_error.ue_error_type <
470			ARRAY_SIZE(mc_ue_types) ?
471			mc_ue_types[evt->u.ue_error.ue_error_type]
472			: "Unknown";
 
473		if (evt->u.ue_error.effective_address_provided)
474			ea = evt->u.ue_error.effective_address;
 
475		if (evt->u.ue_error.physical_address_provided)
476			pa = evt->u.ue_error.physical_address;
 
477		break;
478	case MCE_ERROR_TYPE_SLB:
479		err_type = "SLB";
480		subtype = evt->u.slb_error.slb_error_type <
481			ARRAY_SIZE(mc_slb_types) ?
482			mc_slb_types[evt->u.slb_error.slb_error_type]
483			: "Unknown";
 
484		if (evt->u.slb_error.effective_address_provided)
485			ea = evt->u.slb_error.effective_address;
 
486		break;
487	case MCE_ERROR_TYPE_ERAT:
488		err_type = "ERAT";
489		subtype = evt->u.erat_error.erat_error_type <
490			ARRAY_SIZE(mc_erat_types) ?
491			mc_erat_types[evt->u.erat_error.erat_error_type]
492			: "Unknown";
 
493		if (evt->u.erat_error.effective_address_provided)
494			ea = evt->u.erat_error.effective_address;
 
495		break;
496	case MCE_ERROR_TYPE_TLB:
497		err_type = "TLB";
498		subtype = evt->u.tlb_error.tlb_error_type <
499			ARRAY_SIZE(mc_tlb_types) ?
500			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
501			: "Unknown";
 
502		if (evt->u.tlb_error.effective_address_provided)
503			ea = evt->u.tlb_error.effective_address;
504		break;
505	case MCE_ERROR_TYPE_USER:
506		err_type = "User";
507		subtype = evt->u.user_error.user_error_type <
508			ARRAY_SIZE(mc_user_types) ?
509			mc_user_types[evt->u.user_error.user_error_type]
510			: "Unknown";
511		if (evt->u.user_error.effective_address_provided)
512			ea = evt->u.user_error.effective_address;
513		break;
514	case MCE_ERROR_TYPE_RA:
515		err_type = "Real address";
516		subtype = evt->u.ra_error.ra_error_type <
517			ARRAY_SIZE(mc_ra_types) ?
518			mc_ra_types[evt->u.ra_error.ra_error_type]
519			: "Unknown";
520		if (evt->u.ra_error.effective_address_provided)
521			ea = evt->u.ra_error.effective_address;
522		break;
523	case MCE_ERROR_TYPE_LINK:
524		err_type = "Link";
525		subtype = evt->u.link_error.link_error_type <
526			ARRAY_SIZE(mc_link_types) ?
527			mc_link_types[evt->u.link_error.link_error_type]
528			: "Unknown";
529		if (evt->u.link_error.effective_address_provided)
530			ea = evt->u.link_error.effective_address;
531		break;
532	case MCE_ERROR_TYPE_DCACHE:
533		err_type = "D-Cache";
534		subtype = "Unknown";
535		break;
536	case MCE_ERROR_TYPE_ICACHE:
537		err_type = "I-Cache";
538		subtype = "Unknown";
539		break;
540	default:
541	case MCE_ERROR_TYPE_UNKNOWN:
542		err_type = "Unknown";
543		subtype = "";
544		break;
545	}
546
547	dar_str[0] = pa_str[0] = '\0';
548	if (ea && evt->srr0 != ea) {
549		/* Load/Store address */
550		n = sprintf(dar_str, "DAR: %016llx ", ea);
551		if (pa)
552			sprintf(dar_str + n, "paddr: %016llx ", pa);
553	} else if (pa) {
554		sprintf(pa_str, " paddr: %016llx", pa);
555	}
556
557	printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
558		level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
559		err_type, subtype, dar_str,
560		evt->disposition == MCE_DISPOSITION_RECOVERED ?
561		"Recovered" : "Not recovered");
562
563	if (in_guest || user_mode) {
564		printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
565			level, evt->cpu, current->pid, current->comm,
566			in_guest ? "Guest " : "", evt->srr0, pa_str);
567	} else {
568		printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
569			level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
570	}
571
572	printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
573
574	subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
575		mc_error_class[evt->error_class] : "Unknown";
576	printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
577
578#ifdef CONFIG_PPC_BOOK3S_64
579	/* Display faulty slb contents for SLB errors. */
580	if (evt->error_type == MCE_ERROR_TYPE_SLB)
581		slb_dump_contents(local_paca->mce_faulty_slbs);
582#endif
583}
584EXPORT_SYMBOL_GPL(machine_check_print_event_info);
585
586/*
587 * This function is called in real mode. Strictly no printk's please.
588 *
589 * regs->nip and regs->msr contains srr0 and ssr1.
590 */
591long notrace machine_check_early(struct pt_regs *regs)
592{
593	long handled = 0;
594	bool nested = in_nmi();
595	u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
596
597	this_cpu_set_ftrace_enabled(0);
598
599	if (!nested)
600		nmi_enter();
601
602	hv_nmi_check_nonrecoverable(regs);
603
604	/*
605	 * See if platform is capable of handling machine check.
606	 */
607	if (ppc_md.machine_check_early)
608		handled = ppc_md.machine_check_early(regs);
609
610	if (!nested)
611		nmi_exit();
612
613	this_cpu_set_ftrace_enabled(ftrace_enabled);
614
615	return handled;
616}
617
618/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
619static enum {
620	DTRIG_UNKNOWN,
621	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
622	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
623} hmer_debug_trig_function;
624
625static int init_debug_trig_function(void)
626{
627	int pvr;
628	struct device_node *cpun;
629	struct property *prop = NULL;
630	const char *str;
631
632	/* First look in the device tree */
633	preempt_disable();
634	cpun = of_get_cpu_node(smp_processor_id(), NULL);
635	if (cpun) {
636		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
637					    prop, str) {
638			if (strcmp(str, "bit17-vector-ci-load") == 0)
639				hmer_debug_trig_function = DTRIG_VECTOR_CI;
640			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
641				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
642		}
643		of_node_put(cpun);
644	}
645	preempt_enable();
646
647	/* If we found the property, don't look at PVR */
648	if (prop)
649		goto out;
650
651	pvr = mfspr(SPRN_PVR);
652	/* Check for POWER9 Nimbus (scale-out) */
653	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
654		/* DD2.2 and later */
655		if ((pvr & 0xfff) >= 0x202)
656			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
657		/* DD2.0 and DD2.1 - used for vector CI load emulation */
658		else if ((pvr & 0xfff) >= 0x200)
659			hmer_debug_trig_function = DTRIG_VECTOR_CI;
660	}
661
662 out:
663	switch (hmer_debug_trig_function) {
664	case DTRIG_VECTOR_CI:
665		pr_debug("HMI debug trigger used for vector CI load\n");
666		break;
667	case DTRIG_SUSPEND_ESCAPE:
668		pr_debug("HMI debug trigger used for TM suspend escape\n");
 
669		break;
670	default:
 
 
671		break;
672	}
673	return 0;
674}
675__initcall(init_debug_trig_function);
676
677/*
678 * Handle HMIs that occur as a result of a debug trigger.
679 * Return values:
680 * -1 means this is not a HMI cause that we know about
681 *  0 means no further handling is required
682 *  1 means further handling is required
683 */
684long hmi_handle_debugtrig(struct pt_regs *regs)
685{
686	unsigned long hmer = mfspr(SPRN_HMER);
687	long ret = 0;
688
689	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
690	if (!((hmer & HMER_DEBUG_TRIG)
691	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
692		return -1;
693		
694	hmer &= ~HMER_DEBUG_TRIG;
695	/* HMER is a write-AND register */
696	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
697
698	switch (hmer_debug_trig_function) {
699	case DTRIG_VECTOR_CI:
700		/*
701		 * Now to avoid problems with soft-disable we
702		 * only do the emulation if we are coming from
703		 * host user space
704		 */
705		if (regs && user_mode(regs))
706			ret = local_paca->hmi_p9_special_emu = 1;
707
708		break;
709
710	default:
 
711		break;
712	}
713
714	/*
715	 * See if any other HMI causes remain to be handled
716	 */
717	if (hmer & mfspr(SPRN_HMEER))
718		return -1;
719
720	return ret;
721}
722
723/*
724 * Return values:
725 */
726long hmi_exception_realmode(struct pt_regs *regs)
727{	
728	int ret;
729
730	local_paca->hmi_irqs++;
731
732	ret = hmi_handle_debugtrig(regs);
733	if (ret >= 0)
734		return ret;
735
736	wait_for_subcore_guest_exit();
737
738	if (ppc_md.hmi_exception_early)
739		ppc_md.hmi_exception_early(regs);
740
741	wait_for_tb_resync();
742
743	return 1;
744}
v4.6
 
  1/*
  2 * Machine check exception handling.
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright 2013 IBM Corporation
 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 20 */
 21
 22#undef DEBUG
 23#define pr_fmt(fmt) "mce: " fmt
 24
 
 25#include <linux/types.h>
 26#include <linux/ptrace.h>
 27#include <linux/percpu.h>
 28#include <linux/export.h>
 29#include <linux/irq_work.h>
 
 
 
 
 30#include <asm/mce.h>
 
 31
 32static DEFINE_PER_CPU(int, mce_nest_count);
 33static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 34
 35/* Queue for delayed MCE events. */
 36static DEFINE_PER_CPU(int, mce_queue_count);
 37static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 38
 
 
 
 
 
 39static void machine_check_process_queued_event(struct irq_work *work);
 40struct irq_work mce_event_process_work = {
 
 
 
 
 41        .func = machine_check_process_queued_event,
 42};
 43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 44static void mce_set_error_info(struct machine_check_event *mce,
 45			       struct mce_error_info *mce_err)
 46{
 47	mce->error_type = mce_err->error_type;
 48	switch (mce_err->error_type) {
 49	case MCE_ERROR_TYPE_UE:
 50		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
 51		break;
 52	case MCE_ERROR_TYPE_SLB:
 53		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
 54		break;
 55	case MCE_ERROR_TYPE_ERAT:
 56		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
 57		break;
 58	case MCE_ERROR_TYPE_TLB:
 59		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 60		break;
 
 
 
 
 
 
 
 
 
 61	case MCE_ERROR_TYPE_UNKNOWN:
 62	default:
 63		break;
 64	}
 65}
 66
 67/*
 68 * Decode and save high level MCE information into per cpu buffer which
 69 * is an array of machine_check_event structure.
 70 */
 71void save_mce_event(struct pt_regs *regs, long handled,
 72		    struct mce_error_info *mce_err,
 73		    uint64_t nip, uint64_t addr)
 74{
 75	uint64_t srr1;
 76	int index = __this_cpu_inc_return(mce_nest_count) - 1;
 77	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 78
 79	/*
 80	 * Return if we don't have enough space to log mce event.
 81	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
 82	 * the check below will stop buffer overrun.
 83	 */
 84	if (index >= MAX_MC_EVT)
 85		return;
 86
 87	/* Populate generic machine check info */
 88	mce->version = MCE_V1;
 89	mce->srr0 = nip;
 90	mce->srr1 = regs->msr;
 91	mce->gpr3 = regs->gpr[3];
 92	mce->in_use = 1;
 
 93
 94	mce->initiator = MCE_INITIATOR_CPU;
 95	if (handled)
 96		mce->disposition = MCE_DISPOSITION_RECOVERED;
 97	else
 98		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 99	mce->severity = MCE_SEV_ERROR_SYNC;
100
101	srr1 = regs->msr;
 
 
 
102
103	/*
104	 * Populate the mce error_type and type-specific error_type.
105	 */
106	mce_set_error_info(mce, mce_err);
107
108	if (!addr)
109		return;
110
111	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
112		mce->u.tlb_error.effective_address_provided = true;
113		mce->u.tlb_error.effective_address = addr;
114	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
115		mce->u.slb_error.effective_address_provided = true;
116		mce->u.slb_error.effective_address = addr;
117	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
118		mce->u.erat_error.effective_address_provided = true;
119		mce->u.erat_error.effective_address = addr;
 
 
 
 
 
 
 
 
 
120	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
121		mce->u.ue_error.effective_address_provided = true;
122		mce->u.ue_error.effective_address = addr;
 
 
 
 
 
 
123	}
124	return;
125}
126
127/*
128 * get_mce_event:
129 *	mce	Pointer to machine_check_event structure to be filled.
130 *	release Flag to indicate whether to free the event slot or not.
131 *		0 <= do not release the mce event. Caller will invoke
132 *		     release_mce_event() once event has been consumed.
133 *		1 <= release the slot.
134 *
135 *	return	1 = success
136 *		0 = failure
137 *
138 * get_mce_event() will be called by platform specific machine check
139 * handle routine and in KVM.
140 * When we call get_mce_event(), we are still in interrupt context and
141 * preemption will not be scheduled until ret_from_expect() routine
142 * is called.
143 */
144int get_mce_event(struct machine_check_event *mce, bool release)
145{
146	int index = __this_cpu_read(mce_nest_count) - 1;
147	struct machine_check_event *mc_evt;
148	int ret = 0;
149
150	/* Sanity check */
151	if (index < 0)
152		return ret;
153
154	/* Check if we have MCE info to process. */
155	if (index < MAX_MC_EVT) {
156		mc_evt = this_cpu_ptr(&mce_event[index]);
157		/* Copy the event structure and release the original */
158		if (mce)
159			*mce = *mc_evt;
160		if (release)
161			mc_evt->in_use = 0;
162		ret = 1;
163	}
164	/* Decrement the count to free the slot. */
165	if (release)
166		__this_cpu_dec(mce_nest_count);
167
168	return ret;
169}
170
171void release_mce_event(void)
172{
173	get_mce_event(NULL, true);
174}
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176/*
177 * Queue up the MCE event which then can be handled later.
178 */
179void machine_check_queue_event(void)
180{
181	int index;
182	struct machine_check_event evt;
183
184	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
185		return;
186
187	index = __this_cpu_inc_return(mce_queue_count) - 1;
188	/* If queue is full, just return for now. */
189	if (index >= MAX_MC_EVT) {
190		__this_cpu_dec(mce_queue_count);
191		return;
192	}
193	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
194
195	/* Queue irq work to process this event later. */
196	irq_work_queue(&mce_event_process_work);
197}
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199/*
200 * process pending MCE event from the mce event queue. This function will be
201 * called during syscall exit.
202 */
203static void machine_check_process_queued_event(struct irq_work *work)
204{
205	int index;
 
 
 
206
207	/*
208	 * For now just print it to console.
209	 * TODO: log this error event to FSP or nvram.
210	 */
211	while (__this_cpu_read(mce_queue_count) > 0) {
212		index = __this_cpu_read(mce_queue_count) - 1;
213		machine_check_print_event_info(
214				this_cpu_ptr(&mce_event_queue[index]));
 
 
 
 
 
 
215		__this_cpu_dec(mce_queue_count);
216	}
217}
218
219void machine_check_print_event_info(struct machine_check_event *evt)
 
220{
221	const char *level, *sevstr, *subtype;
 
 
 
 
222	static const char *mc_ue_types[] = {
223		"Indeterminate",
224		"Instruction fetch",
225		"Page table walk ifetch",
226		"Load/Store",
227		"Page table walk Load/Store",
228	};
229	static const char *mc_slb_types[] = {
230		"Indeterminate",
231		"Parity",
232		"Multihit",
233	};
234	static const char *mc_erat_types[] = {
235		"Indeterminate",
236		"Parity",
237		"Multihit",
238	};
239	static const char *mc_tlb_types[] = {
240		"Indeterminate",
241		"Parity",
242		"Multihit",
243	};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
245	/* Print things out */
246	if (evt->version != MCE_V1) {
247		pr_err("Machine Check Exception, Unknown event version %d !\n",
248		       evt->version);
249		return;
250	}
251	switch (evt->severity) {
252	case MCE_SEV_NO_ERROR:
253		level = KERN_INFO;
254		sevstr = "Harmless";
255		break;
256	case MCE_SEV_WARNING:
257		level = KERN_WARNING;
258		sevstr = "";
259		break;
260	case MCE_SEV_ERROR_SYNC:
261		level = KERN_ERR;
262		sevstr = "Severe";
263		break;
264	case MCE_SEV_FATAL:
265	default:
266		level = KERN_ERR;
267		sevstr = "Fatal";
268		break;
269	}
270
271	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
272	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
273	       "Recovered" : "[Not recovered");
274	printk("%s  Initiator: %s\n", level,
275	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276	switch (evt->error_type) {
277	case MCE_ERROR_TYPE_UE:
 
278		subtype = evt->u.ue_error.ue_error_type <
279			ARRAY_SIZE(mc_ue_types) ?
280			mc_ue_types[evt->u.ue_error.ue_error_type]
281			: "Unknown";
282		printk("%s  Error type: UE [%s]\n", level, subtype);
283		if (evt->u.ue_error.effective_address_provided)
284			printk("%s    Effective address: %016llx\n",
285			       level, evt->u.ue_error.effective_address);
286		if (evt->u.ue_error.physical_address_provided)
287			printk("%s      Physial address: %016llx\n",
288			       level, evt->u.ue_error.physical_address);
289		break;
290	case MCE_ERROR_TYPE_SLB:
 
291		subtype = evt->u.slb_error.slb_error_type <
292			ARRAY_SIZE(mc_slb_types) ?
293			mc_slb_types[evt->u.slb_error.slb_error_type]
294			: "Unknown";
295		printk("%s  Error type: SLB [%s]\n", level, subtype);
296		if (evt->u.slb_error.effective_address_provided)
297			printk("%s    Effective address: %016llx\n",
298			       level, evt->u.slb_error.effective_address);
299		break;
300	case MCE_ERROR_TYPE_ERAT:
 
301		subtype = evt->u.erat_error.erat_error_type <
302			ARRAY_SIZE(mc_erat_types) ?
303			mc_erat_types[evt->u.erat_error.erat_error_type]
304			: "Unknown";
305		printk("%s  Error type: ERAT [%s]\n", level, subtype);
306		if (evt->u.erat_error.effective_address_provided)
307			printk("%s    Effective address: %016llx\n",
308			       level, evt->u.erat_error.effective_address);
309		break;
310	case MCE_ERROR_TYPE_TLB:
 
311		subtype = evt->u.tlb_error.tlb_error_type <
312			ARRAY_SIZE(mc_tlb_types) ?
313			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
314			: "Unknown";
315		printk("%s  Error type: TLB [%s]\n", level, subtype);
316		if (evt->u.tlb_error.effective_address_provided)
317			printk("%s    Effective address: %016llx\n",
318			       level, evt->u.tlb_error.effective_address);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319		break;
320	default:
321	case MCE_ERROR_TYPE_UNKNOWN:
322		printk("%s  Error type: Unknown\n", level);
 
323		break;
324	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325}
 
326
327uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 
 
 
 
 
328{
329	switch (evt->error_type) {
330	case MCE_ERROR_TYPE_UE:
331		if (evt->u.ue_error.effective_address_provided)
332			return evt->u.ue_error.effective_address;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333		break;
334	case MCE_ERROR_TYPE_SLB:
335		if (evt->u.slb_error.effective_address_provided)
336			return evt->u.slb_error.effective_address;
337		break;
338	case MCE_ERROR_TYPE_ERAT:
339		if (evt->u.erat_error.effective_address_provided)
340			return evt->u.erat_error.effective_address;
341		break;
342	case MCE_ERROR_TYPE_TLB:
343		if (evt->u.tlb_error.effective_address_provided)
344			return evt->u.tlb_error.effective_address;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345		break;
 
346	default:
347	case MCE_ERROR_TYPE_UNKNOWN:
348		break;
349	}
350	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351}
352EXPORT_SYMBOL(get_mce_fault_addr);