Linux Audio

Check our new training course

Loading...
v4.17
  1/*
  2 * Machine check exception handling.
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright 2013 IBM Corporation
 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 20 */
 21
 22#undef DEBUG
 23#define pr_fmt(fmt) "mce: " fmt
 24
 25#include <linux/hardirq.h>
 26#include <linux/types.h>
 27#include <linux/ptrace.h>
 28#include <linux/percpu.h>
 29#include <linux/export.h>
 30#include <linux/irq_work.h>
 31
 32#include <asm/machdep.h>
 33#include <asm/mce.h>
 34
 35static DEFINE_PER_CPU(int, mce_nest_count);
 36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 37
 38/* Queue for delayed MCE events. */
 39static DEFINE_PER_CPU(int, mce_queue_count);
 40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 41
 42/* Queue for delayed MCE UE events. */
 43static DEFINE_PER_CPU(int, mce_ue_count);
 44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
 45					mce_ue_event_queue);
 46
 47static void machine_check_process_queued_event(struct irq_work *work);
 48void machine_check_ue_event(struct machine_check_event *evt);
 49static void machine_process_ue_event(struct work_struct *work);
 50
 51static struct irq_work mce_event_process_work = {
 52        .func = machine_check_process_queued_event,
 53};
 54
 55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 56
 57static void mce_set_error_info(struct machine_check_event *mce,
 58			       struct mce_error_info *mce_err)
 59{
 60	mce->error_type = mce_err->error_type;
 61	switch (mce_err->error_type) {
 62	case MCE_ERROR_TYPE_UE:
 63		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
 64		break;
 65	case MCE_ERROR_TYPE_SLB:
 66		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
 67		break;
 68	case MCE_ERROR_TYPE_ERAT:
 69		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
 70		break;
 71	case MCE_ERROR_TYPE_TLB:
 72		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 73		break;
 74	case MCE_ERROR_TYPE_USER:
 75		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
 76		break;
 77	case MCE_ERROR_TYPE_RA:
 78		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
 79		break;
 80	case MCE_ERROR_TYPE_LINK:
 81		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
 82		break;
 83	case MCE_ERROR_TYPE_UNKNOWN:
 84	default:
 85		break;
 86	}
 87}
 88
 89/*
 90 * Decode and save high level MCE information into per cpu buffer which
 91 * is an array of machine_check_event structure.
 92 */
 93void save_mce_event(struct pt_regs *regs, long handled,
 94		    struct mce_error_info *mce_err,
 95		    uint64_t nip, uint64_t addr, uint64_t phys_addr)
 96{
 97	int index = __this_cpu_inc_return(mce_nest_count) - 1;
 98	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 99
100	/*
101	 * Return if we don't have enough space to log mce event.
102	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
103	 * the check below will stop buffer overrun.
104	 */
105	if (index >= MAX_MC_EVT)
106		return;
107
108	/* Populate generic machine check info */
109	mce->version = MCE_V1;
110	mce->srr0 = nip;
111	mce->srr1 = regs->msr;
112	mce->gpr3 = regs->gpr[3];
113	mce->in_use = 1;
114
 
115	/* Mark it recovered if we have handled it and MSR(RI=1). */
116	if (handled && (regs->msr & MSR_RI))
117		mce->disposition = MCE_DISPOSITION_RECOVERED;
118	else
119		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120
121	mce->initiator = mce_err->initiator;
122	mce->severity = mce_err->severity;
123
124	/*
125	 * Populate the mce error_type and type-specific error_type.
126	 */
127	mce_set_error_info(mce, mce_err);
128
129	if (!addr)
130		return;
131
132	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133		mce->u.tlb_error.effective_address_provided = true;
134		mce->u.tlb_error.effective_address = addr;
135	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136		mce->u.slb_error.effective_address_provided = true;
137		mce->u.slb_error.effective_address = addr;
138	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139		mce->u.erat_error.effective_address_provided = true;
140		mce->u.erat_error.effective_address = addr;
141	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142		mce->u.user_error.effective_address_provided = true;
143		mce->u.user_error.effective_address = addr;
144	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145		mce->u.ra_error.effective_address_provided = true;
146		mce->u.ra_error.effective_address = addr;
147	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148		mce->u.link_error.effective_address_provided = true;
149		mce->u.link_error.effective_address = addr;
150	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151		mce->u.ue_error.effective_address_provided = true;
152		mce->u.ue_error.effective_address = addr;
153		if (phys_addr != ULONG_MAX) {
154			mce->u.ue_error.physical_address_provided = true;
155			mce->u.ue_error.physical_address = phys_addr;
156			machine_check_ue_event(mce);
157		}
158	}
159	return;
160}
161
162/*
163 * get_mce_event:
164 *	mce	Pointer to machine_check_event structure to be filled.
165 *	release Flag to indicate whether to free the event slot or not.
166 *		0 <= do not release the mce event. Caller will invoke
167 *		     release_mce_event() once event has been consumed.
168 *		1 <= release the slot.
169 *
170 *	return	1 = success
171 *		0 = failure
172 *
173 * get_mce_event() will be called by platform specific machine check
174 * handle routine and in KVM.
175 * When we call get_mce_event(), we are still in interrupt context and
176 * preemption will not be scheduled until ret_from_expect() routine
177 * is called.
178 */
179int get_mce_event(struct machine_check_event *mce, bool release)
180{
181	int index = __this_cpu_read(mce_nest_count) - 1;
182	struct machine_check_event *mc_evt;
183	int ret = 0;
184
185	/* Sanity check */
186	if (index < 0)
187		return ret;
188
189	/* Check if we have MCE info to process. */
190	if (index < MAX_MC_EVT) {
191		mc_evt = this_cpu_ptr(&mce_event[index]);
192		/* Copy the event structure and release the original */
193		if (mce)
194			*mce = *mc_evt;
195		if (release)
196			mc_evt->in_use = 0;
197		ret = 1;
198	}
199	/* Decrement the count to free the slot. */
200	if (release)
201		__this_cpu_dec(mce_nest_count);
202
203	return ret;
204}
205
206void release_mce_event(void)
207{
208	get_mce_event(NULL, true);
209}
210
211
212/*
213 * Queue up the MCE event which then can be handled later.
214 */
215void machine_check_ue_event(struct machine_check_event *evt)
216{
217	int index;
218
219	index = __this_cpu_inc_return(mce_ue_count) - 1;
220	/* If queue is full, just return for now. */
221	if (index >= MAX_MC_EVT) {
222		__this_cpu_dec(mce_ue_count);
223		return;
224	}
225	memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227	/* Queue work to process this event later. */
228	schedule_work(&mce_ue_event_work);
229}
230
231/*
232 * Queue up the MCE event which then can be handled later.
233 */
234void machine_check_queue_event(void)
235{
236	int index;
237	struct machine_check_event evt;
238
239	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240		return;
241
242	index = __this_cpu_inc_return(mce_queue_count) - 1;
243	/* If queue is full, just return for now. */
244	if (index >= MAX_MC_EVT) {
245		__this_cpu_dec(mce_queue_count);
246		return;
247	}
248	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249
250	/* Queue irq work to process this event later. */
251	irq_work_queue(&mce_event_process_work);
252}
253/*
254 * process pending MCE event from the mce event queue. This function will be
255 * called during syscall exit.
256 */
257static void machine_process_ue_event(struct work_struct *work)
258{
259	int index;
260	struct machine_check_event *evt;
261
262	while (__this_cpu_read(mce_ue_count) > 0) {
263		index = __this_cpu_read(mce_ue_count) - 1;
264		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265#ifdef CONFIG_MEMORY_FAILURE
266		/*
267		 * This should probably queued elsewhere, but
268		 * oh! well
269		 */
270		if (evt->error_type == MCE_ERROR_TYPE_UE) {
271			if (evt->u.ue_error.physical_address_provided) {
272				unsigned long pfn;
273
274				pfn = evt->u.ue_error.physical_address >>
275					PAGE_SHIFT;
276				memory_failure(pfn, 0);
277			} else
278				pr_warn("Failed to identify bad address from "
279					"where the uncorrectable error (UE) "
280					"was generated\n");
281		}
282#endif
283		__this_cpu_dec(mce_ue_count);
284	}
285}
286/*
287 * process pending MCE event from the mce event queue. This function will be
288 * called during syscall exit.
289 */
290static void machine_check_process_queued_event(struct irq_work *work)
291{
292	int index;
293	struct machine_check_event *evt;
294
295	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296
297	/*
298	 * For now just print it to console.
299	 * TODO: log this error event to FSP or nvram.
300	 */
301	while (__this_cpu_read(mce_queue_count) > 0) {
302		index = __this_cpu_read(mce_queue_count) - 1;
303		evt = this_cpu_ptr(&mce_event_queue[index]);
304		machine_check_print_event_info(evt, false);
305		__this_cpu_dec(mce_queue_count);
306	}
307}
308
309void machine_check_print_event_info(struct machine_check_event *evt,
310				    bool user_mode)
311{
312	const char *level, *sevstr, *subtype;
313	static const char *mc_ue_types[] = {
314		"Indeterminate",
315		"Instruction fetch",
316		"Page table walk ifetch",
317		"Load/Store",
318		"Page table walk Load/Store",
319	};
320	static const char *mc_slb_types[] = {
321		"Indeterminate",
322		"Parity",
323		"Multihit",
324	};
325	static const char *mc_erat_types[] = {
326		"Indeterminate",
327		"Parity",
328		"Multihit",
329	};
330	static const char *mc_tlb_types[] = {
331		"Indeterminate",
332		"Parity",
333		"Multihit",
334	};
335	static const char *mc_user_types[] = {
336		"Indeterminate",
337		"tlbie(l) invalid",
338	};
339	static const char *mc_ra_types[] = {
340		"Indeterminate",
341		"Instruction fetch (bad)",
342		"Instruction fetch (foreign)",
343		"Page table walk ifetch (bad)",
344		"Page table walk ifetch (foreign)",
345		"Load (bad)",
346		"Store (bad)",
347		"Page table walk Load/Store (bad)",
348		"Page table walk Load/Store (foreign)",
349		"Load/Store (foreign)",
350	};
351	static const char *mc_link_types[] = {
352		"Indeterminate",
353		"Instruction fetch (timeout)",
354		"Page table walk ifetch (timeout)",
355		"Load (timeout)",
356		"Store (timeout)",
357		"Page table walk Load/Store (timeout)",
358	};
359
360	/* Print things out */
361	if (evt->version != MCE_V1) {
362		pr_err("Machine Check Exception, Unknown event version %d !\n",
363		       evt->version);
364		return;
365	}
366	switch (evt->severity) {
367	case MCE_SEV_NO_ERROR:
368		level = KERN_INFO;
369		sevstr = "Harmless";
370		break;
371	case MCE_SEV_WARNING:
372		level = KERN_WARNING;
373		sevstr = "";
374		break;
375	case MCE_SEV_ERROR_SYNC:
376		level = KERN_ERR;
377		sevstr = "Severe";
378		break;
379	case MCE_SEV_FATAL:
380	default:
381		level = KERN_ERR;
382		sevstr = "Fatal";
383		break;
384	}
385
386	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
388	       "Recovered" : "Not recovered");
389
390	if (user_mode) {
391		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
392			evt->srr0, current->pid, current->comm);
393	} else {
394		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
395		       (void *)evt->srr0);
396	}
397
398	printk("%s  Initiator: %s\n", level,
399	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400	switch (evt->error_type) {
401	case MCE_ERROR_TYPE_UE:
402		subtype = evt->u.ue_error.ue_error_type <
403			ARRAY_SIZE(mc_ue_types) ?
404			mc_ue_types[evt->u.ue_error.ue_error_type]
405			: "Unknown";
406		printk("%s  Error type: UE [%s]\n", level, subtype);
407		if (evt->u.ue_error.effective_address_provided)
408			printk("%s    Effective address: %016llx\n",
409			       level, evt->u.ue_error.effective_address);
410		if (evt->u.ue_error.physical_address_provided)
411			printk("%s    Physical address:  %016llx\n",
412			       level, evt->u.ue_error.physical_address);
413		break;
414	case MCE_ERROR_TYPE_SLB:
415		subtype = evt->u.slb_error.slb_error_type <
416			ARRAY_SIZE(mc_slb_types) ?
417			mc_slb_types[evt->u.slb_error.slb_error_type]
418			: "Unknown";
419		printk("%s  Error type: SLB [%s]\n", level, subtype);
420		if (evt->u.slb_error.effective_address_provided)
421			printk("%s    Effective address: %016llx\n",
422			       level, evt->u.slb_error.effective_address);
423		break;
424	case MCE_ERROR_TYPE_ERAT:
425		subtype = evt->u.erat_error.erat_error_type <
426			ARRAY_SIZE(mc_erat_types) ?
427			mc_erat_types[evt->u.erat_error.erat_error_type]
428			: "Unknown";
429		printk("%s  Error type: ERAT [%s]\n", level, subtype);
430		if (evt->u.erat_error.effective_address_provided)
431			printk("%s    Effective address: %016llx\n",
432			       level, evt->u.erat_error.effective_address);
433		break;
434	case MCE_ERROR_TYPE_TLB:
435		subtype = evt->u.tlb_error.tlb_error_type <
436			ARRAY_SIZE(mc_tlb_types) ?
437			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438			: "Unknown";
439		printk("%s  Error type: TLB [%s]\n", level, subtype);
440		if (evt->u.tlb_error.effective_address_provided)
441			printk("%s    Effective address: %016llx\n",
442			       level, evt->u.tlb_error.effective_address);
443		break;
444	case MCE_ERROR_TYPE_USER:
445		subtype = evt->u.user_error.user_error_type <
446			ARRAY_SIZE(mc_user_types) ?
447			mc_user_types[evt->u.user_error.user_error_type]
448			: "Unknown";
449		printk("%s  Error type: User [%s]\n", level, subtype);
450		if (evt->u.user_error.effective_address_provided)
451			printk("%s    Effective address: %016llx\n",
452			       level, evt->u.user_error.effective_address);
453		break;
454	case MCE_ERROR_TYPE_RA:
455		subtype = evt->u.ra_error.ra_error_type <
456			ARRAY_SIZE(mc_ra_types) ?
457			mc_ra_types[evt->u.ra_error.ra_error_type]
458			: "Unknown";
459		printk("%s  Error type: Real address [%s]\n", level, subtype);
460		if (evt->u.ra_error.effective_address_provided)
461			printk("%s    Effective address: %016llx\n",
462			       level, evt->u.ra_error.effective_address);
463		break;
464	case MCE_ERROR_TYPE_LINK:
465		subtype = evt->u.link_error.link_error_type <
466			ARRAY_SIZE(mc_link_types) ?
467			mc_link_types[evt->u.link_error.link_error_type]
468			: "Unknown";
469		printk("%s  Error type: Link [%s]\n", level, subtype);
470		if (evt->u.link_error.effective_address_provided)
471			printk("%s    Effective address: %016llx\n",
472			       level, evt->u.link_error.effective_address);
473		break;
474	default:
475	case MCE_ERROR_TYPE_UNKNOWN:
476		printk("%s  Error type: Unknown\n", level);
477		break;
478	}
479}
480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481
482/*
483 * This function is called in real mode. Strictly no printk's please.
484 *
485 * regs->nip and regs->msr contains srr0 and ssr1.
486 */
487long machine_check_early(struct pt_regs *regs)
488{
489	long handled = 0;
490
491	__this_cpu_inc(irq_stat.mce_exceptions);
492
493	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
494		handled = cur_cpu_spec->machine_check_early(regs);
495	return handled;
496}
497
498/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
499static enum {
500	DTRIG_UNKNOWN,
501	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
502	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
503} hmer_debug_trig_function;
504
505static int init_debug_trig_function(void)
506{
507	int pvr;
508	struct device_node *cpun;
509	struct property *prop = NULL;
510	const char *str;
511
512	/* First look in the device tree */
513	preempt_disable();
514	cpun = of_get_cpu_node(smp_processor_id(), NULL);
515	if (cpun) {
516		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
517					    prop, str) {
518			if (strcmp(str, "bit17-vector-ci-load") == 0)
519				hmer_debug_trig_function = DTRIG_VECTOR_CI;
520			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
521				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
522		}
523		of_node_put(cpun);
524	}
525	preempt_enable();
526
527	/* If we found the property, don't look at PVR */
528	if (prop)
529		goto out;
530
531	pvr = mfspr(SPRN_PVR);
532	/* Check for POWER9 Nimbus (scale-out) */
533	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534		/* DD2.2 and later */
535		if ((pvr & 0xfff) >= 0x202)
536			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537		/* DD2.0 and DD2.1 - used for vector CI load emulation */
538		else if ((pvr & 0xfff) >= 0x200)
539			hmer_debug_trig_function = DTRIG_VECTOR_CI;
540	}
541
542 out:
543	switch (hmer_debug_trig_function) {
544	case DTRIG_VECTOR_CI:
545		pr_debug("HMI debug trigger used for vector CI load\n");
546		break;
547	case DTRIG_SUSPEND_ESCAPE:
548		pr_debug("HMI debug trigger used for TM suspend escape\n");
 
549		break;
550	default:
 
 
551		break;
552	}
553	return 0;
554}
555__initcall(init_debug_trig_function);
556
557/*
558 * Handle HMIs that occur as a result of a debug trigger.
559 * Return values:
560 * -1 means this is not a HMI cause that we know about
561 *  0 means no further handling is required
562 *  1 means further handling is required
563 */
564long hmi_handle_debugtrig(struct pt_regs *regs)
565{
566	unsigned long hmer = mfspr(SPRN_HMER);
567	long ret = 0;
568
569	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
570	if (!((hmer & HMER_DEBUG_TRIG)
571	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
572		return -1;
573		
574	hmer &= ~HMER_DEBUG_TRIG;
575	/* HMER is a write-AND register */
576	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578	switch (hmer_debug_trig_function) {
579	case DTRIG_VECTOR_CI:
580		/*
581		 * Now to avoid problems with soft-disable we
582		 * only do the emulation if we are coming from
583		 * host user space
584		 */
585		if (regs && user_mode(regs))
586			ret = local_paca->hmi_p9_special_emu = 1;
587
588		break;
589
590	default:
 
591		break;
592	}
593
594	/*
595	 * See if any other HMI causes remain to be handled
596	 */
597	if (hmer & mfspr(SPRN_HMEER))
598		return -1;
599
600	return ret;
601}
602
603/*
604 * Return values:
605 */
606long hmi_exception_realmode(struct pt_regs *regs)
607{	
608	int ret;
609
610	__this_cpu_inc(irq_stat.hmi_exceptions);
611
612	ret = hmi_handle_debugtrig(regs);
613	if (ret >= 0)
614		return ret;
615
616	wait_for_subcore_guest_exit();
617
618	if (ppc_md.hmi_exception_early)
619		ppc_md.hmi_exception_early(regs);
620
621	wait_for_tb_resync();
622
623	return 1;
624}
v4.10.11
  1/*
  2 * Machine check exception handling.
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright 2013 IBM Corporation
 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 20 */
 21
 22#undef DEBUG
 23#define pr_fmt(fmt) "mce: " fmt
 24
 
 25#include <linux/types.h>
 26#include <linux/ptrace.h>
 27#include <linux/percpu.h>
 28#include <linux/export.h>
 29#include <linux/irq_work.h>
 
 
 30#include <asm/mce.h>
 31
 32static DEFINE_PER_CPU(int, mce_nest_count);
 33static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 34
 35/* Queue for delayed MCE events. */
 36static DEFINE_PER_CPU(int, mce_queue_count);
 37static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
 38
 
 
 
 
 
 39static void machine_check_process_queued_event(struct irq_work *work);
 
 
 
 40static struct irq_work mce_event_process_work = {
 41        .func = machine_check_process_queued_event,
 42};
 43
 
 
 44static void mce_set_error_info(struct machine_check_event *mce,
 45			       struct mce_error_info *mce_err)
 46{
 47	mce->error_type = mce_err->error_type;
 48	switch (mce_err->error_type) {
 49	case MCE_ERROR_TYPE_UE:
 50		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
 51		break;
 52	case MCE_ERROR_TYPE_SLB:
 53		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
 54		break;
 55	case MCE_ERROR_TYPE_ERAT:
 56		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
 57		break;
 58	case MCE_ERROR_TYPE_TLB:
 59		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 60		break;
 
 
 
 
 
 
 
 
 
 61	case MCE_ERROR_TYPE_UNKNOWN:
 62	default:
 63		break;
 64	}
 65}
 66
 67/*
 68 * Decode and save high level MCE information into per cpu buffer which
 69 * is an array of machine_check_event structure.
 70 */
 71void save_mce_event(struct pt_regs *regs, long handled,
 72		    struct mce_error_info *mce_err,
 73		    uint64_t nip, uint64_t addr)
 74{
 75	int index = __this_cpu_inc_return(mce_nest_count) - 1;
 76	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 77
 78	/*
 79	 * Return if we don't have enough space to log mce event.
 80	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
 81	 * the check below will stop buffer overrun.
 82	 */
 83	if (index >= MAX_MC_EVT)
 84		return;
 85
 86	/* Populate generic machine check info */
 87	mce->version = MCE_V1;
 88	mce->srr0 = nip;
 89	mce->srr1 = regs->msr;
 90	mce->gpr3 = regs->gpr[3];
 91	mce->in_use = 1;
 92
 93	mce->initiator = MCE_INITIATOR_CPU;
 94	/* Mark it recovered if we have handled it and MSR(RI=1). */
 95	if (handled && (regs->msr & MSR_RI))
 96		mce->disposition = MCE_DISPOSITION_RECOVERED;
 97	else
 98		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
 99	mce->severity = MCE_SEV_ERROR_SYNC;
 
 
100
101	/*
102	 * Populate the mce error_type and type-specific error_type.
103	 */
104	mce_set_error_info(mce, mce_err);
105
106	if (!addr)
107		return;
108
109	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
110		mce->u.tlb_error.effective_address_provided = true;
111		mce->u.tlb_error.effective_address = addr;
112	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
113		mce->u.slb_error.effective_address_provided = true;
114		mce->u.slb_error.effective_address = addr;
115	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
116		mce->u.erat_error.effective_address_provided = true;
117		mce->u.erat_error.effective_address = addr;
 
 
 
 
 
 
 
 
 
118	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
119		mce->u.ue_error.effective_address_provided = true;
120		mce->u.ue_error.effective_address = addr;
 
 
 
 
 
121	}
122	return;
123}
124
125/*
126 * get_mce_event:
127 *	mce	Pointer to machine_check_event structure to be filled.
128 *	release Flag to indicate whether to free the event slot or not.
129 *		0 <= do not release the mce event. Caller will invoke
130 *		     release_mce_event() once event has been consumed.
131 *		1 <= release the slot.
132 *
133 *	return	1 = success
134 *		0 = failure
135 *
136 * get_mce_event() will be called by platform specific machine check
137 * handle routine and in KVM.
138 * When we call get_mce_event(), we are still in interrupt context and
139 * preemption will not be scheduled until ret_from_expect() routine
140 * is called.
141 */
142int get_mce_event(struct machine_check_event *mce, bool release)
143{
144	int index = __this_cpu_read(mce_nest_count) - 1;
145	struct machine_check_event *mc_evt;
146	int ret = 0;
147
148	/* Sanity check */
149	if (index < 0)
150		return ret;
151
152	/* Check if we have MCE info to process. */
153	if (index < MAX_MC_EVT) {
154		mc_evt = this_cpu_ptr(&mce_event[index]);
155		/* Copy the event structure and release the original */
156		if (mce)
157			*mce = *mc_evt;
158		if (release)
159			mc_evt->in_use = 0;
160		ret = 1;
161	}
162	/* Decrement the count to free the slot. */
163	if (release)
164		__this_cpu_dec(mce_nest_count);
165
166	return ret;
167}
168
169void release_mce_event(void)
170{
171	get_mce_event(NULL, true);
172}
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174/*
175 * Queue up the MCE event which then can be handled later.
176 */
177void machine_check_queue_event(void)
178{
179	int index;
180	struct machine_check_event evt;
181
182	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
183		return;
184
185	index = __this_cpu_inc_return(mce_queue_count) - 1;
186	/* If queue is full, just return for now. */
187	if (index >= MAX_MC_EVT) {
188		__this_cpu_dec(mce_queue_count);
189		return;
190	}
191	memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
192
193	/* Queue irq work to process this event later. */
194	irq_work_queue(&mce_event_process_work);
195}
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197/*
198 * process pending MCE event from the mce event queue. This function will be
199 * called during syscall exit.
200 */
201static void machine_check_process_queued_event(struct irq_work *work)
202{
203	int index;
 
 
 
204
205	/*
206	 * For now just print it to console.
207	 * TODO: log this error event to FSP or nvram.
208	 */
209	while (__this_cpu_read(mce_queue_count) > 0) {
210		index = __this_cpu_read(mce_queue_count) - 1;
211		machine_check_print_event_info(
212				this_cpu_ptr(&mce_event_queue[index]));
213		__this_cpu_dec(mce_queue_count);
214	}
215}
216
217void machine_check_print_event_info(struct machine_check_event *evt)
 
218{
219	const char *level, *sevstr, *subtype;
220	static const char *mc_ue_types[] = {
221		"Indeterminate",
222		"Instruction fetch",
223		"Page table walk ifetch",
224		"Load/Store",
225		"Page table walk Load/Store",
226	};
227	static const char *mc_slb_types[] = {
228		"Indeterminate",
229		"Parity",
230		"Multihit",
231	};
232	static const char *mc_erat_types[] = {
233		"Indeterminate",
234		"Parity",
235		"Multihit",
236	};
237	static const char *mc_tlb_types[] = {
238		"Indeterminate",
239		"Parity",
240		"Multihit",
241	};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
243	/* Print things out */
244	if (evt->version != MCE_V1) {
245		pr_err("Machine Check Exception, Unknown event version %d !\n",
246		       evt->version);
247		return;
248	}
249	switch (evt->severity) {
250	case MCE_SEV_NO_ERROR:
251		level = KERN_INFO;
252		sevstr = "Harmless";
253		break;
254	case MCE_SEV_WARNING:
255		level = KERN_WARNING;
256		sevstr = "";
257		break;
258	case MCE_SEV_ERROR_SYNC:
259		level = KERN_ERR;
260		sevstr = "Severe";
261		break;
262	case MCE_SEV_FATAL:
263	default:
264		level = KERN_ERR;
265		sevstr = "Fatal";
266		break;
267	}
268
269	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
270	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
271	       "Recovered" : "[Not recovered");
 
 
 
 
 
 
 
 
 
272	printk("%s  Initiator: %s\n", level,
273	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
274	switch (evt->error_type) {
275	case MCE_ERROR_TYPE_UE:
276		subtype = evt->u.ue_error.ue_error_type <
277			ARRAY_SIZE(mc_ue_types) ?
278			mc_ue_types[evt->u.ue_error.ue_error_type]
279			: "Unknown";
280		printk("%s  Error type: UE [%s]\n", level, subtype);
281		if (evt->u.ue_error.effective_address_provided)
282			printk("%s    Effective address: %016llx\n",
283			       level, evt->u.ue_error.effective_address);
284		if (evt->u.ue_error.physical_address_provided)
285			printk("%s      Physical address: %016llx\n",
286			       level, evt->u.ue_error.physical_address);
287		break;
288	case MCE_ERROR_TYPE_SLB:
289		subtype = evt->u.slb_error.slb_error_type <
290			ARRAY_SIZE(mc_slb_types) ?
291			mc_slb_types[evt->u.slb_error.slb_error_type]
292			: "Unknown";
293		printk("%s  Error type: SLB [%s]\n", level, subtype);
294		if (evt->u.slb_error.effective_address_provided)
295			printk("%s    Effective address: %016llx\n",
296			       level, evt->u.slb_error.effective_address);
297		break;
298	case MCE_ERROR_TYPE_ERAT:
299		subtype = evt->u.erat_error.erat_error_type <
300			ARRAY_SIZE(mc_erat_types) ?
301			mc_erat_types[evt->u.erat_error.erat_error_type]
302			: "Unknown";
303		printk("%s  Error type: ERAT [%s]\n", level, subtype);
304		if (evt->u.erat_error.effective_address_provided)
305			printk("%s    Effective address: %016llx\n",
306			       level, evt->u.erat_error.effective_address);
307		break;
308	case MCE_ERROR_TYPE_TLB:
309		subtype = evt->u.tlb_error.tlb_error_type <
310			ARRAY_SIZE(mc_tlb_types) ?
311			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
312			: "Unknown";
313		printk("%s  Error type: TLB [%s]\n", level, subtype);
314		if (evt->u.tlb_error.effective_address_provided)
315			printk("%s    Effective address: %016llx\n",
316			       level, evt->u.tlb_error.effective_address);
317		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318	default:
319	case MCE_ERROR_TYPE_UNKNOWN:
320		printk("%s  Error type: Unknown\n", level);
321		break;
322	}
323}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
325uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 
 
 
 
 
 
 
326{
327	switch (evt->error_type) {
328	case MCE_ERROR_TYPE_UE:
329		if (evt->u.ue_error.effective_address_provided)
330			return evt->u.ue_error.effective_address;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331		break;
332	case MCE_ERROR_TYPE_SLB:
333		if (evt->u.slb_error.effective_address_provided)
334			return evt->u.slb_error.effective_address;
335		break;
336	case MCE_ERROR_TYPE_ERAT:
337		if (evt->u.erat_error.effective_address_provided)
338			return evt->u.erat_error.effective_address;
339		break;
340	case MCE_ERROR_TYPE_TLB:
341		if (evt->u.tlb_error.effective_address_provided)
342			return evt->u.tlb_error.effective_address;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343		break;
 
344	default:
345	case MCE_ERROR_TYPE_UNKNOWN:
346		break;
347	}
348	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349}
350EXPORT_SYMBOL(get_mce_fault_addr);