Linux Audio

Check our new training course

Loading...
v4.17
  1/*
  2 * Machine check exception handling CPU-side for power7 and power8
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright 2013 IBM Corporation
 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 20 */
 21
 22#undef DEBUG
 23#define pr_fmt(fmt) "mce_power: " fmt
 24
 25#include <linux/types.h>
 26#include <linux/ptrace.h>
 27#include <asm/mmu.h>
 28#include <asm/mce.h>
 29#include <asm/machdep.h>
 30#include <asm/pgtable.h>
 31#include <asm/pte-walk.h>
 32#include <asm/sstep.h>
 33#include <asm/exception-64s.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 34
 35/*
 36 * Convert an address related to an mm to a PFN. NOTE: we are in real
 37 * mode, we could potentially race with page table updates.
 
 
 
 38 */
 39static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
 
 
 
 
 
 
 
 
 
 
 40{
 41	pte_t *ptep;
 42	unsigned long flags;
 43	struct mm_struct *mm;
 44
 45	if (user_mode(regs))
 46		mm = current->mm;
 47	else
 48		mm = &init_mm;
 49
 50	local_irq_save(flags);
 51	if (mm == current->mm)
 52		ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
 53	else
 54		ptep = find_init_mm_pte(addr, NULL);
 55	local_irq_restore(flags);
 56	if (!ptep || pte_special(*ptep))
 57		return ULONG_MAX;
 58	return pte_pfn(*ptep);
 59}
 60
 
 61/* flush SLBs and reload */
 62#ifdef CONFIG_PPC_BOOK3S_64
 63static void flush_and_reload_slb(void)
 64{
 65	struct slb_shadow *slb;
 66	unsigned long i, n;
 67
 68	/* Invalidate all SLBs */
 69	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 70
 71#ifdef CONFIG_KVM_BOOK3S_HANDLER
 72	/*
 73	 * If machine check is hit when in guest or in transition, we will
 74	 * only flush the SLBs and continue.
 75	 */
 76	if (get_paca()->kvm_hstate.in_guest)
 77		return;
 78#endif
 79
 80	/* For host kernel, reload the SLBs from shadow SLB buffer. */
 81	slb = get_slb_shadow();
 82	if (!slb)
 83		return;
 84
 85	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
 86
 87	/* Load up the SLB entries from shadow SLB */
 88	for (i = 0; i < n; i++) {
 89		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
 90		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
 91
 92		rb = (rb & ~0xFFFul) | i;
 93		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
 94	}
 95}
 96#endif
 97
 98static void flush_erat(void)
 99{
100	asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
101}
102
103#define MCE_FLUSH_SLB 1
104#define MCE_FLUSH_TLB 2
105#define MCE_FLUSH_ERAT 3
106
107static int mce_flush(int what)
108{
109#ifdef CONFIG_PPC_BOOK3S_64
110	if (what == MCE_FLUSH_SLB) {
 
 
 
111		flush_and_reload_slb();
112		return 1;
 
 
 
 
 
 
 
113	}
114#endif
115	if (what == MCE_FLUSH_ERAT) {
116		flush_erat();
117		return 1;
118	}
119	if (what == MCE_FLUSH_TLB) {
120		tlbiel_all();
121		return 1;
122	}
123
124	return 0;
125}
126
127#define SRR1_MC_LOADSTORE(srr1)	((srr1) & PPC_BIT(42))
128
129struct mce_ierror_table {
130	unsigned long srr1_mask;
131	unsigned long srr1_value;
132	bool nip_valid; /* nip is a valid indicator of faulting address */
133	unsigned int error_type;
134	unsigned int error_subtype;
135	unsigned int initiator;
136	unsigned int severity;
137};
138
139static const struct mce_ierror_table mce_p7_ierror_table[] = {
140{ 0x00000000001c0000, 0x0000000000040000, true,
141  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
142  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
143{ 0x00000000001c0000, 0x0000000000080000, true,
144  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
145  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
146{ 0x00000000001c0000, 0x00000000000c0000, true,
147  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
148  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
149{ 0x00000000001c0000, 0x0000000000100000, true,
150  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
151  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
152{ 0x00000000001c0000, 0x0000000000140000, true,
153  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
154  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
155{ 0x00000000001c0000, 0x0000000000180000, true,
156  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
157  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
158{ 0x00000000001c0000, 0x00000000001c0000, true,
159  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
160  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
161{ 0, 0, 0, 0, 0, 0 } };
162
163static const struct mce_ierror_table mce_p8_ierror_table[] = {
164{ 0x00000000081c0000, 0x0000000000040000, true,
165  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
166  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
167{ 0x00000000081c0000, 0x0000000000080000, true,
168  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
169  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
170{ 0x00000000081c0000, 0x00000000000c0000, true,
171  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
172  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
173{ 0x00000000081c0000, 0x0000000000100000, true,
174  MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
175  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
176{ 0x00000000081c0000, 0x0000000000140000, true,
177  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
178  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
179{ 0x00000000081c0000, 0x0000000000180000, true,
180  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
181  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
182{ 0x00000000081c0000, 0x00000000001c0000, true,
183  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
184  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
185{ 0x00000000081c0000, 0x0000000008000000, true,
186  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
187  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
188{ 0x00000000081c0000, 0x0000000008040000, true,
189  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
190  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
191{ 0, 0, 0, 0, 0, 0 } };
192
193static const struct mce_ierror_table mce_p9_ierror_table[] = {
194{ 0x00000000081c0000, 0x0000000000040000, true,
195  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
196  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
197{ 0x00000000081c0000, 0x0000000000080000, true,
198  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
199  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
200{ 0x00000000081c0000, 0x00000000000c0000, true,
201  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
202  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
203{ 0x00000000081c0000, 0x0000000000100000, true,
204  MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
205  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
206{ 0x00000000081c0000, 0x0000000000140000, true,
207  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
208  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
209{ 0x00000000081c0000, 0x0000000000180000, true,
210  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
211  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
212{ 0x00000000081c0000, 0x00000000001c0000, true,
213  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH_FOREIGN,
214  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
215{ 0x00000000081c0000, 0x0000000008000000, true,
216  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
217  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
218{ 0x00000000081c0000, 0x0000000008040000, true,
219  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
220  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
221{ 0x00000000081c0000, 0x00000000080c0000, true,
222  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH,
223  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
224{ 0x00000000081c0000, 0x0000000008100000, true,
225  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
226  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
227{ 0x00000000081c0000, 0x0000000008140000, false,
228  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_STORE,
229  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, }, /* ASYNC is fatal */
230{ 0x00000000081c0000, 0x0000000008180000, false,
231  MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
232  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, }, /* ASYNC is fatal */
233{ 0x00000000081c0000, 0x00000000081c0000, true,
234  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
235  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
236{ 0, 0, 0, 0, 0, 0 } };
237
238struct mce_derror_table {
239	unsigned long dsisr_value;
240	bool dar_valid; /* dar is a valid indicator of faulting address */
241	unsigned int error_type;
242	unsigned int error_subtype;
243	unsigned int initiator;
244	unsigned int severity;
245};
246
247static const struct mce_derror_table mce_p7_derror_table[] = {
248{ 0x00008000, false,
249  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE,
250  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
251{ 0x00004000, true,
252  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
253  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
254{ 0x00000800, true,
255  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
256  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
257{ 0x00000400, true,
258  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT,
259  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
260{ 0x00000100, true,
261  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY,
262  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
263{ 0x00000080, true,
264  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,
265  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
266{ 0x00000040, true,
267  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
268  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
269{ 0, false, 0, 0, 0, 0 } };
270
271static const struct mce_derror_table mce_p8_derror_table[] = {
272{ 0x00008000, false,
273  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE,
274  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
275{ 0x00004000, true,
276  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
277  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
278{ 0x00002000, true,
279  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
280  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
281{ 0x00001000, true,
282  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
283  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
284{ 0x00000800, true,
285  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
286  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
287{ 0x00000400, true,
288  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT,
289  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
290{ 0x00000200, true,
291  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
292  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
293{ 0x00000100, true,
294  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY,
295  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
296{ 0x00000080, true,
297  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,
298  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
299{ 0, false, 0, 0, 0, 0 } };
300
301static const struct mce_derror_table mce_p9_derror_table[] = {
302{ 0x00008000, false,
303  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE,
304  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
305{ 0x00004000, true,
306  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
307  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
308{ 0x00002000, true,
309  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
310  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
311{ 0x00001000, true,
312  MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
313  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
314{ 0x00000800, true,
315  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
316  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
317{ 0x00000400, true,
318  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT,
319  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
320{ 0x00000200, false,
321  MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
322  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
323{ 0x00000100, true,
324  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY,
325  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
326{ 0x00000080, true,
327  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,
328  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
329{ 0x00000040, true,
330  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD,
331  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
332{ 0x00000020, false,
333  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
334  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
335{ 0x00000010, false,
336  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
337  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
338{ 0x00000008, false,
339  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD_STORE_FOREIGN,
340  MCE_INITIATOR_CPU,   MCE_SEV_ERROR_SYNC, },
341{ 0, false, 0, 0, 0, 0 } };
342
343static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
344					uint64_t *phys_addr)
345{
346	/*
347	 * Carefully look at the NIP to determine
348	 * the instruction to analyse. Reading the NIP
349	 * in real-mode is tricky and can lead to recursive
350	 * faults
351	 */
352	int instr;
353	unsigned long pfn, instr_addr;
354	struct instruction_op op;
355	struct pt_regs tmp = *regs;
356
357	pfn = addr_to_pfn(regs, regs->nip);
358	if (pfn != ULONG_MAX) {
359		instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
360		instr = *(unsigned int *)(instr_addr);
361		if (!analyse_instr(&op, &tmp, instr)) {
362			pfn = addr_to_pfn(regs, op.ea);
363			*addr = op.ea;
364			*phys_addr = (pfn << PAGE_SHIFT);
365			return 0;
366		}
367		/*
368		 * analyse_instr() might fail if the instruction
369		 * is not a load/store, although this is unexpected
370		 * for load/store errors or if we got the NIP
371		 * wrong
372		 */
373	}
374	*addr = 0;
375	return -1;
376}
377
378static int mce_handle_ierror(struct pt_regs *regs,
379		const struct mce_ierror_table table[],
380		struct mce_error_info *mce_err, uint64_t *addr,
381		uint64_t *phys_addr)
382{
383	uint64_t srr1 = regs->msr;
384	int handled = 0;
385	int i;
386
387	*addr = 0;
388
389	for (i = 0; table[i].srr1_mask; i++) {
390		if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
391			continue;
392
393		/* attempt to correct the error */
394		switch (table[i].error_type) {
395		case MCE_ERROR_TYPE_SLB:
396			handled = mce_flush(MCE_FLUSH_SLB);
397			break;
398		case MCE_ERROR_TYPE_ERAT:
399			handled = mce_flush(MCE_FLUSH_ERAT);
400			break;
401		case MCE_ERROR_TYPE_TLB:
402			handled = mce_flush(MCE_FLUSH_TLB);
403			break;
404		}
405
406		/* now fill in mce_error_info */
407		mce_err->error_type = table[i].error_type;
408		switch (table[i].error_type) {
409		case MCE_ERROR_TYPE_UE:
410			mce_err->u.ue_error_type = table[i].error_subtype;
411			break;
412		case MCE_ERROR_TYPE_SLB:
413			mce_err->u.slb_error_type = table[i].error_subtype;
414			break;
415		case MCE_ERROR_TYPE_ERAT:
416			mce_err->u.erat_error_type = table[i].error_subtype;
417			break;
418		case MCE_ERROR_TYPE_TLB:
419			mce_err->u.tlb_error_type = table[i].error_subtype;
420			break;
421		case MCE_ERROR_TYPE_USER:
422			mce_err->u.user_error_type = table[i].error_subtype;
423			break;
424		case MCE_ERROR_TYPE_RA:
425			mce_err->u.ra_error_type = table[i].error_subtype;
426			break;
427		case MCE_ERROR_TYPE_LINK:
428			mce_err->u.link_error_type = table[i].error_subtype;
429			break;
430		}
431		mce_err->severity = table[i].severity;
432		mce_err->initiator = table[i].initiator;
433		if (table[i].nip_valid) {
434			*addr = regs->nip;
435			if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
436				table[i].error_type == MCE_ERROR_TYPE_UE) {
437				unsigned long pfn;
438
439				if (get_paca()->in_mce < MAX_MCE_DEPTH) {
440					pfn = addr_to_pfn(regs, regs->nip);
441					if (pfn != ULONG_MAX) {
442						*phys_addr =
443							(pfn << PAGE_SHIFT);
444					}
445				}
446			}
447		}
448		return handled;
 
 
 
449	}
450
451	mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
452	mce_err->severity = MCE_SEV_ERROR_SYNC;
453	mce_err->initiator = MCE_INITIATOR_CPU;
454
455	return 0;
456}
457
458static int mce_handle_derror(struct pt_regs *regs,
459		const struct mce_derror_table table[],
460		struct mce_error_info *mce_err, uint64_t *addr,
461		uint64_t *phys_addr)
462{
463	uint64_t dsisr = regs->dsisr;
464	int handled = 0;
465	int found = 0;
466	int i;
467
468	*addr = 0;
469
470	for (i = 0; table[i].dsisr_value; i++) {
471		if (!(dsisr & table[i].dsisr_value))
472			continue;
473
474		/* attempt to correct the error */
475		switch (table[i].error_type) {
476		case MCE_ERROR_TYPE_SLB:
477			if (mce_flush(MCE_FLUSH_SLB))
478				handled = 1;
479			break;
480		case MCE_ERROR_TYPE_ERAT:
481			if (mce_flush(MCE_FLUSH_ERAT))
482				handled = 1;
483			break;
484		case MCE_ERROR_TYPE_TLB:
485			if (mce_flush(MCE_FLUSH_TLB))
486				handled = 1;
487			break;
488		}
489
490		/*
491		 * Attempt to handle multiple conditions, but only return
492		 * one. Ensure uncorrectable errors are first in the table
493		 * to match.
494		 */
495		if (found)
496			continue;
497
498		/* now fill in mce_error_info */
499		mce_err->error_type = table[i].error_type;
500		switch (table[i].error_type) {
501		case MCE_ERROR_TYPE_UE:
502			mce_err->u.ue_error_type = table[i].error_subtype;
503			break;
504		case MCE_ERROR_TYPE_SLB:
505			mce_err->u.slb_error_type = table[i].error_subtype;
506			break;
507		case MCE_ERROR_TYPE_ERAT:
508			mce_err->u.erat_error_type = table[i].error_subtype;
509			break;
510		case MCE_ERROR_TYPE_TLB:
511			mce_err->u.tlb_error_type = table[i].error_subtype;
512			break;
513		case MCE_ERROR_TYPE_USER:
514			mce_err->u.user_error_type = table[i].error_subtype;
515			break;
516		case MCE_ERROR_TYPE_RA:
517			mce_err->u.ra_error_type = table[i].error_subtype;
518			break;
519		case MCE_ERROR_TYPE_LINK:
520			mce_err->u.link_error_type = table[i].error_subtype;
521			break;
522		}
523		mce_err->severity = table[i].severity;
524		mce_err->initiator = table[i].initiator;
525		if (table[i].dar_valid)
526			*addr = regs->dar;
527		else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
528				table[i].error_type == MCE_ERROR_TYPE_UE) {
529			/*
530			 * We do a maximum of 4 nested MCE calls, see
531			 * kernel/exception-64s.h
532			 */
533			if (get_paca()->in_mce < MAX_MCE_DEPTH)
534				mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
535		}
536		found = 1;
537	}
 
 
 
538
539	if (found)
540		return handled;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
542	mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
543	mce_err->severity = MCE_SEV_ERROR_SYNC;
544	mce_err->initiator = MCE_INITIATOR_CPU;
 
 
 
 
 
545
546	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547}
548
549static long mce_handle_ue_error(struct pt_regs *regs)
550{
551	long handled = 0;
552
553	/*
554	 * On specific SCOM read via MMIO we may get a machine check
555	 * exception with SRR0 pointing inside opal. If that is the
556	 * case OPAL may have recovery address to re-read SCOM data in
557	 * different way and hence we can recover from this MC.
558	 */
559
560	if (ppc_md.mce_check_early_recovery) {
561		if (ppc_md.mce_check_early_recovery(regs))
562			handled = 1;
563	}
564	return handled;
565}
566
567static long mce_handle_error(struct pt_regs *regs,
568		const struct mce_derror_table dtable[],
569		const struct mce_ierror_table itable[])
570{
571	struct mce_error_info mce_err = { 0 };
572	uint64_t addr, phys_addr = ULONG_MAX;
573	uint64_t srr1 = regs->msr;
574	long handled;
575
576	if (SRR1_MC_LOADSTORE(srr1))
577		handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
578				&phys_addr);
579	else
580		handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
581				&phys_addr);
582
583	if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
584		handled = mce_handle_ue_error(regs);
585
586	save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588	return handled;
589}
590
591long __machine_check_early_realmode_p7(struct pt_regs *regs)
592{
593	/* P7 DD1 leaves top bits of DSISR undefined */
594	regs->dsisr &= 0x0000ffff;
 
 
 
 
595
596	return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
 
 
 
 
 
 
597}
598
599long __machine_check_early_realmode_p8(struct pt_regs *regs)
600{
601	return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
 
 
 
 
 
 
 
 
 
 
602}
603
604long __machine_check_early_realmode_p9(struct pt_regs *regs)
605{
606	/*
607	 * On POWER9 DD2.1 and below, it's possible to get a machine check
608	 * caused by a paste instruction where only DSISR bit 25 is set. This
609	 * will result in the MCE handler seeing an unknown event and the kernel
610	 * crashing. An MCE that occurs like this is spurious, so we don't need
611	 * to do anything in terms of servicing it. If there is something that
612	 * needs to be serviced, the CPU will raise the MCE again with the
613	 * correct DSISR so that it can be serviced properly. So detect this
614	 * case and mark it as handled.
615	 */
616	if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
617		return 1;
 
 
 
 
 
 
 
 
 
618
619	return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
 
 
 
 
 
620}
v4.10.11
  1/*
  2 * Machine check exception handling CPU-side for power7 and power8
  3 *
  4 * This program is free software; you can redistribute it and/or modify
  5 * it under the terms of the GNU General Public License as published by
  6 * the Free Software Foundation; either version 2 of the License, or
  7 * (at your option) any later version.
  8 *
  9 * This program is distributed in the hope that it will be useful,
 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 * GNU General Public License for more details.
 13 *
 14 * You should have received a copy of the GNU General Public License
 15 * along with this program; if not, write to the Free Software
 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 17 *
 18 * Copyright 2013 IBM Corporation
 19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
 20 */
 21
 22#undef DEBUG
 23#define pr_fmt(fmt) "mce_power: " fmt
 24
 25#include <linux/types.h>
 26#include <linux/ptrace.h>
 27#include <asm/mmu.h>
 28#include <asm/mce.h>
 29#include <asm/machdep.h>
 30
 31static void flush_tlb_206(unsigned int num_sets, unsigned int action)
 32{
 33	unsigned long rb;
 34	unsigned int i;
 35
 36	switch (action) {
 37	case TLB_INVAL_SCOPE_GLOBAL:
 38		rb = TLBIEL_INVAL_SET;
 39		break;
 40	case TLB_INVAL_SCOPE_LPID:
 41		rb = TLBIEL_INVAL_SET_LPID;
 42		break;
 43	default:
 44		BUG();
 45		break;
 46	}
 47
 48	asm volatile("ptesync" : : : "memory");
 49	for (i = 0; i < num_sets; i++) {
 50		asm volatile("tlbiel %0" : : "r" (rb));
 51		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
 52	}
 53	asm volatile("ptesync" : : : "memory");
 54}
 55
 56/*
 57 * Generic routines to flush TLB on POWER processors. These routines
 58 * are used as flush_tlb hook in the cpu_spec.
 59 *
 60 * action => TLB_INVAL_SCOPE_GLOBAL:  Invalidate all TLBs.
 61 *	     TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
 62 */
 63void __flush_tlb_power7(unsigned int action)
 64{
 65	flush_tlb_206(POWER7_TLB_SETS, action);
 66}
 67
 68void __flush_tlb_power8(unsigned int action)
 69{
 70	flush_tlb_206(POWER8_TLB_SETS, action);
 71}
 72
 73void __flush_tlb_power9(unsigned int action)
 74{
 75	if (radix_enabled())
 76		flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
 77
 78	flush_tlb_206(POWER9_TLB_SETS_HASH, action);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 79}
 80
 81
 82/* flush SLBs and reload */
 83#ifdef CONFIG_PPC_STD_MMU_64
 84static void flush_and_reload_slb(void)
 85{
 86	struct slb_shadow *slb;
 87	unsigned long i, n;
 88
 89	/* Invalidate all SLBs */
 90	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 91
 92#ifdef CONFIG_KVM_BOOK3S_HANDLER
 93	/*
 94	 * If machine check is hit when in guest or in transition, we will
 95	 * only flush the SLBs and continue.
 96	 */
 97	if (get_paca()->kvm_hstate.in_guest)
 98		return;
 99#endif
100
101	/* For host kernel, reload the SLBs from shadow SLB buffer. */
102	slb = get_slb_shadow();
103	if (!slb)
104		return;
105
106	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
107
108	/* Load up the SLB entries from shadow SLB */
109	for (i = 0; i < n; i++) {
110		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
111		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
112
113		rb = (rb & ~0xFFFul) | i;
114		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
115	}
116}
117#endif
118
119static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
120{
121	long handled = 1;
 
 
 
 
 
122
123	/*
124	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
125	 * reset the error bits whenever we handle them so that at the end
126	 * we can check whether we handled all of them or not.
127	 * */
128#ifdef CONFIG_PPC_STD_MMU_64
129	if (dsisr & slb_error_bits) {
130		flush_and_reload_slb();
131		/* reset error bits */
132		dsisr &= ~(slb_error_bits);
133	}
134	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
135		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
136			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
137		/* reset error bits */
138		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
139	}
140#endif
141	/* Any other errors we don't understand? */
142	if (dsisr & 0xffffffffUL)
143		handled = 0;
144
145	return handled;
146}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
148static long mce_handle_derror_p7(uint64_t dsisr)
 
149{
150	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
151}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
153static long mce_handle_common_ierror(uint64_t srr1)
154{
155	long handled = 0;
156
157	switch (P7_SRR1_MC_IFETCH(srr1)) {
158	case 0:
159		break;
160#ifdef CONFIG_PPC_STD_MMU_64
161	case P7_SRR1_MC_IFETCH_SLB_PARITY:
162	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
163		/* flush and reload SLBs for SLB errors. */
164		flush_and_reload_slb();
165		handled = 1;
166		break;
167	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
168		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
169			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
170			handled = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171		}
172		break;
173#endif
174	default:
175		break;
176	}
177
178	return handled;
179}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
181static long mce_handle_ierror_p7(uint64_t srr1)
182{
183	long handled = 0;
184
185	handled = mce_handle_common_ierror(srr1);
186
187#ifdef CONFIG_PPC_STD_MMU_64
188	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
189		flush_and_reload_slb();
190		handled = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191	}
192#endif
193	return handled;
194}
195
196static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
197{
198	switch (P7_SRR1_MC_IFETCH(srr1)) {
199	case P7_SRR1_MC_IFETCH_SLB_PARITY:
200		mce_err->error_type = MCE_ERROR_TYPE_SLB;
201		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
202		break;
203	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
204		mce_err->error_type = MCE_ERROR_TYPE_SLB;
205		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
206		break;
207	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
208		mce_err->error_type = MCE_ERROR_TYPE_TLB;
209		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
210		break;
211	case P7_SRR1_MC_IFETCH_UE:
212	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
213		mce_err->error_type = MCE_ERROR_TYPE_UE;
214		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
215		break;
216	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
217		mce_err->error_type = MCE_ERROR_TYPE_UE;
218		mce_err->u.ue_error_type =
219				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
220		break;
221	}
222}
223
224static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
225{
226	mce_get_common_ierror(mce_err, srr1);
227	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
228		mce_err->error_type = MCE_ERROR_TYPE_SLB;
229		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
230	}
231}
232
233static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
234{
235	if (dsisr & P7_DSISR_MC_UE) {
236		mce_err->error_type = MCE_ERROR_TYPE_UE;
237		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
238	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
239		mce_err->error_type = MCE_ERROR_TYPE_UE;
240		mce_err->u.ue_error_type =
241				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
242	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
243		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
244		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
245	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
246		mce_err->error_type = MCE_ERROR_TYPE_SLB;
247		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
248	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
249		mce_err->error_type = MCE_ERROR_TYPE_SLB;
250		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
251	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
252		mce_err->error_type = MCE_ERROR_TYPE_TLB;
253		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
254	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
255		mce_err->error_type = MCE_ERROR_TYPE_SLB;
256		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
257	}
258}
259
260static long mce_handle_ue_error(struct pt_regs *regs)
261{
262	long handled = 0;
263
264	/*
265	 * On specific SCOM read via MMIO we may get a machine check
266	 * exception with SRR0 pointing inside opal. If that is the
267	 * case OPAL may have recovery address to re-read SCOM data in
268	 * different way and hence we can recover from this MC.
269	 */
270
271	if (ppc_md.mce_check_early_recovery) {
272		if (ppc_md.mce_check_early_recovery(regs))
273			handled = 1;
274	}
275	return handled;
276}
277
278long __machine_check_early_realmode_p7(struct pt_regs *regs)
279{
280	uint64_t srr1, nip, addr;
281	long handled = 1;
282	struct mce_error_info mce_error_info = { 0 };
 
 
 
 
 
 
 
 
 
 
283
284	srr1 = regs->msr;
285	nip = regs->nip;
286
287	/*
288	 * Handle memory errors depending whether this was a load/store or
289	 * ifetch exception. Also, populate the mce error_type and
290	 * type-specific error_type from either SRR1 or DSISR, depending
291	 * whether this was a load/store or ifetch exception
292	 */
293	if (P7_SRR1_MC_LOADSTORE(srr1)) {
294		handled = mce_handle_derror_p7(regs->dsisr);
295		mce_get_derror_p7(&mce_error_info, regs->dsisr);
296		addr = regs->dar;
297	} else {
298		handled = mce_handle_ierror_p7(srr1);
299		mce_get_ierror_p7(&mce_error_info, srr1);
300		addr = regs->nip;
301	}
302
303	/* Handle UE error. */
304	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
305		handled = mce_handle_ue_error(regs);
306
307	save_mce_event(regs, handled, &mce_error_info, nip, addr);
308	return handled;
309}
310
311static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
312{
313	mce_get_common_ierror(mce_err, srr1);
314	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
315		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
316		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
317	}
318}
319
320static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
321{
322	mce_get_derror_p7(mce_err, dsisr);
323	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
324		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
325		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
326	}
327}
328
329static long mce_handle_ierror_p8(uint64_t srr1)
330{
331	long handled = 0;
332
333	handled = mce_handle_common_ierror(srr1);
334
335#ifdef CONFIG_PPC_STD_MMU_64
336	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
337		flush_and_reload_slb();
338		handled = 1;
339	}
340#endif
341	return handled;
342}
343
344static long mce_handle_derror_p8(uint64_t dsisr)
345{
346	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
347}
348
349long __machine_check_early_realmode_p8(struct pt_regs *regs)
350{
351	uint64_t srr1, nip, addr;
352	long handled = 1;
353	struct mce_error_info mce_error_info = { 0 };
354
355	srr1 = regs->msr;
356	nip = regs->nip;
357
358	if (P7_SRR1_MC_LOADSTORE(srr1)) {
359		handled = mce_handle_derror_p8(regs->dsisr);
360		mce_get_derror_p8(&mce_error_info, regs->dsisr);
361		addr = regs->dar;
362	} else {
363		handled = mce_handle_ierror_p8(srr1);
364		mce_get_ierror_p8(&mce_error_info, srr1);
365		addr = regs->nip;
366	}
367
368	/* Handle UE error. */
369	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
370		handled = mce_handle_ue_error(regs);
371
372	save_mce_event(regs, handled, &mce_error_info, nip, addr);
373	return handled;
374}