Loading...
1/*
2 * Machine check exception handling CPU-side for power7 and power8
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce_power: " fmt
24
25#include <linux/types.h>
26#include <linux/ptrace.h>
27#include <asm/mmu.h>
28#include <asm/mce.h>
29#include <asm/machdep.h>
30#include <asm/pgtable.h>
31#include <asm/pte-walk.h>
32#include <asm/sstep.h>
33#include <asm/exception-64s.h>
34
35/*
36 * Convert an address related to an mm to a PFN. NOTE: we are in real
37 * mode, we could potentially race with page table updates.
38 */
39static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
40{
41 pte_t *ptep;
42 unsigned long flags;
43 struct mm_struct *mm;
44
45 if (user_mode(regs))
46 mm = current->mm;
47 else
48 mm = &init_mm;
49
50 local_irq_save(flags);
51 if (mm == current->mm)
52 ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
53 else
54 ptep = find_init_mm_pte(addr, NULL);
55 local_irq_restore(flags);
56 if (!ptep || pte_special(*ptep))
57 return ULONG_MAX;
58 return pte_pfn(*ptep);
59}
60
61/* flush SLBs and reload */
62#ifdef CONFIG_PPC_BOOK3S_64
63static void flush_and_reload_slb(void)
64{
65 struct slb_shadow *slb;
66 unsigned long i, n;
67
68 /* Invalidate all SLBs */
69 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
70
71#ifdef CONFIG_KVM_BOOK3S_HANDLER
72 /*
73 * If machine check is hit when in guest or in transition, we will
74 * only flush the SLBs and continue.
75 */
76 if (get_paca()->kvm_hstate.in_guest)
77 return;
78#endif
79
80 /* For host kernel, reload the SLBs from shadow SLB buffer. */
81 slb = get_slb_shadow();
82 if (!slb)
83 return;
84
85 n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
86
87 /* Load up the SLB entries from shadow SLB */
88 for (i = 0; i < n; i++) {
89 unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
90 unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
91
92 rb = (rb & ~0xFFFul) | i;
93 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
94 }
95}
96#endif
97
98static void flush_erat(void)
99{
100 asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
101}
102
103#define MCE_FLUSH_SLB 1
104#define MCE_FLUSH_TLB 2
105#define MCE_FLUSH_ERAT 3
106
107static int mce_flush(int what)
108{
109#ifdef CONFIG_PPC_BOOK3S_64
110 if (what == MCE_FLUSH_SLB) {
111 flush_and_reload_slb();
112 return 1;
113 }
114#endif
115 if (what == MCE_FLUSH_ERAT) {
116 flush_erat();
117 return 1;
118 }
119 if (what == MCE_FLUSH_TLB) {
120 tlbiel_all();
121 return 1;
122 }
123
124 return 0;
125}
126
127#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
128
129struct mce_ierror_table {
130 unsigned long srr1_mask;
131 unsigned long srr1_value;
132 bool nip_valid; /* nip is a valid indicator of faulting address */
133 unsigned int error_type;
134 unsigned int error_subtype;
135 unsigned int initiator;
136 unsigned int severity;
137};
138
139static const struct mce_ierror_table mce_p7_ierror_table[] = {
140{ 0x00000000001c0000, 0x0000000000040000, true,
141 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
142 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
143{ 0x00000000001c0000, 0x0000000000080000, true,
144 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
145 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
146{ 0x00000000001c0000, 0x00000000000c0000, true,
147 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
148 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
149{ 0x00000000001c0000, 0x0000000000100000, true,
150 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
151 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
152{ 0x00000000001c0000, 0x0000000000140000, true,
153 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
154 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
155{ 0x00000000001c0000, 0x0000000000180000, true,
156 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
157 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
158{ 0x00000000001c0000, 0x00000000001c0000, true,
159 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
160 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
161{ 0, 0, 0, 0, 0, 0 } };
162
163static const struct mce_ierror_table mce_p8_ierror_table[] = {
164{ 0x00000000081c0000, 0x0000000000040000, true,
165 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
166 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
167{ 0x00000000081c0000, 0x0000000000080000, true,
168 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
169 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
170{ 0x00000000081c0000, 0x00000000000c0000, true,
171 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
172 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
173{ 0x00000000081c0000, 0x0000000000100000, true,
174 MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
175 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
176{ 0x00000000081c0000, 0x0000000000140000, true,
177 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
178 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
179{ 0x00000000081c0000, 0x0000000000180000, true,
180 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
181 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
182{ 0x00000000081c0000, 0x00000000001c0000, true,
183 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
184 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
185{ 0x00000000081c0000, 0x0000000008000000, true,
186 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
187 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
188{ 0x00000000081c0000, 0x0000000008040000, true,
189 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
190 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
191{ 0, 0, 0, 0, 0, 0 } };
192
193static const struct mce_ierror_table mce_p9_ierror_table[] = {
194{ 0x00000000081c0000, 0x0000000000040000, true,
195 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
196 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
197{ 0x00000000081c0000, 0x0000000000080000, true,
198 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
199 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
200{ 0x00000000081c0000, 0x00000000000c0000, true,
201 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
202 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
203{ 0x00000000081c0000, 0x0000000000100000, true,
204 MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
205 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
206{ 0x00000000081c0000, 0x0000000000140000, true,
207 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
208 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
209{ 0x00000000081c0000, 0x0000000000180000, true,
210 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
211 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
212{ 0x00000000081c0000, 0x00000000001c0000, true,
213 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
214 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
215{ 0x00000000081c0000, 0x0000000008000000, true,
216 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
217 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
218{ 0x00000000081c0000, 0x0000000008040000, true,
219 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
220 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
221{ 0x00000000081c0000, 0x00000000080c0000, true,
222 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
223 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
224{ 0x00000000081c0000, 0x0000000008100000, true,
225 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
226 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
227{ 0x00000000081c0000, 0x0000000008140000, false,
228 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
229 MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
230{ 0x00000000081c0000, 0x0000000008180000, false,
231 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
232 MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
233{ 0x00000000081c0000, 0x00000000081c0000, true,
234 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
235 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
236{ 0, 0, 0, 0, 0, 0 } };
237
238struct mce_derror_table {
239 unsigned long dsisr_value;
240 bool dar_valid; /* dar is a valid indicator of faulting address */
241 unsigned int error_type;
242 unsigned int error_subtype;
243 unsigned int initiator;
244 unsigned int severity;
245};
246
247static const struct mce_derror_table mce_p7_derror_table[] = {
248{ 0x00008000, false,
249 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
250 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
251{ 0x00004000, true,
252 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
253 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
254{ 0x00000800, true,
255 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
256 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
257{ 0x00000400, true,
258 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
259 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
260{ 0x00000100, true,
261 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
262 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
263{ 0x00000080, true,
264 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
265 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
266{ 0x00000040, true,
267 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
268 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
269{ 0, false, 0, 0, 0, 0 } };
270
271static const struct mce_derror_table mce_p8_derror_table[] = {
272{ 0x00008000, false,
273 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
274 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
275{ 0x00004000, true,
276 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
277 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
278{ 0x00002000, true,
279 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
280 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
281{ 0x00001000, true,
282 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
283 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
284{ 0x00000800, true,
285 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
286 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
287{ 0x00000400, true,
288 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
289 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
290{ 0x00000200, true,
291 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
292 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
293{ 0x00000100, true,
294 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
295 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
296{ 0x00000080, true,
297 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
298 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
299{ 0, false, 0, 0, 0, 0 } };
300
301static const struct mce_derror_table mce_p9_derror_table[] = {
302{ 0x00008000, false,
303 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
304 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
305{ 0x00004000, true,
306 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
307 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
308{ 0x00002000, true,
309 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
310 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
311{ 0x00001000, true,
312 MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
313 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
314{ 0x00000800, true,
315 MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
316 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
317{ 0x00000400, true,
318 MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
319 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
320{ 0x00000200, false,
321 MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
322 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
323{ 0x00000100, true,
324 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
325 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
326{ 0x00000080, true,
327 MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
328 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
329{ 0x00000040, true,
330 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
331 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
332{ 0x00000020, false,
333 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
334 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
335{ 0x00000010, false,
336 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
337 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
338{ 0x00000008, false,
339 MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
340 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
341{ 0, false, 0, 0, 0, 0 } };
342
343static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
344 uint64_t *phys_addr)
345{
346 /*
347 * Carefully look at the NIP to determine
348 * the instruction to analyse. Reading the NIP
349 * in real-mode is tricky and can lead to recursive
350 * faults
351 */
352 int instr;
353 unsigned long pfn, instr_addr;
354 struct instruction_op op;
355 struct pt_regs tmp = *regs;
356
357 pfn = addr_to_pfn(regs, regs->nip);
358 if (pfn != ULONG_MAX) {
359 instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
360 instr = *(unsigned int *)(instr_addr);
361 if (!analyse_instr(&op, &tmp, instr)) {
362 pfn = addr_to_pfn(regs, op.ea);
363 *addr = op.ea;
364 *phys_addr = (pfn << PAGE_SHIFT);
365 return 0;
366 }
367 /*
368 * analyse_instr() might fail if the instruction
369 * is not a load/store, although this is unexpected
370 * for load/store errors or if we got the NIP
371 * wrong
372 */
373 }
374 *addr = 0;
375 return -1;
376}
377
378static int mce_handle_ierror(struct pt_regs *regs,
379 const struct mce_ierror_table table[],
380 struct mce_error_info *mce_err, uint64_t *addr,
381 uint64_t *phys_addr)
382{
383 uint64_t srr1 = regs->msr;
384 int handled = 0;
385 int i;
386
387 *addr = 0;
388
389 for (i = 0; table[i].srr1_mask; i++) {
390 if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
391 continue;
392
393 /* attempt to correct the error */
394 switch (table[i].error_type) {
395 case MCE_ERROR_TYPE_SLB:
396 handled = mce_flush(MCE_FLUSH_SLB);
397 break;
398 case MCE_ERROR_TYPE_ERAT:
399 handled = mce_flush(MCE_FLUSH_ERAT);
400 break;
401 case MCE_ERROR_TYPE_TLB:
402 handled = mce_flush(MCE_FLUSH_TLB);
403 break;
404 }
405
406 /* now fill in mce_error_info */
407 mce_err->error_type = table[i].error_type;
408 switch (table[i].error_type) {
409 case MCE_ERROR_TYPE_UE:
410 mce_err->u.ue_error_type = table[i].error_subtype;
411 break;
412 case MCE_ERROR_TYPE_SLB:
413 mce_err->u.slb_error_type = table[i].error_subtype;
414 break;
415 case MCE_ERROR_TYPE_ERAT:
416 mce_err->u.erat_error_type = table[i].error_subtype;
417 break;
418 case MCE_ERROR_TYPE_TLB:
419 mce_err->u.tlb_error_type = table[i].error_subtype;
420 break;
421 case MCE_ERROR_TYPE_USER:
422 mce_err->u.user_error_type = table[i].error_subtype;
423 break;
424 case MCE_ERROR_TYPE_RA:
425 mce_err->u.ra_error_type = table[i].error_subtype;
426 break;
427 case MCE_ERROR_TYPE_LINK:
428 mce_err->u.link_error_type = table[i].error_subtype;
429 break;
430 }
431 mce_err->severity = table[i].severity;
432 mce_err->initiator = table[i].initiator;
433 if (table[i].nip_valid) {
434 *addr = regs->nip;
435 if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
436 table[i].error_type == MCE_ERROR_TYPE_UE) {
437 unsigned long pfn;
438
439 if (get_paca()->in_mce < MAX_MCE_DEPTH) {
440 pfn = addr_to_pfn(regs, regs->nip);
441 if (pfn != ULONG_MAX) {
442 *phys_addr =
443 (pfn << PAGE_SHIFT);
444 }
445 }
446 }
447 }
448 return handled;
449 }
450
451 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
452 mce_err->severity = MCE_SEV_ERROR_SYNC;
453 mce_err->initiator = MCE_INITIATOR_CPU;
454
455 return 0;
456}
457
458static int mce_handle_derror(struct pt_regs *regs,
459 const struct mce_derror_table table[],
460 struct mce_error_info *mce_err, uint64_t *addr,
461 uint64_t *phys_addr)
462{
463 uint64_t dsisr = regs->dsisr;
464 int handled = 0;
465 int found = 0;
466 int i;
467
468 *addr = 0;
469
470 for (i = 0; table[i].dsisr_value; i++) {
471 if (!(dsisr & table[i].dsisr_value))
472 continue;
473
474 /* attempt to correct the error */
475 switch (table[i].error_type) {
476 case MCE_ERROR_TYPE_SLB:
477 if (mce_flush(MCE_FLUSH_SLB))
478 handled = 1;
479 break;
480 case MCE_ERROR_TYPE_ERAT:
481 if (mce_flush(MCE_FLUSH_ERAT))
482 handled = 1;
483 break;
484 case MCE_ERROR_TYPE_TLB:
485 if (mce_flush(MCE_FLUSH_TLB))
486 handled = 1;
487 break;
488 }
489
490 /*
491 * Attempt to handle multiple conditions, but only return
492 * one. Ensure uncorrectable errors are first in the table
493 * to match.
494 */
495 if (found)
496 continue;
497
498 /* now fill in mce_error_info */
499 mce_err->error_type = table[i].error_type;
500 switch (table[i].error_type) {
501 case MCE_ERROR_TYPE_UE:
502 mce_err->u.ue_error_type = table[i].error_subtype;
503 break;
504 case MCE_ERROR_TYPE_SLB:
505 mce_err->u.slb_error_type = table[i].error_subtype;
506 break;
507 case MCE_ERROR_TYPE_ERAT:
508 mce_err->u.erat_error_type = table[i].error_subtype;
509 break;
510 case MCE_ERROR_TYPE_TLB:
511 mce_err->u.tlb_error_type = table[i].error_subtype;
512 break;
513 case MCE_ERROR_TYPE_USER:
514 mce_err->u.user_error_type = table[i].error_subtype;
515 break;
516 case MCE_ERROR_TYPE_RA:
517 mce_err->u.ra_error_type = table[i].error_subtype;
518 break;
519 case MCE_ERROR_TYPE_LINK:
520 mce_err->u.link_error_type = table[i].error_subtype;
521 break;
522 }
523 mce_err->severity = table[i].severity;
524 mce_err->initiator = table[i].initiator;
525 if (table[i].dar_valid)
526 *addr = regs->dar;
527 else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
528 table[i].error_type == MCE_ERROR_TYPE_UE) {
529 /*
530 * We do a maximum of 4 nested MCE calls, see
531 * kernel/exception-64s.h
532 */
533 if (get_paca()->in_mce < MAX_MCE_DEPTH)
534 mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
535 }
536 found = 1;
537 }
538
539 if (found)
540 return handled;
541
542 mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
543 mce_err->severity = MCE_SEV_ERROR_SYNC;
544 mce_err->initiator = MCE_INITIATOR_CPU;
545
546 return 0;
547}
548
549static long mce_handle_ue_error(struct pt_regs *regs)
550{
551 long handled = 0;
552
553 /*
554 * On specific SCOM read via MMIO we may get a machine check
555 * exception with SRR0 pointing inside opal. If that is the
556 * case OPAL may have recovery address to re-read SCOM data in
557 * different way and hence we can recover from this MC.
558 */
559
560 if (ppc_md.mce_check_early_recovery) {
561 if (ppc_md.mce_check_early_recovery(regs))
562 handled = 1;
563 }
564 return handled;
565}
566
567static long mce_handle_error(struct pt_regs *regs,
568 const struct mce_derror_table dtable[],
569 const struct mce_ierror_table itable[])
570{
571 struct mce_error_info mce_err = { 0 };
572 uint64_t addr, phys_addr = ULONG_MAX;
573 uint64_t srr1 = regs->msr;
574 long handled;
575
576 if (SRR1_MC_LOADSTORE(srr1))
577 handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
578 &phys_addr);
579 else
580 handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
581 &phys_addr);
582
583 if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
584 handled = mce_handle_ue_error(regs);
585
586 save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
587
588 return handled;
589}
590
591long __machine_check_early_realmode_p7(struct pt_regs *regs)
592{
593 /* P7 DD1 leaves top bits of DSISR undefined */
594 regs->dsisr &= 0x0000ffff;
595
596 return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
597}
598
599long __machine_check_early_realmode_p8(struct pt_regs *regs)
600{
601 return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
602}
603
604long __machine_check_early_realmode_p9(struct pt_regs *regs)
605{
606 /*
607 * On POWER9 DD2.1 and below, it's possible to get a machine check
608 * caused by a paste instruction where only DSISR bit 25 is set. This
609 * will result in the MCE handler seeing an unknown event and the kernel
610 * crashing. An MCE that occurs like this is spurious, so we don't need
611 * to do anything in terms of servicing it. If there is something that
612 * needs to be serviced, the CPU will raise the MCE again with the
613 * correct DSISR so that it can be serviced properly. So detect this
614 * case and mark it as handled.
615 */
616 if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
617 return 1;
618
619 return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
620}
1/*
2 * Machine check exception handling CPU-side for power7 and power8
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce_power: " fmt
24
25#include <linux/types.h>
26#include <linux/ptrace.h>
27#include <asm/mmu.h>
28#include <asm/mce.h>
29#include <asm/machdep.h>
30
31static void flush_tlb_206(unsigned int num_sets, unsigned int action)
32{
33 unsigned long rb;
34 unsigned int i;
35
36 switch (action) {
37 case TLB_INVAL_SCOPE_GLOBAL:
38 rb = TLBIEL_INVAL_SET;
39 break;
40 case TLB_INVAL_SCOPE_LPID:
41 rb = TLBIEL_INVAL_SET_LPID;
42 break;
43 default:
44 BUG();
45 break;
46 }
47
48 asm volatile("ptesync" : : : "memory");
49 for (i = 0; i < num_sets; i++) {
50 asm volatile("tlbiel %0" : : "r" (rb));
51 rb += 1 << TLBIEL_INVAL_SET_SHIFT;
52 }
53 asm volatile("ptesync" : : : "memory");
54}
55
56/*
57 * Generic routines to flush TLB on POWER processors. These routines
58 * are used as flush_tlb hook in the cpu_spec.
59 *
60 * action => TLB_INVAL_SCOPE_GLOBAL: Invalidate all TLBs.
61 * TLB_INVAL_SCOPE_LPID: Invalidate TLB for current LPID.
62 */
63void __flush_tlb_power7(unsigned int action)
64{
65 flush_tlb_206(POWER7_TLB_SETS, action);
66}
67
68void __flush_tlb_power8(unsigned int action)
69{
70 flush_tlb_206(POWER8_TLB_SETS, action);
71}
72
73void __flush_tlb_power9(unsigned int action)
74{
75 if (radix_enabled())
76 flush_tlb_206(POWER9_TLB_SETS_RADIX, action);
77
78 flush_tlb_206(POWER9_TLB_SETS_HASH, action);
79}
80
81
82/* flush SLBs and reload */
83#ifdef CONFIG_PPC_STD_MMU_64
84static void flush_and_reload_slb(void)
85{
86 struct slb_shadow *slb;
87 unsigned long i, n;
88
89 /* Invalidate all SLBs */
90 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
91
92#ifdef CONFIG_KVM_BOOK3S_HANDLER
93 /*
94 * If machine check is hit when in guest or in transition, we will
95 * only flush the SLBs and continue.
96 */
97 if (get_paca()->kvm_hstate.in_guest)
98 return;
99#endif
100
101 /* For host kernel, reload the SLBs from shadow SLB buffer. */
102 slb = get_slb_shadow();
103 if (!slb)
104 return;
105
106 n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
107
108 /* Load up the SLB entries from shadow SLB */
109 for (i = 0; i < n; i++) {
110 unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
111 unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
112
113 rb = (rb & ~0xFFFul) | i;
114 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
115 }
116}
117#endif
118
119static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
120{
121 long handled = 1;
122
123 /*
124 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
125 * reset the error bits whenever we handle them so that at the end
126 * we can check whether we handled all of them or not.
127 * */
128#ifdef CONFIG_PPC_STD_MMU_64
129 if (dsisr & slb_error_bits) {
130 flush_and_reload_slb();
131 /* reset error bits */
132 dsisr &= ~(slb_error_bits);
133 }
134 if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
135 if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
136 cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
137 /* reset error bits */
138 dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
139 }
140#endif
141 /* Any other errors we don't understand? */
142 if (dsisr & 0xffffffffUL)
143 handled = 0;
144
145 return handled;
146}
147
148static long mce_handle_derror_p7(uint64_t dsisr)
149{
150 return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
151}
152
153static long mce_handle_common_ierror(uint64_t srr1)
154{
155 long handled = 0;
156
157 switch (P7_SRR1_MC_IFETCH(srr1)) {
158 case 0:
159 break;
160#ifdef CONFIG_PPC_STD_MMU_64
161 case P7_SRR1_MC_IFETCH_SLB_PARITY:
162 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
163 /* flush and reload SLBs for SLB errors. */
164 flush_and_reload_slb();
165 handled = 1;
166 break;
167 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
168 if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
169 cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
170 handled = 1;
171 }
172 break;
173#endif
174 default:
175 break;
176 }
177
178 return handled;
179}
180
181static long mce_handle_ierror_p7(uint64_t srr1)
182{
183 long handled = 0;
184
185 handled = mce_handle_common_ierror(srr1);
186
187#ifdef CONFIG_PPC_STD_MMU_64
188 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
189 flush_and_reload_slb();
190 handled = 1;
191 }
192#endif
193 return handled;
194}
195
196static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
197{
198 switch (P7_SRR1_MC_IFETCH(srr1)) {
199 case P7_SRR1_MC_IFETCH_SLB_PARITY:
200 mce_err->error_type = MCE_ERROR_TYPE_SLB;
201 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
202 break;
203 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
204 mce_err->error_type = MCE_ERROR_TYPE_SLB;
205 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
206 break;
207 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
208 mce_err->error_type = MCE_ERROR_TYPE_TLB;
209 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
210 break;
211 case P7_SRR1_MC_IFETCH_UE:
212 case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
213 mce_err->error_type = MCE_ERROR_TYPE_UE;
214 mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
215 break;
216 case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
217 mce_err->error_type = MCE_ERROR_TYPE_UE;
218 mce_err->u.ue_error_type =
219 MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
220 break;
221 }
222}
223
224static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
225{
226 mce_get_common_ierror(mce_err, srr1);
227 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
228 mce_err->error_type = MCE_ERROR_TYPE_SLB;
229 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
230 }
231}
232
233static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
234{
235 if (dsisr & P7_DSISR_MC_UE) {
236 mce_err->error_type = MCE_ERROR_TYPE_UE;
237 mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
238 } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
239 mce_err->error_type = MCE_ERROR_TYPE_UE;
240 mce_err->u.ue_error_type =
241 MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
242 } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
243 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
244 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
245 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
246 mce_err->error_type = MCE_ERROR_TYPE_SLB;
247 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
248 } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
249 mce_err->error_type = MCE_ERROR_TYPE_SLB;
250 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
251 } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
252 mce_err->error_type = MCE_ERROR_TYPE_TLB;
253 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
254 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
255 mce_err->error_type = MCE_ERROR_TYPE_SLB;
256 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
257 }
258}
259
260static long mce_handle_ue_error(struct pt_regs *regs)
261{
262 long handled = 0;
263
264 /*
265 * On specific SCOM read via MMIO we may get a machine check
266 * exception with SRR0 pointing inside opal. If that is the
267 * case OPAL may have recovery address to re-read SCOM data in
268 * different way and hence we can recover from this MC.
269 */
270
271 if (ppc_md.mce_check_early_recovery) {
272 if (ppc_md.mce_check_early_recovery(regs))
273 handled = 1;
274 }
275 return handled;
276}
277
278long __machine_check_early_realmode_p7(struct pt_regs *regs)
279{
280 uint64_t srr1, nip, addr;
281 long handled = 1;
282 struct mce_error_info mce_error_info = { 0 };
283
284 srr1 = regs->msr;
285 nip = regs->nip;
286
287 /*
288 * Handle memory errors depending whether this was a load/store or
289 * ifetch exception. Also, populate the mce error_type and
290 * type-specific error_type from either SRR1 or DSISR, depending
291 * whether this was a load/store or ifetch exception
292 */
293 if (P7_SRR1_MC_LOADSTORE(srr1)) {
294 handled = mce_handle_derror_p7(regs->dsisr);
295 mce_get_derror_p7(&mce_error_info, regs->dsisr);
296 addr = regs->dar;
297 } else {
298 handled = mce_handle_ierror_p7(srr1);
299 mce_get_ierror_p7(&mce_error_info, srr1);
300 addr = regs->nip;
301 }
302
303 /* Handle UE error. */
304 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
305 handled = mce_handle_ue_error(regs);
306
307 save_mce_event(regs, handled, &mce_error_info, nip, addr);
308 return handled;
309}
310
311static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
312{
313 mce_get_common_ierror(mce_err, srr1);
314 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
315 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
316 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
317 }
318}
319
320static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
321{
322 mce_get_derror_p7(mce_err, dsisr);
323 if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
324 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
325 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
326 }
327}
328
329static long mce_handle_ierror_p8(uint64_t srr1)
330{
331 long handled = 0;
332
333 handled = mce_handle_common_ierror(srr1);
334
335#ifdef CONFIG_PPC_STD_MMU_64
336 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
337 flush_and_reload_slb();
338 handled = 1;
339 }
340#endif
341 return handled;
342}
343
344static long mce_handle_derror_p8(uint64_t dsisr)
345{
346 return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
347}
348
349long __machine_check_early_realmode_p8(struct pt_regs *regs)
350{
351 uint64_t srr1, nip, addr;
352 long handled = 1;
353 struct mce_error_info mce_error_info = { 0 };
354
355 srr1 = regs->msr;
356 nip = regs->nip;
357
358 if (P7_SRR1_MC_LOADSTORE(srr1)) {
359 handled = mce_handle_derror_p8(regs->dsisr);
360 mce_get_derror_p8(&mce_error_info, regs->dsisr);
361 addr = regs->dar;
362 } else {
363 handled = mce_handle_ierror_p8(srr1);
364 mce_get_ierror_p8(&mce_error_info, srr1);
365 addr = regs->nip;
366 }
367
368 /* Handle UE error. */
369 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
370 handled = mce_handle_ue_error(regs);
371
372 save_mce_event(regs, handled, &mce_error_info, nip, addr);
373 return handled;
374}