Linux Audio

Check our new training course

Embedded Linux training

Mar 31-Apr 8, 2025
Register
Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Machine check injection support.
  4 * Copyright 2008 Intel Corporation.
  5 *
  6 * Authors:
  7 * Andi Kleen
  8 * Ying Huang
  9 *
 10 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
 11 * for testing different aspects of the RAS code. This driver should be
 12 * built as module so that it can be loaded on production kernels for
 13 * testing purposes.
 14 *
 15 * Copyright (c) 2010-17:  Borislav Petkov <bp@alien8.de>
 16 *			   Advanced Micro Devices Inc.
 17 */
 18
 19#include <linux/cpu.h>
 20#include <linux/debugfs.h>
 21#include <linux/kernel.h>
 22#include <linux/module.h>
 23#include <linux/notifier.h>
 24#include <linux/pci.h>
 25#include <linux/uaccess.h>
 26
 27#include <asm/amd_nb.h>
 28#include <asm/apic.h>
 29#include <asm/irq_vectors.h>
 30#include <asm/mce.h>
 31#include <asm/nmi.h>
 32#include <asm/smp.h>
 33
 34#include "internal.h"
 35
 
 
 36/*
 37 * Collect all the MCi_XXX settings
 38 */
 39static struct mce i_mce;
 40static struct dentry *dfs_inj;
 41
 42#define MAX_FLAG_OPT_SIZE	4
 43#define NBCFG			0x44
 44
 45enum injection_type {
 46	SW_INJ = 0,	/* SW injection, simply decode the error */
 47	HW_INJ,		/* Trigger a #MC */
 48	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
 49	THR_INT_INJ,    /* Trigger threshold interrupt */
 50	N_INJ_TYPES,
 51};
 52
 53static const char * const flags_options[] = {
 54	[SW_INJ] = "sw",
 55	[HW_INJ] = "hw",
 56	[DFR_INT_INJ] = "df",
 57	[THR_INT_INJ] = "th",
 58	NULL
 59};
 60
 61/* Set default injection to SW_INJ */
 62static enum injection_type inj_type = SW_INJ;
 63
 64#define MCE_INJECT_SET(reg)						\
 65static int inj_##reg##_set(void *data, u64 val)				\
 66{									\
 67	struct mce *m = (struct mce *)data;				\
 68									\
 69	m->reg = val;							\
 70	return 0;							\
 71}
 72
 73MCE_INJECT_SET(status);
 74MCE_INJECT_SET(misc);
 75MCE_INJECT_SET(addr);
 76MCE_INJECT_SET(synd);
 77
 78#define MCE_INJECT_GET(reg)						\
 79static int inj_##reg##_get(void *data, u64 *val)			\
 80{									\
 81	struct mce *m = (struct mce *)data;				\
 82									\
 83	*val = m->reg;							\
 84	return 0;							\
 85}
 86
 87MCE_INJECT_GET(status);
 88MCE_INJECT_GET(misc);
 89MCE_INJECT_GET(addr);
 90MCE_INJECT_GET(synd);
 
 91
 92DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
 93DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
 94DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
 95DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
 96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 97static void setup_inj_struct(struct mce *m)
 98{
 99	memset(m, 0, sizeof(struct mce));
100
101	m->cpuvendor = boot_cpu_data.x86_vendor;
102	m->time	     = ktime_get_real_seconds();
103	m->cpuid     = cpuid_eax(1);
104	m->microcode = boot_cpu_data.microcode;
105}
106
107/* Update fake mce registers on current CPU. */
108static void inject_mce(struct mce *m)
109{
110	struct mce *i = &per_cpu(injectm, m->extcpu);
111
112	/* Make sure no one reads partially written injectm */
113	i->finished = 0;
114	mb();
115	m->finished = 0;
116	/* First set the fields after finished */
117	i->extcpu = m->extcpu;
118	mb();
119	/* Now write record in order, finished last (except above) */
120	memcpy(i, m, sizeof(struct mce));
121	/* Finally activate it */
122	mb();
123	i->finished = 1;
124}
125
126static void raise_poll(struct mce *m)
127{
128	unsigned long flags;
129	mce_banks_t b;
130
131	memset(&b, 0xff, sizeof(mce_banks_t));
132	local_irq_save(flags);
133	machine_check_poll(0, &b);
134	local_irq_restore(flags);
135	m->finished = 0;
136}
137
138static void raise_exception(struct mce *m, struct pt_regs *pregs)
139{
140	struct pt_regs regs;
141	unsigned long flags;
142
143	if (!pregs) {
144		memset(&regs, 0, sizeof(struct pt_regs));
145		regs.ip = m->ip;
146		regs.cs = m->cs;
147		pregs = &regs;
148	}
149	/* in mcheck exeception handler, irq will be disabled */
150	local_irq_save(flags);
151	do_machine_check(pregs, 0);
152	local_irq_restore(flags);
153	m->finished = 0;
154}
155
156static cpumask_var_t mce_inject_cpumask;
157static DEFINE_MUTEX(mce_inject_mutex);
158
159static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
160{
161	int cpu = smp_processor_id();
162	struct mce *m = this_cpu_ptr(&injectm);
163	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
164		return NMI_DONE;
165	cpumask_clear_cpu(cpu, mce_inject_cpumask);
166	if (m->inject_flags & MCJ_EXCEPTION)
167		raise_exception(m, regs);
168	else if (m->status)
169		raise_poll(m);
170	return NMI_HANDLED;
171}
172
173static void mce_irq_ipi(void *info)
174{
175	int cpu = smp_processor_id();
176	struct mce *m = this_cpu_ptr(&injectm);
177
178	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
179			m->inject_flags & MCJ_EXCEPTION) {
180		cpumask_clear_cpu(cpu, mce_inject_cpumask);
181		raise_exception(m, NULL);
182	}
183}
184
185/* Inject mce on current CPU */
186static int raise_local(void)
187{
188	struct mce *m = this_cpu_ptr(&injectm);
189	int context = MCJ_CTX(m->inject_flags);
190	int ret = 0;
191	int cpu = m->extcpu;
192
193	if (m->inject_flags & MCJ_EXCEPTION) {
194		pr_info("Triggering MCE exception on CPU %d\n", cpu);
195		switch (context) {
196		case MCJ_CTX_IRQ:
197			/*
198			 * Could do more to fake interrupts like
199			 * calling irq_enter, but the necessary
200			 * machinery isn't exported currently.
201			 */
202			/*FALL THROUGH*/
203		case MCJ_CTX_PROCESS:
204			raise_exception(m, NULL);
205			break;
206		default:
207			pr_info("Invalid MCE context\n");
208			ret = -EINVAL;
209		}
210		pr_info("MCE exception done on CPU %d\n", cpu);
211	} else if (m->status) {
212		pr_info("Starting machine check poll CPU %d\n", cpu);
213		raise_poll(m);
214		mce_notify_irq();
215		pr_info("Machine check poll done on CPU %d\n", cpu);
216	} else
217		m->finished = 0;
218
219	return ret;
220}
221
222static void __maybe_unused raise_mce(struct mce *m)
223{
224	int context = MCJ_CTX(m->inject_flags);
225
226	inject_mce(m);
227
228	if (context == MCJ_CTX_RANDOM)
229		return;
230
231	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
232		unsigned long start;
233		int cpu;
234
235		get_online_cpus();
236		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
237		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
238		for_each_online_cpu(cpu) {
239			struct mce *mcpu = &per_cpu(injectm, cpu);
240			if (!mcpu->finished ||
241			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
242				cpumask_clear_cpu(cpu, mce_inject_cpumask);
243		}
244		if (!cpumask_empty(mce_inject_cpumask)) {
245			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
246				/*
247				 * don't wait because mce_irq_ipi is necessary
248				 * to be sync with following raise_local
249				 */
250				preempt_disable();
251				smp_call_function_many(mce_inject_cpumask,
252					mce_irq_ipi, NULL, 0);
253				preempt_enable();
254			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
255				apic->send_IPI_mask(mce_inject_cpumask,
256						NMI_VECTOR);
257		}
258		start = jiffies;
259		while (!cpumask_empty(mce_inject_cpumask)) {
260			if (!time_before(jiffies, start + 2*HZ)) {
261				pr_err("Timeout waiting for mce inject %lx\n",
262					*cpumask_bits(mce_inject_cpumask));
263				break;
264			}
265			cpu_relax();
266		}
267		raise_local();
268		put_cpu();
269		put_online_cpus();
270	} else {
271		preempt_disable();
272		raise_local();
273		preempt_enable();
274	}
275}
276
277static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
278			    void *data)
279{
280	struct mce *m = (struct mce *)data;
281
282	if (!m)
283		return NOTIFY_DONE;
284
285	mutex_lock(&mce_inject_mutex);
286	raise_mce(m);
287	mutex_unlock(&mce_inject_mutex);
288
289	return NOTIFY_DONE;
290}
291
292static struct notifier_block inject_nb = {
293	.notifier_call  = mce_inject_raise,
294};
295
296/*
297 * Caller needs to be make sure this cpu doesn't disappear
298 * from under us, i.e.: get_cpu/put_cpu.
299 */
300static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
301{
302	u32 l, h;
303	int err;
304
305	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
306	if (err) {
307		pr_err("%s: error reading HWCR\n", __func__);
308		return err;
309	}
310
311	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
312
313	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
314	if (err)
315		pr_err("%s: error writing HWCR\n", __func__);
316
317	return err;
318}
319
320static int __set_inj(const char *buf)
321{
322	int i;
323
324	for (i = 0; i < N_INJ_TYPES; i++) {
325		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
 
 
326			inj_type = i;
327			return 0;
328		}
329	}
330	return -EINVAL;
331}
332
333static ssize_t flags_read(struct file *filp, char __user *ubuf,
334			  size_t cnt, loff_t *ppos)
335{
336	char buf[MAX_FLAG_OPT_SIZE];
337	int n;
338
339	n = sprintf(buf, "%s\n", flags_options[inj_type]);
340
341	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
342}
343
344static ssize_t flags_write(struct file *filp, const char __user *ubuf,
345			   size_t cnt, loff_t *ppos)
346{
347	char buf[MAX_FLAG_OPT_SIZE], *__buf;
348	int err;
349
350	if (cnt > MAX_FLAG_OPT_SIZE)
351		return -EINVAL;
352
353	if (copy_from_user(&buf, ubuf, cnt))
354		return -EFAULT;
355
356	buf[cnt - 1] = 0;
357
358	/* strip whitespace */
359	__buf = strstrip(buf);
360
361	err = __set_inj(__buf);
362	if (err) {
363		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
364		return err;
365	}
366
367	*ppos += cnt;
368
369	return cnt;
370}
371
372static const struct file_operations flags_fops = {
373	.read           = flags_read,
374	.write          = flags_write,
375	.llseek         = generic_file_llseek,
376};
377
378/*
379 * On which CPU to inject?
380 */
381MCE_INJECT_GET(extcpu);
382
383static int inj_extcpu_set(void *data, u64 val)
384{
385	struct mce *m = (struct mce *)data;
386
387	if (val >= nr_cpu_ids || !cpu_online(val)) {
388		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
389		return -EINVAL;
390	}
391	m->extcpu = val;
392	return 0;
393}
394
395DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
396
397static void trigger_mce(void *info)
398{
399	asm volatile("int $18");
400}
401
402static void trigger_dfr_int(void *info)
403{
404	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
405}
406
407static void trigger_thr_int(void *info)
408{
409	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
410}
411
412static u32 get_nbc_for_node(int node_id)
413{
414	struct cpuinfo_x86 *c = &boot_cpu_data;
415	u32 cores_per_node;
416
417	cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
418
419	return cores_per_node * node_id;
420}
421
422static void toggle_nb_mca_mst_cpu(u16 nid)
423{
424	struct amd_northbridge *nb;
425	struct pci_dev *F3;
426	u32 val;
427	int err;
428
429	nb = node_to_amd_nb(nid);
430	if (!nb)
431		return;
432
433	F3 = nb->misc;
434	if (!F3)
435		return;
436
437	err = pci_read_config_dword(F3, NBCFG, &val);
438	if (err) {
439		pr_err("%s: Error reading F%dx%03x.\n",
440		       __func__, PCI_FUNC(F3->devfn), NBCFG);
441		return;
442	}
443
444	if (val & BIT(27))
445		return;
446
447	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
448	       __func__);
449
450	val |= BIT(27);
451	err = pci_write_config_dword(F3, NBCFG, val);
452	if (err)
453		pr_err("%s: Error writing F%dx%03x.\n",
454		       __func__, PCI_FUNC(F3->devfn), NBCFG);
455}
456
457static void prepare_msrs(void *info)
458{
459	struct mce m = *(struct mce *)info;
460	u8 b = m.bank;
461
462	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
463
464	if (boot_cpu_has(X86_FEATURE_SMCA)) {
465		if (m.inject_flags == DFR_INT_INJ) {
466			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
467			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
468		} else {
469			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
470			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
471		}
472
473		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
474		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
475	} else {
476		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
477		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
478		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
479	}
480}
481
482static void do_inject(void)
483{
484	u64 mcg_status = 0;
485	unsigned int cpu = i_mce.extcpu;
486	u8 b = i_mce.bank;
487
488	i_mce.tsc = rdtsc_ordered();
489
 
 
490	if (i_mce.misc)
491		i_mce.status |= MCI_STATUS_MISCV;
492
493	if (i_mce.synd)
494		i_mce.status |= MCI_STATUS_SYNDV;
495
496	if (inj_type == SW_INJ) {
497		mce_inject_log(&i_mce);
498		return;
499	}
500
501	/* prep MCE global settings for the injection */
502	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
503
504	if (!(i_mce.status & MCI_STATUS_PCC))
505		mcg_status |= MCG_STATUS_RIPV;
506
507	/*
508	 * Ensure necessary status bits for deferred errors:
509	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
510	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
511	 */
512	if (inj_type == DFR_INT_INJ) {
513		i_mce.status |= MCI_STATUS_DEFERRED;
514		i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
515	}
516
517	/*
518	 * For multi node CPUs, logging and reporting of bank 4 errors happens
519	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
520	 * Fam10h and later BKDGs.
521	 */
522	if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
523	    b == 4 &&
524	    boot_cpu_data.x86 < 0x17) {
525		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
526		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
527	}
528
529	get_online_cpus();
530	if (!cpu_online(cpu))
531		goto err;
532
533	toggle_hw_mce_inject(cpu, true);
534
535	i_mce.mcgstatus = mcg_status;
536	i_mce.inject_flags = inj_type;
537	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
538
539	toggle_hw_mce_inject(cpu, false);
540
541	switch (inj_type) {
542	case DFR_INT_INJ:
543		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
544		break;
545	case THR_INT_INJ:
546		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
547		break;
548	default:
549		smp_call_function_single(cpu, trigger_mce, NULL, 0);
550	}
551
552err:
553	put_online_cpus();
554
555}
556
557/*
558 * This denotes into which bank we're injecting and triggers
559 * the injection, at the same time.
560 */
561static int inj_bank_set(void *data, u64 val)
562{
563	struct mce *m = (struct mce *)data;
564	u8 n_banks;
565	u64 cap;
566
567	/* Get bank count on target CPU so we can handle non-uniform values. */
568	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
569	n_banks = cap & MCG_BANKCNT_MASK;
570
571	if (val >= n_banks) {
572		pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
573		return -EINVAL;
574	}
575
576	m->bank = val;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577	do_inject();
578
579	/* Reset injection struct */
580	setup_inj_struct(&i_mce);
581
582	return 0;
583}
584
585MCE_INJECT_GET(bank);
586
587DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
588
589static const char readme_msg[] =
590"Description of the files and their usages:\n"
591"\n"
592"Note1: i refers to the bank number below.\n"
593"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
594"as they mirror the hardware registers.\n"
595"\n"
596"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
597"\t attributes of the error which caused the MCE.\n"
598"\n"
599"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
600"\t used for error thresholding purposes and its validity is indicated by\n"
601"\t MCi_STATUS[MiscV].\n"
602"\n"
603"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
604"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
605"\n"
606"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
607"\t associated with the error.\n"
608"\n"
609"cpu:\t The CPU to inject the error on.\n"
610"\n"
611"bank:\t Specify the bank you want to inject the error into: the number of\n"
612"\t banks in a processor varies and is family/model-specific, therefore, the\n"
613"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
614"\t injection.\n"
615"\n"
616"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
617"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
618"\t for AMD processors.\n"
619"\n"
620"\t Allowed error injection types:\n"
621"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
622"\t    format only. Safe to use.\n"
623"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
624"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
625"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
626"\t    before injecting.\n"
627"\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
628"\t    error APIC interrupt handler to handle the error if the feature is \n"
629"\t    is present in hardware. \n"
630"\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
631"\t    APIC interrupt handler to handle the error. \n"
 
 
632"\n";
633
634static ssize_t
635inj_readme_read(struct file *filp, char __user *ubuf,
636		       size_t cnt, loff_t *ppos)
637{
638	return simple_read_from_buffer(ubuf, cnt, ppos,
639					readme_msg, strlen(readme_msg));
640}
641
642static const struct file_operations readme_fops = {
643	.read		= inj_readme_read,
644};
645
646static struct dfs_node {
647	char *name;
648	const struct file_operations *fops;
649	umode_t perm;
650} dfs_fls[] = {
651	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
652	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
653	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
654	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
 
655	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
656	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
657	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
658	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
659};
660
661static void __init debugfs_init(void)
662{
663	unsigned int i;
664
665	dfs_inj = debugfs_create_dir("mce-inject", NULL);
666
667	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
668		debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
669				    &i_mce, dfs_fls[i].fops);
670}
671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672static int __init inject_init(void)
673{
674	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
675		return -ENOMEM;
 
 
676
677	debugfs_init();
678
679	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
680	mce_register_injector_chain(&inject_nb);
681
682	setup_inj_struct(&i_mce);
683
684	pr_info("Machine check injector initialized\n");
685
686	return 0;
687}
688
689static void __exit inject_exit(void)
690{
691
692	mce_unregister_injector_chain(&inject_nb);
693	unregister_nmi_handler(NMI_LOCAL, "mce_notify");
694
695	debugfs_remove_recursive(dfs_inj);
696	dfs_inj = NULL;
697
698	memset(&dfs_fls, 0, sizeof(dfs_fls));
699
700	free_cpumask_var(mce_inject_cpumask);
701}
702
703module_init(inject_init);
704module_exit(inject_exit);
705MODULE_LICENSE("GPL");
v6.8
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Machine check injection support.
  4 * Copyright 2008 Intel Corporation.
  5 *
  6 * Authors:
  7 * Andi Kleen
  8 * Ying Huang
  9 *
 10 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
 11 * for testing different aspects of the RAS code. This driver should be
 12 * built as module so that it can be loaded on production kernels for
 13 * testing purposes.
 14 *
 15 * Copyright (c) 2010-17:  Borislav Petkov <bp@alien8.de>
 16 *			   Advanced Micro Devices Inc.
 17 */
 18
 19#include <linux/cpu.h>
 20#include <linux/debugfs.h>
 21#include <linux/kernel.h>
 22#include <linux/module.h>
 23#include <linux/notifier.h>
 24#include <linux/pci.h>
 25#include <linux/uaccess.h>
 26
 27#include <asm/amd_nb.h>
 28#include <asm/apic.h>
 29#include <asm/irq_vectors.h>
 30#include <asm/mce.h>
 31#include <asm/nmi.h>
 32#include <asm/smp.h>
 33
 34#include "internal.h"
 35
 36static bool hw_injection_possible = true;
 37
 38/*
 39 * Collect all the MCi_XXX settings
 40 */
 41static struct mce i_mce;
 42static struct dentry *dfs_inj;
 43
 44#define MAX_FLAG_OPT_SIZE	4
 45#define NBCFG			0x44
 46
 47enum injection_type {
 48	SW_INJ = 0,	/* SW injection, simply decode the error */
 49	HW_INJ,		/* Trigger a #MC */
 50	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
 51	THR_INT_INJ,    /* Trigger threshold interrupt */
 52	N_INJ_TYPES,
 53};
 54
 55static const char * const flags_options[] = {
 56	[SW_INJ] = "sw",
 57	[HW_INJ] = "hw",
 58	[DFR_INT_INJ] = "df",
 59	[THR_INT_INJ] = "th",
 60	NULL
 61};
 62
 63/* Set default injection to SW_INJ */
 64static enum injection_type inj_type = SW_INJ;
 65
 66#define MCE_INJECT_SET(reg)						\
 67static int inj_##reg##_set(void *data, u64 val)				\
 68{									\
 69	struct mce *m = (struct mce *)data;				\
 70									\
 71	m->reg = val;							\
 72	return 0;							\
 73}
 74
 75MCE_INJECT_SET(status);
 76MCE_INJECT_SET(misc);
 77MCE_INJECT_SET(addr);
 78MCE_INJECT_SET(synd);
 79
 80#define MCE_INJECT_GET(reg)						\
 81static int inj_##reg##_get(void *data, u64 *val)			\
 82{									\
 83	struct mce *m = (struct mce *)data;				\
 84									\
 85	*val = m->reg;							\
 86	return 0;							\
 87}
 88
 89MCE_INJECT_GET(status);
 90MCE_INJECT_GET(misc);
 91MCE_INJECT_GET(addr);
 92MCE_INJECT_GET(synd);
 93MCE_INJECT_GET(ipid);
 94
 95DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
 96DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
 97DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
 98DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
 99
100/* Use the user provided IPID value on a sw injection. */
101static int inj_ipid_set(void *data, u64 val)
102{
103	struct mce *m = (struct mce *)data;
104
105	if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
106		if (inj_type == SW_INJ)
107			m->ipid = val;
108	}
109
110	return 0;
111}
112
113DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
114
115static void setup_inj_struct(struct mce *m)
116{
117	memset(m, 0, sizeof(struct mce));
118
119	m->cpuvendor = boot_cpu_data.x86_vendor;
120	m->time	     = ktime_get_real_seconds();
121	m->cpuid     = cpuid_eax(1);
122	m->microcode = boot_cpu_data.microcode;
123}
124
125/* Update fake mce registers on current CPU. */
126static void inject_mce(struct mce *m)
127{
128	struct mce *i = &per_cpu(injectm, m->extcpu);
129
130	/* Make sure no one reads partially written injectm */
131	i->finished = 0;
132	mb();
133	m->finished = 0;
134	/* First set the fields after finished */
135	i->extcpu = m->extcpu;
136	mb();
137	/* Now write record in order, finished last (except above) */
138	memcpy(i, m, sizeof(struct mce));
139	/* Finally activate it */
140	mb();
141	i->finished = 1;
142}
143
144static void raise_poll(struct mce *m)
145{
146	unsigned long flags;
147	mce_banks_t b;
148
149	memset(&b, 0xff, sizeof(mce_banks_t));
150	local_irq_save(flags);
151	machine_check_poll(0, &b);
152	local_irq_restore(flags);
153	m->finished = 0;
154}
155
156static void raise_exception(struct mce *m, struct pt_regs *pregs)
157{
158	struct pt_regs regs;
159	unsigned long flags;
160
161	if (!pregs) {
162		memset(&regs, 0, sizeof(struct pt_regs));
163		regs.ip = m->ip;
164		regs.cs = m->cs;
165		pregs = &regs;
166	}
167	/* do_machine_check() expects interrupts disabled -- at least */
168	local_irq_save(flags);
169	do_machine_check(pregs);
170	local_irq_restore(flags);
171	m->finished = 0;
172}
173
174static cpumask_var_t mce_inject_cpumask;
175static DEFINE_MUTEX(mce_inject_mutex);
176
177static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
178{
179	int cpu = smp_processor_id();
180	struct mce *m = this_cpu_ptr(&injectm);
181	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
182		return NMI_DONE;
183	cpumask_clear_cpu(cpu, mce_inject_cpumask);
184	if (m->inject_flags & MCJ_EXCEPTION)
185		raise_exception(m, regs);
186	else if (m->status)
187		raise_poll(m);
188	return NMI_HANDLED;
189}
190
191static void mce_irq_ipi(void *info)
192{
193	int cpu = smp_processor_id();
194	struct mce *m = this_cpu_ptr(&injectm);
195
196	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
197			m->inject_flags & MCJ_EXCEPTION) {
198		cpumask_clear_cpu(cpu, mce_inject_cpumask);
199		raise_exception(m, NULL);
200	}
201}
202
203/* Inject mce on current CPU */
204static int raise_local(void)
205{
206	struct mce *m = this_cpu_ptr(&injectm);
207	int context = MCJ_CTX(m->inject_flags);
208	int ret = 0;
209	int cpu = m->extcpu;
210
211	if (m->inject_flags & MCJ_EXCEPTION) {
212		pr_info("Triggering MCE exception on CPU %d\n", cpu);
213		switch (context) {
214		case MCJ_CTX_IRQ:
215			/*
216			 * Could do more to fake interrupts like
217			 * calling irq_enter, but the necessary
218			 * machinery isn't exported currently.
219			 */
220			fallthrough;
221		case MCJ_CTX_PROCESS:
222			raise_exception(m, NULL);
223			break;
224		default:
225			pr_info("Invalid MCE context\n");
226			ret = -EINVAL;
227		}
228		pr_info("MCE exception done on CPU %d\n", cpu);
229	} else if (m->status) {
230		pr_info("Starting machine check poll CPU %d\n", cpu);
231		raise_poll(m);
232		mce_notify_irq();
233		pr_info("Machine check poll done on CPU %d\n", cpu);
234	} else
235		m->finished = 0;
236
237	return ret;
238}
239
240static void __maybe_unused raise_mce(struct mce *m)
241{
242	int context = MCJ_CTX(m->inject_flags);
243
244	inject_mce(m);
245
246	if (context == MCJ_CTX_RANDOM)
247		return;
248
249	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
250		unsigned long start;
251		int cpu;
252
253		cpus_read_lock();
254		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
255		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
256		for_each_online_cpu(cpu) {
257			struct mce *mcpu = &per_cpu(injectm, cpu);
258			if (!mcpu->finished ||
259			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
260				cpumask_clear_cpu(cpu, mce_inject_cpumask);
261		}
262		if (!cpumask_empty(mce_inject_cpumask)) {
263			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
264				/*
265				 * don't wait because mce_irq_ipi is necessary
266				 * to be sync with following raise_local
267				 */
268				preempt_disable();
269				smp_call_function_many(mce_inject_cpumask,
270					mce_irq_ipi, NULL, 0);
271				preempt_enable();
272			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
273				__apic_send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
 
274		}
275		start = jiffies;
276		while (!cpumask_empty(mce_inject_cpumask)) {
277			if (!time_before(jiffies, start + 2*HZ)) {
278				pr_err("Timeout waiting for mce inject %lx\n",
279					*cpumask_bits(mce_inject_cpumask));
280				break;
281			}
282			cpu_relax();
283		}
284		raise_local();
285		put_cpu();
286		cpus_read_unlock();
287	} else {
288		preempt_disable();
289		raise_local();
290		preempt_enable();
291	}
292}
293
294static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
295			    void *data)
296{
297	struct mce *m = (struct mce *)data;
298
299	if (!m)
300		return NOTIFY_DONE;
301
302	mutex_lock(&mce_inject_mutex);
303	raise_mce(m);
304	mutex_unlock(&mce_inject_mutex);
305
306	return NOTIFY_DONE;
307}
308
309static struct notifier_block inject_nb = {
310	.notifier_call  = mce_inject_raise,
311};
312
313/*
314 * Caller needs to be make sure this cpu doesn't disappear
315 * from under us, i.e.: get_cpu/put_cpu.
316 */
317static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
318{
319	u32 l, h;
320	int err;
321
322	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
323	if (err) {
324		pr_err("%s: error reading HWCR\n", __func__);
325		return err;
326	}
327
328	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
329
330	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
331	if (err)
332		pr_err("%s: error writing HWCR\n", __func__);
333
334	return err;
335}
336
337static int __set_inj(const char *buf)
338{
339	int i;
340
341	for (i = 0; i < N_INJ_TYPES; i++) {
342		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
343			if (i > SW_INJ && !hw_injection_possible)
344				continue;
345			inj_type = i;
346			return 0;
347		}
348	}
349	return -EINVAL;
350}
351
352static ssize_t flags_read(struct file *filp, char __user *ubuf,
353			  size_t cnt, loff_t *ppos)
354{
355	char buf[MAX_FLAG_OPT_SIZE];
356	int n;
357
358	n = sprintf(buf, "%s\n", flags_options[inj_type]);
359
360	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
361}
362
363static ssize_t flags_write(struct file *filp, const char __user *ubuf,
364			   size_t cnt, loff_t *ppos)
365{
366	char buf[MAX_FLAG_OPT_SIZE], *__buf;
367	int err;
368
369	if (!cnt || cnt > MAX_FLAG_OPT_SIZE)
370		return -EINVAL;
371
372	if (copy_from_user(&buf, ubuf, cnt))
373		return -EFAULT;
374
375	buf[cnt - 1] = 0;
376
377	/* strip whitespace */
378	__buf = strstrip(buf);
379
380	err = __set_inj(__buf);
381	if (err) {
382		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
383		return err;
384	}
385
386	*ppos += cnt;
387
388	return cnt;
389}
390
391static const struct file_operations flags_fops = {
392	.read           = flags_read,
393	.write          = flags_write,
394	.llseek         = generic_file_llseek,
395};
396
397/*
398 * On which CPU to inject?
399 */
400MCE_INJECT_GET(extcpu);
401
402static int inj_extcpu_set(void *data, u64 val)
403{
404	struct mce *m = (struct mce *)data;
405
406	if (val >= nr_cpu_ids || !cpu_online(val)) {
407		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
408		return -EINVAL;
409	}
410	m->extcpu = val;
411	return 0;
412}
413
414DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
415
416static void trigger_mce(void *info)
417{
418	asm volatile("int $18");
419}
420
421static void trigger_dfr_int(void *info)
422{
423	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
424}
425
426static void trigger_thr_int(void *info)
427{
428	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
429}
430
431static u32 get_nbc_for_node(int node_id)
432{
433	struct cpuinfo_x86 *c = &boot_cpu_data;
434	u32 cores_per_node;
435
436	cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
437
438	return cores_per_node * node_id;
439}
440
441static void toggle_nb_mca_mst_cpu(u16 nid)
442{
443	struct amd_northbridge *nb;
444	struct pci_dev *F3;
445	u32 val;
446	int err;
447
448	nb = node_to_amd_nb(nid);
449	if (!nb)
450		return;
451
452	F3 = nb->misc;
453	if (!F3)
454		return;
455
456	err = pci_read_config_dword(F3, NBCFG, &val);
457	if (err) {
458		pr_err("%s: Error reading F%dx%03x.\n",
459		       __func__, PCI_FUNC(F3->devfn), NBCFG);
460		return;
461	}
462
463	if (val & BIT(27))
464		return;
465
466	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
467	       __func__);
468
469	val |= BIT(27);
470	err = pci_write_config_dword(F3, NBCFG, val);
471	if (err)
472		pr_err("%s: Error writing F%dx%03x.\n",
473		       __func__, PCI_FUNC(F3->devfn), NBCFG);
474}
475
476static void prepare_msrs(void *info)
477{
478	struct mce m = *(struct mce *)info;
479	u8 b = m.bank;
480
481	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
482
483	if (boot_cpu_has(X86_FEATURE_SMCA)) {
484		if (m.inject_flags == DFR_INT_INJ) {
485			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
486			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
487		} else {
488			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
489			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
490		}
491
492		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
493		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
494	} else {
495		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
496		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
497		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
498	}
499}
500
501static void do_inject(void)
502{
503	u64 mcg_status = 0;
504	unsigned int cpu = i_mce.extcpu;
505	u8 b = i_mce.bank;
506
507	i_mce.tsc = rdtsc_ordered();
508
509	i_mce.status |= MCI_STATUS_VAL;
510
511	if (i_mce.misc)
512		i_mce.status |= MCI_STATUS_MISCV;
513
514	if (i_mce.synd)
515		i_mce.status |= MCI_STATUS_SYNDV;
516
517	if (inj_type == SW_INJ) {
518		mce_log(&i_mce);
519		return;
520	}
521
522	/* prep MCE global settings for the injection */
523	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
524
525	if (!(i_mce.status & MCI_STATUS_PCC))
526		mcg_status |= MCG_STATUS_RIPV;
527
528	/*
529	 * Ensure necessary status bits for deferred errors:
530	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
531	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
532	 */
533	if (inj_type == DFR_INT_INJ) {
534		i_mce.status |= MCI_STATUS_DEFERRED;
535		i_mce.status &= ~MCI_STATUS_UC;
536	}
537
538	/*
539	 * For multi node CPUs, logging and reporting of bank 4 errors happens
540	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
541	 * Fam10h and later BKDGs.
542	 */
543	if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
544	    b == 4 &&
545	    boot_cpu_data.x86 < 0x17) {
546		toggle_nb_mca_mst_cpu(topology_die_id(cpu));
547		cpu = get_nbc_for_node(topology_die_id(cpu));
548	}
549
550	cpus_read_lock();
551	if (!cpu_online(cpu))
552		goto err;
553
554	toggle_hw_mce_inject(cpu, true);
555
556	i_mce.mcgstatus = mcg_status;
557	i_mce.inject_flags = inj_type;
558	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
559
560	toggle_hw_mce_inject(cpu, false);
561
562	switch (inj_type) {
563	case DFR_INT_INJ:
564		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
565		break;
566	case THR_INT_INJ:
567		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
568		break;
569	default:
570		smp_call_function_single(cpu, trigger_mce, NULL, 0);
571	}
572
573err:
574	cpus_read_unlock();
575
576}
577
578/*
579 * This denotes into which bank we're injecting and triggers
580 * the injection, at the same time.
581 */
582static int inj_bank_set(void *data, u64 val)
583{
584	struct mce *m = (struct mce *)data;
585	u8 n_banks;
586	u64 cap;
587
588	/* Get bank count on target CPU so we can handle non-uniform values. */
589	rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
590	n_banks = cap & MCG_BANKCNT_MASK;
591
592	if (val >= n_banks) {
593		pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
594		return -EINVAL;
595	}
596
597	m->bank = val;
598
599	/*
600	 * sw-only injection allows to write arbitrary values into the MCA
601	 * registers because it tests only the decoding paths.
602	 */
603	if (inj_type == SW_INJ)
604		goto inject;
605
606	/*
607	 * Read IPID value to determine if a bank is populated on the target
608	 * CPU.
609	 */
610	if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
611		u64 ipid;
612
613		if (rdmsrl_on_cpu(m->extcpu, MSR_AMD64_SMCA_MCx_IPID(val), &ipid)) {
614			pr_err("Error reading IPID on CPU%d\n", m->extcpu);
615			return -EINVAL;
616		}
617
618		if (!ipid) {
619			pr_err("Cannot inject into unpopulated bank %llu\n", val);
620			return -ENODEV;
621		}
622	}
623
624inject:
625	do_inject();
626
627	/* Reset injection struct */
628	setup_inj_struct(&i_mce);
629
630	return 0;
631}
632
633MCE_INJECT_GET(bank);
634
635DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
636
637static const char readme_msg[] =
638"Description of the files and their usages:\n"
639"\n"
640"Note1: i refers to the bank number below.\n"
641"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
642"as they mirror the hardware registers.\n"
643"\n"
644"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
645"\t attributes of the error which caused the MCE.\n"
646"\n"
647"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
648"\t used for error thresholding purposes and its validity is indicated by\n"
649"\t MCi_STATUS[MiscV].\n"
650"\n"
651"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
652"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
653"\n"
654"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
655"\t associated with the error.\n"
656"\n"
657"cpu:\t The CPU to inject the error on.\n"
658"\n"
659"bank:\t Specify the bank you want to inject the error into: the number of\n"
660"\t banks in a processor varies and is family/model-specific, therefore, the\n"
661"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
662"\t injection.\n"
663"\n"
664"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
665"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
666"\t for AMD processors.\n"
667"\n"
668"\t Allowed error injection types:\n"
669"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
670"\t    format only. Safe to use.\n"
671"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
672"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
673"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
674"\t    before injecting.\n"
675"\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
676"\t    error APIC interrupt handler to handle the error if the feature is \n"
677"\t    is present in hardware. \n"
678"\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
679"\t    APIC interrupt handler to handle the error. \n"
680"\n"
681"ipid:\t IPID (AMD-specific)\n"
682"\n";
683
684static ssize_t
685inj_readme_read(struct file *filp, char __user *ubuf,
686		       size_t cnt, loff_t *ppos)
687{
688	return simple_read_from_buffer(ubuf, cnt, ppos,
689					readme_msg, strlen(readme_msg));
690}
691
692static const struct file_operations readme_fops = {
693	.read		= inj_readme_read,
694};
695
696static struct dfs_node {
697	char *name;
698	const struct file_operations *fops;
699	umode_t perm;
700} dfs_fls[] = {
701	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
702	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
703	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
704	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
705	{ .name = "ipid",	.fops = &ipid_fops,   .perm = S_IRUSR | S_IWUSR },
706	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
707	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
708	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
709	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
710};
711
712static void __init debugfs_init(void)
713{
714	unsigned int i;
715
716	dfs_inj = debugfs_create_dir("mce-inject", NULL);
717
718	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
719		debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
720				    &i_mce, dfs_fls[i].fops);
721}
722
723static void check_hw_inj_possible(void)
724{
725	int cpu;
726	u8 bank;
727
728	/*
729	 * This behavior exists only on SMCA systems though its not directly
730	 * related to SMCA.
731	 */
732	if (!cpu_feature_enabled(X86_FEATURE_SMCA))
733		return;
734
735	cpu = get_cpu();
736
737	for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
738		u64 status = MCI_STATUS_VAL, ipid;
739
740		/* Check whether bank is populated */
741		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
742		if (!ipid)
743			continue;
744
745		toggle_hw_mce_inject(cpu, true);
746
747		wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
748		rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
749		wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
750
751		if (!status) {
752			hw_injection_possible = false;
753			pr_warn("Platform does not allow *hardware* error injection."
754				"Try using APEI EINJ instead.\n");
755		}
756
757		toggle_hw_mce_inject(cpu, false);
758
759		break;
760	}
761
762	put_cpu();
763}
764
765static int __init inject_init(void)
766{
767	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
768		return -ENOMEM;
769
770	check_hw_inj_possible();
771
772	debugfs_init();
773
774	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
775	mce_register_injector_chain(&inject_nb);
776
777	setup_inj_struct(&i_mce);
778
779	pr_info("Machine check injector initialized\n");
780
781	return 0;
782}
783
784static void __exit inject_exit(void)
785{
786
787	mce_unregister_injector_chain(&inject_nb);
788	unregister_nmi_handler(NMI_LOCAL, "mce_notify");
789
790	debugfs_remove_recursive(dfs_inj);
791	dfs_inj = NULL;
792
793	memset(&dfs_fls, 0, sizeof(dfs_fls));
794
795	free_cpumask_var(mce_inject_cpumask);
796}
797
798module_init(inject_init);
799module_exit(inject_exit);
800MODULE_LICENSE("GPL");