Linux Audio

Check our new training course

Loading...
v4.17
  1/*
  2 * Machine check injection support.
  3 * Copyright 2008 Intel Corporation.
  4 *
  5 * This program is free software; you can redistribute it and/or
  6 * modify it under the terms of the GNU General Public License
  7 * as published by the Free Software Foundation; version 2
  8 * of the License.
  9 *
 10 * Authors:
 11 * Andi Kleen
 12 * Ying Huang
 13 *
 14 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
 15 * for testing different aspects of the RAS code. This driver should be
 16 * built as module so that it can be loaded on production kernels for
 17 * testing purposes.
 18 *
 19 * This file may be distributed under the terms of the GNU General Public
 20 * License version 2.
 21 *
 22 * Copyright (c) 2010-17:  Borislav Petkov <bp@alien8.de>
 23 *			   Advanced Micro Devices Inc.
 24 */
 25
 26#include <linux/cpu.h>
 27#include <linux/debugfs.h>
 28#include <linux/kernel.h>
 29#include <linux/module.h>
 
 
 
 
 
 
 30#include <linux/notifier.h>
 31#include <linux/pci.h>
 32#include <linux/uaccess.h>
 33
 34#include <asm/amd_nb.h>
 35#include <asm/apic.h>
 36#include <asm/irq_vectors.h>
 37#include <asm/mce.h>
 
 38#include <asm/nmi.h>
 39#include <asm/smp.h>
 40
 41#include "mce-internal.h"
 42
 43/*
 44 * Collect all the MCi_XXX settings
 45 */
 46static struct mce i_mce;
 47static struct dentry *dfs_inj;
 48
 49static u8 n_banks;
 50
 51#define MAX_FLAG_OPT_SIZE	3
 52#define NBCFG			0x44
 53
 54enum injection_type {
 55	SW_INJ = 0,	/* SW injection, simply decode the error */
 56	HW_INJ,		/* Trigger a #MC */
 57	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
 58	THR_INT_INJ,    /* Trigger threshold interrupt */
 59	N_INJ_TYPES,
 60};
 61
 62static const char * const flags_options[] = {
 63	[SW_INJ] = "sw",
 64	[HW_INJ] = "hw",
 65	[DFR_INT_INJ] = "df",
 66	[THR_INT_INJ] = "th",
 67	NULL
 68};
 69
 70/* Set default injection to SW_INJ */
 71static enum injection_type inj_type = SW_INJ;
 72
 73#define MCE_INJECT_SET(reg)						\
 74static int inj_##reg##_set(void *data, u64 val)				\
 75{									\
 76	struct mce *m = (struct mce *)data;				\
 77									\
 78	m->reg = val;							\
 79	return 0;							\
 80}
 81
 82MCE_INJECT_SET(status);
 83MCE_INJECT_SET(misc);
 84MCE_INJECT_SET(addr);
 85MCE_INJECT_SET(synd);
 86
 87#define MCE_INJECT_GET(reg)						\
 88static int inj_##reg##_get(void *data, u64 *val)			\
 89{									\
 90	struct mce *m = (struct mce *)data;				\
 91									\
 92	*val = m->reg;							\
 93	return 0;							\
 94}
 95
 96MCE_INJECT_GET(status);
 97MCE_INJECT_GET(misc);
 98MCE_INJECT_GET(addr);
 99MCE_INJECT_GET(synd);
100
101DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
102DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
103DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
104DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
105
106static void setup_inj_struct(struct mce *m)
107{
108	memset(m, 0, sizeof(struct mce));
109
110	m->cpuvendor = boot_cpu_data.x86_vendor;
111}
112
113/* Update fake mce registers on current CPU. */
114static void inject_mce(struct mce *m)
115{
116	struct mce *i = &per_cpu(injectm, m->extcpu);
117
118	/* Make sure no one reads partially written injectm */
119	i->finished = 0;
120	mb();
121	m->finished = 0;
122	/* First set the fields after finished */
123	i->extcpu = m->extcpu;
124	mb();
125	/* Now write record in order, finished last (except above) */
126	memcpy(i, m, sizeof(struct mce));
127	/* Finally activate it */
128	mb();
129	i->finished = 1;
130}
131
132static void raise_poll(struct mce *m)
133{
134	unsigned long flags;
135	mce_banks_t b;
136
137	memset(&b, 0xff, sizeof(mce_banks_t));
138	local_irq_save(flags);
139	machine_check_poll(0, &b);
140	local_irq_restore(flags);
141	m->finished = 0;
142}
143
144static void raise_exception(struct mce *m, struct pt_regs *pregs)
145{
146	struct pt_regs regs;
147	unsigned long flags;
148
149	if (!pregs) {
150		memset(&regs, 0, sizeof(struct pt_regs));
151		regs.ip = m->ip;
152		regs.cs = m->cs;
153		pregs = &regs;
154	}
155	/* in mcheck exeception handler, irq will be disabled */
156	local_irq_save(flags);
157	do_machine_check(pregs, 0);
158	local_irq_restore(flags);
159	m->finished = 0;
160}
161
162static cpumask_var_t mce_inject_cpumask;
163static DEFINE_MUTEX(mce_inject_mutex);
164
165static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
166{
167	int cpu = smp_processor_id();
168	struct mce *m = this_cpu_ptr(&injectm);
169	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
170		return NMI_DONE;
171	cpumask_clear_cpu(cpu, mce_inject_cpumask);
172	if (m->inject_flags & MCJ_EXCEPTION)
173		raise_exception(m, regs);
174	else if (m->status)
175		raise_poll(m);
176	return NMI_HANDLED;
177}
178
179static void mce_irq_ipi(void *info)
180{
181	int cpu = smp_processor_id();
182	struct mce *m = this_cpu_ptr(&injectm);
183
184	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
185			m->inject_flags & MCJ_EXCEPTION) {
186		cpumask_clear_cpu(cpu, mce_inject_cpumask);
187		raise_exception(m, NULL);
188	}
189}
190
191/* Inject mce on current CPU */
192static int raise_local(void)
193{
194	struct mce *m = this_cpu_ptr(&injectm);
195	int context = MCJ_CTX(m->inject_flags);
196	int ret = 0;
197	int cpu = m->extcpu;
198
199	if (m->inject_flags & MCJ_EXCEPTION) {
200		pr_info("Triggering MCE exception on CPU %d\n", cpu);
201		switch (context) {
202		case MCJ_CTX_IRQ:
203			/*
204			 * Could do more to fake interrupts like
205			 * calling irq_enter, but the necessary
206			 * machinery isn't exported currently.
207			 */
208			/*FALL THROUGH*/
209		case MCJ_CTX_PROCESS:
210			raise_exception(m, NULL);
211			break;
212		default:
213			pr_info("Invalid MCE context\n");
214			ret = -EINVAL;
215		}
216		pr_info("MCE exception done on CPU %d\n", cpu);
217	} else if (m->status) {
218		pr_info("Starting machine check poll CPU %d\n", cpu);
219		raise_poll(m);
220		mce_notify_irq();
221		pr_info("Machine check poll done on CPU %d\n", cpu);
222	} else
223		m->finished = 0;
224
225	return ret;
226}
227
228static void __maybe_unused raise_mce(struct mce *m)
229{
230	int context = MCJ_CTX(m->inject_flags);
231
232	inject_mce(m);
233
234	if (context == MCJ_CTX_RANDOM)
235		return;
236
 
237	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
238		unsigned long start;
239		int cpu;
240
241		get_online_cpus();
242		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
243		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
244		for_each_online_cpu(cpu) {
245			struct mce *mcpu = &per_cpu(injectm, cpu);
246			if (!mcpu->finished ||
247			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
248				cpumask_clear_cpu(cpu, mce_inject_cpumask);
249		}
250		if (!cpumask_empty(mce_inject_cpumask)) {
251			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
252				/*
253				 * don't wait because mce_irq_ipi is necessary
254				 * to be sync with following raise_local
255				 */
256				preempt_disable();
257				smp_call_function_many(mce_inject_cpumask,
258					mce_irq_ipi, NULL, 0);
259				preempt_enable();
260			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
261				apic->send_IPI_mask(mce_inject_cpumask,
262						NMI_VECTOR);
263		}
264		start = jiffies;
265		while (!cpumask_empty(mce_inject_cpumask)) {
266			if (!time_before(jiffies, start + 2*HZ)) {
267				pr_err("Timeout waiting for mce inject %lx\n",
268					*cpumask_bits(mce_inject_cpumask));
269				break;
270			}
271			cpu_relax();
272		}
273		raise_local();
274		put_cpu();
275		put_online_cpus();
276	} else {
 
 
277		preempt_disable();
278		raise_local();
279		preempt_enable();
280	}
281}
282
283static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
284			    void *data)
285{
286	struct mce *m = (struct mce *)data;
287
288	if (!m)
289		return NOTIFY_DONE;
290
291	mutex_lock(&mce_inject_mutex);
292	raise_mce(m);
293	mutex_unlock(&mce_inject_mutex);
294
295	return NOTIFY_DONE;
296}
297
298static struct notifier_block inject_nb = {
299	.notifier_call  = mce_inject_raise,
300};
301
302/*
303 * Caller needs to be make sure this cpu doesn't disappear
304 * from under us, i.e.: get_cpu/put_cpu.
305 */
306static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
307{
308	u32 l, h;
309	int err;
310
311	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
312	if (err) {
313		pr_err("%s: error reading HWCR\n", __func__);
314		return err;
315	}
316
317	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
318
319	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
320	if (err)
321		pr_err("%s: error writing HWCR\n", __func__);
322
323	return err;
324}
325
326static int __set_inj(const char *buf)
327{
328	int i;
329
330	for (i = 0; i < N_INJ_TYPES; i++) {
331		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
332			inj_type = i;
333			return 0;
334		}
335	}
336	return -EINVAL;
337}
338
339static ssize_t flags_read(struct file *filp, char __user *ubuf,
340			  size_t cnt, loff_t *ppos)
341{
342	char buf[MAX_FLAG_OPT_SIZE];
343	int n;
344
345	n = sprintf(buf, "%s\n", flags_options[inj_type]);
346
347	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
348}
349
350static ssize_t flags_write(struct file *filp, const char __user *ubuf,
351			   size_t cnt, loff_t *ppos)
352{
353	char buf[MAX_FLAG_OPT_SIZE], *__buf;
354	int err;
355
356	if (cnt > MAX_FLAG_OPT_SIZE)
357		return -EINVAL;
358
359	if (copy_from_user(&buf, ubuf, cnt))
 
 
360		return -EFAULT;
361
362	buf[cnt - 1] = 0;
363
364	/* strip whitespace */
365	__buf = strstrip(buf);
366
367	err = __set_inj(__buf);
368	if (err) {
369		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
370		return err;
371	}
372
373	*ppos += cnt;
374
375	return cnt;
376}
377
378static const struct file_operations flags_fops = {
379	.read           = flags_read,
380	.write          = flags_write,
381	.llseek         = generic_file_llseek,
382};
383
384/*
385 * On which CPU to inject?
386 */
387MCE_INJECT_GET(extcpu);
388
389static int inj_extcpu_set(void *data, u64 val)
390{
391	struct mce *m = (struct mce *)data;
392
393	if (val >= nr_cpu_ids || !cpu_online(val)) {
394		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
395		return -EINVAL;
396	}
397	m->extcpu = val;
398	return 0;
399}
400
401DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
402
403static void trigger_mce(void *info)
404{
405	asm volatile("int $18");
406}
407
408static void trigger_dfr_int(void *info)
409{
410	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
411}
412
413static void trigger_thr_int(void *info)
414{
415	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
416}
417
418static u32 get_nbc_for_node(int node_id)
419{
420	struct cpuinfo_x86 *c = &boot_cpu_data;
421	u32 cores_per_node;
422
423	cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
424
425	return cores_per_node * node_id;
426}
427
428static void toggle_nb_mca_mst_cpu(u16 nid)
429{
430	struct amd_northbridge *nb;
431	struct pci_dev *F3;
432	u32 val;
433	int err;
434
435	nb = node_to_amd_nb(nid);
436	if (!nb)
437		return;
438
439	F3 = nb->misc;
440	if (!F3)
441		return;
442
443	err = pci_read_config_dword(F3, NBCFG, &val);
444	if (err) {
445		pr_err("%s: Error reading F%dx%03x.\n",
446		       __func__, PCI_FUNC(F3->devfn), NBCFG);
447		return;
448	}
449
450	if (val & BIT(27))
451		return;
452
453	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
454	       __func__);
455
456	val |= BIT(27);
457	err = pci_write_config_dword(F3, NBCFG, val);
458	if (err)
459		pr_err("%s: Error writing F%dx%03x.\n",
460		       __func__, PCI_FUNC(F3->devfn), NBCFG);
461}
462
463static void prepare_msrs(void *info)
464{
465	struct mce m = *(struct mce *)info;
466	u8 b = m.bank;
467
468	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
469
470	if (boot_cpu_has(X86_FEATURE_SMCA)) {
471		if (m.inject_flags == DFR_INT_INJ) {
472			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
473			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
474		} else {
475			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
476			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
477		}
478
479		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
480		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
481	} else {
482		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
483		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
484		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
485	}
486}
487
488static void do_inject(void)
489{
490	u64 mcg_status = 0;
491	unsigned int cpu = i_mce.extcpu;
492	u8 b = i_mce.bank;
493
494	i_mce.tsc = rdtsc_ordered();
495
496	if (i_mce.misc)
497		i_mce.status |= MCI_STATUS_MISCV;
498
499	if (i_mce.synd)
500		i_mce.status |= MCI_STATUS_SYNDV;
501
502	if (inj_type == SW_INJ) {
503		mce_inject_log(&i_mce);
504		return;
505	}
506
507	/* prep MCE global settings for the injection */
508	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
509
510	if (!(i_mce.status & MCI_STATUS_PCC))
511		mcg_status |= MCG_STATUS_RIPV;
512
513	/*
514	 * Ensure necessary status bits for deferred errors:
515	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
516	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
517	 */
518	if (inj_type == DFR_INT_INJ) {
519		i_mce.status |= MCI_STATUS_DEFERRED;
520		i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
521	}
522
523	/*
524	 * For multi node CPUs, logging and reporting of bank 4 errors happens
525	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
526	 * Fam10h and later BKDGs.
527	 */
528	if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
529	    b == 4 &&
530	    boot_cpu_data.x86 < 0x17) {
531		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
532		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
533	}
534
535	get_online_cpus();
536	if (!cpu_online(cpu))
537		goto err;
538
539	toggle_hw_mce_inject(cpu, true);
540
541	i_mce.mcgstatus = mcg_status;
542	i_mce.inject_flags = inj_type;
543	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
544
545	toggle_hw_mce_inject(cpu, false);
546
547	switch (inj_type) {
548	case DFR_INT_INJ:
549		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
550		break;
551	case THR_INT_INJ:
552		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
553		break;
554	default:
555		smp_call_function_single(cpu, trigger_mce, NULL, 0);
556	}
557
558err:
559	put_online_cpus();
560
 
 
 
 
561}
562
563/*
564 * This denotes into which bank we're injecting and triggers
565 * the injection, at the same time.
566 */
567static int inj_bank_set(void *data, u64 val)
568{
569	struct mce *m = (struct mce *)data;
570
571	if (val >= n_banks) {
572		pr_err("Non-existent MCE bank: %llu\n", val);
573		return -EINVAL;
574	}
575
576	m->bank = val;
577	do_inject();
578
579	return 0;
580}
581
582MCE_INJECT_GET(bank);
583
584DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
585
586static const char readme_msg[] =
587"Description of the files and their usages:\n"
588"\n"
589"Note1: i refers to the bank number below.\n"
590"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
591"as they mirror the hardware registers.\n"
592"\n"
593"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
594"\t attributes of the error which caused the MCE.\n"
595"\n"
596"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
597"\t used for error thresholding purposes and its validity is indicated by\n"
598"\t MCi_STATUS[MiscV].\n"
599"\n"
600"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
601"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
602"\n"
603"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
604"\t associated with the error.\n"
605"\n"
606"cpu:\t The CPU to inject the error on.\n"
607"\n"
608"bank:\t Specify the bank you want to inject the error into: the number of\n"
609"\t banks in a processor varies and is family/model-specific, therefore, the\n"
610"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
611"\t injection.\n"
612"\n"
613"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
614"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
615"\t for AMD processors.\n"
616"\n"
617"\t Allowed error injection types:\n"
618"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
619"\t    format only. Safe to use.\n"
620"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
621"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
622"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
623"\t    before injecting.\n"
624"\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
625"\t    error APIC interrupt handler to handle the error if the feature is \n"
626"\t    is present in hardware. \n"
627"\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
628"\t    APIC interrupt handler to handle the error. \n"
629"\n";
630
631static ssize_t
632inj_readme_read(struct file *filp, char __user *ubuf,
633		       size_t cnt, loff_t *ppos)
634{
635	return simple_read_from_buffer(ubuf, cnt, ppos,
636					readme_msg, strlen(readme_msg));
637}
638
639static const struct file_operations readme_fops = {
640	.read		= inj_readme_read,
641};
642
643static struct dfs_node {
644	char *name;
645	struct dentry *d;
646	const struct file_operations *fops;
647	umode_t perm;
648} dfs_fls[] = {
649	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
650	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
651	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
652	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
653	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
654	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
655	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
656	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
657};
658
659static int __init debugfs_init(void)
660{
661	unsigned int i;
662	u64 cap;
663
664	rdmsrl(MSR_IA32_MCG_CAP, cap);
665	n_banks = cap & MCG_BANKCNT_MASK;
666
667	dfs_inj = debugfs_create_dir("mce-inject", NULL);
668	if (!dfs_inj)
669		return -EINVAL;
670
671	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
672		dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
673						    dfs_fls[i].perm,
674						    dfs_inj,
675						    &i_mce,
676						    dfs_fls[i].fops);
677
678		if (!dfs_fls[i].d)
679			goto err_dfs_add;
680	}
681
682	return 0;
683
684err_dfs_add:
685	while (i-- > 0)
686		debugfs_remove(dfs_fls[i].d);
687
688	debugfs_remove(dfs_inj);
689	dfs_inj = NULL;
690
691	return -ENODEV;
692}
693
694static int __init inject_init(void)
695{
696	int err;
697
698	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
699		return -ENOMEM;
700
701	err = debugfs_init();
702	if (err) {
703		free_cpumask_var(mce_inject_cpumask);
704		return err;
705	}
706
707	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
708	mce_register_injector_chain(&inject_nb);
709
710	setup_inj_struct(&i_mce);
711
712	pr_info("Machine check injector initialized\n");
713
 
 
714	return 0;
715}
716
717static void __exit inject_exit(void)
718{
719
720	mce_unregister_injector_chain(&inject_nb);
721	unregister_nmi_handler(NMI_LOCAL, "mce_notify");
722
723	debugfs_remove_recursive(dfs_inj);
724	dfs_inj = NULL;
725
726	memset(&dfs_fls, 0, sizeof(dfs_fls));
727
728	free_cpumask_var(mce_inject_cpumask);
729}
730
731module_init(inject_init);
732module_exit(inject_exit);
 
 
 
733MODULE_LICENSE("GPL");
v4.6
  1/*
  2 * Machine check injection support.
  3 * Copyright 2008 Intel Corporation.
  4 *
  5 * This program is free software; you can redistribute it and/or
  6 * modify it under the terms of the GNU General Public License
  7 * as published by the Free Software Foundation; version 2
  8 * of the License.
  9 *
 10 * Authors:
 11 * Andi Kleen
 12 * Ying Huang
 
 
 
 
 
 
 
 
 
 
 
 13 */
 14#include <linux/uaccess.h>
 
 
 
 15#include <linux/module.h>
 16#include <linux/timer.h>
 17#include <linux/kernel.h>
 18#include <linux/string.h>
 19#include <linux/fs.h>
 20#include <linux/preempt.h>
 21#include <linux/smp.h>
 22#include <linux/notifier.h>
 23#include <linux/kdebug.h>
 24#include <linux/cpu.h>
 25#include <linux/sched.h>
 26#include <linux/gfp.h>
 
 
 27#include <asm/mce.h>
 28#include <asm/apic.h>
 29#include <asm/nmi.h>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 30
 31/* Update fake mce registers on current CPU. */
 32static void inject_mce(struct mce *m)
 33{
 34	struct mce *i = &per_cpu(injectm, m->extcpu);
 35
 36	/* Make sure no one reads partially written injectm */
 37	i->finished = 0;
 38	mb();
 39	m->finished = 0;
 40	/* First set the fields after finished */
 41	i->extcpu = m->extcpu;
 42	mb();
 43	/* Now write record in order, finished last (except above) */
 44	memcpy(i, m, sizeof(struct mce));
 45	/* Finally activate it */
 46	mb();
 47	i->finished = 1;
 48}
 49
 50static void raise_poll(struct mce *m)
 51{
 52	unsigned long flags;
 53	mce_banks_t b;
 54
 55	memset(&b, 0xff, sizeof(mce_banks_t));
 56	local_irq_save(flags);
 57	machine_check_poll(0, &b);
 58	local_irq_restore(flags);
 59	m->finished = 0;
 60}
 61
 62static void raise_exception(struct mce *m, struct pt_regs *pregs)
 63{
 64	struct pt_regs regs;
 65	unsigned long flags;
 66
 67	if (!pregs) {
 68		memset(&regs, 0, sizeof(struct pt_regs));
 69		regs.ip = m->ip;
 70		regs.cs = m->cs;
 71		pregs = &regs;
 72	}
 73	/* in mcheck exeception handler, irq will be disabled */
 74	local_irq_save(flags);
 75	do_machine_check(pregs, 0);
 76	local_irq_restore(flags);
 77	m->finished = 0;
 78}
 79
 80static cpumask_var_t mce_inject_cpumask;
 81static DEFINE_MUTEX(mce_inject_mutex);
 82
 83static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 84{
 85	int cpu = smp_processor_id();
 86	struct mce *m = this_cpu_ptr(&injectm);
 87	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
 88		return NMI_DONE;
 89	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 90	if (m->inject_flags & MCJ_EXCEPTION)
 91		raise_exception(m, regs);
 92	else if (m->status)
 93		raise_poll(m);
 94	return NMI_HANDLED;
 95}
 96
 97static void mce_irq_ipi(void *info)
 98{
 99	int cpu = smp_processor_id();
100	struct mce *m = this_cpu_ptr(&injectm);
101
102	if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
103			m->inject_flags & MCJ_EXCEPTION) {
104		cpumask_clear_cpu(cpu, mce_inject_cpumask);
105		raise_exception(m, NULL);
106	}
107}
108
109/* Inject mce on current CPU */
110static int raise_local(void)
111{
112	struct mce *m = this_cpu_ptr(&injectm);
113	int context = MCJ_CTX(m->inject_flags);
114	int ret = 0;
115	int cpu = m->extcpu;
116
117	if (m->inject_flags & MCJ_EXCEPTION) {
118		pr_info("Triggering MCE exception on CPU %d\n", cpu);
119		switch (context) {
120		case MCJ_CTX_IRQ:
121			/*
122			 * Could do more to fake interrupts like
123			 * calling irq_enter, but the necessary
124			 * machinery isn't exported currently.
125			 */
126			/*FALL THROUGH*/
127		case MCJ_CTX_PROCESS:
128			raise_exception(m, NULL);
129			break;
130		default:
131			pr_info("Invalid MCE context\n");
132			ret = -EINVAL;
133		}
134		pr_info("MCE exception done on CPU %d\n", cpu);
135	} else if (m->status) {
136		pr_info("Starting machine check poll CPU %d\n", cpu);
137		raise_poll(m);
138		mce_notify_irq();
139		pr_info("Machine check poll done on CPU %d\n", cpu);
140	} else
141		m->finished = 0;
142
143	return ret;
144}
145
146static void raise_mce(struct mce *m)
147{
148	int context = MCJ_CTX(m->inject_flags);
149
150	inject_mce(m);
151
152	if (context == MCJ_CTX_RANDOM)
153		return;
154
155#ifdef CONFIG_X86_LOCAL_APIC
156	if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
157		unsigned long start;
158		int cpu;
159
160		get_online_cpus();
161		cpumask_copy(mce_inject_cpumask, cpu_online_mask);
162		cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
163		for_each_online_cpu(cpu) {
164			struct mce *mcpu = &per_cpu(injectm, cpu);
165			if (!mcpu->finished ||
166			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
167				cpumask_clear_cpu(cpu, mce_inject_cpumask);
168		}
169		if (!cpumask_empty(mce_inject_cpumask)) {
170			if (m->inject_flags & MCJ_IRQ_BROADCAST) {
171				/*
172				 * don't wait because mce_irq_ipi is necessary
173				 * to be sync with following raise_local
174				 */
175				preempt_disable();
176				smp_call_function_many(mce_inject_cpumask,
177					mce_irq_ipi, NULL, 0);
178				preempt_enable();
179			} else if (m->inject_flags & MCJ_NMI_BROADCAST)
180				apic->send_IPI_mask(mce_inject_cpumask,
181						NMI_VECTOR);
182		}
183		start = jiffies;
184		while (!cpumask_empty(mce_inject_cpumask)) {
185			if (!time_before(jiffies, start + 2*HZ)) {
186				pr_err("Timeout waiting for mce inject %lx\n",
187					*cpumask_bits(mce_inject_cpumask));
188				break;
189			}
190			cpu_relax();
191		}
192		raise_local();
193		put_cpu();
194		put_online_cpus();
195	} else
196#endif
197	{
198		preempt_disable();
199		raise_local();
200		preempt_enable();
201	}
202}
203
204/* Error injection interface */
205static ssize_t mce_write(struct file *filp, const char __user *ubuf,
206			 size_t usize, loff_t *off)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207{
208	struct mce m;
 
209
210	if (!capable(CAP_SYS_ADMIN))
211		return -EPERM;
212	/*
213	 * There are some cases where real MSR reads could slip
214	 * through.
215	 */
216	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
217		return -EIO;
 
 
 
 
 
218
219	if ((unsigned long)usize > sizeof(struct mce))
220		usize = sizeof(struct mce);
221	if (copy_from_user(&m, ubuf, usize))
222		return -EFAULT;
223
224	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
227	/*
228	 * Need to give user space some time to set everything up,
229	 * so do it a jiffie or two later everywhere.
 
230	 */
231	schedule_timeout(2);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
233	mutex_lock(&mce_inject_mutex);
234	raise_mce(&m);
235	mutex_unlock(&mce_inject_mutex);
236	return usize;
237}
238
239static int inject_init(void)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240{
 
 
241	if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
242		return -ENOMEM;
 
 
 
 
 
 
 
 
 
 
 
 
243	pr_info("Machine check injector initialized\n");
244	register_mce_write_callback(mce_write);
245	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
246				"mce_notify");
247	return 0;
248}
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250module_init(inject_init);
251/*
252 * Cannot tolerate unloading currently because we cannot
253 * guarantee all openers of mce_chrdev will get a reference to us.
254 */
255MODULE_LICENSE("GPL");