Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Apr 14-17, 2025
Register
Loading...
Note: File does not exist in v6.13.7.
  1/*
  2 * A simple MCE injection facility for testing different aspects of the RAS
  3 * code. This driver should be built as module so that it can be loaded
  4 * on production kernels for testing purposes.
  5 *
  6 * This file may be distributed under the terms of the GNU General Public
  7 * License version 2.
  8 *
  9 * Copyright (c) 2010-15:  Borislav Petkov <bp@alien8.de>
 10 *			Advanced Micro Devices Inc.
 11 */
 12
 13#include <linux/kobject.h>
 14#include <linux/debugfs.h>
 15#include <linux/device.h>
 16#include <linux/module.h>
 17#include <linux/cpu.h>
 18#include <linux/string.h>
 19#include <linux/uaccess.h>
 20#include <linux/pci.h>
 21
 22#include <asm/mce.h>
 23#include <asm/smp.h>
 24#include <asm/amd_nb.h>
 25#include <asm/irq_vectors.h>
 26
 27#include "../kernel/cpu/mcheck/mce-internal.h"
 28
 29/*
 30 * Collect all the MCi_XXX settings
 31 */
 32static struct mce i_mce;
 33static struct dentry *dfs_inj;
 34
 35static u8 n_banks;
 36
 37#define MAX_FLAG_OPT_SIZE	3
 38#define NBCFG			0x44
 39
 40enum injection_type {
 41	SW_INJ = 0,	/* SW injection, simply decode the error */
 42	HW_INJ,		/* Trigger a #MC */
 43	DFR_INT_INJ,    /* Trigger Deferred error interrupt */
 44	THR_INT_INJ,    /* Trigger threshold interrupt */
 45	N_INJ_TYPES,
 46};
 47
 48static const char * const flags_options[] = {
 49	[SW_INJ] = "sw",
 50	[HW_INJ] = "hw",
 51	[DFR_INT_INJ] = "df",
 52	[THR_INT_INJ] = "th",
 53	NULL
 54};
 55
 56/* Set default injection to SW_INJ */
 57static enum injection_type inj_type = SW_INJ;
 58
 59#define MCE_INJECT_SET(reg)						\
 60static int inj_##reg##_set(void *data, u64 val)				\
 61{									\
 62	struct mce *m = (struct mce *)data;				\
 63									\
 64	m->reg = val;							\
 65	return 0;							\
 66}
 67
 68MCE_INJECT_SET(status);
 69MCE_INJECT_SET(misc);
 70MCE_INJECT_SET(addr);
 71MCE_INJECT_SET(synd);
 72
 73#define MCE_INJECT_GET(reg)						\
 74static int inj_##reg##_get(void *data, u64 *val)			\
 75{									\
 76	struct mce *m = (struct mce *)data;				\
 77									\
 78	*val = m->reg;							\
 79	return 0;							\
 80}
 81
 82MCE_INJECT_GET(status);
 83MCE_INJECT_GET(misc);
 84MCE_INJECT_GET(addr);
 85MCE_INJECT_GET(synd);
 86
 87DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
 88DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
 89DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
 90DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
 91
 92/*
 93 * Caller needs to be make sure this cpu doesn't disappear
 94 * from under us, i.e.: get_cpu/put_cpu.
 95 */
 96static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
 97{
 98	u32 l, h;
 99	int err;
100
101	err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
102	if (err) {
103		pr_err("%s: error reading HWCR\n", __func__);
104		return err;
105	}
106
107	enable ? (l |= BIT(18)) : (l &= ~BIT(18));
108
109	err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
110	if (err)
111		pr_err("%s: error writing HWCR\n", __func__);
112
113	return err;
114}
115
116static int __set_inj(const char *buf)
117{
118	int i;
119
120	for (i = 0; i < N_INJ_TYPES; i++) {
121		if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
122			inj_type = i;
123			return 0;
124		}
125	}
126	return -EINVAL;
127}
128
129static ssize_t flags_read(struct file *filp, char __user *ubuf,
130			  size_t cnt, loff_t *ppos)
131{
132	char buf[MAX_FLAG_OPT_SIZE];
133	int n;
134
135	n = sprintf(buf, "%s\n", flags_options[inj_type]);
136
137	return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
138}
139
140static ssize_t flags_write(struct file *filp, const char __user *ubuf,
141			   size_t cnt, loff_t *ppos)
142{
143	char buf[MAX_FLAG_OPT_SIZE], *__buf;
144	int err;
145
146	if (cnt > MAX_FLAG_OPT_SIZE)
147		return -EINVAL;
148
149	if (copy_from_user(&buf, ubuf, cnt))
150		return -EFAULT;
151
152	buf[cnt - 1] = 0;
153
154	/* strip whitespace */
155	__buf = strstrip(buf);
156
157	err = __set_inj(__buf);
158	if (err) {
159		pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
160		return err;
161	}
162
163	*ppos += cnt;
164
165	return cnt;
166}
167
168static const struct file_operations flags_fops = {
169	.read           = flags_read,
170	.write          = flags_write,
171	.llseek         = generic_file_llseek,
172};
173
174/*
175 * On which CPU to inject?
176 */
177MCE_INJECT_GET(extcpu);
178
179static int inj_extcpu_set(void *data, u64 val)
180{
181	struct mce *m = (struct mce *)data;
182
183	if (val >= nr_cpu_ids || !cpu_online(val)) {
184		pr_err("%s: Invalid CPU: %llu\n", __func__, val);
185		return -EINVAL;
186	}
187	m->extcpu = val;
188	return 0;
189}
190
191DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
192
193static void trigger_mce(void *info)
194{
195	asm volatile("int $18");
196}
197
198static void trigger_dfr_int(void *info)
199{
200	asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
201}
202
203static void trigger_thr_int(void *info)
204{
205	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
206}
207
208static u32 get_nbc_for_node(int node_id)
209{
210	struct cpuinfo_x86 *c = &boot_cpu_data;
211	u32 cores_per_node;
212
213	cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
214
215	return cores_per_node * node_id;
216}
217
218static void toggle_nb_mca_mst_cpu(u16 nid)
219{
220	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
221	u32 val;
222	int err;
223
224	if (!F3)
225		return;
226
227	err = pci_read_config_dword(F3, NBCFG, &val);
228	if (err) {
229		pr_err("%s: Error reading F%dx%03x.\n",
230		       __func__, PCI_FUNC(F3->devfn), NBCFG);
231		return;
232	}
233
234	if (val & BIT(27))
235		return;
236
237	pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
238	       __func__);
239
240	val |= BIT(27);
241	err = pci_write_config_dword(F3, NBCFG, val);
242	if (err)
243		pr_err("%s: Error writing F%dx%03x.\n",
244		       __func__, PCI_FUNC(F3->devfn), NBCFG);
245}
246
247static void prepare_msrs(void *info)
248{
249	struct mce m = *(struct mce *)info;
250	u8 b = m.bank;
251
252	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
253
254	if (boot_cpu_has(X86_FEATURE_SMCA)) {
255		if (m.inject_flags == DFR_INT_INJ) {
256			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
257			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
258		} else {
259			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
260			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
261		}
262
263		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
264		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
265	} else {
266		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
267		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
268		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
269	}
270}
271
272static void do_inject(void)
273{
274	u64 mcg_status = 0;
275	unsigned int cpu = i_mce.extcpu;
276	u8 b = i_mce.bank;
277
278	rdtscll(i_mce.tsc);
279
280	if (i_mce.misc)
281		i_mce.status |= MCI_STATUS_MISCV;
282
283	if (i_mce.synd)
284		i_mce.status |= MCI_STATUS_SYNDV;
285
286	if (inj_type == SW_INJ) {
287		mce_inject_log(&i_mce);
288		return;
289	}
290
291	/* prep MCE global settings for the injection */
292	mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
293
294	if (!(i_mce.status & MCI_STATUS_PCC))
295		mcg_status |= MCG_STATUS_RIPV;
296
297	/*
298	 * Ensure necessary status bits for deferred errors:
299	 * - MCx_STATUS[Deferred]: make sure it is a deferred error
300	 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
301	 */
302	if (inj_type == DFR_INT_INJ) {
303		i_mce.status |= MCI_STATUS_DEFERRED;
304		i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
305	}
306
307	/*
308	 * For multi node CPUs, logging and reporting of bank 4 errors happens
309	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
310	 * Fam10h and later BKDGs.
311	 */
312	if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
313	    b == 4 &&
314	    boot_cpu_data.x86 < 0x17) {
315		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
316		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
317	}
318
319	get_online_cpus();
320	if (!cpu_online(cpu))
321		goto err;
322
323	toggle_hw_mce_inject(cpu, true);
324
325	i_mce.mcgstatus = mcg_status;
326	i_mce.inject_flags = inj_type;
327	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
328
329	toggle_hw_mce_inject(cpu, false);
330
331	switch (inj_type) {
332	case DFR_INT_INJ:
333		smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
334		break;
335	case THR_INT_INJ:
336		smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
337		break;
338	default:
339		smp_call_function_single(cpu, trigger_mce, NULL, 0);
340	}
341
342err:
343	put_online_cpus();
344
345}
346
347/*
348 * This denotes into which bank we're injecting and triggers
349 * the injection, at the same time.
350 */
351static int inj_bank_set(void *data, u64 val)
352{
353	struct mce *m = (struct mce *)data;
354
355	if (val >= n_banks) {
356		pr_err("Non-existent MCE bank: %llu\n", val);
357		return -EINVAL;
358	}
359
360	m->bank = val;
361	do_inject();
362
363	return 0;
364}
365
366MCE_INJECT_GET(bank);
367
368DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
369
370static const char readme_msg[] =
371"Description of the files and their usages:\n"
372"\n"
373"Note1: i refers to the bank number below.\n"
374"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
375"as they mirror the hardware registers.\n"
376"\n"
377"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
378"\t attributes of the error which caused the MCE.\n"
379"\n"
380"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
381"\t used for error thresholding purposes and its validity is indicated by\n"
382"\t MCi_STATUS[MiscV].\n"
383"\n"
384"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
385"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
386"\n"
387"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
388"\t associated with the error.\n"
389"\n"
390"cpu:\t The CPU to inject the error on.\n"
391"\n"
392"bank:\t Specify the bank you want to inject the error into: the number of\n"
393"\t banks in a processor varies and is family/model-specific, therefore, the\n"
394"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
395"\t injection.\n"
396"\n"
397"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
398"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
399"\t for AMD processors.\n"
400"\n"
401"\t Allowed error injection types:\n"
402"\t  - \"sw\": Software error injection. Decode error to a human-readable \n"
403"\t    format only. Safe to use.\n"
404"\t  - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
405"\t    handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
406"\t    is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
407"\t    before injecting.\n"
408"\t  - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
409"\t    error APIC interrupt handler to handle the error if the feature is \n"
410"\t    is present in hardware. \n"
411"\t  - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
412"\t    APIC interrupt handler to handle the error. \n"
413"\n";
414
415static ssize_t
416inj_readme_read(struct file *filp, char __user *ubuf,
417		       size_t cnt, loff_t *ppos)
418{
419	return simple_read_from_buffer(ubuf, cnt, ppos,
420					readme_msg, strlen(readme_msg));
421}
422
423static const struct file_operations readme_fops = {
424	.read		= inj_readme_read,
425};
426
427static struct dfs_node {
428	char *name;
429	struct dentry *d;
430	const struct file_operations *fops;
431	umode_t perm;
432} dfs_fls[] = {
433	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
434	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
435	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
436	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
437	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
438	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
439	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
440	{ .name = "README",	.fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
441};
442
443static int __init init_mce_inject(void)
444{
445	unsigned int i;
446	u64 cap;
447
448	rdmsrl(MSR_IA32_MCG_CAP, cap);
449	n_banks = cap & MCG_BANKCNT_MASK;
450
451	dfs_inj = debugfs_create_dir("mce-inject", NULL);
452	if (!dfs_inj)
453		return -EINVAL;
454
455	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
456		dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
457						    dfs_fls[i].perm,
458						    dfs_inj,
459						    &i_mce,
460						    dfs_fls[i].fops);
461
462		if (!dfs_fls[i].d)
463			goto err_dfs_add;
464	}
465
466	return 0;
467
468err_dfs_add:
469	while (i-- > 0)
470		debugfs_remove(dfs_fls[i].d);
471
472	debugfs_remove(dfs_inj);
473	dfs_inj = NULL;
474
475	return -ENODEV;
476}
477
478static void __exit exit_mce_inject(void)
479{
480
481	debugfs_remove_recursive(dfs_inj);
482	dfs_inj = NULL;
483
484	memset(&dfs_fls, 0, sizeof(dfs_fls));
485}
486module_init(init_mce_inject);
487module_exit(exit_mce_inject);
488
489MODULE_LICENSE("GPL");
490MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>");
491MODULE_AUTHOR("AMD Inc.");
492MODULE_DESCRIPTION("MCE injection facility for RAS testing");