Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * UEFI Common Platform Error Record (CPER) support
  3 *
  4 * Copyright (C) 2010, Intel Corp.
  5 *	Author: Huang Ying <ying.huang@intel.com>
  6 *
  7 * CPER is the format used to describe platform hardware error by
  8 * various tables, such as ERST, BERT and HEST etc.
  9 *
 10 * For more information about CPER, please refer to Appendix N of UEFI
 11 * Specification version 2.4.
 12 *
 13 * This program is free software; you can redistribute it and/or
 14 * modify it under the terms of the GNU General Public License version
 15 * 2 as published by the Free Software Foundation.
 16 *
 17 * This program is distributed in the hope that it will be useful,
 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 * GNU General Public License for more details.
 21 *
 22 * You should have received a copy of the GNU General Public License
 23 * along with this program; if not, write to the Free Software
 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 25 */
 26
 27#include <linux/kernel.h>
 28#include <linux/module.h>
 29#include <linux/time.h>
 30#include <linux/cper.h>
 31#include <linux/dmi.h>
 32#include <linux/acpi.h>
 33#include <linux/pci.h>
 34#include <linux/aer.h>
 35#include <linux/printk.h>
 36#include <linux/bcd.h>
 37#include <acpi/ghes.h>
 38#include <ras/ras_event.h>
 39
 40#define INDENT_SP	" "
 41
 42static char rcd_decode_str[CPER_REC_LEN];
 43
 44/*
 45 * CPER record ID need to be unique even after reboot, because record
 46 * ID is used as index for ERST storage, while CPER records from
 47 * multiple boot may co-exist in ERST.
 48 */
 49u64 cper_next_record_id(void)
 50{
 51	static atomic64_t seq;
 52
 53	if (!atomic64_read(&seq))
 54		atomic64_set(&seq, ((u64)get_seconds()) << 32);
 55
 56	return atomic64_inc_return(&seq);
 57}
 58EXPORT_SYMBOL_GPL(cper_next_record_id);
 59
 60static const char * const severity_strs[] = {
 61	"recoverable",
 62	"fatal",
 63	"corrected",
 64	"info",
 65};
 66
 67const char *cper_severity_str(unsigned int severity)
 68{
 69	return severity < ARRAY_SIZE(severity_strs) ?
 70		severity_strs[severity] : "unknown";
 71}
 72EXPORT_SYMBOL_GPL(cper_severity_str);
 73
 74/*
 75 * cper_print_bits - print strings for set bits
 76 * @pfx: prefix for each line, including log level and prefix string
 77 * @bits: bit mask
 78 * @strs: string array, indexed by bit position
 79 * @strs_size: size of the string array: @strs
 80 *
 81 * For each set bit in @bits, print the corresponding string in @strs.
 82 * If the output length is longer than 80, multiple line will be
 83 * printed, with @pfx is printed at the beginning of each line.
 84 */
 85void cper_print_bits(const char *pfx, unsigned int bits,
 86		     const char * const strs[], unsigned int strs_size)
 87{
 88	int i, len = 0;
 89	const char *str;
 90	char buf[84];
 91
 92	for (i = 0; i < strs_size; i++) {
 93		if (!(bits & (1U << i)))
 94			continue;
 95		str = strs[i];
 96		if (!str)
 97			continue;
 98		if (len && len + strlen(str) + 2 > 80) {
 99			printk("%s\n", buf);
100			len = 0;
101		}
102		if (!len)
103			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
104		else
105			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
106	}
107	if (len)
108		printk("%s\n", buf);
109}
110
111static const char * const proc_type_strs[] = {
112	"IA32/X64",
113	"IA64",
114	"ARM",
115};
116
117static const char * const proc_isa_strs[] = {
118	"IA32",
119	"IA64",
120	"X64",
121	"ARM A32/T32",
122	"ARM A64",
123};
124
125const char * const cper_proc_error_type_strs[] = {
126	"cache error",
127	"TLB error",
128	"bus error",
129	"micro-architectural error",
130};
131
132static const char * const proc_op_strs[] = {
133	"unknown or generic",
134	"data read",
135	"data write",
136	"instruction execution",
137};
138
139static const char * const proc_flag_strs[] = {
140	"restartable",
141	"precise IP",
142	"overflow",
143	"corrected",
144};
145
146static void cper_print_proc_generic(const char *pfx,
147				    const struct cper_sec_proc_generic *proc)
148{
149	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
150		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
151		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
152		       proc_type_strs[proc->proc_type] : "unknown");
153	if (proc->validation_bits & CPER_PROC_VALID_ISA)
154		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
155		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
156		       proc_isa_strs[proc->proc_isa] : "unknown");
157	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
158		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
159		cper_print_bits(pfx, proc->proc_error_type,
160				cper_proc_error_type_strs,
161				ARRAY_SIZE(cper_proc_error_type_strs));
162	}
163	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
164		printk("%s""operation: %d, %s\n", pfx, proc->operation,
165		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
166		       proc_op_strs[proc->operation] : "unknown");
167	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
168		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
169		cper_print_bits(pfx, proc->flags, proc_flag_strs,
170				ARRAY_SIZE(proc_flag_strs));
171	}
172	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
173		printk("%s""level: %d\n", pfx, proc->level);
174	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
175		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
176	if (proc->validation_bits & CPER_PROC_VALID_ID)
177		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
178	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
179		printk("%s""target_address: 0x%016llx\n",
180		       pfx, proc->target_addr);
181	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
182		printk("%s""requestor_id: 0x%016llx\n",
183		       pfx, proc->requestor_id);
184	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
185		printk("%s""responder_id: 0x%016llx\n",
186		       pfx, proc->responder_id);
187	if (proc->validation_bits & CPER_PROC_VALID_IP)
188		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
189}
190
191static const char * const mem_err_type_strs[] = {
192	"unknown",
193	"no error",
194	"single-bit ECC",
195	"multi-bit ECC",
196	"single-symbol chipkill ECC",
197	"multi-symbol chipkill ECC",
198	"master abort",
199	"target abort",
200	"parity error",
201	"watchdog timeout",
202	"invalid address",
203	"mirror Broken",
204	"memory sparing",
205	"scrub corrected error",
206	"scrub uncorrected error",
207	"physical memory map-out event",
208};
209
210const char *cper_mem_err_type_str(unsigned int etype)
211{
212	return etype < ARRAY_SIZE(mem_err_type_strs) ?
213		mem_err_type_strs[etype] : "unknown";
214}
215EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
216
217static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
218{
219	u32 len, n;
220
221	if (!msg)
222		return 0;
223
224	n = 0;
225	len = CPER_REC_LEN - 1;
226	if (mem->validation_bits & CPER_MEM_VALID_NODE)
227		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
228	if (mem->validation_bits & CPER_MEM_VALID_CARD)
229		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
230	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
231		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
232	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
233		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
234	if (mem->validation_bits & CPER_MEM_VALID_BANK)
235		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
236	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
237		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
238	if (mem->validation_bits & CPER_MEM_VALID_ROW)
239		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
240	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
241		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
242	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
243		n += scnprintf(msg + n, len - n, "bit_position: %d ",
244			       mem->bit_pos);
245	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
246		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
247			       mem->requestor_id);
248	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
249		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
250			       mem->responder_id);
251	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
252		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
253			  mem->target_id);
254
255	msg[n] = '\0';
256	return n;
257}
258
259static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
260{
261	u32 len, n;
262	const char *bank = NULL, *device = NULL;
263
264	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
265		return 0;
266
267	n = 0;
268	len = CPER_REC_LEN - 1;
269	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
270	if (bank && device)
271		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
272	else
273		n = snprintf(msg, len,
274			     "DIMM location: not present. DMI handle: 0x%.4x ",
275			     mem->mem_dev_handle);
276
277	msg[n] = '\0';
278	return n;
279}
280
281void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
282		       struct cper_mem_err_compact *cmem)
283{
284	cmem->validation_bits = mem->validation_bits;
285	cmem->node = mem->node;
286	cmem->card = mem->card;
287	cmem->module = mem->module;
288	cmem->bank = mem->bank;
289	cmem->device = mem->device;
290	cmem->row = mem->row;
291	cmem->column = mem->column;
292	cmem->bit_pos = mem->bit_pos;
293	cmem->requestor_id = mem->requestor_id;
294	cmem->responder_id = mem->responder_id;
295	cmem->target_id = mem->target_id;
296	cmem->rank = mem->rank;
297	cmem->mem_array_handle = mem->mem_array_handle;
298	cmem->mem_dev_handle = mem->mem_dev_handle;
299}
300
301const char *cper_mem_err_unpack(struct trace_seq *p,
302				struct cper_mem_err_compact *cmem)
303{
304	const char *ret = trace_seq_buffer_ptr(p);
305
306	if (cper_mem_err_location(cmem, rcd_decode_str))
307		trace_seq_printf(p, "%s", rcd_decode_str);
308	if (cper_dimm_err_location(cmem, rcd_decode_str))
309		trace_seq_printf(p, "%s", rcd_decode_str);
310	trace_seq_putc(p, '\0');
311
312	return ret;
313}
314
315static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
316	int len)
317{
318	struct cper_mem_err_compact cmem;
319
320	/* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
321	if (len == sizeof(struct cper_sec_mem_err_old) &&
322	    (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
323		pr_err(FW_WARN "valid bits set for fields beyond structure\n");
324		return;
325	}
326	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
327		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
328	if (mem->validation_bits & CPER_MEM_VALID_PA)
329		printk("%s""physical_address: 0x%016llx\n",
330		       pfx, mem->physical_addr);
331	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
332		printk("%s""physical_address_mask: 0x%016llx\n",
333		       pfx, mem->physical_addr_mask);
334	cper_mem_err_pack(mem, &cmem);
335	if (cper_mem_err_location(&cmem, rcd_decode_str))
336		printk("%s%s\n", pfx, rcd_decode_str);
337	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
338		u8 etype = mem->error_type;
339		printk("%s""error_type: %d, %s\n", pfx, etype,
340		       cper_mem_err_type_str(etype));
341	}
342	if (cper_dimm_err_location(&cmem, rcd_decode_str))
343		printk("%s%s\n", pfx, rcd_decode_str);
344}
345
346static const char * const pcie_port_type_strs[] = {
347	"PCIe end point",
348	"legacy PCI end point",
349	"unknown",
350	"unknown",
351	"root port",
352	"upstream switch port",
353	"downstream switch port",
354	"PCIe to PCI/PCI-X bridge",
355	"PCI/PCI-X to PCIe bridge",
356	"root complex integrated endpoint device",
357	"root complex event collector",
358};
359
360static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
361			    const struct acpi_hest_generic_data *gdata)
362{
363	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
364		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
365		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
366		       pcie_port_type_strs[pcie->port_type] : "unknown");
367	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
368		printk("%s""version: %d.%d\n", pfx,
369		       pcie->version.major, pcie->version.minor);
370	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
371		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
372		       pcie->command, pcie->status);
373	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
374		const __u8 *p;
375		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
376		       pcie->device_id.segment, pcie->device_id.bus,
377		       pcie->device_id.device, pcie->device_id.function);
378		printk("%s""slot: %d\n", pfx,
379		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
380		printk("%s""secondary_bus: 0x%02x\n", pfx,
381		       pcie->device_id.secondary_bus);
382		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
383		       pcie->device_id.vendor_id, pcie->device_id.device_id);
384		p = pcie->device_id.class_code;
385		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
386	}
387	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
388		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
389		       pcie->serial_number.lower, pcie->serial_number.upper);
390	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
391		printk(
392	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
393	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
394}
395
396static void cper_print_tstamp(const char *pfx,
397				   struct acpi_hest_generic_data_v300 *gdata)
398{
399	__u8 hour, min, sec, day, mon, year, century, *timestamp;
400
401	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
402		timestamp = (__u8 *)&(gdata->time_stamp);
403		sec       = bcd2bin(timestamp[0]);
404		min       = bcd2bin(timestamp[1]);
405		hour      = bcd2bin(timestamp[2]);
406		day       = bcd2bin(timestamp[4]);
407		mon       = bcd2bin(timestamp[5]);
408		year      = bcd2bin(timestamp[6]);
409		century   = bcd2bin(timestamp[7]);
410
411		printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
412		       (timestamp[3] & 0x1 ? "precise " : "imprecise "),
413		       century, year, mon, day, hour, min, sec);
414	}
415}
416
417static void
418cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
419			   int sec_no)
420{
421	guid_t *sec_type = (guid_t *)gdata->section_type;
422	__u16 severity;
423	char newpfx[64];
424
425	if (acpi_hest_get_version(gdata) >= 3)
426		cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
427
428	severity = gdata->error_severity;
429	printk("%s""Error %d, type: %s\n", pfx, sec_no,
430	       cper_severity_str(severity));
431	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
432		printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
433	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
434		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
435
436	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
437	if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
438		struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
439
440		printk("%s""section_type: general processor error\n", newpfx);
441		if (gdata->error_data_length >= sizeof(*proc_err))
442			cper_print_proc_generic(newpfx, proc_err);
443		else
444			goto err_section_too_small;
445	} else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
446		struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
447
448		printk("%s""section_type: memory error\n", newpfx);
449		if (gdata->error_data_length >=
450		    sizeof(struct cper_sec_mem_err_old))
451			cper_print_mem(newpfx, mem_err,
452				       gdata->error_data_length);
453		else
454			goto err_section_too_small;
455	} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
456		struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
457
458		printk("%s""section_type: PCIe error\n", newpfx);
459		if (gdata->error_data_length >= sizeof(*pcie))
460			cper_print_pcie(newpfx, pcie, gdata);
461		else
462			goto err_section_too_small;
463#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
464	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
465		struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
466
467		printk("%ssection_type: ARM processor error\n", newpfx);
468		if (gdata->error_data_length >= sizeof(*arm_err))
469			cper_print_proc_arm(newpfx, arm_err);
470		else
471			goto err_section_too_small;
472#endif
473	} else {
474		const void *err = acpi_hest_get_payload(gdata);
475
476		printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
477		printk("%ssection length: %#x\n", newpfx,
478		       gdata->error_data_length);
479		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
480			       gdata->error_data_length, true);
481	}
482
483	return;
484
485err_section_too_small:
486	pr_err(FW_WARN "error section length is too small\n");
487}
488
489void cper_estatus_print(const char *pfx,
490			const struct acpi_hest_generic_status *estatus)
491{
492	struct acpi_hest_generic_data *gdata;
493	int sec_no = 0;
494	char newpfx[64];
495	__u16 severity;
496
497	severity = estatus->error_severity;
498	if (severity == CPER_SEV_CORRECTED)
499		printk("%s%s\n", pfx,
500		       "It has been corrected by h/w "
501		       "and requires no further action");
502	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
503	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
504
505	apei_estatus_for_each_section(estatus, gdata) {
506		cper_estatus_print_section(newpfx, gdata, sec_no);
507		sec_no++;
508	}
509}
510EXPORT_SYMBOL_GPL(cper_estatus_print);
511
512int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
513{
514	if (estatus->data_length &&
515	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
516		return -EINVAL;
517	if (estatus->raw_data_length &&
518	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
519		return -EINVAL;
520
521	return 0;
522}
523EXPORT_SYMBOL_GPL(cper_estatus_check_header);
524
525int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
526{
527	struct acpi_hest_generic_data *gdata;
528	unsigned int data_len, gedata_len;
529	int rc;
530
531	rc = cper_estatus_check_header(estatus);
532	if (rc)
533		return rc;
534	data_len = estatus->data_length;
535
536	apei_estatus_for_each_section(estatus, gdata) {
537		gedata_len = acpi_hest_get_error_length(gdata);
538		if (gedata_len > data_len - acpi_hest_get_size(gdata))
539			return -EINVAL;
540		data_len -= acpi_hest_get_record_size(gdata);
541	}
542	if (data_len)
543		return -EINVAL;
544
545	return 0;
546}
547EXPORT_SYMBOL_GPL(cper_estatus_check);