Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * UEFI Common Platform Error Record (CPER) support
  4 *
  5 * Copyright (C) 2010, Intel Corp.
  6 *	Author: Huang Ying <ying.huang@intel.com>
  7 *
  8 * CPER is the format used to describe platform hardware error by
  9 * various tables, such as ERST, BERT and HEST etc.
 10 *
 11 * For more information about CPER, please refer to Appendix N of UEFI
 12 * Specification version 2.4.
 13 */
 14
 15#include <linux/kernel.h>
 16#include <linux/module.h>
 17#include <linux/time.h>
 18#include <linux/cper.h>
 19#include <linux/dmi.h>
 20#include <linux/acpi.h>
 21#include <linux/pci.h>
 22#include <linux/aer.h>
 23#include <linux/printk.h>
 24#include <linux/bcd.h>
 25#include <acpi/ghes.h>
 26#include <ras/ras_event.h>
 27
 28static char rcd_decode_str[CPER_REC_LEN];
 29
 30/*
 31 * CPER record ID need to be unique even after reboot, because record
 32 * ID is used as index for ERST storage, while CPER records from
 33 * multiple boot may co-exist in ERST.
 34 */
 35u64 cper_next_record_id(void)
 36{
 37	static atomic64_t seq;
 38
 39	if (!atomic64_read(&seq)) {
 40		time64_t time = ktime_get_real_seconds();
 41
 42		/*
 43		 * This code is unlikely to still be needed in year 2106,
 44		 * but just in case, let's use a few more bits for timestamps
 45		 * after y2038 to be sure they keep increasing monotonically
 46		 * for the next few hundred years...
 47		 */
 48		if (time < 0x80000000)
 49			atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
 50		else
 51			atomic64_set(&seq, 0x8000000000000000ull |
 52					   ktime_get_real_seconds() << 24);
 53	}
 54
 55	return atomic64_inc_return(&seq);
 56}
 57EXPORT_SYMBOL_GPL(cper_next_record_id);
 58
 59static const char * const severity_strs[] = {
 60	"recoverable",
 61	"fatal",
 62	"corrected",
 63	"info",
 64};
 65
 66const char *cper_severity_str(unsigned int severity)
 67{
 68	return severity < ARRAY_SIZE(severity_strs) ?
 69		severity_strs[severity] : "unknown";
 70}
 71EXPORT_SYMBOL_GPL(cper_severity_str);
 72
 73/*
 74 * cper_print_bits - print strings for set bits
 75 * @pfx: prefix for each line, including log level and prefix string
 76 * @bits: bit mask
 77 * @strs: string array, indexed by bit position
 78 * @strs_size: size of the string array: @strs
 79 *
 80 * For each set bit in @bits, print the corresponding string in @strs.
 81 * If the output length is longer than 80, multiple line will be
 82 * printed, with @pfx is printed at the beginning of each line.
 83 */
 84void cper_print_bits(const char *pfx, unsigned int bits,
 85		     const char * const strs[], unsigned int strs_size)
 86{
 87	int i, len = 0;
 88	const char *str;
 89	char buf[84];
 90
 91	for (i = 0; i < strs_size; i++) {
 92		if (!(bits & (1U << i)))
 93			continue;
 94		str = strs[i];
 95		if (!str)
 96			continue;
 97		if (len && len + strlen(str) + 2 > 80) {
 98			printk("%s\n", buf);
 99			len = 0;
100		}
101		if (!len)
102			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103		else
104			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
105	}
106	if (len)
107		printk("%s\n", buf);
108}
109
110static const char * const proc_type_strs[] = {
111	"IA32/X64",
112	"IA64",
113	"ARM",
114};
115
116static const char * const proc_isa_strs[] = {
117	"IA32",
118	"IA64",
119	"X64",
120	"ARM A32/T32",
121	"ARM A64",
122};
123
124const char * const cper_proc_error_type_strs[] = {
125	"cache error",
126	"TLB error",
127	"bus error",
128	"micro-architectural error",
129};
130
131static const char * const proc_op_strs[] = {
132	"unknown or generic",
133	"data read",
134	"data write",
135	"instruction execution",
136};
137
138static const char * const proc_flag_strs[] = {
139	"restartable",
140	"precise IP",
141	"overflow",
142	"corrected",
143};
144
145static void cper_print_proc_generic(const char *pfx,
146				    const struct cper_sec_proc_generic *proc)
147{
148	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151		       proc_type_strs[proc->proc_type] : "unknown");
152	if (proc->validation_bits & CPER_PROC_VALID_ISA)
153		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155		       proc_isa_strs[proc->proc_isa] : "unknown");
156	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158		cper_print_bits(pfx, proc->proc_error_type,
159				cper_proc_error_type_strs,
160				ARRAY_SIZE(cper_proc_error_type_strs));
161	}
162	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163		printk("%s""operation: %d, %s\n", pfx, proc->operation,
164		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
165		       proc_op_strs[proc->operation] : "unknown");
166	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168		cper_print_bits(pfx, proc->flags, proc_flag_strs,
169				ARRAY_SIZE(proc_flag_strs));
170	}
171	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172		printk("%s""level: %d\n", pfx, proc->level);
173	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175	if (proc->validation_bits & CPER_PROC_VALID_ID)
176		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178		printk("%s""target_address: 0x%016llx\n",
179		       pfx, proc->target_addr);
180	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181		printk("%s""requestor_id: 0x%016llx\n",
182		       pfx, proc->requestor_id);
183	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184		printk("%s""responder_id: 0x%016llx\n",
185		       pfx, proc->responder_id);
186	if (proc->validation_bits & CPER_PROC_VALID_IP)
187		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
188}
189
190static const char * const mem_err_type_strs[] = {
191	"unknown",
192	"no error",
193	"single-bit ECC",
194	"multi-bit ECC",
195	"single-symbol chipkill ECC",
196	"multi-symbol chipkill ECC",
197	"master abort",
198	"target abort",
199	"parity error",
200	"watchdog timeout",
201	"invalid address",
202	"mirror Broken",
203	"memory sparing",
204	"scrub corrected error",
205	"scrub uncorrected error",
206	"physical memory map-out event",
207};
208
209const char *cper_mem_err_type_str(unsigned int etype)
210{
211	return etype < ARRAY_SIZE(mem_err_type_strs) ?
212		mem_err_type_strs[etype] : "unknown";
213}
214EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215
216static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
217{
218	u32 len, n;
219
220	if (!msg)
221		return 0;
222
223	n = 0;
224	len = CPER_REC_LEN - 1;
225	if (mem->validation_bits & CPER_MEM_VALID_NODE)
226		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227	if (mem->validation_bits & CPER_MEM_VALID_CARD)
228		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233	if (mem->validation_bits & CPER_MEM_VALID_BANK)
234		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
236		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
237	if (mem->validation_bits & CPER_MEM_VALID_ROW)
238		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
239	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
240		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
241	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
242		n += scnprintf(msg + n, len - n, "bit_position: %d ",
243			       mem->bit_pos);
244	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
245		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246			       mem->requestor_id);
247	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
248		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249			       mem->responder_id);
250	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
251		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
252			  mem->target_id);
253
254	msg[n] = '\0';
255	return n;
256}
257
258static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
259{
260	u32 len, n;
261	const char *bank = NULL, *device = NULL;
262
263	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
264		return 0;
265
266	n = 0;
267	len = CPER_REC_LEN - 1;
268	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269	if (bank && device)
270		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271	else
272		n = snprintf(msg, len,
273			     "DIMM location: not present. DMI handle: 0x%.4x ",
274			     mem->mem_dev_handle);
275
276	msg[n] = '\0';
277	return n;
278}
279
280void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
281		       struct cper_mem_err_compact *cmem)
282{
283	cmem->validation_bits = mem->validation_bits;
284	cmem->node = mem->node;
285	cmem->card = mem->card;
286	cmem->module = mem->module;
287	cmem->bank = mem->bank;
288	cmem->device = mem->device;
289	cmem->row = mem->row;
290	cmem->column = mem->column;
291	cmem->bit_pos = mem->bit_pos;
292	cmem->requestor_id = mem->requestor_id;
293	cmem->responder_id = mem->responder_id;
294	cmem->target_id = mem->target_id;
295	cmem->rank = mem->rank;
296	cmem->mem_array_handle = mem->mem_array_handle;
297	cmem->mem_dev_handle = mem->mem_dev_handle;
298}
299
300const char *cper_mem_err_unpack(struct trace_seq *p,
301				struct cper_mem_err_compact *cmem)
302{
303	const char *ret = trace_seq_buffer_ptr(p);
304
305	if (cper_mem_err_location(cmem, rcd_decode_str))
306		trace_seq_printf(p, "%s", rcd_decode_str);
307	if (cper_dimm_err_location(cmem, rcd_decode_str))
308		trace_seq_printf(p, "%s", rcd_decode_str);
309	trace_seq_putc(p, '\0');
310
311	return ret;
312}
313
314static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
315	int len)
316{
317	struct cper_mem_err_compact cmem;
318
319	/* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320	if (len == sizeof(struct cper_sec_mem_err_old) &&
321	    (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
322		pr_err(FW_WARN "valid bits set for fields beyond structure\n");
323		return;
324	}
325	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
326		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
327	if (mem->validation_bits & CPER_MEM_VALID_PA)
328		printk("%s""physical_address: 0x%016llx\n",
329		       pfx, mem->physical_addr);
330	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
331		printk("%s""physical_address_mask: 0x%016llx\n",
332		       pfx, mem->physical_addr_mask);
333	cper_mem_err_pack(mem, &cmem);
334	if (cper_mem_err_location(&cmem, rcd_decode_str))
335		printk("%s%s\n", pfx, rcd_decode_str);
336	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
337		u8 etype = mem->error_type;
338		printk("%s""error_type: %d, %s\n", pfx, etype,
339		       cper_mem_err_type_str(etype));
340	}
341	if (cper_dimm_err_location(&cmem, rcd_decode_str))
342		printk("%s%s\n", pfx, rcd_decode_str);
343}
344
345static const char * const pcie_port_type_strs[] = {
346	"PCIe end point",
347	"legacy PCI end point",
348	"unknown",
349	"unknown",
350	"root port",
351	"upstream switch port",
352	"downstream switch port",
353	"PCIe to PCI/PCI-X bridge",
354	"PCI/PCI-X to PCIe bridge",
355	"root complex integrated endpoint device",
356	"root complex event collector",
357};
358
359static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
360			    const struct acpi_hest_generic_data *gdata)
361{
362	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
363		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
364		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
365		       pcie_port_type_strs[pcie->port_type] : "unknown");
366	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
367		printk("%s""version: %d.%d\n", pfx,
368		       pcie->version.major, pcie->version.minor);
369	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
370		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
371		       pcie->command, pcie->status);
372	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373		const __u8 *p;
374		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
375		       pcie->device_id.segment, pcie->device_id.bus,
376		       pcie->device_id.device, pcie->device_id.function);
377		printk("%s""slot: %d\n", pfx,
378		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
379		printk("%s""secondary_bus: 0x%02x\n", pfx,
380		       pcie->device_id.secondary_bus);
381		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
382		       pcie->device_id.vendor_id, pcie->device_id.device_id);
383		p = pcie->device_id.class_code;
384		printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
385	}
386	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
387		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
388		       pcie->serial_number.lower, pcie->serial_number.upper);
389	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390		printk(
391	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
393
394	/* Fatal errors call __ghes_panic() before AER handler prints this */
395	if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
396	    (gdata->error_severity & CPER_SEV_FATAL)) {
397		struct aer_capability_regs *aer;
398
399		aer = (struct aer_capability_regs *)pcie->aer_info;
400		printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
401		       pfx, aer->uncor_status, aer->uncor_mask);
402		printk("%saer_uncor_severity: 0x%08x\n",
403		       pfx, aer->uncor_severity);
404		printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
405		       aer->header_log.dw0, aer->header_log.dw1,
406		       aer->header_log.dw2, aer->header_log.dw3);
407	}
408}
409
410static void cper_print_tstamp(const char *pfx,
411				   struct acpi_hest_generic_data_v300 *gdata)
412{
413	__u8 hour, min, sec, day, mon, year, century, *timestamp;
414
415	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
416		timestamp = (__u8 *)&(gdata->time_stamp);
417		sec       = bcd2bin(timestamp[0]);
418		min       = bcd2bin(timestamp[1]);
419		hour      = bcd2bin(timestamp[2]);
420		day       = bcd2bin(timestamp[4]);
421		mon       = bcd2bin(timestamp[5]);
422		year      = bcd2bin(timestamp[6]);
423		century   = bcd2bin(timestamp[7]);
424
425		printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
426		       (timestamp[3] & 0x1 ? "precise " : "imprecise "),
427		       century, year, mon, day, hour, min, sec);
428	}
429}
430
431static void
432cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
433			   int sec_no)
434{
435	guid_t *sec_type = (guid_t *)gdata->section_type;
436	__u16 severity;
437	char newpfx[64];
438
439	if (acpi_hest_get_version(gdata) >= 3)
440		cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
441
442	severity = gdata->error_severity;
443	printk("%s""Error %d, type: %s\n", pfx, sec_no,
444	       cper_severity_str(severity));
445	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
446		printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
447	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
448		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
449
450	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
451	if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
452		struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
453
454		printk("%s""section_type: general processor error\n", newpfx);
455		if (gdata->error_data_length >= sizeof(*proc_err))
456			cper_print_proc_generic(newpfx, proc_err);
457		else
458			goto err_section_too_small;
459	} else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
460		struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
461
462		printk("%s""section_type: memory error\n", newpfx);
463		if (gdata->error_data_length >=
464		    sizeof(struct cper_sec_mem_err_old))
465			cper_print_mem(newpfx, mem_err,
466				       gdata->error_data_length);
467		else
468			goto err_section_too_small;
469	} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
470		struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
471
472		printk("%s""section_type: PCIe error\n", newpfx);
473		if (gdata->error_data_length >= sizeof(*pcie))
474			cper_print_pcie(newpfx, pcie, gdata);
475		else
476			goto err_section_too_small;
477#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
478	} else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
479		struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
480
481		printk("%ssection_type: ARM processor error\n", newpfx);
482		if (gdata->error_data_length >= sizeof(*arm_err))
483			cper_print_proc_arm(newpfx, arm_err);
484		else
485			goto err_section_too_small;
486#endif
487#if defined(CONFIG_UEFI_CPER_X86)
488	} else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
489		struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
490
491		printk("%ssection_type: IA32/X64 processor error\n", newpfx);
492		if (gdata->error_data_length >= sizeof(*ia_err))
493			cper_print_proc_ia(newpfx, ia_err);
494		else
495			goto err_section_too_small;
496#endif
497	} else {
498		const void *err = acpi_hest_get_payload(gdata);
499
500		printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
501		printk("%ssection length: %#x\n", newpfx,
502		       gdata->error_data_length);
503		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
504			       gdata->error_data_length, true);
505	}
506
507	return;
508
509err_section_too_small:
510	pr_err(FW_WARN "error section length is too small\n");
511}
512
513void cper_estatus_print(const char *pfx,
514			const struct acpi_hest_generic_status *estatus)
515{
516	struct acpi_hest_generic_data *gdata;
517	int sec_no = 0;
518	char newpfx[64];
519	__u16 severity;
520
521	severity = estatus->error_severity;
522	if (severity == CPER_SEV_CORRECTED)
523		printk("%s%s\n", pfx,
524		       "It has been corrected by h/w "
525		       "and requires no further action");
526	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
527	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
528
529	apei_estatus_for_each_section(estatus, gdata) {
530		cper_estatus_print_section(newpfx, gdata, sec_no);
531		sec_no++;
532	}
533}
534EXPORT_SYMBOL_GPL(cper_estatus_print);
535
536int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
537{
538	if (estatus->data_length &&
539	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
540		return -EINVAL;
541	if (estatus->raw_data_length &&
542	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
543		return -EINVAL;
544
545	return 0;
546}
547EXPORT_SYMBOL_GPL(cper_estatus_check_header);
548
549int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
550{
551	struct acpi_hest_generic_data *gdata;
552	unsigned int data_len, record_size;
553	int rc;
554
555	rc = cper_estatus_check_header(estatus);
556	if (rc)
557		return rc;
558
559	data_len = estatus->data_length;
560
561	apei_estatus_for_each_section(estatus, gdata) {
562		if (sizeof(struct acpi_hest_generic_data) > data_len)
563			return -EINVAL;
564
565		record_size = acpi_hest_get_record_size(gdata);
566		if (record_size > data_len)
567			return -EINVAL;
568
569		data_len -= record_size;
570	}
571	if (data_len)
572		return -EINVAL;
573
574	return 0;
575}
576EXPORT_SYMBOL_GPL(cper_estatus_check);