Linux Audio

Check our new training course

Loading...
v5.9
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * UEFI Common Platform Error Record (CPER) support
  4 *
  5 * Copyright (C) 2010, Intel Corp.
  6 *	Author: Huang Ying <ying.huang@intel.com>
  7 *
  8 * CPER is the format used to describe platform hardware error by
  9 * various tables, such as ERST, BERT and HEST etc.
 10 *
 11 * For more information about CPER, please refer to Appendix N of UEFI
 12 * Specification version 2.4.
 
 
 
 
 
 
 
 
 
 
 
 
 
 13 */
 14
 15#include <linux/kernel.h>
 16#include <linux/module.h>
 17#include <linux/time.h>
 18#include <linux/cper.h>
 19#include <linux/dmi.h>
 20#include <linux/acpi.h>
 21#include <linux/pci.h>
 22#include <linux/aer.h>
 23#include <linux/printk.h>
 24#include <linux/bcd.h>
 25#include <acpi/ghes.h>
 26#include <ras/ras_event.h>
 27
 28static char rcd_decode_str[CPER_REC_LEN];
 29
 30/*
 31 * CPER record ID need to be unique even after reboot, because record
 32 * ID is used as index for ERST storage, while CPER records from
 33 * multiple boot may co-exist in ERST.
 34 */
 35u64 cper_next_record_id(void)
 36{
 37	static atomic64_t seq;
 38
 39	if (!atomic64_read(&seq)) {
 40		time64_t time = ktime_get_real_seconds();
 41
 42		/*
 43		 * This code is unlikely to still be needed in year 2106,
 44		 * but just in case, let's use a few more bits for timestamps
 45		 * after y2038 to be sure they keep increasing monotonically
 46		 * for the next few hundred years...
 47		 */
 48		if (time < 0x80000000)
 49			atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
 50		else
 51			atomic64_set(&seq, 0x8000000000000000ull |
 52					   ktime_get_real_seconds() << 24);
 53	}
 54
 55	return atomic64_inc_return(&seq);
 56}
 57EXPORT_SYMBOL_GPL(cper_next_record_id);
 58
 59static const char * const severity_strs[] = {
 60	"recoverable",
 61	"fatal",
 62	"corrected",
 63	"info",
 64};
 65
 66const char *cper_severity_str(unsigned int severity)
 67{
 68	return severity < ARRAY_SIZE(severity_strs) ?
 69		severity_strs[severity] : "unknown";
 70}
 71EXPORT_SYMBOL_GPL(cper_severity_str);
 72
 73/*
 74 * cper_print_bits - print strings for set bits
 75 * @pfx: prefix for each line, including log level and prefix string
 76 * @bits: bit mask
 77 * @strs: string array, indexed by bit position
 78 * @strs_size: size of the string array: @strs
 79 *
 80 * For each set bit in @bits, print the corresponding string in @strs.
 81 * If the output length is longer than 80, multiple line will be
 82 * printed, with @pfx is printed at the beginning of each line.
 83 */
 84void cper_print_bits(const char *pfx, unsigned int bits,
 85		     const char * const strs[], unsigned int strs_size)
 86{
 87	int i, len = 0;
 88	const char *str;
 89	char buf[84];
 90
 91	for (i = 0; i < strs_size; i++) {
 92		if (!(bits & (1U << i)))
 93			continue;
 94		str = strs[i];
 95		if (!str)
 96			continue;
 97		if (len && len + strlen(str) + 2 > 80) {
 98			printk("%s\n", buf);
 99			len = 0;
100		}
101		if (!len)
102			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103		else
104			len += scnprintf(buf+len, sizeof(buf)-len, ", %s", str);
105	}
106	if (len)
107		printk("%s\n", buf);
108}
109
110static const char * const proc_type_strs[] = {
111	"IA32/X64",
112	"IA64",
113	"ARM",
114};
115
116static const char * const proc_isa_strs[] = {
117	"IA32",
118	"IA64",
119	"X64",
120	"ARM A32/T32",
121	"ARM A64",
122};
123
124const char * const cper_proc_error_type_strs[] = {
125	"cache error",
126	"TLB error",
127	"bus error",
128	"micro-architectural error",
129};
130
131static const char * const proc_op_strs[] = {
132	"unknown or generic",
133	"data read",
134	"data write",
135	"instruction execution",
136};
137
138static const char * const proc_flag_strs[] = {
139	"restartable",
140	"precise IP",
141	"overflow",
142	"corrected",
143};
144
145static void cper_print_proc_generic(const char *pfx,
146				    const struct cper_sec_proc_generic *proc)
147{
148	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151		       proc_type_strs[proc->proc_type] : "unknown");
152	if (proc->validation_bits & CPER_PROC_VALID_ISA)
153		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155		       proc_isa_strs[proc->proc_isa] : "unknown");
156	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158		cper_print_bits(pfx, proc->proc_error_type,
159				cper_proc_error_type_strs,
160				ARRAY_SIZE(cper_proc_error_type_strs));
161	}
162	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163		printk("%s""operation: %d, %s\n", pfx, proc->operation,
164		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
165		       proc_op_strs[proc->operation] : "unknown");
166	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168		cper_print_bits(pfx, proc->flags, proc_flag_strs,
169				ARRAY_SIZE(proc_flag_strs));
170	}
171	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172		printk("%s""level: %d\n", pfx, proc->level);
173	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175	if (proc->validation_bits & CPER_PROC_VALID_ID)
176		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178		printk("%s""target_address: 0x%016llx\n",
179		       pfx, proc->target_addr);
180	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181		printk("%s""requestor_id: 0x%016llx\n",
182		       pfx, proc->requestor_id);
183	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184		printk("%s""responder_id: 0x%016llx\n",
185		       pfx, proc->responder_id);
186	if (proc->validation_bits & CPER_PROC_VALID_IP)
187		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
188}
189
190static const char * const mem_err_type_strs[] = {
191	"unknown",
192	"no error",
193	"single-bit ECC",
194	"multi-bit ECC",
195	"single-symbol chipkill ECC",
196	"multi-symbol chipkill ECC",
197	"master abort",
198	"target abort",
199	"parity error",
200	"watchdog timeout",
201	"invalid address",
202	"mirror Broken",
203	"memory sparing",
204	"scrub corrected error",
205	"scrub uncorrected error",
206	"physical memory map-out event",
207};
208
209const char *cper_mem_err_type_str(unsigned int etype)
210{
211	return etype < ARRAY_SIZE(mem_err_type_strs) ?
212		mem_err_type_strs[etype] : "unknown";
213}
214EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215
216static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
217{
218	u32 len, n;
219
220	if (!msg)
221		return 0;
222
223	n = 0;
224	len = CPER_REC_LEN - 1;
225	if (mem->validation_bits & CPER_MEM_VALID_NODE)
226		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227	if (mem->validation_bits & CPER_MEM_VALID_CARD)
228		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233	if (mem->validation_bits & CPER_MEM_VALID_BANK)
234		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
236		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
237	if (mem->validation_bits & CPER_MEM_VALID_ROW)
238		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
239	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
240		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
241	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
242		n += scnprintf(msg + n, len - n, "bit_position: %d ",
243			       mem->bit_pos);
244	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
245		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246			       mem->requestor_id);
247	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
248		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249			       mem->responder_id);
250	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
251		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
252			  mem->target_id);
253
254	msg[n] = '\0';
255	return n;
256}
257
258static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
259{
260	u32 len, n;
261	const char *bank = NULL, *device = NULL;
262
263	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
264		return 0;
265
266	n = 0;
267	len = CPER_REC_LEN - 1;
268	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269	if (bank && device)
270		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271	else
272		n = snprintf(msg, len,
273			     "DIMM location: not present. DMI handle: 0x%.4x ",
274			     mem->mem_dev_handle);
275
276	msg[n] = '\0';
277	return n;
278}
279
280void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
281		       struct cper_mem_err_compact *cmem)
282{
283	cmem->validation_bits = mem->validation_bits;
284	cmem->node = mem->node;
285	cmem->card = mem->card;
286	cmem->module = mem->module;
287	cmem->bank = mem->bank;
288	cmem->device = mem->device;
289	cmem->row = mem->row;
290	cmem->column = mem->column;
291	cmem->bit_pos = mem->bit_pos;
292	cmem->requestor_id = mem->requestor_id;
293	cmem->responder_id = mem->responder_id;
294	cmem->target_id = mem->target_id;
295	cmem->rank = mem->rank;
296	cmem->mem_array_handle = mem->mem_array_handle;
297	cmem->mem_dev_handle = mem->mem_dev_handle;
298}
299
300const char *cper_mem_err_unpack(struct trace_seq *p,
301				struct cper_mem_err_compact *cmem)
302{
303	const char *ret = trace_seq_buffer_ptr(p);
304
305	if (cper_mem_err_location(cmem, rcd_decode_str))
306		trace_seq_printf(p, "%s", rcd_decode_str);
307	if (cper_dimm_err_location(cmem, rcd_decode_str))
308		trace_seq_printf(p, "%s", rcd_decode_str);
309	trace_seq_putc(p, '\0');
310
311	return ret;
312}
313
314static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
315	int len)
316{
317	struct cper_mem_err_compact cmem;
318
319	/* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320	if (len == sizeof(struct cper_sec_mem_err_old) &&
321	    (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
322		pr_err(FW_WARN "valid bits set for fields beyond structure\n");
323		return;
324	}
325	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
326		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
327	if (mem->validation_bits & CPER_MEM_VALID_PA)
328		printk("%s""physical_address: 0x%016llx\n",
329		       pfx, mem->physical_addr);
330	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
331		printk("%s""physical_address_mask: 0x%016llx\n",
332		       pfx, mem->physical_addr_mask);
333	cper_mem_err_pack(mem, &cmem);
334	if (cper_mem_err_location(&cmem, rcd_decode_str))
335		printk("%s%s\n", pfx, rcd_decode_str);
336	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
337		u8 etype = mem->error_type;
338		printk("%s""error_type: %d, %s\n", pfx, etype,
339		       cper_mem_err_type_str(etype));
340	}
341	if (cper_dimm_err_location(&cmem, rcd_decode_str))
342		printk("%s%s\n", pfx, rcd_decode_str);
343}
344
345static const char * const pcie_port_type_strs[] = {
346	"PCIe end point",
347	"legacy PCI end point",
348	"unknown",
349	"unknown",
350	"root port",
351	"upstream switch port",
352	"downstream switch port",
353	"PCIe to PCI/PCI-X bridge",
354	"PCI/PCI-X to PCIe bridge",
355	"root complex integrated endpoint device",
356	"root complex event collector",
357};
358
359static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
360			    const struct acpi_hest_generic_data *gdata)
361{
362	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
363		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
364		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
365		       pcie_port_type_strs[pcie->port_type] : "unknown");
366	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
367		printk("%s""version: %d.%d\n", pfx,
368		       pcie->version.major, pcie->version.minor);
369	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
370		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
371		       pcie->command, pcie->status);
372	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373		const __u8 *p;
374		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
375		       pcie->device_id.segment, pcie->device_id.bus,
376		       pcie->device_id.device, pcie->device_id.function);
377		printk("%s""slot: %d\n", pfx,
378		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
379		printk("%s""secondary_bus: 0x%02x\n", pfx,
380		       pcie->device_id.secondary_bus);
381		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
382		       pcie->device_id.vendor_id, pcie->device_id.device_id);
383		p = pcie->device_id.class_code;
384		printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
385	}
386	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
387		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
388		       pcie->serial_number.lower, pcie->serial_number.upper);
389	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390		printk(
391	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
393
394	/* Fatal errors call __ghes_panic() before AER handler prints this */
395	if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
396	    (gdata->error_severity & CPER_SEV_FATAL)) {
397		struct aer_capability_regs *aer;
398
399		aer = (struct aer_capability_regs *)pcie->aer_info;
400		printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
401		       pfx, aer->uncor_status, aer->uncor_mask);
402		printk("%saer_uncor_severity: 0x%08x\n",
403		       pfx, aer->uncor_severity);
404		printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
405		       aer->header_log.dw0, aer->header_log.dw1,
406		       aer->header_log.dw2, aer->header_log.dw3);
407	}
408}
409
410static const char * const fw_err_rec_type_strs[] = {
411	"IPF SAL Error Record",
412	"SOC Firmware Error Record Type1 (Legacy CrashLog Support)",
413	"SOC Firmware Error Record Type2",
414};
415
416static void cper_print_fw_err(const char *pfx,
417			      struct acpi_hest_generic_data *gdata,
418			      const struct cper_sec_fw_err_rec_ref *fw_err)
419{
420	void *buf = acpi_hest_get_payload(gdata);
421	u32 offset, length = gdata->error_data_length;
422
423	printk("%s""Firmware Error Record Type: %s\n", pfx,
424	       fw_err->record_type < ARRAY_SIZE(fw_err_rec_type_strs) ?
425	       fw_err_rec_type_strs[fw_err->record_type] : "unknown");
426	printk("%s""Revision: %d\n", pfx, fw_err->revision);
427
428	/* Record Type based on UEFI 2.7 */
429	if (fw_err->revision == 0) {
430		printk("%s""Record Identifier: %08llx\n", pfx,
431		       fw_err->record_identifier);
432	} else if (fw_err->revision == 2) {
433		printk("%s""Record Identifier: %pUl\n", pfx,
434		       &fw_err->record_identifier_guid);
435	}
436
437	/*
438	 * The FW error record may contain trailing data beyond the
439	 * structure defined by the specification. As the fields
440	 * defined (and hence the offset of any trailing data) vary
441	 * with the revision, set the offset to account for this
442	 * variation.
443	 */
444	if (fw_err->revision == 0) {
445		/* record_identifier_guid not defined */
446		offset = offsetof(struct cper_sec_fw_err_rec_ref,
447				  record_identifier_guid);
448	} else if (fw_err->revision == 1) {
449		/* record_identifier not defined */
450		offset = offsetof(struct cper_sec_fw_err_rec_ref,
451				  record_identifier);
452	} else {
453		offset = sizeof(*fw_err);
454	}
455
456	buf += offset;
457	length -= offset;
458
459	print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, buf, length, true);
460}
461
462static void cper_print_tstamp(const char *pfx,
463				   struct acpi_hest_generic_data_v300 *gdata)
464{
465	__u8 hour, min, sec, day, mon, year, century, *timestamp;
466
467	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
468		timestamp = (__u8 *)&(gdata->time_stamp);
469		sec       = bcd2bin(timestamp[0]);
470		min       = bcd2bin(timestamp[1]);
471		hour      = bcd2bin(timestamp[2]);
472		day       = bcd2bin(timestamp[4]);
473		mon       = bcd2bin(timestamp[5]);
474		year      = bcd2bin(timestamp[6]);
475		century   = bcd2bin(timestamp[7]);
476
477		printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
478		       (timestamp[3] & 0x1 ? "precise " : "imprecise "),
479		       century, year, mon, day, hour, min, sec);
480	}
481}
482
483static void
484cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
485			   int sec_no)
486{
487	guid_t *sec_type = (guid_t *)gdata->section_type;
488	__u16 severity;
489	char newpfx[64];
490
491	if (acpi_hest_get_version(gdata) >= 3)
492		cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
493
494	severity = gdata->error_severity;
495	printk("%s""Error %d, type: %s\n", pfx, sec_no,
496	       cper_severity_str(severity));
497	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
498		printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
499	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
500		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
501
502	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
503	if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
504		struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
505
506		printk("%s""section_type: general processor error\n", newpfx);
507		if (gdata->error_data_length >= sizeof(*proc_err))
508			cper_print_proc_generic(newpfx, proc_err);
509		else
510			goto err_section_too_small;
511	} else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
512		struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
513
514		printk("%s""section_type: memory error\n", newpfx);
515		if (gdata->error_data_length >=
516		    sizeof(struct cper_sec_mem_err_old))
517			cper_print_mem(newpfx, mem_err,
518				       gdata->error_data_length);
519		else
520			goto err_section_too_small;
521	} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
522		struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
523
524		printk("%s""section_type: PCIe error\n", newpfx);
525		if (gdata->error_data_length >= sizeof(*pcie))
526			cper_print_pcie(newpfx, pcie, gdata);
527		else
528			goto err_section_too_small;
529#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
530	} else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
531		struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
532
533		printk("%ssection_type: ARM processor error\n", newpfx);
534		if (gdata->error_data_length >= sizeof(*arm_err))
535			cper_print_proc_arm(newpfx, arm_err);
536		else
537			goto err_section_too_small;
538#endif
539#if defined(CONFIG_UEFI_CPER_X86)
540	} else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
541		struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
542
543		printk("%ssection_type: IA32/X64 processor error\n", newpfx);
544		if (gdata->error_data_length >= sizeof(*ia_err))
545			cper_print_proc_ia(newpfx, ia_err);
546		else
547			goto err_section_too_small;
548#endif
549	} else if (guid_equal(sec_type, &CPER_SEC_FW_ERR_REC_REF)) {
550		struct cper_sec_fw_err_rec_ref *fw_err = acpi_hest_get_payload(gdata);
551
552		printk("%ssection_type: Firmware Error Record Reference\n",
553		       newpfx);
554		/* The minimal FW Error Record contains 16 bytes */
555		if (gdata->error_data_length >= SZ_16)
556			cper_print_fw_err(newpfx, gdata, fw_err);
557		else
558			goto err_section_too_small;
559	} else {
560		const void *err = acpi_hest_get_payload(gdata);
561
562		printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
563		printk("%ssection length: %#x\n", newpfx,
564		       gdata->error_data_length);
565		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
566			       gdata->error_data_length, true);
567	}
568
569	return;
570
571err_section_too_small:
572	pr_err(FW_WARN "error section length is too small\n");
573}
574
575void cper_estatus_print(const char *pfx,
576			const struct acpi_hest_generic_status *estatus)
577{
578	struct acpi_hest_generic_data *gdata;
 
579	int sec_no = 0;
580	char newpfx[64];
581	__u16 severity;
582
583	severity = estatus->error_severity;
584	if (severity == CPER_SEV_CORRECTED)
585		printk("%s%s\n", pfx,
586		       "It has been corrected by h/w "
587		       "and requires no further action");
588	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
589	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
590
591	apei_estatus_for_each_section(estatus, gdata) {
 
 
592		cper_estatus_print_section(newpfx, gdata, sec_no);
 
 
593		sec_no++;
594	}
595}
596EXPORT_SYMBOL_GPL(cper_estatus_print);
597
598int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
599{
600	if (estatus->data_length &&
601	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
602		return -EINVAL;
603	if (estatus->raw_data_length &&
604	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
605		return -EINVAL;
606
607	return 0;
608}
609EXPORT_SYMBOL_GPL(cper_estatus_check_header);
610
611int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
612{
613	struct acpi_hest_generic_data *gdata;
614	unsigned int data_len, record_size;
615	int rc;
616
617	rc = cper_estatus_check_header(estatus);
618	if (rc)
619		return rc;
620
621	data_len = estatus->data_length;
622
623	apei_estatus_for_each_section(estatus, gdata) {
624		if (sizeof(struct acpi_hest_generic_data) > data_len)
 
625			return -EINVAL;
626
627		record_size = acpi_hest_get_record_size(gdata);
628		if (record_size > data_len)
629			return -EINVAL;
630
631		data_len -= record_size;
632	}
633	if (data_len)
634		return -EINVAL;
635
636	return 0;
637}
638EXPORT_SYMBOL_GPL(cper_estatus_check);
v4.6
 
  1/*
  2 * UEFI Common Platform Error Record (CPER) support
  3 *
  4 * Copyright (C) 2010, Intel Corp.
  5 *	Author: Huang Ying <ying.huang@intel.com>
  6 *
  7 * CPER is the format used to describe platform hardware error by
  8 * various tables, such as ERST, BERT and HEST etc.
  9 *
 10 * For more information about CPER, please refer to Appendix N of UEFI
 11 * Specification version 2.4.
 12 *
 13 * This program is free software; you can redistribute it and/or
 14 * modify it under the terms of the GNU General Public License version
 15 * 2 as published by the Free Software Foundation.
 16 *
 17 * This program is distributed in the hope that it will be useful,
 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20 * GNU General Public License for more details.
 21 *
 22 * You should have received a copy of the GNU General Public License
 23 * along with this program; if not, write to the Free Software
 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 25 */
 26
 27#include <linux/kernel.h>
 28#include <linux/module.h>
 29#include <linux/time.h>
 30#include <linux/cper.h>
 31#include <linux/dmi.h>
 32#include <linux/acpi.h>
 33#include <linux/pci.h>
 34#include <linux/aer.h>
 35
 36#define INDENT_SP	" "
 
 
 37
 38static char rcd_decode_str[CPER_REC_LEN];
 39
 40/*
 41 * CPER record ID need to be unique even after reboot, because record
 42 * ID is used as index for ERST storage, while CPER records from
 43 * multiple boot may co-exist in ERST.
 44 */
 45u64 cper_next_record_id(void)
 46{
 47	static atomic64_t seq;
 48
 49	if (!atomic64_read(&seq))
 50		atomic64_set(&seq, ((u64)get_seconds()) << 32);
 
 
 
 
 
 
 
 
 
 
 
 
 
 51
 52	return atomic64_inc_return(&seq);
 53}
 54EXPORT_SYMBOL_GPL(cper_next_record_id);
 55
 56static const char * const severity_strs[] = {
 57	"recoverable",
 58	"fatal",
 59	"corrected",
 60	"info",
 61};
 62
 63const char *cper_severity_str(unsigned int severity)
 64{
 65	return severity < ARRAY_SIZE(severity_strs) ?
 66		severity_strs[severity] : "unknown";
 67}
 68EXPORT_SYMBOL_GPL(cper_severity_str);
 69
 70/*
 71 * cper_print_bits - print strings for set bits
 72 * @pfx: prefix for each line, including log level and prefix string
 73 * @bits: bit mask
 74 * @strs: string array, indexed by bit position
 75 * @strs_size: size of the string array: @strs
 76 *
 77 * For each set bit in @bits, print the corresponding string in @strs.
 78 * If the output length is longer than 80, multiple line will be
 79 * printed, with @pfx is printed at the beginning of each line.
 80 */
 81void cper_print_bits(const char *pfx, unsigned int bits,
 82		     const char * const strs[], unsigned int strs_size)
 83{
 84	int i, len = 0;
 85	const char *str;
 86	char buf[84];
 87
 88	for (i = 0; i < strs_size; i++) {
 89		if (!(bits & (1U << i)))
 90			continue;
 91		str = strs[i];
 92		if (!str)
 93			continue;
 94		if (len && len + strlen(str) + 2 > 80) {
 95			printk("%s\n", buf);
 96			len = 0;
 97		}
 98		if (!len)
 99			len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
100		else
101			len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
102	}
103	if (len)
104		printk("%s\n", buf);
105}
106
107static const char * const proc_type_strs[] = {
108	"IA32/X64",
109	"IA64",
 
110};
111
112static const char * const proc_isa_strs[] = {
113	"IA32",
114	"IA64",
115	"X64",
 
 
116};
117
118static const char * const proc_error_type_strs[] = {
119	"cache error",
120	"TLB error",
121	"bus error",
122	"micro-architectural error",
123};
124
125static const char * const proc_op_strs[] = {
126	"unknown or generic",
127	"data read",
128	"data write",
129	"instruction execution",
130};
131
132static const char * const proc_flag_strs[] = {
133	"restartable",
134	"precise IP",
135	"overflow",
136	"corrected",
137};
138
139static void cper_print_proc_generic(const char *pfx,
140				    const struct cper_sec_proc_generic *proc)
141{
142	if (proc->validation_bits & CPER_PROC_VALID_TYPE)
143		printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
144		       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
145		       proc_type_strs[proc->proc_type] : "unknown");
146	if (proc->validation_bits & CPER_PROC_VALID_ISA)
147		printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
148		       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
149		       proc_isa_strs[proc->proc_isa] : "unknown");
150	if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
151		printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
152		cper_print_bits(pfx, proc->proc_error_type,
153				proc_error_type_strs,
154				ARRAY_SIZE(proc_error_type_strs));
155	}
156	if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
157		printk("%s""operation: %d, %s\n", pfx, proc->operation,
158		       proc->operation < ARRAY_SIZE(proc_op_strs) ?
159		       proc_op_strs[proc->operation] : "unknown");
160	if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
161		printk("%s""flags: 0x%02x\n", pfx, proc->flags);
162		cper_print_bits(pfx, proc->flags, proc_flag_strs,
163				ARRAY_SIZE(proc_flag_strs));
164	}
165	if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
166		printk("%s""level: %d\n", pfx, proc->level);
167	if (proc->validation_bits & CPER_PROC_VALID_VERSION)
168		printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
169	if (proc->validation_bits & CPER_PROC_VALID_ID)
170		printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
171	if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
172		printk("%s""target_address: 0x%016llx\n",
173		       pfx, proc->target_addr);
174	if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
175		printk("%s""requestor_id: 0x%016llx\n",
176		       pfx, proc->requestor_id);
177	if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
178		printk("%s""responder_id: 0x%016llx\n",
179		       pfx, proc->responder_id);
180	if (proc->validation_bits & CPER_PROC_VALID_IP)
181		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
182}
183
184static const char * const mem_err_type_strs[] = {
185	"unknown",
186	"no error",
187	"single-bit ECC",
188	"multi-bit ECC",
189	"single-symbol chipkill ECC",
190	"multi-symbol chipkill ECC",
191	"master abort",
192	"target abort",
193	"parity error",
194	"watchdog timeout",
195	"invalid address",
196	"mirror Broken",
197	"memory sparing",
198	"scrub corrected error",
199	"scrub uncorrected error",
200	"physical memory map-out event",
201};
202
203const char *cper_mem_err_type_str(unsigned int etype)
204{
205	return etype < ARRAY_SIZE(mem_err_type_strs) ?
206		mem_err_type_strs[etype] : "unknown";
207}
208EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
209
210static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
211{
212	u32 len, n;
213
214	if (!msg)
215		return 0;
216
217	n = 0;
218	len = CPER_REC_LEN - 1;
219	if (mem->validation_bits & CPER_MEM_VALID_NODE)
220		n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
221	if (mem->validation_bits & CPER_MEM_VALID_CARD)
222		n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
223	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
224		n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
225	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
226		n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
227	if (mem->validation_bits & CPER_MEM_VALID_BANK)
228		n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
229	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
230		n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
231	if (mem->validation_bits & CPER_MEM_VALID_ROW)
232		n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
233	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
234		n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
235	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
236		n += scnprintf(msg + n, len - n, "bit_position: %d ",
237			       mem->bit_pos);
238	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
239		n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
240			       mem->requestor_id);
241	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
242		n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
243			       mem->responder_id);
244	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
245		scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
246			  mem->target_id);
247
248	msg[n] = '\0';
249	return n;
250}
251
252static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253{
254	u32 len, n;
255	const char *bank = NULL, *device = NULL;
256
257	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
258		return 0;
259
260	n = 0;
261	len = CPER_REC_LEN - 1;
262	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
263	if (bank && device)
264		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
265	else
266		n = snprintf(msg, len,
267			     "DIMM location: not present. DMI handle: 0x%.4x ",
268			     mem->mem_dev_handle);
269
270	msg[n] = '\0';
271	return n;
272}
273
274void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
275		       struct cper_mem_err_compact *cmem)
276{
277	cmem->validation_bits = mem->validation_bits;
278	cmem->node = mem->node;
279	cmem->card = mem->card;
280	cmem->module = mem->module;
281	cmem->bank = mem->bank;
282	cmem->device = mem->device;
283	cmem->row = mem->row;
284	cmem->column = mem->column;
285	cmem->bit_pos = mem->bit_pos;
286	cmem->requestor_id = mem->requestor_id;
287	cmem->responder_id = mem->responder_id;
288	cmem->target_id = mem->target_id;
289	cmem->rank = mem->rank;
290	cmem->mem_array_handle = mem->mem_array_handle;
291	cmem->mem_dev_handle = mem->mem_dev_handle;
292}
293
294const char *cper_mem_err_unpack(struct trace_seq *p,
295				struct cper_mem_err_compact *cmem)
296{
297	const char *ret = trace_seq_buffer_ptr(p);
298
299	if (cper_mem_err_location(cmem, rcd_decode_str))
300		trace_seq_printf(p, "%s", rcd_decode_str);
301	if (cper_dimm_err_location(cmem, rcd_decode_str))
302		trace_seq_printf(p, "%s", rcd_decode_str);
303	trace_seq_putc(p, '\0');
304
305	return ret;
306}
307
308static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
309	int len)
310{
311	struct cper_mem_err_compact cmem;
312
313	/* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
314	if (len == sizeof(struct cper_sec_mem_err_old) &&
315	    (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
316		pr_err(FW_WARN "valid bits set for fields beyond structure\n");
317		return;
318	}
319	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
320		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
321	if (mem->validation_bits & CPER_MEM_VALID_PA)
322		printk("%s""physical_address: 0x%016llx\n",
323		       pfx, mem->physical_addr);
324	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
325		printk("%s""physical_address_mask: 0x%016llx\n",
326		       pfx, mem->physical_addr_mask);
327	cper_mem_err_pack(mem, &cmem);
328	if (cper_mem_err_location(&cmem, rcd_decode_str))
329		printk("%s%s\n", pfx, rcd_decode_str);
330	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
331		u8 etype = mem->error_type;
332		printk("%s""error_type: %d, %s\n", pfx, etype,
333		       cper_mem_err_type_str(etype));
334	}
335	if (cper_dimm_err_location(&cmem, rcd_decode_str))
336		printk("%s%s\n", pfx, rcd_decode_str);
337}
338
339static const char * const pcie_port_type_strs[] = {
340	"PCIe end point",
341	"legacy PCI end point",
342	"unknown",
343	"unknown",
344	"root port",
345	"upstream switch port",
346	"downstream switch port",
347	"PCIe to PCI/PCI-X bridge",
348	"PCI/PCI-X to PCIe bridge",
349	"root complex integrated endpoint device",
350	"root complex event collector",
351};
352
353static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
354			    const struct acpi_hest_generic_data *gdata)
355{
356	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
357		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
358		       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
359		       pcie_port_type_strs[pcie->port_type] : "unknown");
360	if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
361		printk("%s""version: %d.%d\n", pfx,
362		       pcie->version.major, pcie->version.minor);
363	if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
364		printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
365		       pcie->command, pcie->status);
366	if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
367		const __u8 *p;
368		printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
369		       pcie->device_id.segment, pcie->device_id.bus,
370		       pcie->device_id.device, pcie->device_id.function);
371		printk("%s""slot: %d\n", pfx,
372		       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
373		printk("%s""secondary_bus: 0x%02x\n", pfx,
374		       pcie->device_id.secondary_bus);
375		printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
376		       pcie->device_id.vendor_id, pcie->device_id.device_id);
377		p = pcie->device_id.class_code;
378		printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
379	}
380	if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
381		printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
382		       pcie->serial_number.lower, pcie->serial_number.upper);
383	if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
384		printk(
385	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
386	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387}
388
389static void cper_estatus_print_section(
390	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391{
392	uuid_le *sec_type = (uuid_le *)gdata->section_type;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393	__u16 severity;
394	char newpfx[64];
395
 
 
 
396	severity = gdata->error_severity;
397	printk("%s""Error %d, type: %s\n", pfx, sec_no,
398	       cper_severity_str(severity));
399	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
400		printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
401	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
402		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
403
404	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
405	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
406		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
 
407		printk("%s""section_type: general processor error\n", newpfx);
408		if (gdata->error_data_length >= sizeof(*proc_err))
409			cper_print_proc_generic(newpfx, proc_err);
410		else
411			goto err_section_too_small;
412	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
413		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
 
414		printk("%s""section_type: memory error\n", newpfx);
415		if (gdata->error_data_length >=
416		    sizeof(struct cper_sec_mem_err_old))
417			cper_print_mem(newpfx, mem_err,
418				       gdata->error_data_length);
419		else
420			goto err_section_too_small;
421	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
422		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
 
423		printk("%s""section_type: PCIe error\n", newpfx);
424		if (gdata->error_data_length >= sizeof(*pcie))
425			cper_print_pcie(newpfx, pcie, gdata);
426		else
427			goto err_section_too_small;
428	} else
429		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
431	return;
432
433err_section_too_small:
434	pr_err(FW_WARN "error section length is too small\n");
435}
436
437void cper_estatus_print(const char *pfx,
438			const struct acpi_hest_generic_status *estatus)
439{
440	struct acpi_hest_generic_data *gdata;
441	unsigned int data_len, gedata_len;
442	int sec_no = 0;
443	char newpfx[64];
444	__u16 severity;
445
446	severity = estatus->error_severity;
447	if (severity == CPER_SEV_CORRECTED)
448		printk("%s%s\n", pfx,
449		       "It has been corrected by h/w "
450		       "and requires no further action");
451	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
452	data_len = estatus->data_length;
453	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
454	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
455	while (data_len >= sizeof(*gdata)) {
456		gedata_len = gdata->error_data_length;
457		cper_estatus_print_section(newpfx, gdata, sec_no);
458		data_len -= gedata_len + sizeof(*gdata);
459		gdata = (void *)(gdata + 1) + gedata_len;
460		sec_no++;
461	}
462}
463EXPORT_SYMBOL_GPL(cper_estatus_print);
464
465int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
466{
467	if (estatus->data_length &&
468	    estatus->data_length < sizeof(struct acpi_hest_generic_data))
469		return -EINVAL;
470	if (estatus->raw_data_length &&
471	    estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
472		return -EINVAL;
473
474	return 0;
475}
476EXPORT_SYMBOL_GPL(cper_estatus_check_header);
477
478int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
479{
480	struct acpi_hest_generic_data *gdata;
481	unsigned int data_len, gedata_len;
482	int rc;
483
484	rc = cper_estatus_check_header(estatus);
485	if (rc)
486		return rc;
 
487	data_len = estatus->data_length;
488	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
489	while (data_len >= sizeof(*gdata)) {
490		gedata_len = gdata->error_data_length;
491		if (gedata_len > data_len - sizeof(*gdata))
492			return -EINVAL;
493		data_len -= gedata_len + sizeof(*gdata);
494		gdata = (void *)(gdata + 1) + gedata_len;
 
 
 
 
495	}
496	if (data_len)
497		return -EINVAL;
498
499	return 0;
500}
501EXPORT_SYMBOL_GPL(cper_estatus_check);