Loading...
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * UEFI Common Platform Error Record (CPER) support
4 *
5 * Copyright (C) 2010, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * CPER is the format used to describe platform hardware error by
9 * various tables, such as ERST, BERT and HEST etc.
10 *
11 * For more information about CPER, please refer to Appendix N of UEFI
12 * Specification version 2.4.
13 */
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/time.h>
18#include <linux/cper.h>
19#include <linux/dmi.h>
20#include <linux/acpi.h>
21#include <linux/pci.h>
22#include <linux/aer.h>
23#include <linux/printk.h>
24#include <linux/bcd.h>
25#include <acpi/ghes.h>
26#include <ras/ras_event.h>
27
28static char rcd_decode_str[CPER_REC_LEN];
29
30/*
31 * CPER record ID need to be unique even after reboot, because record
32 * ID is used as index for ERST storage, while CPER records from
33 * multiple boot may co-exist in ERST.
34 */
35u64 cper_next_record_id(void)
36{
37 static atomic64_t seq;
38
39 if (!atomic64_read(&seq)) {
40 time64_t time = ktime_get_real_seconds();
41
42 /*
43 * This code is unlikely to still be needed in year 2106,
44 * but just in case, let's use a few more bits for timestamps
45 * after y2038 to be sure they keep increasing monotonically
46 * for the next few hundred years...
47 */
48 if (time < 0x80000000)
49 atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
50 else
51 atomic64_set(&seq, 0x8000000000000000ull |
52 ktime_get_real_seconds() << 24);
53 }
54
55 return atomic64_inc_return(&seq);
56}
57EXPORT_SYMBOL_GPL(cper_next_record_id);
58
59static const char * const severity_strs[] = {
60 "recoverable",
61 "fatal",
62 "corrected",
63 "info",
64};
65
66const char *cper_severity_str(unsigned int severity)
67{
68 return severity < ARRAY_SIZE(severity_strs) ?
69 severity_strs[severity] : "unknown";
70}
71EXPORT_SYMBOL_GPL(cper_severity_str);
72
73/*
74 * cper_print_bits - print strings for set bits
75 * @pfx: prefix for each line, including log level and prefix string
76 * @bits: bit mask
77 * @strs: string array, indexed by bit position
78 * @strs_size: size of the string array: @strs
79 *
80 * For each set bit in @bits, print the corresponding string in @strs.
81 * If the output length is longer than 80, multiple line will be
82 * printed, with @pfx is printed at the beginning of each line.
83 */
84void cper_print_bits(const char *pfx, unsigned int bits,
85 const char * const strs[], unsigned int strs_size)
86{
87 int i, len = 0;
88 const char *str;
89 char buf[84];
90
91 for (i = 0; i < strs_size; i++) {
92 if (!(bits & (1U << i)))
93 continue;
94 str = strs[i];
95 if (!str)
96 continue;
97 if (len && len + strlen(str) + 2 > 80) {
98 printk("%s\n", buf);
99 len = 0;
100 }
101 if (!len)
102 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103 else
104 len += scnprintf(buf+len, sizeof(buf)-len, ", %s", str);
105 }
106 if (len)
107 printk("%s\n", buf);
108}
109
110static const char * const proc_type_strs[] = {
111 "IA32/X64",
112 "IA64",
113 "ARM",
114};
115
116static const char * const proc_isa_strs[] = {
117 "IA32",
118 "IA64",
119 "X64",
120 "ARM A32/T32",
121 "ARM A64",
122};
123
124const char * const cper_proc_error_type_strs[] = {
125 "cache error",
126 "TLB error",
127 "bus error",
128 "micro-architectural error",
129};
130
131static const char * const proc_op_strs[] = {
132 "unknown or generic",
133 "data read",
134 "data write",
135 "instruction execution",
136};
137
138static const char * const proc_flag_strs[] = {
139 "restartable",
140 "precise IP",
141 "overflow",
142 "corrected",
143};
144
145static void cper_print_proc_generic(const char *pfx,
146 const struct cper_sec_proc_generic *proc)
147{
148 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151 proc_type_strs[proc->proc_type] : "unknown");
152 if (proc->validation_bits & CPER_PROC_VALID_ISA)
153 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155 proc_isa_strs[proc->proc_isa] : "unknown");
156 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158 cper_print_bits(pfx, proc->proc_error_type,
159 cper_proc_error_type_strs,
160 ARRAY_SIZE(cper_proc_error_type_strs));
161 }
162 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163 printk("%s""operation: %d, %s\n", pfx, proc->operation,
164 proc->operation < ARRAY_SIZE(proc_op_strs) ?
165 proc_op_strs[proc->operation] : "unknown");
166 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168 cper_print_bits(pfx, proc->flags, proc_flag_strs,
169 ARRAY_SIZE(proc_flag_strs));
170 }
171 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172 printk("%s""level: %d\n", pfx, proc->level);
173 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175 if (proc->validation_bits & CPER_PROC_VALID_ID)
176 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178 printk("%s""target_address: 0x%016llx\n",
179 pfx, proc->target_addr);
180 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181 printk("%s""requestor_id: 0x%016llx\n",
182 pfx, proc->requestor_id);
183 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184 printk("%s""responder_id: 0x%016llx\n",
185 pfx, proc->responder_id);
186 if (proc->validation_bits & CPER_PROC_VALID_IP)
187 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
188}
189
190static const char * const mem_err_type_strs[] = {
191 "unknown",
192 "no error",
193 "single-bit ECC",
194 "multi-bit ECC",
195 "single-symbol chipkill ECC",
196 "multi-symbol chipkill ECC",
197 "master abort",
198 "target abort",
199 "parity error",
200 "watchdog timeout",
201 "invalid address",
202 "mirror Broken",
203 "memory sparing",
204 "scrub corrected error",
205 "scrub uncorrected error",
206 "physical memory map-out event",
207};
208
209const char *cper_mem_err_type_str(unsigned int etype)
210{
211 return etype < ARRAY_SIZE(mem_err_type_strs) ?
212 mem_err_type_strs[etype] : "unknown";
213}
214EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215
216static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
217{
218 u32 len, n;
219
220 if (!msg)
221 return 0;
222
223 n = 0;
224 len = CPER_REC_LEN - 1;
225 if (mem->validation_bits & CPER_MEM_VALID_NODE)
226 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227 if (mem->validation_bits & CPER_MEM_VALID_CARD)
228 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233 if (mem->validation_bits & CPER_MEM_VALID_BANK)
234 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
236 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
237 if (mem->validation_bits & CPER_MEM_VALID_ROW)
238 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
239 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
240 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
241 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
242 n += scnprintf(msg + n, len - n, "bit_position: %d ",
243 mem->bit_pos);
244 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
245 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246 mem->requestor_id);
247 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
248 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249 mem->responder_id);
250 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
251 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
252 mem->target_id);
253
254 msg[n] = '\0';
255 return n;
256}
257
258static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
259{
260 u32 len, n;
261 const char *bank = NULL, *device = NULL;
262
263 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
264 return 0;
265
266 n = 0;
267 len = CPER_REC_LEN - 1;
268 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269 if (bank && device)
270 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271 else
272 n = snprintf(msg, len,
273 "DIMM location: not present. DMI handle: 0x%.4x ",
274 mem->mem_dev_handle);
275
276 msg[n] = '\0';
277 return n;
278}
279
280void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
281 struct cper_mem_err_compact *cmem)
282{
283 cmem->validation_bits = mem->validation_bits;
284 cmem->node = mem->node;
285 cmem->card = mem->card;
286 cmem->module = mem->module;
287 cmem->bank = mem->bank;
288 cmem->device = mem->device;
289 cmem->row = mem->row;
290 cmem->column = mem->column;
291 cmem->bit_pos = mem->bit_pos;
292 cmem->requestor_id = mem->requestor_id;
293 cmem->responder_id = mem->responder_id;
294 cmem->target_id = mem->target_id;
295 cmem->rank = mem->rank;
296 cmem->mem_array_handle = mem->mem_array_handle;
297 cmem->mem_dev_handle = mem->mem_dev_handle;
298}
299
300const char *cper_mem_err_unpack(struct trace_seq *p,
301 struct cper_mem_err_compact *cmem)
302{
303 const char *ret = trace_seq_buffer_ptr(p);
304
305 if (cper_mem_err_location(cmem, rcd_decode_str))
306 trace_seq_printf(p, "%s", rcd_decode_str);
307 if (cper_dimm_err_location(cmem, rcd_decode_str))
308 trace_seq_printf(p, "%s", rcd_decode_str);
309 trace_seq_putc(p, '\0');
310
311 return ret;
312}
313
314static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
315 int len)
316{
317 struct cper_mem_err_compact cmem;
318
319 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320 if (len == sizeof(struct cper_sec_mem_err_old) &&
321 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
322 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
323 return;
324 }
325 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
326 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
327 if (mem->validation_bits & CPER_MEM_VALID_PA)
328 printk("%s""physical_address: 0x%016llx\n",
329 pfx, mem->physical_addr);
330 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
331 printk("%s""physical_address_mask: 0x%016llx\n",
332 pfx, mem->physical_addr_mask);
333 cper_mem_err_pack(mem, &cmem);
334 if (cper_mem_err_location(&cmem, rcd_decode_str))
335 printk("%s%s\n", pfx, rcd_decode_str);
336 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
337 u8 etype = mem->error_type;
338 printk("%s""error_type: %d, %s\n", pfx, etype,
339 cper_mem_err_type_str(etype));
340 }
341 if (cper_dimm_err_location(&cmem, rcd_decode_str))
342 printk("%s%s\n", pfx, rcd_decode_str);
343}
344
345static const char * const pcie_port_type_strs[] = {
346 "PCIe end point",
347 "legacy PCI end point",
348 "unknown",
349 "unknown",
350 "root port",
351 "upstream switch port",
352 "downstream switch port",
353 "PCIe to PCI/PCI-X bridge",
354 "PCI/PCI-X to PCIe bridge",
355 "root complex integrated endpoint device",
356 "root complex event collector",
357};
358
359static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
360 const struct acpi_hest_generic_data *gdata)
361{
362 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
363 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
364 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
365 pcie_port_type_strs[pcie->port_type] : "unknown");
366 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
367 printk("%s""version: %d.%d\n", pfx,
368 pcie->version.major, pcie->version.minor);
369 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
370 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
371 pcie->command, pcie->status);
372 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373 const __u8 *p;
374 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
375 pcie->device_id.segment, pcie->device_id.bus,
376 pcie->device_id.device, pcie->device_id.function);
377 printk("%s""slot: %d\n", pfx,
378 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
379 printk("%s""secondary_bus: 0x%02x\n", pfx,
380 pcie->device_id.secondary_bus);
381 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
382 pcie->device_id.vendor_id, pcie->device_id.device_id);
383 p = pcie->device_id.class_code;
384 printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
385 }
386 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
387 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
388 pcie->serial_number.lower, pcie->serial_number.upper);
389 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390 printk(
391 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
393
394 /* Fatal errors call __ghes_panic() before AER handler prints this */
395 if ((pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) &&
396 (gdata->error_severity & CPER_SEV_FATAL)) {
397 struct aer_capability_regs *aer;
398
399 aer = (struct aer_capability_regs *)pcie->aer_info;
400 printk("%saer_uncor_status: 0x%08x, aer_uncor_mask: 0x%08x\n",
401 pfx, aer->uncor_status, aer->uncor_mask);
402 printk("%saer_uncor_severity: 0x%08x\n",
403 pfx, aer->uncor_severity);
404 printk("%sTLP Header: %08x %08x %08x %08x\n", pfx,
405 aer->header_log.dw0, aer->header_log.dw1,
406 aer->header_log.dw2, aer->header_log.dw3);
407 }
408}
409
410static const char * const fw_err_rec_type_strs[] = {
411 "IPF SAL Error Record",
412 "SOC Firmware Error Record Type1 (Legacy CrashLog Support)",
413 "SOC Firmware Error Record Type2",
414};
415
416static void cper_print_fw_err(const char *pfx,
417 struct acpi_hest_generic_data *gdata,
418 const struct cper_sec_fw_err_rec_ref *fw_err)
419{
420 void *buf = acpi_hest_get_payload(gdata);
421 u32 offset, length = gdata->error_data_length;
422
423 printk("%s""Firmware Error Record Type: %s\n", pfx,
424 fw_err->record_type < ARRAY_SIZE(fw_err_rec_type_strs) ?
425 fw_err_rec_type_strs[fw_err->record_type] : "unknown");
426 printk("%s""Revision: %d\n", pfx, fw_err->revision);
427
428 /* Record Type based on UEFI 2.7 */
429 if (fw_err->revision == 0) {
430 printk("%s""Record Identifier: %08llx\n", pfx,
431 fw_err->record_identifier);
432 } else if (fw_err->revision == 2) {
433 printk("%s""Record Identifier: %pUl\n", pfx,
434 &fw_err->record_identifier_guid);
435 }
436
437 /*
438 * The FW error record may contain trailing data beyond the
439 * structure defined by the specification. As the fields
440 * defined (and hence the offset of any trailing data) vary
441 * with the revision, set the offset to account for this
442 * variation.
443 */
444 if (fw_err->revision == 0) {
445 /* record_identifier_guid not defined */
446 offset = offsetof(struct cper_sec_fw_err_rec_ref,
447 record_identifier_guid);
448 } else if (fw_err->revision == 1) {
449 /* record_identifier not defined */
450 offset = offsetof(struct cper_sec_fw_err_rec_ref,
451 record_identifier);
452 } else {
453 offset = sizeof(*fw_err);
454 }
455
456 buf += offset;
457 length -= offset;
458
459 print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, buf, length, true);
460}
461
462static void cper_print_tstamp(const char *pfx,
463 struct acpi_hest_generic_data_v300 *gdata)
464{
465 __u8 hour, min, sec, day, mon, year, century, *timestamp;
466
467 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
468 timestamp = (__u8 *)&(gdata->time_stamp);
469 sec = bcd2bin(timestamp[0]);
470 min = bcd2bin(timestamp[1]);
471 hour = bcd2bin(timestamp[2]);
472 day = bcd2bin(timestamp[4]);
473 mon = bcd2bin(timestamp[5]);
474 year = bcd2bin(timestamp[6]);
475 century = bcd2bin(timestamp[7]);
476
477 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
478 (timestamp[3] & 0x1 ? "precise " : "imprecise "),
479 century, year, mon, day, hour, min, sec);
480 }
481}
482
483static void
484cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
485 int sec_no)
486{
487 guid_t *sec_type = (guid_t *)gdata->section_type;
488 __u16 severity;
489 char newpfx[64];
490
491 if (acpi_hest_get_version(gdata) >= 3)
492 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
493
494 severity = gdata->error_severity;
495 printk("%s""Error %d, type: %s\n", pfx, sec_no,
496 cper_severity_str(severity));
497 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
498 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
499 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
500 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
501
502 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
503 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
504 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
505
506 printk("%s""section_type: general processor error\n", newpfx);
507 if (gdata->error_data_length >= sizeof(*proc_err))
508 cper_print_proc_generic(newpfx, proc_err);
509 else
510 goto err_section_too_small;
511 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
512 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
513
514 printk("%s""section_type: memory error\n", newpfx);
515 if (gdata->error_data_length >=
516 sizeof(struct cper_sec_mem_err_old))
517 cper_print_mem(newpfx, mem_err,
518 gdata->error_data_length);
519 else
520 goto err_section_too_small;
521 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
522 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
523
524 printk("%s""section_type: PCIe error\n", newpfx);
525 if (gdata->error_data_length >= sizeof(*pcie))
526 cper_print_pcie(newpfx, pcie, gdata);
527 else
528 goto err_section_too_small;
529#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
530 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
531 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
532
533 printk("%ssection_type: ARM processor error\n", newpfx);
534 if (gdata->error_data_length >= sizeof(*arm_err))
535 cper_print_proc_arm(newpfx, arm_err);
536 else
537 goto err_section_too_small;
538#endif
539#if defined(CONFIG_UEFI_CPER_X86)
540 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
541 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
542
543 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
544 if (gdata->error_data_length >= sizeof(*ia_err))
545 cper_print_proc_ia(newpfx, ia_err);
546 else
547 goto err_section_too_small;
548#endif
549 } else if (guid_equal(sec_type, &CPER_SEC_FW_ERR_REC_REF)) {
550 struct cper_sec_fw_err_rec_ref *fw_err = acpi_hest_get_payload(gdata);
551
552 printk("%ssection_type: Firmware Error Record Reference\n",
553 newpfx);
554 /* The minimal FW Error Record contains 16 bytes */
555 if (gdata->error_data_length >= SZ_16)
556 cper_print_fw_err(newpfx, gdata, fw_err);
557 else
558 goto err_section_too_small;
559 } else {
560 const void *err = acpi_hest_get_payload(gdata);
561
562 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
563 printk("%ssection length: %#x\n", newpfx,
564 gdata->error_data_length);
565 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
566 gdata->error_data_length, true);
567 }
568
569 return;
570
571err_section_too_small:
572 pr_err(FW_WARN "error section length is too small\n");
573}
574
575void cper_estatus_print(const char *pfx,
576 const struct acpi_hest_generic_status *estatus)
577{
578 struct acpi_hest_generic_data *gdata;
579 int sec_no = 0;
580 char newpfx[64];
581 __u16 severity;
582
583 severity = estatus->error_severity;
584 if (severity == CPER_SEV_CORRECTED)
585 printk("%s%s\n", pfx,
586 "It has been corrected by h/w "
587 "and requires no further action");
588 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
589 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
590
591 apei_estatus_for_each_section(estatus, gdata) {
592 cper_estatus_print_section(newpfx, gdata, sec_no);
593 sec_no++;
594 }
595}
596EXPORT_SYMBOL_GPL(cper_estatus_print);
597
598int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
599{
600 if (estatus->data_length &&
601 estatus->data_length < sizeof(struct acpi_hest_generic_data))
602 return -EINVAL;
603 if (estatus->raw_data_length &&
604 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
605 return -EINVAL;
606
607 return 0;
608}
609EXPORT_SYMBOL_GPL(cper_estatus_check_header);
610
611int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
612{
613 struct acpi_hest_generic_data *gdata;
614 unsigned int data_len, record_size;
615 int rc;
616
617 rc = cper_estatus_check_header(estatus);
618 if (rc)
619 return rc;
620
621 data_len = estatus->data_length;
622
623 apei_estatus_for_each_section(estatus, gdata) {
624 if (sizeof(struct acpi_hest_generic_data) > data_len)
625 return -EINVAL;
626
627 record_size = acpi_hest_get_record_size(gdata);
628 if (record_size > data_len)
629 return -EINVAL;
630
631 data_len -= record_size;
632 }
633 if (data_len)
634 return -EINVAL;
635
636 return 0;
637}
638EXPORT_SYMBOL_GPL(cper_estatus_check);
1/*
2 * UEFI Common Platform Error Record (CPER) support
3 *
4 * Copyright (C) 2010, Intel Corp.
5 * Author: Huang Ying <ying.huang@intel.com>
6 *
7 * CPER is the format used to describe platform hardware error by
8 * various tables, such as ERST, BERT and HEST etc.
9 *
10 * For more information about CPER, please refer to Appendix N of UEFI
11 * Specification version 2.4.
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version
15 * 2 as published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include <linux/kernel.h>
28#include <linux/module.h>
29#include <linux/time.h>
30#include <linux/cper.h>
31#include <linux/dmi.h>
32#include <linux/acpi.h>
33#include <linux/pci.h>
34#include <linux/aer.h>
35
36#define INDENT_SP " "
37
38static char rcd_decode_str[CPER_REC_LEN];
39
40/*
41 * CPER record ID need to be unique even after reboot, because record
42 * ID is used as index for ERST storage, while CPER records from
43 * multiple boot may co-exist in ERST.
44 */
45u64 cper_next_record_id(void)
46{
47 static atomic64_t seq;
48
49 if (!atomic64_read(&seq))
50 atomic64_set(&seq, ((u64)get_seconds()) << 32);
51
52 return atomic64_inc_return(&seq);
53}
54EXPORT_SYMBOL_GPL(cper_next_record_id);
55
56static const char * const severity_strs[] = {
57 "recoverable",
58 "fatal",
59 "corrected",
60 "info",
61};
62
63const char *cper_severity_str(unsigned int severity)
64{
65 return severity < ARRAY_SIZE(severity_strs) ?
66 severity_strs[severity] : "unknown";
67}
68EXPORT_SYMBOL_GPL(cper_severity_str);
69
70/*
71 * cper_print_bits - print strings for set bits
72 * @pfx: prefix for each line, including log level and prefix string
73 * @bits: bit mask
74 * @strs: string array, indexed by bit position
75 * @strs_size: size of the string array: @strs
76 *
77 * For each set bit in @bits, print the corresponding string in @strs.
78 * If the output length is longer than 80, multiple line will be
79 * printed, with @pfx is printed at the beginning of each line.
80 */
81void cper_print_bits(const char *pfx, unsigned int bits,
82 const char * const strs[], unsigned int strs_size)
83{
84 int i, len = 0;
85 const char *str;
86 char buf[84];
87
88 for (i = 0; i < strs_size; i++) {
89 if (!(bits & (1U << i)))
90 continue;
91 str = strs[i];
92 if (!str)
93 continue;
94 if (len && len + strlen(str) + 2 > 80) {
95 printk("%s\n", buf);
96 len = 0;
97 }
98 if (!len)
99 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
100 else
101 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
102 }
103 if (len)
104 printk("%s\n", buf);
105}
106
107static const char * const proc_type_strs[] = {
108 "IA32/X64",
109 "IA64",
110};
111
112static const char * const proc_isa_strs[] = {
113 "IA32",
114 "IA64",
115 "X64",
116};
117
118static const char * const proc_error_type_strs[] = {
119 "cache error",
120 "TLB error",
121 "bus error",
122 "micro-architectural error",
123};
124
125static const char * const proc_op_strs[] = {
126 "unknown or generic",
127 "data read",
128 "data write",
129 "instruction execution",
130};
131
132static const char * const proc_flag_strs[] = {
133 "restartable",
134 "precise IP",
135 "overflow",
136 "corrected",
137};
138
139static void cper_print_proc_generic(const char *pfx,
140 const struct cper_sec_proc_generic *proc)
141{
142 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
143 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
144 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
145 proc_type_strs[proc->proc_type] : "unknown");
146 if (proc->validation_bits & CPER_PROC_VALID_ISA)
147 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
148 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
149 proc_isa_strs[proc->proc_isa] : "unknown");
150 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
151 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
152 cper_print_bits(pfx, proc->proc_error_type,
153 proc_error_type_strs,
154 ARRAY_SIZE(proc_error_type_strs));
155 }
156 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
157 printk("%s""operation: %d, %s\n", pfx, proc->operation,
158 proc->operation < ARRAY_SIZE(proc_op_strs) ?
159 proc_op_strs[proc->operation] : "unknown");
160 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
161 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
162 cper_print_bits(pfx, proc->flags, proc_flag_strs,
163 ARRAY_SIZE(proc_flag_strs));
164 }
165 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
166 printk("%s""level: %d\n", pfx, proc->level);
167 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
168 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
169 if (proc->validation_bits & CPER_PROC_VALID_ID)
170 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
171 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
172 printk("%s""target_address: 0x%016llx\n",
173 pfx, proc->target_addr);
174 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
175 printk("%s""requestor_id: 0x%016llx\n",
176 pfx, proc->requestor_id);
177 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
178 printk("%s""responder_id: 0x%016llx\n",
179 pfx, proc->responder_id);
180 if (proc->validation_bits & CPER_PROC_VALID_IP)
181 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
182}
183
184static const char * const mem_err_type_strs[] = {
185 "unknown",
186 "no error",
187 "single-bit ECC",
188 "multi-bit ECC",
189 "single-symbol chipkill ECC",
190 "multi-symbol chipkill ECC",
191 "master abort",
192 "target abort",
193 "parity error",
194 "watchdog timeout",
195 "invalid address",
196 "mirror Broken",
197 "memory sparing",
198 "scrub corrected error",
199 "scrub uncorrected error",
200 "physical memory map-out event",
201};
202
203const char *cper_mem_err_type_str(unsigned int etype)
204{
205 return etype < ARRAY_SIZE(mem_err_type_strs) ?
206 mem_err_type_strs[etype] : "unknown";
207}
208EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
209
210static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
211{
212 u32 len, n;
213
214 if (!msg)
215 return 0;
216
217 n = 0;
218 len = CPER_REC_LEN - 1;
219 if (mem->validation_bits & CPER_MEM_VALID_NODE)
220 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
221 if (mem->validation_bits & CPER_MEM_VALID_CARD)
222 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
223 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
224 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
225 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
226 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
227 if (mem->validation_bits & CPER_MEM_VALID_BANK)
228 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
229 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
230 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
231 if (mem->validation_bits & CPER_MEM_VALID_ROW)
232 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
233 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
234 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
235 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
236 n += scnprintf(msg + n, len - n, "bit_position: %d ",
237 mem->bit_pos);
238 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
239 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
240 mem->requestor_id);
241 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
242 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
243 mem->responder_id);
244 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
245 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
246 mem->target_id);
247
248 msg[n] = '\0';
249 return n;
250}
251
252static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253{
254 u32 len, n;
255 const char *bank = NULL, *device = NULL;
256
257 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
258 return 0;
259
260 n = 0;
261 len = CPER_REC_LEN - 1;
262 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
263 if (bank && device)
264 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
265 else
266 n = snprintf(msg, len,
267 "DIMM location: not present. DMI handle: 0x%.4x ",
268 mem->mem_dev_handle);
269
270 msg[n] = '\0';
271 return n;
272}
273
274void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
275 struct cper_mem_err_compact *cmem)
276{
277 cmem->validation_bits = mem->validation_bits;
278 cmem->node = mem->node;
279 cmem->card = mem->card;
280 cmem->module = mem->module;
281 cmem->bank = mem->bank;
282 cmem->device = mem->device;
283 cmem->row = mem->row;
284 cmem->column = mem->column;
285 cmem->bit_pos = mem->bit_pos;
286 cmem->requestor_id = mem->requestor_id;
287 cmem->responder_id = mem->responder_id;
288 cmem->target_id = mem->target_id;
289 cmem->rank = mem->rank;
290 cmem->mem_array_handle = mem->mem_array_handle;
291 cmem->mem_dev_handle = mem->mem_dev_handle;
292}
293
294const char *cper_mem_err_unpack(struct trace_seq *p,
295 struct cper_mem_err_compact *cmem)
296{
297 const char *ret = trace_seq_buffer_ptr(p);
298
299 if (cper_mem_err_location(cmem, rcd_decode_str))
300 trace_seq_printf(p, "%s", rcd_decode_str);
301 if (cper_dimm_err_location(cmem, rcd_decode_str))
302 trace_seq_printf(p, "%s", rcd_decode_str);
303 trace_seq_putc(p, '\0');
304
305 return ret;
306}
307
308static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
309 int len)
310{
311 struct cper_mem_err_compact cmem;
312
313 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
314 if (len == sizeof(struct cper_sec_mem_err_old) &&
315 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
316 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
317 return;
318 }
319 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
320 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
321 if (mem->validation_bits & CPER_MEM_VALID_PA)
322 printk("%s""physical_address: 0x%016llx\n",
323 pfx, mem->physical_addr);
324 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
325 printk("%s""physical_address_mask: 0x%016llx\n",
326 pfx, mem->physical_addr_mask);
327 cper_mem_err_pack(mem, &cmem);
328 if (cper_mem_err_location(&cmem, rcd_decode_str))
329 printk("%s%s\n", pfx, rcd_decode_str);
330 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
331 u8 etype = mem->error_type;
332 printk("%s""error_type: %d, %s\n", pfx, etype,
333 cper_mem_err_type_str(etype));
334 }
335 if (cper_dimm_err_location(&cmem, rcd_decode_str))
336 printk("%s%s\n", pfx, rcd_decode_str);
337}
338
339static const char * const pcie_port_type_strs[] = {
340 "PCIe end point",
341 "legacy PCI end point",
342 "unknown",
343 "unknown",
344 "root port",
345 "upstream switch port",
346 "downstream switch port",
347 "PCIe to PCI/PCI-X bridge",
348 "PCI/PCI-X to PCIe bridge",
349 "root complex integrated endpoint device",
350 "root complex event collector",
351};
352
353static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
354 const struct acpi_hest_generic_data *gdata)
355{
356 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
357 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
358 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
359 pcie_port_type_strs[pcie->port_type] : "unknown");
360 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
361 printk("%s""version: %d.%d\n", pfx,
362 pcie->version.major, pcie->version.minor);
363 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
364 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
365 pcie->command, pcie->status);
366 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
367 const __u8 *p;
368 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
369 pcie->device_id.segment, pcie->device_id.bus,
370 pcie->device_id.device, pcie->device_id.function);
371 printk("%s""slot: %d\n", pfx,
372 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
373 printk("%s""secondary_bus: 0x%02x\n", pfx,
374 pcie->device_id.secondary_bus);
375 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
376 pcie->device_id.vendor_id, pcie->device_id.device_id);
377 p = pcie->device_id.class_code;
378 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
379 }
380 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
381 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
382 pcie->serial_number.lower, pcie->serial_number.upper);
383 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
384 printk(
385 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
386 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
387}
388
389static void cper_estatus_print_section(
390 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
391{
392 uuid_le *sec_type = (uuid_le *)gdata->section_type;
393 __u16 severity;
394 char newpfx[64];
395
396 severity = gdata->error_severity;
397 printk("%s""Error %d, type: %s\n", pfx, sec_no,
398 cper_severity_str(severity));
399 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
400 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
401 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
402 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
403
404 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
405 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
406 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
407 printk("%s""section_type: general processor error\n", newpfx);
408 if (gdata->error_data_length >= sizeof(*proc_err))
409 cper_print_proc_generic(newpfx, proc_err);
410 else
411 goto err_section_too_small;
412 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
413 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
414 printk("%s""section_type: memory error\n", newpfx);
415 if (gdata->error_data_length >=
416 sizeof(struct cper_sec_mem_err_old))
417 cper_print_mem(newpfx, mem_err,
418 gdata->error_data_length);
419 else
420 goto err_section_too_small;
421 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
422 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
423 printk("%s""section_type: PCIe error\n", newpfx);
424 if (gdata->error_data_length >= sizeof(*pcie))
425 cper_print_pcie(newpfx, pcie, gdata);
426 else
427 goto err_section_too_small;
428 } else
429 printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
430
431 return;
432
433err_section_too_small:
434 pr_err(FW_WARN "error section length is too small\n");
435}
436
437void cper_estatus_print(const char *pfx,
438 const struct acpi_hest_generic_status *estatus)
439{
440 struct acpi_hest_generic_data *gdata;
441 unsigned int data_len, gedata_len;
442 int sec_no = 0;
443 char newpfx[64];
444 __u16 severity;
445
446 severity = estatus->error_severity;
447 if (severity == CPER_SEV_CORRECTED)
448 printk("%s%s\n", pfx,
449 "It has been corrected by h/w "
450 "and requires no further action");
451 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
452 data_len = estatus->data_length;
453 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
454 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
455 while (data_len >= sizeof(*gdata)) {
456 gedata_len = gdata->error_data_length;
457 cper_estatus_print_section(newpfx, gdata, sec_no);
458 data_len -= gedata_len + sizeof(*gdata);
459 gdata = (void *)(gdata + 1) + gedata_len;
460 sec_no++;
461 }
462}
463EXPORT_SYMBOL_GPL(cper_estatus_print);
464
465int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
466{
467 if (estatus->data_length &&
468 estatus->data_length < sizeof(struct acpi_hest_generic_data))
469 return -EINVAL;
470 if (estatus->raw_data_length &&
471 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
472 return -EINVAL;
473
474 return 0;
475}
476EXPORT_SYMBOL_GPL(cper_estatus_check_header);
477
478int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
479{
480 struct acpi_hest_generic_data *gdata;
481 unsigned int data_len, gedata_len;
482 int rc;
483
484 rc = cper_estatus_check_header(estatus);
485 if (rc)
486 return rc;
487 data_len = estatus->data_length;
488 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
489 while (data_len >= sizeof(*gdata)) {
490 gedata_len = gdata->error_data_length;
491 if (gedata_len > data_len - sizeof(*gdata))
492 return -EINVAL;
493 data_len -= gedata_len + sizeof(*gdata);
494 gdata = (void *)(gdata + 1) + gedata_len;
495 }
496 if (data_len)
497 return -EINVAL;
498
499 return 0;
500}
501EXPORT_SYMBOL_GPL(cper_estatus_check);