Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Ampere Computing SoC's SMpro Error Monitoring Driver
  4 *
  5 * Copyright (c) 2022, Ampere Computing LLC
  6 *
  7 */
  8
  9#include <linux/i2c.h>
 10#include <linux/mod_devicetable.h>
 11#include <linux/module.h>
 12#include <linux/platform_device.h>
 13#include <linux/regmap.h>
 14
 15/* GPI RAS Error Registers */
 16#define GPI_RAS_ERR		0x7E
 17
 18/* Core and L2C Error Registers */
 19#define CORE_CE_ERR_CNT		0x80
 20#define CORE_CE_ERR_LEN		0x81
 21#define CORE_CE_ERR_DATA	0x82
 22#define CORE_UE_ERR_CNT		0x83
 23#define CORE_UE_ERR_LEN		0x84
 24#define CORE_UE_ERR_DATA	0x85
 25
 26/* Memory Error Registers */
 27#define MEM_CE_ERR_CNT		0x90
 28#define MEM_CE_ERR_LEN		0x91
 29#define MEM_CE_ERR_DATA		0x92
 30#define MEM_UE_ERR_CNT		0x93
 31#define MEM_UE_ERR_LEN		0x94
 32#define MEM_UE_ERR_DATA		0x95
 33
 34/* RAS Error/Warning Registers */
 35#define ERR_SMPRO_TYPE		0xA0
 36#define ERR_PMPRO_TYPE		0xA1
 37#define ERR_SMPRO_INFO_LO	0xA2
 38#define ERR_SMPRO_INFO_HI	0xA3
 39#define ERR_SMPRO_DATA_LO	0xA4
 40#define ERR_SMPRO_DATA_HI	0xA5
 41#define WARN_SMPRO_INFO_LO	0xAA
 42#define WARN_SMPRO_INFO_HI	0xAB
 43#define ERR_PMPRO_INFO_LO	0xA6
 44#define ERR_PMPRO_INFO_HI	0xA7
 45#define ERR_PMPRO_DATA_LO	0xA8
 46#define ERR_PMPRO_DATA_HI	0xA9
 47#define WARN_PMPRO_INFO_LO	0xAC
 48#define WARN_PMPRO_INFO_HI	0xAD
 49
 50/* PCIE Error Registers */
 51#define PCIE_CE_ERR_CNT		0xC0
 52#define PCIE_CE_ERR_LEN		0xC1
 53#define PCIE_CE_ERR_DATA	0xC2
 54#define PCIE_UE_ERR_CNT		0xC3
 55#define PCIE_UE_ERR_LEN		0xC4
 56#define PCIE_UE_ERR_DATA	0xC5
 57
 58/* Other Error Registers */
 59#define OTHER_CE_ERR_CNT	0xD0
 60#define OTHER_CE_ERR_LEN	0xD1
 61#define OTHER_CE_ERR_DATA	0xD2
 62#define OTHER_UE_ERR_CNT	0xD8
 63#define OTHER_UE_ERR_LEN	0xD9
 64#define OTHER_UE_ERR_DATA	0xDA
 65
 66/* Event Data Registers */
 67#define VRD_WARN_FAULT_EVENT_DATA	0x78
 68#define VRD_HOT_EVENT_DATA		0x79
 69#define DIMM_HOT_EVENT_DATA		0x7A
 70
 71#define MAX_READ_BLOCK_LENGTH	48
 72
 73#define RAS_SMPRO_ERR		0
 74#define RAS_PMPRO_ERR		1
 75
 76enum RAS_48BYTES_ERR_TYPES {
 77	CORE_CE_ERR,
 78	CORE_UE_ERR,
 79	MEM_CE_ERR,
 80	MEM_UE_ERR,
 81	PCIE_CE_ERR,
 82	PCIE_UE_ERR,
 83	OTHER_CE_ERR,
 84	OTHER_UE_ERR,
 85	NUM_48BYTES_ERR_TYPE,
 86};
 87
 88struct smpro_error_hdr {
 89	u8 count;	/* Number of the RAS errors */
 90	u8 len;		/* Number of data bytes */
 91	u8 data;	/* Start of 48-byte data */
 92	u8 max_cnt;	/* Max num of errors */
 93};
 94
 95/*
 96 * Included Address of registers to get Count, Length of data and Data
 97 * of the 48 bytes error data
 98 */
 99static struct smpro_error_hdr smpro_error_table[] = {
100	[CORE_CE_ERR] = {
101		.count = CORE_CE_ERR_CNT,
102		.len = CORE_CE_ERR_LEN,
103		.data = CORE_CE_ERR_DATA,
104		.max_cnt = 32
105	},
106	[CORE_UE_ERR] = {
107		.count = CORE_UE_ERR_CNT,
108		.len = CORE_UE_ERR_LEN,
109		.data = CORE_UE_ERR_DATA,
110		.max_cnt = 32
111	},
112	[MEM_CE_ERR] = {
113		.count = MEM_CE_ERR_CNT,
114		.len = MEM_CE_ERR_LEN,
115		.data = MEM_CE_ERR_DATA,
116		.max_cnt = 16
117	},
118	[MEM_UE_ERR] = {
119		.count = MEM_UE_ERR_CNT,
120		.len = MEM_UE_ERR_LEN,
121		.data = MEM_UE_ERR_DATA,
122		.max_cnt = 16
123	},
124	[PCIE_CE_ERR] = {
125		.count = PCIE_CE_ERR_CNT,
126		.len = PCIE_CE_ERR_LEN,
127		.data = PCIE_CE_ERR_DATA,
128		.max_cnt = 96
129	},
130	[PCIE_UE_ERR] = {
131		.count = PCIE_UE_ERR_CNT,
132		.len = PCIE_UE_ERR_LEN,
133		.data = PCIE_UE_ERR_DATA,
134		.max_cnt = 96
135	},
136	[OTHER_CE_ERR] = {
137		.count = OTHER_CE_ERR_CNT,
138		.len = OTHER_CE_ERR_LEN,
139		.data = OTHER_CE_ERR_DATA,
140		.max_cnt = 8
141	},
142	[OTHER_UE_ERR] = {
143		.count = OTHER_UE_ERR_CNT,
144		.len = OTHER_UE_ERR_LEN,
145		.data = OTHER_UE_ERR_DATA,
146		.max_cnt = 8
147	},
148};
149
150/*
151 * List of SCP registers which are used to get
152 * one type of RAS Internal errors.
153 */
154struct smpro_int_error_hdr {
155	u8 type;
156	u8 info_l;
157	u8 info_h;
158	u8 data_l;
159	u8 data_h;
160	u8 warn_l;
161	u8 warn_h;
162};
163
164static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = {
165	[RAS_SMPRO_ERR] = {
166		.type = ERR_SMPRO_TYPE,
167		.info_l = ERR_SMPRO_INFO_LO,
168		.info_h = ERR_SMPRO_INFO_HI,
169		.data_l = ERR_SMPRO_DATA_LO,
170		.data_h = ERR_SMPRO_DATA_HI,
171		.warn_l = WARN_SMPRO_INFO_LO,
172		.warn_h = WARN_SMPRO_INFO_HI,
173	},
174	[RAS_PMPRO_ERR] = {
175		.type = ERR_PMPRO_TYPE,
176		.info_l = ERR_PMPRO_INFO_LO,
177		.info_h = ERR_PMPRO_INFO_HI,
178		.data_l = ERR_PMPRO_DATA_LO,
179		.data_h = ERR_PMPRO_DATA_HI,
180		.warn_l = WARN_PMPRO_INFO_LO,
181		.warn_h = WARN_PMPRO_INFO_HI,
182	},
183};
184
185struct smpro_errmon {
186	struct regmap *regmap;
187};
188
189enum EVENT_TYPES {
190	VRD_WARN_FAULT_EVENT,
191	VRD_HOT_EVENT,
192	DIMM_HOT_EVENT,
193	NUM_EVENTS_TYPE,
194};
195
196/* Included Address of event source and data registers */
197static u8 smpro_event_table[NUM_EVENTS_TYPE] = {
198	VRD_WARN_FAULT_EVENT_DATA,
199	VRD_HOT_EVENT_DATA,
200	DIMM_HOT_EVENT_DATA,
201};
202
203static ssize_t smpro_event_data_read(struct device *dev,
204				     struct device_attribute *da, char *buf,
205				     int channel)
206{
207	struct smpro_errmon *errmon = dev_get_drvdata(dev);
208	s32 event_data;
209	int ret;
210
211	ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data);
212	if (ret)
213		return ret;
214	/* Clear event after read */
215	if (event_data != 0)
216		regmap_write(errmon->regmap, smpro_event_table[channel], event_data);
217
218	return sysfs_emit(buf, "%04x\n", event_data);
219}
220
221static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da,
222					char *buf, int channel)
223{
224	struct smpro_errmon *errmon = dev_get_drvdata(dev);
225	struct smpro_error_hdr *err_info;
226	s32 err_count;
227	int ret;
228
229	err_info = &smpro_error_table[channel];
230
231	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
232	if (ret)
233		return ret;
234
235	/* Bit 8 indicates the overflow status */
236	return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0);
237}
238
239static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da,
240				     char *buf, int channel)
241{
242	struct smpro_errmon *errmon = dev_get_drvdata(dev);
243	unsigned char err_data[MAX_READ_BLOCK_LENGTH];
244	struct smpro_error_hdr *err_info;
245	s32 err_count, err_length;
246	int ret;
247
248	err_info = &smpro_error_table[channel];
249
250	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
251	/* Error count is the low byte */
252	err_count &= 0xff;
253	if (ret || !err_count || err_count > err_info->max_cnt)
254		return ret;
255
256	ret = regmap_read(errmon->regmap, err_info->len, &err_length);
257	if (ret || err_length <= 0)
258		return ret;
259
260	if (err_length > MAX_READ_BLOCK_LENGTH)
261		err_length = MAX_READ_BLOCK_LENGTH;
262
263	memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH);
264	ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length);
265	if (ret < 0)
266		return ret;
267
268	/* clear the error */
269	ret = regmap_write(errmon->regmap, err_info->count, 0x100);
270	if (ret)
271		return ret;
272	/*
273	 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
274	 * specified in section 5.8.1 CE/UE Error Data record in
275	 * Altra SOC BMC Interface specification.
276	 */
277	return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data);
278}
279
280/*
281 * Output format:
282 * <4-byte hex value of error info><4-byte hex value of error extensive data>
283 * Where:
284 *   + error info : The error information
285 *   + error data : Extensive data (32 bits)
286 * Reference to section 5.10 RAS Internal Error Register Definition in
287 * Altra SOC BMC Interface specification
288 */
289static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da,
290				       char *buf, int channel)
291{
292	struct smpro_errmon *errmon = dev_get_drvdata(dev);
293	struct smpro_int_error_hdr *err_info;
294	unsigned int err[4] = { 0 };
295	unsigned int err_type;
296	unsigned int val;
297	int ret;
298
299	/* read error status */
300	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
301	if (ret)
302		return ret;
303
304	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
305	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
306		return 0;
307
308	err_info = &list_smpro_int_error_hdr[channel];
309	ret = regmap_read(errmon->regmap, err_info->type, &val);
310	if (ret)
311		return ret;
312
313	err_type = (val & BIT(1)) ? BIT(1) :
314		   (val & BIT(2)) ? BIT(2) : 0;
315
316	if (!err_type)
317		return 0;
318
319	ret = regmap_read(errmon->regmap, err_info->info_l, err + 1);
320	if (ret)
321		return ret;
322
323	ret = regmap_read(errmon->regmap, err_info->info_h, err);
324	if (ret)
325		return ret;
326
327	if (err_type & BIT(2)) {
328		/* Error with data type */
329		ret = regmap_read(errmon->regmap, err_info->data_l, err + 3);
330		if (ret)
331			return ret;
332
333		ret = regmap_read(errmon->regmap, err_info->data_h, err + 2);
334		if (ret)
335			return ret;
336	}
337
338	/* clear the read errors */
339	ret = regmap_write(errmon->regmap, err_info->type, err_type);
340	if (ret)
341		return ret;
342
343	return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err);
344}
345
346/*
347 * Output format:
348 * <4-byte hex value of warining info>
349 * Reference to section 5.10 RAS Internal Error Register Definition in
350 * Altra SOC BMC Interface specification
351 */
352static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da,
353					char *buf, int channel)
354{
355	struct smpro_errmon *errmon = dev_get_drvdata(dev);
356	struct smpro_int_error_hdr *err_info;
357	unsigned int warn[2] = { 0 };
358	unsigned int val;
359	int ret;
360
361	/* read error status */
362	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
363	if (ret)
364		return ret;
365
366	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
367	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
368		return 0;
369
370	err_info = &list_smpro_int_error_hdr[channel];
371	ret = regmap_read(errmon->regmap, err_info->type, &val);
372	if (ret)
373		return ret;
374
375	if (!(val & BIT(0)))
376		return 0;
377
378	ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1);
379	if (ret)
380		return ret;
381
382	ret = regmap_read(errmon->regmap, err_info->warn_h, warn);
383	if (ret)
384		return ret;
385
386	/* clear the warning */
387	ret = regmap_write(errmon->regmap, err_info->type, BIT(0));
388	if (ret)
389		return ret;
390
391	return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn);
392}
393
394#define ERROR_OVERFLOW_RO(_error, _index) \
395	static ssize_t overflow_##_error##_show(struct device *dev,            \
396						struct device_attribute *da,   \
397						char *buf)                     \
398	{                                                                      \
399		return smpro_overflow_data_read(dev, da, buf, _index);         \
400	}                                                                      \
401	static DEVICE_ATTR_RO(overflow_##_error)
402
403ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR);
404ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR);
405ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR);
406ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR);
407ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR);
408ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR);
409ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR);
410ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR);
411
412#define ERROR_RO(_error, _index) \
413	static ssize_t error_##_error##_show(struct device *dev,            \
414					     struct device_attribute *da,   \
415					     char *buf)                     \
416	{                                                                   \
417		return smpro_error_data_read(dev, da, buf, _index);         \
418	}                                                                   \
419	static DEVICE_ATTR_RO(error_##_error)
420
421ERROR_RO(core_ce, CORE_CE_ERR);
422ERROR_RO(core_ue, CORE_UE_ERR);
423ERROR_RO(mem_ce, MEM_CE_ERR);
424ERROR_RO(mem_ue, MEM_UE_ERR);
425ERROR_RO(pcie_ce, PCIE_CE_ERR);
426ERROR_RO(pcie_ue, PCIE_UE_ERR);
427ERROR_RO(other_ce, OTHER_CE_ERR);
428ERROR_RO(other_ue, OTHER_UE_ERR);
429
430static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
431{
432	return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR);
433}
434static DEVICE_ATTR_RO(error_smpro);
435
436static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
437{
438	return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR);
439}
440static DEVICE_ATTR_RO(error_pmpro);
441
442static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
443{
444	return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR);
445}
446static DEVICE_ATTR_RO(warn_smpro);
447
448static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
449{
450	return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR);
451}
452static DEVICE_ATTR_RO(warn_pmpro);
453
454#define EVENT_RO(_event, _index) \
455	static ssize_t event_##_event##_show(struct device *dev,            \
456					     struct device_attribute *da,   \
457					     char *buf)                     \
458	{                                                                   \
459		return smpro_event_data_read(dev, da, buf, _index);         \
460	}                                                                   \
461	static DEVICE_ATTR_RO(event_##_event)
462
463EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT);
464EVENT_RO(vrd_hot, VRD_HOT_EVENT);
465EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
466
467static struct attribute *smpro_errmon_attrs[] = {
468	&dev_attr_overflow_core_ce.attr,
469	&dev_attr_overflow_core_ue.attr,
470	&dev_attr_overflow_mem_ce.attr,
471	&dev_attr_overflow_mem_ue.attr,
472	&dev_attr_overflow_pcie_ce.attr,
473	&dev_attr_overflow_pcie_ue.attr,
474	&dev_attr_overflow_other_ce.attr,
475	&dev_attr_overflow_other_ue.attr,
476	&dev_attr_error_core_ce.attr,
477	&dev_attr_error_core_ue.attr,
478	&dev_attr_error_mem_ce.attr,
479	&dev_attr_error_mem_ue.attr,
480	&dev_attr_error_pcie_ce.attr,
481	&dev_attr_error_pcie_ue.attr,
482	&dev_attr_error_other_ce.attr,
483	&dev_attr_error_other_ue.attr,
484	&dev_attr_error_smpro.attr,
485	&dev_attr_error_pmpro.attr,
486	&dev_attr_warn_smpro.attr,
487	&dev_attr_warn_pmpro.attr,
488	&dev_attr_event_vrd_warn_fault.attr,
489	&dev_attr_event_vrd_hot.attr,
490	&dev_attr_event_dimm_hot.attr,
491	NULL
492};
493
494ATTRIBUTE_GROUPS(smpro_errmon);
495
496static int smpro_errmon_probe(struct platform_device *pdev)
497{
498	struct smpro_errmon *errmon;
499
500	errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL);
501	if (!errmon)
502		return -ENOMEM;
503
504	platform_set_drvdata(pdev, errmon);
505
506	errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL);
507	if (!errmon->regmap)
508		return -ENODEV;
509
510	return 0;
511}
512
513static struct platform_driver smpro_errmon_driver = {
514	.probe          = smpro_errmon_probe,
515	.driver = {
516		.name   = "smpro-errmon",
517		.dev_groups = smpro_errmon_groups,
518	},
519};
520
521module_platform_driver(smpro_errmon_driver);
522
523MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
524MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
525MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
526MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
527MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
528MODULE_DESCRIPTION("Ampere Altra SMpro driver");
529MODULE_LICENSE("GPL");