Linux Audio

Check our new training course

Loading...
v6.9.4
  1/*
  2 * Copyright 2020 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23#include "amdgpu.h"
 24#include "sdma/sdma_4_4_0_offset.h"
 25#include "sdma/sdma_4_4_0_sh_mask.h"
 26#include "soc15.h"
 27#include "amdgpu_ras.h"
 28
 29#define SDMA1_REG_OFFSET 0x600
 30#define SDMA2_REG_OFFSET 0x1cda0
 31#define SDMA3_REG_OFFSET 0x1d1a0
 32#define SDMA4_REG_OFFSET 0x1d5a0
 33
 34/* helper function that allow only use sdma0 register offset
 35 * to calculate register offset for all the sdma instances */
 36static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
 37					 uint32_t instance,
 38					 uint32_t offset)
 39{
 40	uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
 41
 42	switch (instance) {
 43	case 0:
 44		return (sdma_base + offset);
 45	case 1:
 46		return (sdma_base + SDMA1_REG_OFFSET + offset);
 47	case 2:
 48		return (sdma_base + SDMA2_REG_OFFSET + offset);
 49	case 3:
 50		return (sdma_base + SDMA3_REG_OFFSET + offset);
 51	case 4:
 52		return (sdma_base + SDMA4_REG_OFFSET + offset);
 53	default:
 54		break;
 55	}
 56	return 0;
 57}
 58
 59static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
 60	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 61	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
 62	0, 0,
 63	},
 64	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 65	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
 66	0, 0,
 67	},
 68	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 69	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
 70	0, 0,
 71	},
 72	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 73	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
 74	0, 0,
 75	},
 76	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 77	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
 78	0, 0,
 79	},
 80	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 81	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
 82	0, 0,
 83	},
 84	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 85	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
 86	0, 0,
 87	},
 88	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 89	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
 90	0, 0,
 91	},
 92	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 93	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
 94	0, 0,
 95	},
 96	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 97	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
 98	0, 0,
 99	},
100	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
101	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
102	0, 0,
103	},
104	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
105	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
106	0, 0,
107	},
108	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
109	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
110	0, 0,
111	},
112	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
113	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
114	0, 0,
115	},
116	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
117	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
118	0, 0,
119	},
120	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
121	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
122	0, 0,
123	},
124	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
125	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
126	0, 0,
127	},
128	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
129	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
130	0, 0,
131	},
132	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
133	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
134	0, 0,
135	},
136	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
137	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
138	0, 0,
139	},
140	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
141	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
142	0, 0,
143	},
144	{ "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
145	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
146	0, 0,
147	},
148	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
149	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
150	0, 0,
151	},
152	{ "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
153	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
154	0, 0,
155	},
156	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
157	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
158	0, 0,
159	},
160	{ "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
161	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
162	0, 0,
163	},
164};
165
166static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
167					  uint32_t reg_offset,
168					  uint32_t value,
169					  uint32_t instance,
170					  uint32_t *sec_count)
171{
172	uint32_t i;
173	uint32_t sec_cnt;
174
175	/* double bits error (multiple bits) error detection is not supported */
176	for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
177		if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
178			continue;
179
180		/* the SDMA_EDC_COUNTER register in each sdma instance
181		 * shares the same sed shift_mask
182		 * */
183		sec_cnt = (value &
184			sdma_v4_4_ras_fields[i].sec_count_mask) >>
185			sdma_v4_4_ras_fields[i].sec_count_shift;
186		if (sec_cnt) {
187			dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
188				 sdma_v4_4_ras_fields[i].name,
189				 instance, sec_cnt);
190			*sec_count += sec_cnt;
191		}
192	}
193}
194
195static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
196					   uint32_t instance,
197					   void *ras_error_status)
198{
199	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
200	uint32_t sec_count = 0;
201	uint32_t reg_value = 0;
202	uint32_t reg_offset = 0;
203
204	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
205	reg_value = RREG32(reg_offset);
206	/* double bit error is not supported */
207	if (reg_value)
208		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
209					      instance, &sec_count);
210
211	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
212	reg_value = RREG32(reg_offset);
213	/* double bit error is not supported */
214	if (reg_value)
215		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
216					      instance, &sec_count);
217
218	/*
219	 * err_data->ue_count should be initialized to 0
220	 * before calling into this function
221	 *
222	 * SDMA RAS supports single bit uncorrectable error detection.
223	 * So, increment uncorrectable error count.
224	 */
225	err_data->ue_count += sec_count;
226
227	/*
228	 * SDMA RAS does not support correctable errors.
229	 * Set ce count to 0.
230	 */
231	err_data->ce_count = 0;
232
233	return 0;
234};
235
236static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
237{
238	int i;
239	uint32_t reg_offset;
240
241	/* write 0 to EDC_COUNTER reg to clear sdma edc counters */
242	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
243		for (i = 0; i < adev->sdma.num_instances; i++) {
244			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
245			WREG32(reg_offset, 0);
246			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
247			WREG32(reg_offset, 0);
248		}
249	}
250}
251
252static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
253{
254	int i = 0;
255
256	for (i = 0; i < adev->sdma.num_instances; i++) {
257		if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
258			dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
259			return;
260		}
261	}
262
263}
264
265const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
266	.query_ras_error_count = sdma_v4_4_query_ras_error_count,
267	.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
268};
269
270struct amdgpu_sdma_ras sdma_v4_4_ras = {
271	.ras_block = {
272		.hw_ops = &sdma_v4_4_ras_hw_ops,
273	},
274};
v6.8
  1/*
  2 * Copyright 2020 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 20 * OTHER DEALINGS IN THE SOFTWARE.
 21 *
 22 */
 23#include "amdgpu.h"
 24#include "sdma/sdma_4_4_0_offset.h"
 25#include "sdma/sdma_4_4_0_sh_mask.h"
 26#include "soc15.h"
 27#include "amdgpu_ras.h"
 28
 29#define SDMA1_REG_OFFSET 0x600
 30#define SDMA2_REG_OFFSET 0x1cda0
 31#define SDMA3_REG_OFFSET 0x1d1a0
 32#define SDMA4_REG_OFFSET 0x1d5a0
 33
 34/* helper function that allow only use sdma0 register offset
 35 * to calculate register offset for all the sdma instances */
 36static uint32_t sdma_v4_4_get_reg_offset(struct amdgpu_device *adev,
 37					 uint32_t instance,
 38					 uint32_t offset)
 39{
 40	uint32_t sdma_base = adev->reg_offset[SDMA0_HWIP][0][0];
 41
 42	switch (instance) {
 43	case 0:
 44		return (sdma_base + offset);
 45	case 1:
 46		return (sdma_base + SDMA1_REG_OFFSET + offset);
 47	case 2:
 48		return (sdma_base + SDMA2_REG_OFFSET + offset);
 49	case 3:
 50		return (sdma_base + SDMA3_REG_OFFSET + offset);
 51	case 4:
 52		return (sdma_base + SDMA4_REG_OFFSET + offset);
 53	default:
 54		break;
 55	}
 56	return 0;
 57}
 58
 59static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = {
 60	{ "SDMA_MBANK_DATA_BUF0_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 61	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF0_SED),
 62	0, 0,
 63	},
 64	{ "SDMA_MBANK_DATA_BUF1_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 65	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF1_SED),
 66	0, 0,
 67	},
 68	{ "SDMA_MBANK_DATA_BUF2_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 69	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF2_SED),
 70	0, 0,
 71	},
 72	{ "SDMA_MBANK_DATA_BUF3_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 73	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF3_SED),
 74	0, 0,
 75	},
 76	{ "SDMA_MBANK_DATA_BUF4_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 77	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF4_SED),
 78	0, 0,
 79	},
 80	{ "SDMA_MBANK_DATA_BUF5_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 81	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF5_SED),
 82	0, 0,
 83	},
 84	{ "SDMA_MBANK_DATA_BUF6_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 85	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF6_SED),
 86	0, 0,
 87	},
 88	{ "SDMA_MBANK_DATA_BUF7_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 89	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF7_SED),
 90	0, 0,
 91	},
 92	{ "SDMA_MBANK_DATA_BUF8_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 93	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF8_SED),
 94	0, 0,
 95	},
 96	{ "SDMA_MBANK_DATA_BUF9_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
 97	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF9_SED),
 98	0, 0,
 99	},
100	{ "SDMA_MBANK_DATA_BUF10_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
101	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF10_SED),
102	0, 0,
103	},
104	{ "SDMA_MBANK_DATA_BUF11_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
105	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF11_SED),
106	0, 0,
107	},
108	{ "SDMA_MBANK_DATA_BUF12_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
109	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF12_SED),
110	0, 0,
111	},
112	{ "SDMA_MBANK_DATA_BUF13_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
113	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF13_SED),
114	0, 0,
115	},
116	{ "SDMA_MBANK_DATA_BUF14_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
117	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF14_SED),
118	0, 0,
119	},
120	{ "SDMA_MBANK_DATA_BUF15_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER),
121	SOC15_REG_FIELD(SDMA0_EDC_COUNTER, SDMA_MBANK_DATA_BUF15_SED),
122	0, 0,
123	},
124	{ "SDMA_UCODE_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
125	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UCODE_BUF_SED),
126	0, 0,
127	},
128	{ "SDMA_RB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
129	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_RB_CMD_BUF_SED),
130	0, 0,
131	},
132	{ "SDMA_IB_CMD_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
133	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_IB_CMD_BUF_SED),
134	0, 0,
135	},
136	{ "SDMA_UTCL1_RD_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
137	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RD_FIFO_SED),
138	0, 0,
139	},
140	{ "SDMA_UTCL1_RDBST_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
141	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED),
142	0, 0,
143	},
144	{ "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
145	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED),
146	0, 0,
147	},
148	{ "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
149	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED),
150	0, 0,
151	},
152	{ "SDMA_SPLIT_DATA_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
153	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_SPLIT_DATA_BUF_SED),
154	0, 0,
155	},
156	{ "SDMA_MC_WR_ADDR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
157	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
158	0, 0,
159	},
160	{ "SDMA_MC_RDRET_BUF_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2),
161	SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_MC_WR_ADDR_FIFO_SED),
162	0, 0,
163	},
164};
165
166static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
167					  uint32_t reg_offset,
168					  uint32_t value,
169					  uint32_t instance,
170					  uint32_t *sec_count)
171{
172	uint32_t i;
173	uint32_t sec_cnt;
174
175	/* double bits error (multiple bits) error detection is not supported */
176	for (i = 0; i < ARRAY_SIZE(sdma_v4_4_ras_fields); i++) {
177		if (sdma_v4_4_ras_fields[i].reg_offset != reg_offset)
178			continue;
179
180		/* the SDMA_EDC_COUNTER register in each sdma instance
181		 * shares the same sed shift_mask
182		 * */
183		sec_cnt = (value &
184			sdma_v4_4_ras_fields[i].sec_count_mask) >>
185			sdma_v4_4_ras_fields[i].sec_count_shift;
186		if (sec_cnt) {
187			dev_info(adev->dev, "Detected %s in SDMA%d, SED %d\n",
188				 sdma_v4_4_ras_fields[i].name,
189				 instance, sec_cnt);
190			*sec_count += sec_cnt;
191		}
192	}
193}
194
195static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
196					   uint32_t instance,
197					   void *ras_error_status)
198{
199	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
200	uint32_t sec_count = 0;
201	uint32_t reg_value = 0;
202	uint32_t reg_offset = 0;
203
204	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER);
205	reg_value = RREG32(reg_offset);
206	/* double bit error is not supported */
207	if (reg_value)
208		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER, reg_value,
209					      instance, &sec_count);
210
211	reg_offset = sdma_v4_4_get_reg_offset(adev, instance, regSDMA0_EDC_COUNTER2);
212	reg_value = RREG32(reg_offset);
213	/* double bit error is not supported */
214	if (reg_value)
215		sdma_v4_4_get_ras_error_count(adev, regSDMA0_EDC_COUNTER2, reg_value,
216					      instance, &sec_count);
217
218	/*
219	 * err_data->ue_count should be initialized to 0
220	 * before calling into this function
221	 *
222	 * SDMA RAS supports single bit uncorrectable error detection.
223	 * So, increment uncorrectable error count.
224	 */
225	err_data->ue_count += sec_count;
226
227	/*
228	 * SDMA RAS does not support correctable errors.
229	 * Set ce count to 0.
230	 */
231	err_data->ce_count = 0;
232
233	return 0;
234};
235
236static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
237{
238	int i;
239	uint32_t reg_offset;
240
241	/* write 0 to EDC_COUNTER reg to clear sdma edc counters */
242	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
243		for (i = 0; i < adev->sdma.num_instances; i++) {
244			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER);
245			WREG32(reg_offset, 0);
246			reg_offset = sdma_v4_4_get_reg_offset(adev, i, regSDMA0_EDC_COUNTER2);
247			WREG32(reg_offset, 0);
248		}
249	}
250}
251
252static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev,  void *ras_error_status)
253{
254	int i = 0;
255
256	for (i = 0; i < adev->sdma.num_instances; i++) {
257		if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) {
258			dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i);
259			return;
260		}
261	}
262
263}
264
265const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
266	.query_ras_error_count = sdma_v4_4_query_ras_error_count,
267	.reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
268};
269
270struct amdgpu_sdma_ras sdma_v4_4_ras = {
271	.ras_block = {
272		.hw_ops = &sdma_v4_4_ras_hw_ops,
273	},
274};