Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * AMD Address Translation Library
  4 *
  5 * umc.c : Unified Memory Controller (UMC) topology helpers
  6 *
  7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
  8 * All Rights Reserved.
  9 *
 10 * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
 11 */
 12
 13#include "internal.h"
 14
 15/*
 16 * MI300 has a fixed, model-specific mapping between a UMC instance and
 17 * its related Data Fabric Coherent Station instance.
 18 *
 19 * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the
 20 * UMC instance within a Node. Use this to find the appropriate Coherent
 21 * Station ID.
 22 *
 23 * Redundant bits were removed from the map below.
 24 */
 25static const u16 umc_coh_st_map[32] = {
 26	0x393, 0x293, 0x193, 0x093,
 27	0x392, 0x292, 0x192, 0x092,
 28	0x391, 0x291, 0x191, 0x091,
 29	0x390, 0x290, 0x190, 0x090,
 30	0x793, 0x693, 0x593, 0x493,
 31	0x792, 0x692, 0x592, 0x492,
 32	0x791, 0x691, 0x591, 0x491,
 33	0x790, 0x690, 0x590, 0x490,
 34};
 35
 36#define UMC_ID_MI300 GENMASK(23, 12)
 37static u8 get_coh_st_inst_id_mi300(struct atl_err *err)
 38{
 39	u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid);
 40	u8 i;
 41
 42	for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) {
 43		if (umc_id == umc_coh_st_map[i])
 44			break;
 45	}
 46
 47	WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map));
 48
 49	return i;
 50}
 51
 52/* XOR the bits in @val. */
 53static u16 bitwise_xor_bits(u16 val)
 54{
 55	u16 tmp = 0;
 56	u8 i;
 57
 58	for (i = 0; i < 16; i++)
 59		tmp ^= (val >> i) & 0x1;
 60
 61	return tmp;
 62}
 63
 64struct xor_bits {
 65	bool	xor_enable;
 66	u16	col_xor;
 67	u32	row_xor;
 68};
 69
 70#define NUM_BANK_BITS	4
 71#define NUM_COL_BITS	5
 72#define NUM_SID_BITS	2
 73
 74static struct {
 75	/* UMC::CH::AddrHashBank */
 76	struct xor_bits	bank[NUM_BANK_BITS];
 77
 78	/* UMC::CH::AddrHashPC */
 79	struct xor_bits	pc;
 80
 81	/* UMC::CH::AddrHashPC2 */
 82	u8		bank_xor;
 83} addr_hash;
 84
 85static struct {
 86	u8 bank[NUM_BANK_BITS];
 87	u8 col[NUM_COL_BITS];
 88	u8 sid[NUM_SID_BITS];
 89	u8 num_row_lo;
 90	u8 num_row_hi;
 91	u8 row_lo;
 92	u8 row_hi;
 93	u8 pc;
 94} bit_shifts;
 95
 96#define MI300_UMC_CH_BASE	0x90000
 97#define MI300_ADDR_CFG		(MI300_UMC_CH_BASE + 0x30)
 98#define MI300_ADDR_SEL		(MI300_UMC_CH_BASE + 0x40)
 99#define MI300_COL_SEL_LO	(MI300_UMC_CH_BASE + 0x50)
100#define MI300_ADDR_SEL_2	(MI300_UMC_CH_BASE + 0xA4)
101#define MI300_ADDR_HASH_BANK0	(MI300_UMC_CH_BASE + 0xC8)
102#define MI300_ADDR_HASH_PC	(MI300_UMC_CH_BASE + 0xE0)
103#define MI300_ADDR_HASH_PC2	(MI300_UMC_CH_BASE + 0xE4)
104
105#define ADDR_HASH_XOR_EN	BIT(0)
106#define ADDR_HASH_COL_XOR	GENMASK(13, 1)
107#define ADDR_HASH_ROW_XOR	GENMASK(31, 14)
108#define ADDR_HASH_BANK_XOR	GENMASK(5, 0)
109
110#define ADDR_CFG_NUM_ROW_LO	GENMASK(11, 8)
111#define ADDR_CFG_NUM_ROW_HI	GENMASK(15, 12)
112
113#define ADDR_SEL_BANK0		GENMASK(3, 0)
114#define ADDR_SEL_BANK1		GENMASK(7, 4)
115#define ADDR_SEL_BANK2		GENMASK(11, 8)
116#define ADDR_SEL_BANK3		GENMASK(15, 12)
117#define ADDR_SEL_BANK4		GENMASK(20, 16)
118#define ADDR_SEL_ROW_LO		GENMASK(27, 24)
119#define ADDR_SEL_ROW_HI		GENMASK(31, 28)
120
121#define COL_SEL_LO_COL0		GENMASK(3, 0)
122#define COL_SEL_LO_COL1		GENMASK(7, 4)
123#define COL_SEL_LO_COL2		GENMASK(11, 8)
124#define COL_SEL_LO_COL3		GENMASK(15, 12)
125#define COL_SEL_LO_COL4		GENMASK(19, 16)
126
127#define ADDR_SEL_2_BANK5	GENMASK(4, 0)
128#define ADDR_SEL_2_CHAN		GENMASK(15, 12)
129
130/*
131 * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used
132 * for hashing.
133 *
134 * Also, read UMC::CH::Addr{Cfg,Sel,Sel2} and UMC::CH:ColSelLo registers to
135 * get the values needed to reconstruct the normalized address. Apply additional
136 * offsets to the raw register values, as needed.
137 *
138 * Do this during module init, since the values will not change during run time.
139 *
140 * These registers are instantiated for each UMC across each AMD Node.
141 * However, they should be identically programmed due to the fixed hardware
142 * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a
143 * single global structure for simplicity.
144 */
145int get_umc_info_mi300(void)
146{
147	u32 temp;
148	int ret;
149	u8 i;
150
151	for (i = 0; i < NUM_BANK_BITS; i++) {
152		ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp);
153		if (ret)
154			return ret;
155
156		addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN,  temp);
157		addr_hash.bank[i].col_xor    = FIELD_GET(ADDR_HASH_COL_XOR, temp);
158		addr_hash.bank[i].row_xor    = FIELD_GET(ADDR_HASH_ROW_XOR, temp);
159	}
160
161	ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp);
162	if (ret)
163		return ret;
164
165	addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN,  temp);
166	addr_hash.pc.col_xor    = FIELD_GET(ADDR_HASH_COL_XOR, temp);
167	addr_hash.pc.row_xor    = FIELD_GET(ADDR_HASH_ROW_XOR, temp);
168
169	ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp);
170	if (ret)
171		return ret;
172
173	addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp);
174
175	ret = amd_smn_read(0, MI300_ADDR_CFG, &temp);
176	if (ret)
177		return ret;
178
179	bit_shifts.num_row_hi = FIELD_GET(ADDR_CFG_NUM_ROW_HI, temp);
180	bit_shifts.num_row_lo = 10 + FIELD_GET(ADDR_CFG_NUM_ROW_LO, temp);
181
182	ret = amd_smn_read(0, MI300_ADDR_SEL, &temp);
183	if (ret)
184		return ret;
185
186	bit_shifts.bank[0] = 5 + FIELD_GET(ADDR_SEL_BANK0, temp);
187	bit_shifts.bank[1] = 5 + FIELD_GET(ADDR_SEL_BANK1, temp);
188	bit_shifts.bank[2] = 5 + FIELD_GET(ADDR_SEL_BANK2, temp);
189	bit_shifts.bank[3] = 5 + FIELD_GET(ADDR_SEL_BANK3, temp);
190	/* Use BankBit4 for the SID0 position. */
191	bit_shifts.sid[0]  = 5 + FIELD_GET(ADDR_SEL_BANK4, temp);
192	bit_shifts.row_lo  = 12 + FIELD_GET(ADDR_SEL_ROW_LO, temp);
193	bit_shifts.row_hi  = 24 + FIELD_GET(ADDR_SEL_ROW_HI, temp);
194
195	ret = amd_smn_read(0, MI300_COL_SEL_LO, &temp);
196	if (ret)
197		return ret;
198
199	bit_shifts.col[0] = 2 + FIELD_GET(COL_SEL_LO_COL0, temp);
200	bit_shifts.col[1] = 2 + FIELD_GET(COL_SEL_LO_COL1, temp);
201	bit_shifts.col[2] = 2 + FIELD_GET(COL_SEL_LO_COL2, temp);
202	bit_shifts.col[3] = 2 + FIELD_GET(COL_SEL_LO_COL3, temp);
203	bit_shifts.col[4] = 2 + FIELD_GET(COL_SEL_LO_COL4, temp);
204
205	ret = amd_smn_read(0, MI300_ADDR_SEL_2, &temp);
206	if (ret)
207		return ret;
208
209	/* Use BankBit5 for the SID1 position. */
210	bit_shifts.sid[1] = 5 + FIELD_GET(ADDR_SEL_2_BANK5, temp);
211	bit_shifts.pc	  = 5 + FIELD_GET(ADDR_SEL_2_CHAN, temp);
212
213	return 0;
214}
215
216/*
217 * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must
218 * be converted to the intermediate normalized address (NA) before translating to a
219 * system physical address.
220 *
221 * The DRAM address includes bank, row, and column. Also included are bits for
222 * pseudochannel (PC) and stack ID (SID).
223 *
224 * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero
225 *
226 * The MCA address format is as follows:
227 *	MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]}
228 *
229 * Additionally, the PC and Bank bits may be hashed. This must be accounted for before
230 * reconstructing the normalized address.
231 */
232#define MI300_UMC_MCA_COL	GENMASK(5, 1)
233#define MI300_UMC_MCA_BANK	GENMASK(9, 6)
234#define MI300_UMC_MCA_ROW	GENMASK(24, 10)
235#define MI300_UMC_MCA_PC	BIT(25)
236#define MI300_UMC_MCA_SID	GENMASK(27, 26)
237
238static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr)
239{
240	u16 i, col, row, bank, pc, sid;
241	u32 temp;
242
243	col  = FIELD_GET(MI300_UMC_MCA_COL,  addr);
244	bank = FIELD_GET(MI300_UMC_MCA_BANK, addr);
245	row  = FIELD_GET(MI300_UMC_MCA_ROW,  addr);
246	pc   = FIELD_GET(MI300_UMC_MCA_PC,   addr);
247	sid  = FIELD_GET(MI300_UMC_MCA_SID,  addr);
248
249	/* Calculate hash for each Bank bit. */
250	for (i = 0; i < NUM_BANK_BITS; i++) {
251		if (!addr_hash.bank[i].xor_enable)
252			continue;
253
254		temp  = bitwise_xor_bits(col & addr_hash.bank[i].col_xor);
255		temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor);
256		bank ^= temp << i;
257	}
258
259	/* Calculate hash for PC bit. */
260	if (addr_hash.pc.xor_enable) {
261		temp  = bitwise_xor_bits(col  & addr_hash.pc.col_xor);
262		temp ^= bitwise_xor_bits(row  & addr_hash.pc.row_xor);
263		/* Bits SID[1:0] act as Bank[5:4] for PC hash, so apply them here. */
264		temp ^= bitwise_xor_bits((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor);
265		pc   ^= temp;
266	}
267
268	/* Reconstruct the normalized address starting with NA[4:0] = 0 */
269	addr  = 0;
270
271	/* Column bits */
272	for (i = 0; i < NUM_COL_BITS; i++) {
273		temp  = (col >> i) & 0x1;
274		addr |= temp << bit_shifts.col[i];
275	}
276
277	/* Bank bits */
278	for (i = 0; i < NUM_BANK_BITS; i++) {
279		temp  = (bank >> i) & 0x1;
280		addr |= temp << bit_shifts.bank[i];
281	}
282
283	/* Row lo bits */
284	for (i = 0; i < bit_shifts.num_row_lo; i++) {
285		temp  = (row >> i) & 0x1;
286		addr |= temp << (i + bit_shifts.row_lo);
287	}
288
289	/* Row hi bits */
290	for (i = 0; i < bit_shifts.num_row_hi; i++) {
291		temp  = (row >> (i + bit_shifts.num_row_lo)) & 0x1;
292		addr |= temp << (i + bit_shifts.row_hi);
293	}
294
295	/* PC bit */
296	addr |= pc << bit_shifts.pc;
297
298	/* SID bits */
299	for (i = 0; i < NUM_SID_BITS; i++) {
300		temp  = (sid >> i) & 0x1;
301		addr |= temp << bit_shifts.sid[i];
302	}
303
304	pr_debug("Addr=0x%016lx", addr);
305	pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid);
306
307	return addr;
308}
309
310/*
311 * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire
312 * all memory within that DRAM row. This applies to the memory with a DRAM
313 * bank.
314 *
315 * To find the memory addresses, loop through permutations of the DRAM column
316 * bits and find the System Physical address of each. The column bits are used
317 * to calculate the intermediate Normalized address, so all permutations should
318 * be checked.
319 *
320 * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
321 */
322#define MI300_NUM_COL		BIT(HWEIGHT(MI300_UMC_MCA_COL))
323static void retire_row_mi300(struct atl_err *a_err)
324{
325	unsigned long addr;
326	struct page *p;
327	u8 col;
328
329	for (col = 0; col < MI300_NUM_COL; col++) {
330		a_err->addr &= ~MI300_UMC_MCA_COL;
331		a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col);
332
333		addr = amd_convert_umc_mca_addr_to_sys_addr(a_err);
334		if (IS_ERR_VALUE(addr))
335			continue;
336
337		addr = PHYS_PFN(addr);
338
339		/*
340		 * Skip invalid or already poisoned pages to avoid unnecessary
341		 * error messages from memory_failure().
342		 */
343		p = pfn_to_online_page(addr);
344		if (!p)
345			continue;
346
347		if (PageHWPoison(p))
348			continue;
349
350		memory_failure(addr, 0);
351	}
352}
353
354void amd_retire_dram_row(struct atl_err *a_err)
355{
356	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
357		return retire_row_mi300(a_err);
358}
359EXPORT_SYMBOL_GPL(amd_retire_dram_row);
360
361static unsigned long get_addr(unsigned long addr)
362{
363	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
364		return convert_dram_to_norm_addr_mi300(addr);
365
366	return addr;
367}
368
369#define MCA_IPID_INST_ID_HI	GENMASK_ULL(47, 44)
370static u8 get_die_id(struct atl_err *err)
371{
372	/*
373	 * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this
374	 * needs to be divided by 4 to get the internal Die ID.
375	 */
376	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) {
377		u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid);
378
379		return node_id >> 2;
380	}
381
382	/*
383	 * For CPUs, this is the AMD Node ID modulo the number
384	 * of AMD Nodes per socket.
385	 */
386	return topology_amd_node_id(err->cpu) % topology_amd_nodes_per_pkg();
387}
388
389#define UMC_CHANNEL_NUM	GENMASK(31, 20)
390static u8 get_coh_st_inst_id(struct atl_err *err)
391{
392	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
393		return get_coh_st_inst_id_mi300(err);
394
395	return FIELD_GET(UMC_CHANNEL_NUM, err->ipid);
396}
397
398unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
399{
400	u8 socket_id = topology_physical_package_id(err->cpu);
401	u8 coh_st_inst_id = get_coh_st_inst_id(err);
402	unsigned long addr = get_addr(err->addr);
403	u8 die_id = get_die_id(err);
404	unsigned long ret_addr;
405
406	pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx",
407		 socket_id, die_id, coh_st_inst_id, addr);
408
409	ret_addr = prm_umc_norm_to_sys_addr(socket_id, err->ipid, addr);
410	if (!IS_ERR_VALUE(ret_addr))
411		return ret_addr;
412
413	return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr);
414}