Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * AMD Address Translation Library
   4 *
   5 * denormalize.c : Functions to account for interleaving bits
   6 *
   7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
   8 * All Rights Reserved.
   9 *
  10 * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
  11 */
  12
  13#include "internal.h"
  14
  15/*
  16 * Returns the Destination Fabric ID. This is the first (lowest)
  17 * COH_ST Fabric ID used within a DRAM Address map.
  18 */
  19static u16 get_dst_fabric_id(struct addr_ctx *ctx)
  20{
  21	switch (df_cfg.rev) {
  22	case DF2:	return FIELD_GET(DF2_DST_FABRIC_ID,	ctx->map.limit);
  23	case DF3:	return FIELD_GET(DF3_DST_FABRIC_ID,	ctx->map.limit);
  24	case DF3p5:	return FIELD_GET(DF3p5_DST_FABRIC_ID,	ctx->map.limit);
  25	case DF4:	return FIELD_GET(DF4_DST_FABRIC_ID,	ctx->map.ctl);
  26	case DF4p5:	return FIELD_GET(DF4p5_DST_FABRIC_ID,	ctx->map.ctl);
  27	default:
  28			atl_debug_on_bad_df_rev();
  29			return 0;
  30	}
  31}
  32
  33/*
  34 * Make a contiguous gap in address for N bits starting at bit P.
  35 *
  36 * Example:
  37 * address bits:		[20:0]
  38 * # of interleave bits    (n):	3
  39 * starting interleave bit (p):	8
  40 *
  41 * expanded address bits:	[20+n : n+p][n+p-1 : p][p-1 : 0]
  42 *				[23   :  11][10    : 8][7   : 0]
  43 */
  44static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx)
  45{
  46	return expand_bits(ctx->map.intlv_bit_pos,
  47			   ctx->map.total_intlv_bits,
  48			   ctx->ret_addr);
  49}
  50
  51/*
  52 * Make two gaps in address for N bits.
  53 * First gap is a single bit at bit P.
  54 * Second gap is the remaining N-1 bits at bit 12.
  55 *
  56 * Example:
  57 * address bits:		[20:0]
  58 * # of interleave bits    (n):	3
  59 * starting interleave bit (p):	8
  60 *
  61 * First gap
  62 * expanded address bits:	[20+1 : p+1][p][p-1 : 0]
  63 *				[21   :   9][8][7   : 0]
  64 *
  65 * Second gap uses result from first.
  66 *				r = n - 1; remaining interleave bits
  67 * expanded address bits:	[21+r : 12+r][12+r-1: 12][11 : 0]
  68 *				[23   :   14][13    : 12][11 : 0]
  69 */
  70static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
  71{
  72	/* Make a single space at the interleave bit. */
  73	u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr);
  74
  75	/* Done if there's only a single interleave bit. */
  76	if (ctx->map.total_intlv_bits <= 1)
  77		return denorm_addr;
  78
  79	/* Make spaces for the remaining interleave bits starting at bit 12. */
  80	return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
  81}
  82
  83/*
  84 * Make space for CS ID at bits [14:8] as follows:
  85 *
  86 * 8 channels	-> bits [10:8]
  87 * 16 channels	-> bits [11:8]
  88 * 32 channels	-> bits [14,11:8]
  89 *
  90 * 1 die	-> N/A
  91 * 2 dies	-> bit  [12]
  92 * 4 dies	-> bits [13:12]
  93 */
  94static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
  95{
  96	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
  97	u64 denorm_addr;
  98
  99	if (ctx->map.intlv_bit_pos != 8) {
 100		pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
 101		return ~0ULL;
 102	}
 103
 104	/* Channel bits. Covers up to 4 bits at [11:8]. */
 105	denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
 106
 107	/* Die bits. Always starts at [12]. */
 108	denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
 109
 110	/* Additional channel bit at [14]. */
 111	if (num_intlv_bits > 4)
 112		denorm_addr = expand_bits(14, 1, denorm_addr);
 113
 114	return denorm_addr;
 115}
 116
 117/*
 118 * Take the current calculated address and shift enough bits in the middle
 119 * to make a gap where the interleave bits will be inserted.
 120 */
 121static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
 122{
 123	switch (ctx->map.intlv_mode) {
 124	case NOHASH_2CHAN:
 125	case NOHASH_4CHAN:
 126	case NOHASH_8CHAN:
 127	case NOHASH_16CHAN:
 128	case NOHASH_32CHAN:
 129	case DF2_2CHAN_HASH:
 130		return make_space_for_coh_st_id_at_intlv_bit(ctx);
 131
 132	case DF3_COD4_2CHAN_HASH:
 133	case DF3_COD2_4CHAN_HASH:
 134	case DF3_COD1_8CHAN_HASH:
 135	case DF4_NPS4_2CHAN_HASH:
 136	case DF4_NPS2_4CHAN_HASH:
 137	case DF4_NPS1_8CHAN_HASH:
 138	case DF4p5_NPS4_2CHAN_1K_HASH:
 139	case DF4p5_NPS4_2CHAN_2K_HASH:
 140	case DF4p5_NPS2_4CHAN_2K_HASH:
 141	case DF4p5_NPS1_8CHAN_2K_HASH:
 142	case DF4p5_NPS1_16CHAN_2K_HASH:
 143		return make_space_for_coh_st_id_split_2_1(ctx);
 144
 145	case MI3_HASH_8CHAN:
 146	case MI3_HASH_16CHAN:
 147	case MI3_HASH_32CHAN:
 148		return make_space_for_coh_st_id_mi300(ctx);
 149
 150	default:
 151		atl_debug_on_bad_intlv_mode(ctx);
 152		return ~0ULL;
 153	}
 154}
 155
 156static u16 get_coh_st_id_df2(struct addr_ctx *ctx)
 157{
 158	u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
 159	u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies);
 160	u8 num_intlv_bits;
 161	u16 coh_st_id, mask;
 162
 163	coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
 164
 165	/* Channel interleave bits */
 166	num_intlv_bits = order_base_2(ctx->map.num_intlv_chan);
 167	mask = GENMASK(num_intlv_bits - 1, 0);
 168	coh_st_id &= mask;
 169
 170	/* Die interleave bits */
 171	if (num_die_intlv_bits) {
 172		u16 die_bits;
 173
 174		mask = GENMASK(num_die_intlv_bits - 1, 0);
 175		die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask;
 176		die_bits >>= df_cfg.die_id_shift;
 177
 178		coh_st_id |= (die_bits & mask) << num_intlv_bits;
 179		num_intlv_bits += num_die_intlv_bits;
 180	}
 181
 182	/* Socket interleave bits */
 183	if (num_socket_intlv_bits) {
 184		u16 socket_bits;
 185
 186		mask = GENMASK(num_socket_intlv_bits - 1, 0);
 187		socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
 188		socket_bits >>= df_cfg.socket_id_shift;
 189
 190		coh_st_id |= (socket_bits & mask) << num_intlv_bits;
 191	}
 192
 193	return coh_st_id;
 194}
 195
 196static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
 197{
 198	/*
 199	 * Start with the original component mask and the number of interleave
 200	 * bits for the channels in this map.
 201	 */
 202	u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
 203	u16 mask = df_cfg.component_id_mask;
 204
 205	u16 socket_bits;
 206
 207	/* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */
 208	u16 coh_st_id = ctx->coh_st_fabric_id & mask;
 209
 210	/*
 211	 * Subtract the "base" Destination Fabric ID.
 212	 * This accounts for systems with disabled Coherent Stations.
 213	 */
 214	coh_st_id -= get_dst_fabric_id(ctx) & mask;
 215
 216	/*
 217	 * Generate and use a new mask based on the number of bits
 218	 * needed for channel interleaving in this map.
 219	 */
 220	mask = GENMASK(num_intlv_bits - 1, 0);
 221	coh_st_id &= mask;
 222
 223	/* Done if socket interleaving is not enabled. */
 224	if (ctx->map.num_intlv_sockets <= 1)
 225		return coh_st_id;
 226
 227	/*
 228	 * Figure out how many bits are needed for the number of
 229	 * interleaved sockets. And shift the derived Coherent Station ID to account
 230	 * for these.
 231	 */
 232	num_intlv_bits = ilog2(ctx->map.num_intlv_sockets);
 233	coh_st_id <<= num_intlv_bits;
 234
 235	/* Generate a new mask for the socket interleaving bits. */
 236	mask = GENMASK(num_intlv_bits - 1, 0);
 237
 238	/* Get the socket interleave bits from the original Coherent Station Fabric ID. */
 239	socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift;
 240
 241	/* Apply the appropriate socket bits to the derived Coherent Station ID. */
 242	coh_st_id |= socket_bits & mask;
 243
 244	return coh_st_id;
 245}
 246
 247/*
 248 * MI300 hash has:
 249 * (C)hannel[3:0]	= coh_st_id[3:0]
 250 * (S)tack[0]		= coh_st_id[4]
 251 * (D)ie[1:0]		= coh_st_id[6:5]
 252 *
 253 * Hashed coh_st_id is swizzled so that Stack bit is at the end.
 254 * coh_st_id = SDDCCCC
 255 */
 256static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
 257{
 258	u8 channel_bits, die_bits, stack_bit;
 259	u16 die_id;
 260
 261	/* Subtract the "base" Destination Fabric ID. */
 262	ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
 263
 264	die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
 265
 266	channel_bits	= FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
 267	stack_bit	= FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
 268	die_bits	= die_id << 4;
 269
 270	return stack_bit | die_bits | channel_bits;
 271}
 272
 273/*
 274 * Derive the correct Coherent Station ID that represents the interleave bits
 275 * used within the system physical address. This accounts for the
 276 * interleave mode, number of interleaved channels/dies/sockets, and
 277 * other system/mode-specific bit swizzling.
 278 *
 279 * Returns:	Coherent Station ID on success.
 280 *		All bits set on error.
 281 */
 282static u16 calculate_coh_st_id(struct addr_ctx *ctx)
 283{
 284	switch (ctx->map.intlv_mode) {
 285	case NOHASH_2CHAN:
 286	case NOHASH_4CHAN:
 287	case NOHASH_8CHAN:
 288	case NOHASH_16CHAN:
 289	case NOHASH_32CHAN:
 290	case DF3_COD4_2CHAN_HASH:
 291	case DF3_COD2_4CHAN_HASH:
 292	case DF3_COD1_8CHAN_HASH:
 293	case DF2_2CHAN_HASH:
 294		return get_coh_st_id_df2(ctx);
 295
 296	case DF4_NPS4_2CHAN_HASH:
 297	case DF4_NPS2_4CHAN_HASH:
 298	case DF4_NPS1_8CHAN_HASH:
 299	case DF4p5_NPS4_2CHAN_1K_HASH:
 300	case DF4p5_NPS4_2CHAN_2K_HASH:
 301	case DF4p5_NPS2_4CHAN_2K_HASH:
 302	case DF4p5_NPS1_8CHAN_2K_HASH:
 303	case DF4p5_NPS1_16CHAN_2K_HASH:
 304		return get_coh_st_id_df4(ctx);
 305
 306	case MI3_HASH_8CHAN:
 307	case MI3_HASH_16CHAN:
 308	case MI3_HASH_32CHAN:
 309		return get_coh_st_id_mi300(ctx);
 310
 311	/* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
 312	case DF4p5_NPS2_4CHAN_1K_HASH:
 313	case DF4p5_NPS1_8CHAN_1K_HASH:
 314	case DF4p5_NPS1_16CHAN_1K_HASH:
 315		return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx);
 316
 317	default:
 318		atl_debug_on_bad_intlv_mode(ctx);
 319		return ~0;
 320	}
 321}
 322
 323static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
 324{
 325	return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos);
 326}
 327
 328static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
 329{
 330	/* Insert coh_st_id[0] at the interleave bit. */
 331	denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos;
 332
 333	/* Insert coh_st_id[2:1] at bit 12. */
 334	denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11;
 335
 336	return denorm_addr;
 337}
 338
 339static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
 340{
 341	/* Insert coh_st_id[1:0] at bit 8. */
 342	denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8;
 343
 344	/*
 345	 * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3.
 346	 * Grab both because bit 3 will be clear if unused.
 347	 */
 348	denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10;
 349
 350	return denorm_addr;
 351}
 352
 353static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id)
 354{
 355	switch (ctx->map.intlv_mode) {
 356	case NOHASH_2CHAN:
 357	case NOHASH_4CHAN:
 358	case NOHASH_8CHAN:
 359	case NOHASH_16CHAN:
 360	case NOHASH_32CHAN:
 361	case MI3_HASH_8CHAN:
 362	case MI3_HASH_16CHAN:
 363	case MI3_HASH_32CHAN:
 364	case DF2_2CHAN_HASH:
 365		return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
 366
 367	case DF3_COD4_2CHAN_HASH:
 368	case DF3_COD2_4CHAN_HASH:
 369	case DF3_COD1_8CHAN_HASH:
 370	case DF4_NPS4_2CHAN_HASH:
 371	case DF4_NPS2_4CHAN_HASH:
 372	case DF4_NPS1_8CHAN_HASH:
 373	case DF4p5_NPS4_2CHAN_1K_HASH:
 374	case DF4p5_NPS4_2CHAN_2K_HASH:
 375	case DF4p5_NPS2_4CHAN_2K_HASH:
 376	case DF4p5_NPS1_8CHAN_2K_HASH:
 377	case DF4p5_NPS1_16CHAN_2K_HASH:
 378		return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id);
 379
 380	case DF4p5_NPS2_4CHAN_1K_HASH:
 381	case DF4p5_NPS1_8CHAN_1K_HASH:
 382	case DF4p5_NPS1_16CHAN_1K_HASH:
 383		return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id);
 384
 385	default:
 386		atl_debug_on_bad_intlv_mode(ctx);
 387		return ~0ULL;
 388	}
 389}
 390
 391/*
 392 * MI300 systems have a fixed, hardware-defined physical-to-logical
 393 * Coherent Station mapping. The Remap registers are not used.
 394 */
 395static const u16 phy_to_log_coh_st_map_mi300[] = {
 396	12, 13, 14, 15,
 397	 8,  9, 10, 11,
 398	 4,  5,  6,  7,
 399	 0,  1,  2,  3,
 400	28, 29, 30, 31,
 401	24, 25, 26, 27,
 402	20, 21, 22, 23,
 403	16, 17, 18, 19,
 404};
 405
 406static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
 407{
 408	if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) {
 409		atl_debug(ctx, "Instance ID out of range");
 410		return ~0;
 411	}
 412
 413	return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
 414}
 415
 416static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
 417{
 418	u16 component_id, log_fabric_id;
 419
 420	/* Start with the physical COH_ST Fabric ID. */
 421	u16 phys_fabric_id = ctx->coh_st_fabric_id;
 422
 423	if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
 424		return get_logical_coh_st_fabric_id_mi300(ctx);
 425
 426	/* Skip logical ID lookup if remapping is disabled. */
 427	if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
 428	    ctx->map.intlv_mode != DF3_6CHAN)
 429		return phys_fabric_id;
 430
 431	/* Mask off the Node ID bits to get the "local" Component ID. */
 432	component_id = phys_fabric_id & df_cfg.component_id_mask;
 433
 434	/*
 435	 * Search the list of logical Component IDs for the one that
 436	 * matches this physical Component ID.
 437	 */
 438	for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) {
 439		if (ctx->map.remap_array[log_fabric_id] == component_id)
 440			break;
 441	}
 442
 443	if (log_fabric_id == MAX_COH_ST_CHANNELS)
 444		atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
 445			  log_fabric_id);
 446
 447	/* Get the Node ID bits from the physical and apply to the logical. */
 448	return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
 449}
 450
 451static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
 452							struct df4p5_denorm_ctx *denorm_ctx)
 453{
 454	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
 455	bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
 456	u64 cs_id = 0;
 457
 458	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
 459	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
 460	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
 461	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
 462
 463	hash_pa8  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
 464	hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
 465	hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
 466	hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
 467	hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
 468	hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
 469
 470	hash_pa9  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
 471	hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
 472	hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
 473	hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
 474	hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
 475
 476	hash_pa12  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
 477	hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
 478	hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
 479	hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
 480	hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
 481
 482	hash_pa13  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
 483	hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
 484	hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
 485	hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
 486	hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
 487
 488	switch (ctx->map.intlv_mode) {
 489	case DF4p5_NPS0_24CHAN_1K_HASH:
 490		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
 491		cs_id %= denorm_ctx->mod_value;
 492		cs_id <<= 2;
 493		cs_id |= (hash_pa9 | (hash_pa12 << 1));
 494		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
 495		break;
 496
 497	case DF4p5_NPS0_24CHAN_2K_HASH:
 498		cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
 499		cs_id %= denorm_ctx->mod_value;
 500		cs_id <<= 2;
 501		cs_id |= (hash_pa12 | (hash_pa13 << 1));
 502		cs_id |= hash_pa8 << df_cfg.socket_id_shift;
 503		break;
 504
 505	case DF4p5_NPS1_12CHAN_1K_HASH:
 506		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
 507		cs_id %= denorm_ctx->mod_value;
 508		cs_id <<= 2;
 509		cs_id |= (hash_pa8 | (hash_pa9 << 1));
 510		break;
 511
 512	case DF4p5_NPS1_12CHAN_2K_HASH:
 513		cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
 514		cs_id %= denorm_ctx->mod_value;
 515		cs_id <<= 2;
 516		cs_id |= (hash_pa8 | (hash_pa12 << 1));
 517		break;
 518
 519	case DF4p5_NPS2_6CHAN_1K_HASH:
 520	case DF4p5_NPS1_10CHAN_1K_HASH:
 521		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
 522		cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
 523		cs_id %= denorm_ctx->mod_value;
 524		cs_id <<= 1;
 525		cs_id |= hash_pa8;
 526		break;
 527
 528	case DF4p5_NPS2_6CHAN_2K_HASH:
 529	case DF4p5_NPS1_10CHAN_2K_HASH:
 530		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
 531		cs_id %= denorm_ctx->mod_value;
 532		cs_id <<= 1;
 533		cs_id |= hash_pa8;
 534		break;
 535
 536	case DF4p5_NPS4_3CHAN_1K_HASH:
 537	case DF4p5_NPS2_5CHAN_1K_HASH:
 538		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
 539		cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
 540		cs_id %= denorm_ctx->mod_value;
 541		break;
 542
 543	case DF4p5_NPS4_3CHAN_2K_HASH:
 544	case DF4p5_NPS2_5CHAN_2K_HASH:
 545		cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
 546		cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
 547		cs_id %= denorm_ctx->mod_value;
 548		break;
 549
 550	default:
 551		atl_debug_on_bad_intlv_mode(ctx);
 552		return 0;
 553	}
 554
 555	if (cs_id > 0xffff) {
 556		atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n");
 557		return 0;
 558	}
 559
 560	return cs_id;
 561}
 562
 563static int denorm_addr_common(struct addr_ctx *ctx)
 564{
 565	u64 denorm_addr;
 566	u16 coh_st_id;
 567
 568	/*
 569	 * Convert the original physical COH_ST Fabric ID to a logical value.
 570	 * This is required for non-power-of-two and other interleaving modes.
 571	 */
 572	ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx);
 573
 574	denorm_addr = make_space_for_coh_st_id(ctx);
 575	coh_st_id = calculate_coh_st_id(ctx);
 576	ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id);
 577	return 0;
 578}
 579
 580static int denorm_addr_df3_6chan(struct addr_ctx *ctx)
 581{
 582	u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask;
 583	u8 total_intlv_bits = ctx->map.total_intlv_bits;
 584	u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos;
 585	u64 msb_intlv_bits, temp_addr_a, temp_addr_b;
 586	u8 np2_bits = ctx->map.np2_bits;
 587
 588	if (ctx->map.intlv_mode != DF3_6CHAN)
 589		return -EINVAL;
 590
 591	/*
 592	 * 'np2_bits' holds the number of bits needed to cover the
 593	 * amount of memory (rounded up) in this map using 64K chunks.
 594	 *
 595	 * Example:
 596	 * Total memory in map:			6GB
 597	 * Rounded up to next power-of-2:	8GB
 598	 * Number of 64K chunks:		0x20000
 599	 * np2_bits = log2(# of chunks):	17
 600	 *
 601	 * Get the two most-significant interleave bits from the
 602	 * input address based on the following:
 603	 *
 604	 * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits]
 605	 */
 606	low_bit = 14 + np2_bits - total_intlv_bits;
 607	msb_intlv_bits = ctx->ret_addr >> low_bit;
 608	msb_intlv_bits &= 0x3;
 609
 610	/*
 611	 * If MSB are 11b, then logical COH_ST ID is 6 or 7.
 612	 * Need to adjust based on the mod3 result.
 613	 */
 614	if (msb_intlv_bits == 3) {
 615		u8 addr_mod, phys_addr_msb, msb_coh_st_id;
 616
 617		/* Get the remaining interleave bits from the input address. */
 618		temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr;
 619		temp_addr_b >>= intlv_bit;
 620
 621		/* Calculate the logical COH_ST offset based on mod3. */
 622		addr_mod = temp_addr_b % 3;
 623
 624		/* Get COH_ST ID bits [2:1]. */
 625		msb_coh_st_id = (coh_st_id >> 1) & 0x3;
 626
 627		/* Get the bit that starts the physical address bits. */
 628		phys_addr_msb = (intlv_bit + np2_bits + 1);
 629		phys_addr_msb &= BIT(0);
 630		phys_addr_msb++;
 631		phys_addr_msb *= 3 - addr_mod + msb_coh_st_id;
 632		phys_addr_msb %= 3;
 633
 634		/* Move the physical address MSB to the correct place. */
 635		temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit);
 636
 637		/* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */
 638		coh_st_id &= BIT(0);
 639		coh_st_id |= GENMASK(2, 1);
 640	} else {
 641		temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr;
 642		temp_addr_b >>= intlv_bit;
 643	}
 644
 645	temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr;
 646	temp_addr_b <<= intlv_bit + total_intlv_bits;
 647
 648	ctx->ret_addr = temp_addr_a | temp_addr_b;
 649	ctx->ret_addr |= coh_st_id << intlv_bit;
 650	return 0;
 651}
 652
 653static int denorm_addr_df4_np2(struct addr_ctx *ctx)
 654{
 655	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
 656	u16 group, group_offset, log_coh_st_offset;
 657	unsigned int mod_value, shift_value;
 658	u16 mask = df_cfg.component_id_mask;
 659	u64 temp_addr_a, temp_addr_b;
 660	bool hash_pa8, hashed_bit;
 661
 662	switch (ctx->map.intlv_mode) {
 663	case DF4_NPS4_3CHAN_HASH:
 664		mod_value	= 3;
 665		shift_value	= 13;
 666		break;
 667	case DF4_NPS2_6CHAN_HASH:
 668		mod_value	= 3;
 669		shift_value	= 12;
 670		break;
 671	case DF4_NPS1_12CHAN_HASH:
 672		mod_value	= 3;
 673		shift_value	= 11;
 674		break;
 675	case DF4_NPS2_5CHAN_HASH:
 676		mod_value	= 5;
 677		shift_value	= 13;
 678		break;
 679	case DF4_NPS1_10CHAN_HASH:
 680		mod_value	= 5;
 681		shift_value	= 12;
 682		break;
 683	default:
 684		atl_debug_on_bad_intlv_mode(ctx);
 685		return -EINVAL;
 686	};
 687
 688	if (ctx->map.num_intlv_sockets == 1) {
 689		hash_pa8	= BIT_ULL(shift_value) & ctx->ret_addr;
 690		temp_addr_a	= remove_bits(shift_value, shift_value, ctx->ret_addr);
 691	} else {
 692		hash_pa8	= ctx->coh_st_fabric_id & df_cfg.socket_id_mask;
 693		temp_addr_a	= ctx->ret_addr;
 694	}
 695
 696	/* Make a gap for the real bit [8]. */
 697	temp_addr_a = expand_bits(8, 1, temp_addr_a);
 698
 699	/* Make an additional gap for bits [13:12], as appropriate.*/
 700	if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH ||
 701	    ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) {
 702		temp_addr_a = expand_bits(13, 1, temp_addr_a);
 703	} else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) {
 704		temp_addr_a = expand_bits(12, 2, temp_addr_a);
 705	}
 706
 707	/* Keep bits [13:0]. */
 708	temp_addr_a &= GENMASK_ULL(13, 0);
 709
 710	/* Get the appropriate high bits. */
 711	shift_value += 1 - ilog2(ctx->map.num_intlv_sockets);
 712	temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr;
 713	temp_addr_b >>= shift_value;
 714	temp_addr_b *= mod_value;
 715
 716	/*
 717	 * Coherent Stations are divided into groups.
 718	 *
 719	 * Multiples of 3 (mod3) are divided into quadrants.
 720	 * e.g. NP4_3CHAN ->	[0, 1, 2] [6, 7, 8]
 721	 *			[3, 4, 5] [9, 10, 11]
 722	 *
 723	 * Multiples of 5 (mod5) are divided into sides.
 724	 * e.g. NP2_5CHAN ->	[0, 1, 2, 3, 4] [5, 6, 7, 8, 9]
 725	 */
 726
 727	 /*
 728	  * Calculate the logical offset for the COH_ST within its DRAM Address map.
 729	  * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then
 730	  *	 log_coh_st_offset = 8 - 5 = 3
 731	  */
 732	log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask);
 733
 734	/*
 735	 * Figure out the group number.
 736	 *
 737	 * Following above example,
 738	 * log_coh_st_offset = 3
 739	 * mod_value = 5
 740	 * group = 3 / 5 = 0
 741	 */
 742	group = log_coh_st_offset / mod_value;
 743
 744	/*
 745	 * Figure out the offset within the group.
 746	 *
 747	 * Following above example,
 748	 * log_coh_st_offset = 3
 749	 * mod_value = 5
 750	 * group_offset = 3 % 5 = 3
 751	 */
 752	group_offset = log_coh_st_offset % mod_value;
 753
 754	/* Adjust group_offset if the hashed bit [8] is set. */
 755	if (hash_pa8) {
 756		if (!group_offset)
 757			group_offset = mod_value - 1;
 758		else
 759			group_offset--;
 760	}
 761
 762	/* Add in the group offset to the high bits. */
 763	temp_addr_b += group_offset;
 764
 765	/* Shift the high bits to the proper starting position. */
 766	temp_addr_b <<= 14;
 767
 768	/* Combine the high and low bits together. */
 769	ctx->ret_addr = temp_addr_a | temp_addr_b;
 770
 771	/* Account for hashing here instead of in dehash_address(). */
 772	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
 773	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
 774	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
 775
 776	hashed_bit = !!hash_pa8;
 777	hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr);
 778	hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k;
 779	hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M;
 780	hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G;
 781
 782	ctx->ret_addr |= hashed_bit << 8;
 783
 784	/* Done for 3 and 5 channel. */
 785	if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH ||
 786	    ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH)
 787		return 0;
 788
 789	/* Select the proper 'group' bit to use for Bit 13. */
 790	if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH)
 791		hashed_bit = !!(group & BIT(1));
 792	else
 793		hashed_bit = group & BIT(0);
 794
 795	hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k;
 796	hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M;
 797	hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G;
 798
 799	ctx->ret_addr |= hashed_bit << 13;
 800
 801	/* Done for 6 and 10 channel. */
 802	if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH)
 803		return 0;
 804
 805	hashed_bit = group & BIT(0);
 806	hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k;
 807	hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M;
 808	hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G;
 809
 810	ctx->ret_addr |= hashed_bit << 12;
 811	return 0;
 812}
 813
 814static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
 815				    u64 addr)
 816{
 817	u64 temp_addr_a = 0, temp_addr_b = 0;
 818
 819	switch (ctx->map.intlv_mode) {
 820	case DF4p5_NPS0_24CHAN_1K_HASH:
 821	case DF4p5_NPS1_12CHAN_1K_HASH:
 822	case DF4p5_NPS2_6CHAN_1K_HASH:
 823	case DF4p5_NPS4_3CHAN_1K_HASH:
 824	case DF4p5_NPS1_10CHAN_1K_HASH:
 825	case DF4p5_NPS2_5CHAN_1K_HASH:
 826		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
 827		break;
 828
 829	case DF4p5_NPS0_24CHAN_2K_HASH:
 830	case DF4p5_NPS1_12CHAN_2K_HASH:
 831	case DF4p5_NPS2_6CHAN_2K_HASH:
 832	case DF4p5_NPS4_3CHAN_2K_HASH:
 833	case DF4p5_NPS1_10CHAN_2K_HASH:
 834	case DF4p5_NPS2_5CHAN_2K_HASH:
 835		temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
 836		break;
 837
 838	default:
 839		atl_debug_on_bad_intlv_mode(ctx);
 840		return 0;
 841	}
 842
 843	switch (ctx->map.intlv_mode) {
 844	case DF4p5_NPS0_24CHAN_1K_HASH:
 845		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
 846		temp_addr_b <<= 10;
 847		break;
 848
 849	case DF4p5_NPS0_24CHAN_2K_HASH:
 850		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
 851		temp_addr_b <<= 11;
 852		break;
 853
 854	case DF4p5_NPS1_12CHAN_1K_HASH:
 855		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
 856		temp_addr_b <<= 10;
 857		break;
 858
 859	case DF4p5_NPS1_12CHAN_2K_HASH:
 860		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
 861		temp_addr_b <<= 11;
 862		break;
 863
 864	case DF4p5_NPS2_6CHAN_1K_HASH:
 865	case DF4p5_NPS1_10CHAN_1K_HASH:
 866		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
 867		temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
 868		temp_addr_b /= denorm_ctx->mod_value;
 869		temp_addr_b <<= 10;
 870		break;
 871
 872	case DF4p5_NPS2_6CHAN_2K_HASH:
 873	case DF4p5_NPS1_10CHAN_2K_HASH:
 874		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
 875		temp_addr_b <<= 11;
 876		break;
 877
 878	case DF4p5_NPS4_3CHAN_1K_HASH:
 879	case DF4p5_NPS2_5CHAN_1K_HASH:
 880		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
 881		temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
 882		temp_addr_b /= denorm_ctx->mod_value;
 883		temp_addr_b <<= 10;
 884		break;
 885
 886	case DF4p5_NPS4_3CHAN_2K_HASH:
 887	case DF4p5_NPS2_5CHAN_2K_HASH:
 888		temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
 889		temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
 890		temp_addr_b /= denorm_ctx->mod_value;
 891		temp_addr_b <<= 11;
 892		break;
 893
 894	default:
 895		atl_debug_on_bad_intlv_mode(ctx);
 896		return 0;
 897	}
 898
 899	return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
 900}
 901
 902static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
 903					      struct df4p5_denorm_ctx *denorm_ctx)
 904{
 905	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
 906
 907	if (!denorm_ctx->rehash_vector)
 908		return;
 909
 910	hash_ctl_64k	= FIELD_GET(DF4_HASH_CTL_64K,  ctx->map.ctl);
 911	hash_ctl_2M	= FIELD_GET(DF4_HASH_CTL_2M,   ctx->map.ctl);
 912	hash_ctl_1G	= FIELD_GET(DF4_HASH_CTL_1G,   ctx->map.ctl);
 913	hash_ctl_1T	= FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
 914
 915	if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
 916		hashed_bit  = FIELD_GET(BIT_ULL(8),  denorm_ctx->current_spa);
 917		hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
 918		hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
 919		hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
 920		hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
 921		hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
 922
 923		if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
 924			denorm_ctx->current_spa ^= BIT_ULL(8);
 925	}
 926
 927	if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
 928		hashed_bit  = FIELD_GET(BIT_ULL(9),  denorm_ctx->current_spa);
 929		hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
 930		hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
 931		hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
 932		hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
 933
 934		if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
 935			denorm_ctx->current_spa ^= BIT_ULL(9);
 936	}
 937
 938	if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
 939		hashed_bit  = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
 940		hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
 941		hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
 942		hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
 943		hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
 944
 945		if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
 946			denorm_ctx->current_spa ^= BIT_ULL(12);
 947	}
 948
 949	if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
 950		hashed_bit  = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
 951		hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
 952		hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
 953		hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
 954		hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
 955
 956		if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
 957			denorm_ctx->current_spa ^= BIT_ULL(13);
 958	}
 959}
 960
 961static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx,
 962					   struct df4p5_denorm_ctx *denorm_ctx)
 963{
 964	/*
 965	 * The logical CS fabric ID of the permutation must be calculated from the
 966	 * current SPA with the base and with the MMIO hole.
 967	 */
 968	u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
 969
 970	atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
 971	atl_debug(ctx, "  calculated fabric id         = 0x%x\n", id);
 972	atl_debug(ctx, "  expected fabric id           = 0x%x\n", denorm_ctx->coh_st_fabric_id);
 973
 974	return denorm_ctx->coh_st_fabric_id == id;
 975}
 976
 977static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
 978{
 979	u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa);
 980
 981	/*
 982	 * The normalized address must be calculated with the current SPA without
 983	 * the base and without the MMIO hole.
 984	 */
 985	addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr);
 986
 987	atl_debug(ctx, "Checking calculated normalized address:\n");
 988	atl_debug(ctx, "  calculated normalized addr = 0x%016llx\n", addr);
 989	atl_debug(ctx, "  expected normalized addr   = 0x%016llx\n", ctx->ret_addr);
 990
 991	return addr == ctx->ret_addr;
 992}
 993
 994static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
 995{
 996	u64 test_perm, temp_addr, denorm_addr, num_perms;
 997	unsigned int dropped_remainder;
 998
 999	denorm_ctx->div_addr *= denorm_ctx->mod_value;
1000
1001	/*
1002	 * The high order bits of num_permutations represent the permutations
1003	 * of the dropped remainder. This will be either 0-3 or 0-5 depending
1004	 * on the interleave mode. The low order bits represent the
1005	 * permutations of other "lost" bits which will be any combination of
1006	 * 1, 2, or 3 bits depending on the interleave mode.
1007	 */
1008	num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
1009
1010	for (test_perm = 0; test_perm < num_perms; test_perm++) {
1011		denorm_addr = denorm_ctx->base_denorm_addr;
1012		dropped_remainder = test_perm >> denorm_ctx->perm_shift;
1013		temp_addr = denorm_ctx->div_addr + dropped_remainder;
1014
1015		switch (ctx->map.intlv_mode) {
1016		case DF4p5_NPS0_24CHAN_2K_HASH:
1017			denorm_addr |= temp_addr << 14;
1018			break;
1019
1020		case DF4p5_NPS0_24CHAN_1K_HASH:
1021		case DF4p5_NPS1_12CHAN_2K_HASH:
1022			denorm_addr |= temp_addr << 13;
1023			break;
1024
1025		case DF4p5_NPS1_12CHAN_1K_HASH:
1026		case DF4p5_NPS2_6CHAN_2K_HASH:
1027		case DF4p5_NPS1_10CHAN_2K_HASH:
1028			denorm_addr |= temp_addr << 12;
1029			break;
1030
1031		case DF4p5_NPS2_6CHAN_1K_HASH:
1032		case DF4p5_NPS1_10CHAN_1K_HASH:
1033			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
1034			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1035			break;
1036
1037		case DF4p5_NPS4_3CHAN_1K_HASH:
1038		case DF4p5_NPS2_5CHAN_1K_HASH:
1039			denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
1040			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
1041			break;
1042
1043		case DF4p5_NPS4_3CHAN_2K_HASH:
1044		case DF4p5_NPS2_5CHAN_2K_HASH:
1045			denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
1046			denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
1047			break;
1048
1049		default:
1050			atl_debug_on_bad_intlv_mode(ctx);
1051			return -EINVAL;
1052		}
1053
1054		switch (ctx->map.intlv_mode) {
1055		case DF4p5_NPS0_24CHAN_1K_HASH:
1056			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1057			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1058			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
1059			break;
1060
1061		case DF4p5_NPS0_24CHAN_2K_HASH:
1062			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1063			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1064			denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
1065			break;
1066
1067		case DF4p5_NPS1_12CHAN_2K_HASH:
1068			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1069			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
1070			break;
1071
1072		case DF4p5_NPS1_12CHAN_1K_HASH:
1073		case DF4p5_NPS4_3CHAN_1K_HASH:
1074		case DF4p5_NPS2_5CHAN_1K_HASH:
1075			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1076			denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
1077			break;
1078
1079		case DF4p5_NPS2_6CHAN_1K_HASH:
1080		case DF4p5_NPS2_6CHAN_2K_HASH:
1081		case DF4p5_NPS4_3CHAN_2K_HASH:
1082		case DF4p5_NPS1_10CHAN_1K_HASH:
1083		case DF4p5_NPS1_10CHAN_2K_HASH:
1084		case DF4p5_NPS2_5CHAN_2K_HASH:
1085			denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
1086			break;
1087
1088		default:
1089			atl_debug_on_bad_intlv_mode(ctx);
1090			return -EINVAL;
1091		}
1092
1093		denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
1094		recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
1095
1096		atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
1097			  denorm_ctx->current_spa);
1098
1099		if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx))
1100			continue;
1101
1102		if (!match_norm_addr(ctx, denorm_ctx))
1103			continue;
1104
1105		if (denorm_ctx->resolved_spa == INVALID_SPA ||
1106		    denorm_ctx->current_spa > denorm_ctx->resolved_spa)
1107			denorm_ctx->resolved_spa = denorm_ctx->current_spa;
1108	}
1109
1110	if (denorm_ctx->resolved_spa == INVALID_SPA) {
1111		atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
1112			  ctx->ret_addr);
1113		return -EINVAL;
1114	}
1115
1116	/* Return the resolved SPA without the base, without the MMIO hole */
1117	ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
1118
1119	return 0;
1120}
1121
1122static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
1123{
1124	denorm_ctx->current_spa = INVALID_SPA;
1125	denorm_ctx->resolved_spa = INVALID_SPA;
1126
1127	switch (ctx->map.intlv_mode) {
1128	case DF4p5_NPS0_24CHAN_1K_HASH:
1129		denorm_ctx->perm_shift    = 3;
1130		denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
1131		break;
1132
1133	case DF4p5_NPS0_24CHAN_2K_HASH:
1134		denorm_ctx->perm_shift    = 3;
1135		denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
1136		break;
1137
1138	case DF4p5_NPS1_12CHAN_1K_HASH:
1139		denorm_ctx->perm_shift    = 2;
1140		denorm_ctx->rehash_vector = BIT(8);
1141		break;
1142
1143	case DF4p5_NPS1_12CHAN_2K_HASH:
1144		denorm_ctx->perm_shift    = 2;
1145		denorm_ctx->rehash_vector = BIT(8) | BIT(12);
1146		break;
1147
1148	case DF4p5_NPS2_6CHAN_1K_HASH:
1149	case DF4p5_NPS2_6CHAN_2K_HASH:
1150	case DF4p5_NPS1_10CHAN_1K_HASH:
1151	case DF4p5_NPS1_10CHAN_2K_HASH:
1152		denorm_ctx->perm_shift    = 1;
1153		denorm_ctx->rehash_vector = BIT(8);
1154		break;
1155
1156	case DF4p5_NPS4_3CHAN_1K_HASH:
1157	case DF4p5_NPS2_5CHAN_1K_HASH:
1158		denorm_ctx->perm_shift    = 2;
1159		denorm_ctx->rehash_vector = 0;
1160		break;
1161
1162	case DF4p5_NPS4_3CHAN_2K_HASH:
1163	case DF4p5_NPS2_5CHAN_2K_HASH:
1164		denorm_ctx->perm_shift    = 1;
1165		denorm_ctx->rehash_vector = 0;
1166		break;
1167
1168	default:
1169		atl_debug_on_bad_intlv_mode(ctx);
1170		return -EINVAL;
1171	}
1172
1173	denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
1174
1175	switch (ctx->map.intlv_mode) {
1176	case DF4p5_NPS0_24CHAN_1K_HASH:
1177	case DF4p5_NPS1_12CHAN_1K_HASH:
1178	case DF4p5_NPS2_6CHAN_1K_HASH:
1179	case DF4p5_NPS4_3CHAN_1K_HASH:
1180	case DF4p5_NPS1_10CHAN_1K_HASH:
1181	case DF4p5_NPS2_5CHAN_1K_HASH:
1182		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
1183		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
1184		break;
1185
1186	case DF4p5_NPS0_24CHAN_2K_HASH:
1187	case DF4p5_NPS1_12CHAN_2K_HASH:
1188	case DF4p5_NPS2_6CHAN_2K_HASH:
1189	case DF4p5_NPS4_3CHAN_2K_HASH:
1190	case DF4p5_NPS1_10CHAN_2K_HASH:
1191	case DF4p5_NPS2_5CHAN_2K_HASH:
1192		denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
1193		denorm_ctx->div_addr          = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
1194		break;
1195
1196	default:
1197		atl_debug_on_bad_intlv_mode(ctx);
1198		return -EINVAL;
1199	}
1200
1201	if (ctx->map.num_intlv_chan % 3 == 0)
1202		denorm_ctx->mod_value = 3;
1203	else
1204		denorm_ctx->mod_value = 5;
1205
1206	denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
1207
1208	atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
1209	atl_debug(ctx, "  mod_value         = %d", denorm_ctx->mod_value);
1210	atl_debug(ctx, "  perm_shift        = %d", denorm_ctx->perm_shift);
1211	atl_debug(ctx, "  rehash_vector     = 0x%x", denorm_ctx->rehash_vector);
1212	atl_debug(ctx, "  base_denorm_addr  = 0x%016llx", denorm_ctx->base_denorm_addr);
1213	atl_debug(ctx, "  div_addr          = 0x%016llx", denorm_ctx->div_addr);
1214	atl_debug(ctx, "  coh_st_fabric_id  = 0x%x", denorm_ctx->coh_st_fabric_id);
1215
1216	return 0;
1217}
1218
1219/*
1220 * For DF 4.5, parts of the physical address can be directly pulled from the
1221 * normalized address. The exact bits will differ between interleave modes, but
1222 * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
1223 * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
1224 * physical address.
1225 *
1226 * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
1227 * and 12) from the normalized address. Additionally, when bits [63:13] are
1228 * divided by 3, the remainder is dropped. Determine the proper combination of
1229 * "lost" bits and dropped remainder by iterating through each possible
1230 * permutation of these bits and then normalizing the generated system physical
1231 * addresses. If the normalized address matches the address we are trying to
1232 * translate, then we have found the correct permutation of bits.
1233 */
1234static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
1235{
1236	struct df4p5_denorm_ctx denorm_ctx;
1237	int ret = 0;
1238
1239	memset(&denorm_ctx, 0, sizeof(denorm_ctx));
1240
1241	atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
1242
1243	ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
1244	if (ret)
1245		return ret;
1246
1247	return check_permutations(ctx, &denorm_ctx);
1248}
1249
1250int denormalize_address(struct addr_ctx *ctx)
1251{
1252	switch (ctx->map.intlv_mode) {
1253	case NONE:
1254		return 0;
1255	case DF4_NPS4_3CHAN_HASH:
1256	case DF4_NPS2_6CHAN_HASH:
1257	case DF4_NPS1_12CHAN_HASH:
1258	case DF4_NPS2_5CHAN_HASH:
1259	case DF4_NPS1_10CHAN_HASH:
1260		return denorm_addr_df4_np2(ctx);
1261	case DF4p5_NPS0_24CHAN_1K_HASH:
1262	case DF4p5_NPS4_3CHAN_1K_HASH:
1263	case DF4p5_NPS2_6CHAN_1K_HASH:
1264	case DF4p5_NPS1_12CHAN_1K_HASH:
1265	case DF4p5_NPS2_5CHAN_1K_HASH:
1266	case DF4p5_NPS1_10CHAN_1K_HASH:
1267	case DF4p5_NPS4_3CHAN_2K_HASH:
1268	case DF4p5_NPS2_6CHAN_2K_HASH:
1269	case DF4p5_NPS1_12CHAN_2K_HASH:
1270	case DF4p5_NPS0_24CHAN_2K_HASH:
1271	case DF4p5_NPS2_5CHAN_2K_HASH:
1272	case DF4p5_NPS1_10CHAN_2K_HASH:
1273		return denorm_addr_df4p5_np2(ctx);
1274	case DF3_6CHAN:
1275		return denorm_addr_df3_6chan(ctx);
1276	default:
1277		return denorm_addr_common(ctx);
1278	}
1279}