Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "soc15.h"
  33#include "soc15d.h"
  34#include "amdgpu_atomfirmware.h"
  35#include "amdgpu_pm.h"
  36
  37#include "gc/gc_9_0_offset.h"
  38#include "gc/gc_9_0_sh_mask.h"
  39
  40#include "vega10_enum.h"
  41
  42#include "soc15_common.h"
  43#include "clearstate_gfx9.h"
  44#include "v9_structs.h"
  45
  46#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
  47
  48#include "amdgpu_ras.h"
  49
  50#include "gfx_v9_4.h"
  51#include "gfx_v9_0.h"
  52#include "gfx_v9_4_2.h"
  53
  54#include "asic_reg/pwr/pwr_10_0_offset.h"
  55#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
  56#include "asic_reg/gc/gc_9_0_default.h"
  57
  58#define GFX9_NUM_GFX_RINGS     1
  59#define GFX9_MEC_HPD_SIZE 4096
  60#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  61#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  62
  63#define mmGCEA_PROBE_MAP                        0x070c
  64#define mmGCEA_PROBE_MAP_BASE_IDX               0
  65
  66MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  67MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  68MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  69MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  70MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  71MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  72
  73MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  74MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  75MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  76MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  77MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  78MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  79
  80MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
  81MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
  82MODULE_FIRMWARE("amdgpu/vega20_me.bin");
  83MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
  84MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
  85MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
  86
  87MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  88MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  89MODULE_FIRMWARE("amdgpu/raven_me.bin");
  90MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  91MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  92MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  93
  94MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
  95MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/picasso_me.bin");
  97MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
  98MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
  99MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
 100MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 101
 102MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 103MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/raven2_me.bin");
 105MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
 106MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
 107MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
 108MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
 109
 110MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
 111MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
 112
 113MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
 114MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
 115MODULE_FIRMWARE("amdgpu/renoir_me.bin");
 116MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
 117MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
 118
 119MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
 120MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
 121MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
 122MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
 123MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
 124MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
 125
 126MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
 127MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
 128MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
 129
 130#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
 131#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
 132#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
 133#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
 134#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
 135#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
 136#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
 137#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
 138#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
 139#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
 140#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
 141#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
 142
 143enum ta_ras_gfx_subblock {
 144	/*CPC*/
 145	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
 146	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
 147	TA_RAS_BLOCK__GFX_CPC_UCODE,
 148	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
 149	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
 150	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
 151	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
 152	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
 153	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 154	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 155	/* CPF*/
 156	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 157	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 158	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
 159	TA_RAS_BLOCK__GFX_CPF_TAG,
 160	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
 161	/* CPG*/
 162	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 163	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 164	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
 165	TA_RAS_BLOCK__GFX_CPG_TAG,
 166	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
 167	/* GDS*/
 168	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 169	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 170	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
 171	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
 172	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
 173	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 174	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 175	/* SPI*/
 176	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
 177	/* SQ*/
 178	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 179	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 180	TA_RAS_BLOCK__GFX_SQ_LDS_D,
 181	TA_RAS_BLOCK__GFX_SQ_LDS_I,
 182	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
 183	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
 184	/* SQC (3 ranges)*/
 185	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 186	/* SQC range 0*/
 187	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 188	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
 189		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
 190	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
 191	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
 192	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
 193	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
 194	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
 195	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 196	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
 197		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 198	/* SQC range 1*/
 199	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 200	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
 201		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 202	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
 203	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
 204	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
 205	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
 206	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
 207	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
 208	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
 209	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 210	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
 211		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 212	/* SQC range 2*/
 213	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 214	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
 215		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 216	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
 217	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
 218	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
 219	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
 220	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
 221	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
 222	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
 223	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 224	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
 225		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 226	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
 227	/* TA*/
 228	TA_RAS_BLOCK__GFX_TA_INDEX_START,
 229	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
 230	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
 231	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
 232	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
 233	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 234	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 235	/* TCA*/
 236	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 237	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 238	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 239	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 240	/* TCC (5 sub-ranges)*/
 241	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 242	/* TCC range 0*/
 243	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 244	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
 245	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
 246	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
 247	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
 248	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
 249	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
 250	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
 251	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 252	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 253	/* TCC range 1*/
 254	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 255	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 256	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 257	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
 258		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 259	/* TCC range 2*/
 260	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 261	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 262	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
 263	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
 264	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
 265	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
 266	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
 267	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
 268	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 269	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
 270		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 271	/* TCC range 3*/
 272	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 273	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 274	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 275	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
 276		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 277	/* TCC range 4*/
 278	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 279	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
 280		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 281	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 282	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
 283		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 284	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
 285	/* TCI*/
 286	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
 287	/* TCP*/
 288	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 289	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 290	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
 291	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
 292	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
 293	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
 294	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
 295	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 296	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 297	/* TD*/
 298	TA_RAS_BLOCK__GFX_TD_INDEX_START,
 299	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
 300	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
 301	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 302	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 303	/* EA (3 sub-ranges)*/
 304	TA_RAS_BLOCK__GFX_EA_INDEX_START,
 305	/* EA range 0*/
 306	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
 307	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
 308	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
 309	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
 310	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
 311	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
 312	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
 313	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
 314	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 315	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 316	/* EA range 1*/
 317	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 318	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 319	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
 320	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
 321	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
 322	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
 323	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
 324	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 325	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 326	/* EA range 2*/
 327	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 328	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 329	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
 330	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
 331	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 332	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 333	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
 334	/* UTC VM L2 bank*/
 335	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
 336	/* UTC VM walker*/
 337	TA_RAS_BLOCK__UTC_VML2_WALKER,
 338	/* UTC ATC L2 2MB cache*/
 339	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
 340	/* UTC ATC L2 4KB cache*/
 341	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
 342	TA_RAS_BLOCK__GFX_MAX
 343};
 344
 345struct ras_gfx_subblock {
 346	unsigned char *name;
 347	int ta_subblock;
 348	int hw_supported_error_type;
 349	int sw_supported_error_type;
 350};
 351
 352#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
 353	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
 354		#subblock,                                                     \
 355		TA_RAS_BLOCK__##subblock,                                      \
 356		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
 357		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
 358	}
 359
 360static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
 361	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
 362	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
 363	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 364	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 365	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 366	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 367	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 368	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 369	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 370	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 371	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
 372	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
 373	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
 374	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
 375	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 376	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
 377	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
 378			     0),
 379	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
 380			     0),
 381	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 382	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 383	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 384	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
 385	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
 386	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 387	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
 388	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 389			     0, 0),
 390	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 391			     0),
 392	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 393			     0, 0),
 394	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
 395			     0),
 396	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 397			     0, 0),
 398	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 399			     0),
 400	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 401			     1),
 402	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 403			     0, 0, 0),
 404	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 405			     0),
 406	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 407			     0),
 408	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 409			     0),
 410	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 411			     0),
 412	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 413			     0),
 414	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 415			     0, 0),
 416	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 417			     0),
 418	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 419			     0),
 420	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 421			     0, 0, 0),
 422	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 423			     0),
 424	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 425			     0),
 426	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 427			     0),
 428	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 429			     0),
 430	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 431			     0),
 432	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 433			     0, 0),
 434	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 435			     0),
 436	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
 437	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 438	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 439	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 440	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 441	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
 442	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 443	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
 444	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
 445			     1),
 446	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
 447			     1),
 448	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
 449			     1),
 450	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
 451			     0),
 452	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
 453			     0),
 454	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 456	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
 457	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
 458	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
 459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
 460	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
 462	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
 463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 464	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
 465	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
 466			     0),
 467	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 468	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
 469			     0),
 470	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
 471			     0, 0),
 472	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
 473			     0),
 474	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 475	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
 476	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
 477	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 478	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 479	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 480	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
 481	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
 482	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
 483	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
 484	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 485	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
 486	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 487	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 488	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 489	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 490	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 491	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 492	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 493	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 495	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 496	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 497	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 498	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 499	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 500	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 501	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 502	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 503	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 504	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
 505	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
 506	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
 507	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
 508};
 509
 510static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 511{
 512	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 513	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
 514	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 515	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 516	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 517	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 518	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 519	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 520	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
 522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
 523	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 524	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 525	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 526	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
 529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 532};
 533
 534static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 535{
 536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 537	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 538	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 539	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 540	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 541	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 542	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 543	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 544	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 545	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 546	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 547	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 548	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
 553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 554};
 555
 556static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
 557{
 558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
 559	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 560	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 561	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
 562	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
 563	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
 564	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
 565	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
 566	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
 567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
 568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
 569};
 570
 571static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 572{
 573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 574	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 575	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 576	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 577	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 578	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 579	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 580	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 581	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 588	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 589	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 590	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 591	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
 594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 597};
 598
 599static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 600{
 601	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 602	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 603	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 604	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 605	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 606	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 607	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 608};
 609
 610static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 611{
 612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
 613	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 614	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 615	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
 616	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
 617	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
 618	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
 619	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
 620	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 622	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
 623	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
 624	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
 625	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
 629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 631};
 632
 633static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
 634{
 635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 636	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 637	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 638	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
 639	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
 640	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 641	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 642	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 643	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 645	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 646	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
 647};
 648
 649static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 650{
 651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
 652	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 653	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 654};
 655
 656static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 657{
 658	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 659	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 660	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 661	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 662	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 663	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 664	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 665	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 666	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 667	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 668	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 674};
 675
 676static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 677{
 678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 679	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 680	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 681	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 682	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 683	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 684	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 685	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 686	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 691};
 692
 693static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 694{
 695	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 696	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
 697	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
 698	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
 699	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
 700	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 701	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 702	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
 703	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
 704	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
 705	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
 706};
 707
 708static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
 709	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
 710	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
 711};
 712
 713static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 714{
 715	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 716	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 717	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 718	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 719	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 720	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 721	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 722	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 723};
 724
 725static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 726{
 727	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 728	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 729	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 730	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 731	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 732	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 733	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 734	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 735};
 736
 737static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)
 738{
 739	static void *scratch_reg0;
 740	static void *scratch_reg1;
 741	static void *scratch_reg2;
 742	static void *scratch_reg3;
 743	static void *spare_int;
 744	static uint32_t grbm_cntl;
 745	static uint32_t grbm_idx;
 746
 747	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
 748	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
 749	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
 750	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
 751	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
 752
 753	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
 754	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
 755
 756	if (amdgpu_sriov_runtime(adev)) {
 757		pr_err("shouldn't call rlcg write register during runtime\n");
 758		return;
 759	}
 760
 761	if (offset == grbm_cntl || offset == grbm_idx) {
 762		if (offset  == grbm_cntl)
 763			writel(v, scratch_reg2);
 764		else if (offset == grbm_idx)
 765			writel(v, scratch_reg3);
 766
 767		writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
 768	} else {
 769		uint32_t i = 0;
 770		uint32_t retries = 50000;
 771
 772		writel(v, scratch_reg0);
 773		writel(offset | 0x80000000, scratch_reg1);
 774		writel(1, spare_int);
 775		for (i = 0; i < retries; i++) {
 776			u32 tmp;
 777
 778			tmp = readl(scratch_reg1);
 779			if (!(tmp & 0x80000000))
 780				break;
 781
 782			udelay(10);
 783		}
 784		if (i >= retries)
 785			pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
 786	}
 787
 788}
 789
 790static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset,
 791			       u32 v, u32 acc_flags, u32 hwip)
 792{
 793	if ((acc_flags & AMDGPU_REGS_RLC) &&
 794	    amdgpu_sriov_fullaccess(adev)) {
 795		gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);
 796
 797		return;
 798	}
 799
 800	if (acc_flags & AMDGPU_REGS_NO_KIQ)
 801		WREG32_NO_KIQ(offset, v);
 802	else
 803		WREG32(offset, v);
 804}
 805
 806#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 807#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 808#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 809#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
 810
 811static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 812static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 813static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 814static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 815static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 816				struct amdgpu_cu_info *cu_info);
 817static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 818static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 819static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 820static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 821					  void *ras_error_status);
 822static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 823				     void *inject_if);
 824static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
 825
 826static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
 827				uint64_t queue_mask)
 828{
 829	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
 830	amdgpu_ring_write(kiq_ring,
 831		PACKET3_SET_RESOURCES_VMID_MASK(0) |
 832		/* vmid_mask:0* queue_type:0 (KIQ) */
 833		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
 834	amdgpu_ring_write(kiq_ring,
 835			lower_32_bits(queue_mask));	/* queue mask lo */
 836	amdgpu_ring_write(kiq_ring,
 837			upper_32_bits(queue_mask));	/* queue mask hi */
 838	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
 839	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
 840	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
 841	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
 842}
 843
 844static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
 845				 struct amdgpu_ring *ring)
 846{
 847	struct amdgpu_device *adev = kiq_ring->adev;
 848	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
 849	uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 850	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 851
 852	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
 853	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
 854	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 855			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
 856			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
 857			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
 858			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
 859			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
 860			 /*queue_type: normal compute queue */
 861			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
 862			 /* alloc format: all_on_one_pipe */
 863			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
 864			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
 865			 /* num_queues: must be 1 */
 866			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
 867	amdgpu_ring_write(kiq_ring,
 868			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
 869	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
 870	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
 871	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
 872	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
 873}
 874
 875static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
 876				   struct amdgpu_ring *ring,
 877				   enum amdgpu_unmap_queues_action action,
 878				   u64 gpu_addr, u64 seq)
 879{
 880	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 881
 882	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
 883	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 884			  PACKET3_UNMAP_QUEUES_ACTION(action) |
 885			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
 886			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
 887			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
 888	amdgpu_ring_write(kiq_ring,
 889			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 890
 891	if (action == PREEMPT_QUEUES_NO_UNMAP) {
 892		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
 893		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
 894		amdgpu_ring_write(kiq_ring, seq);
 895	} else {
 896		amdgpu_ring_write(kiq_ring, 0);
 897		amdgpu_ring_write(kiq_ring, 0);
 898		amdgpu_ring_write(kiq_ring, 0);
 899	}
 900}
 901
 902static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
 903				   struct amdgpu_ring *ring,
 904				   u64 addr,
 905				   u64 seq)
 906{
 907	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 908
 909	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
 910	amdgpu_ring_write(kiq_ring,
 911			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
 912			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
 913			  PACKET3_QUERY_STATUS_COMMAND(2));
 914	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 915	amdgpu_ring_write(kiq_ring,
 916			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
 917			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
 918	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
 919	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
 920	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
 921	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 922}
 923
 924static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
 925				uint16_t pasid, uint32_t flush_type,
 926				bool all_hub)
 927{
 928	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
 929	amdgpu_ring_write(kiq_ring,
 930			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
 931			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
 932			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
 933			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
 934}
 935
 936static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
 937	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
 938	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
 939	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
 940	.kiq_query_status = gfx_v9_0_kiq_query_status,
 941	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
 942	.set_resources_size = 8,
 943	.map_queues_size = 7,
 944	.unmap_queues_size = 6,
 945	.query_status_size = 7,
 946	.invalidate_tlbs_size = 2,
 947};
 948
 949static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 950{
 951	adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
 952}
 953
 954static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 955{
 956	switch (adev->asic_type) {
 957	case CHIP_VEGA10:
 958		soc15_program_register_sequence(adev,
 959						golden_settings_gc_9_0,
 960						ARRAY_SIZE(golden_settings_gc_9_0));
 961		soc15_program_register_sequence(adev,
 962						golden_settings_gc_9_0_vg10,
 963						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 964		break;
 965	case CHIP_VEGA12:
 966		soc15_program_register_sequence(adev,
 967						golden_settings_gc_9_2_1,
 968						ARRAY_SIZE(golden_settings_gc_9_2_1));
 969		soc15_program_register_sequence(adev,
 970						golden_settings_gc_9_2_1_vg12,
 971						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 972		break;
 973	case CHIP_VEGA20:
 974		soc15_program_register_sequence(adev,
 975						golden_settings_gc_9_0,
 976						ARRAY_SIZE(golden_settings_gc_9_0));
 977		soc15_program_register_sequence(adev,
 978						golden_settings_gc_9_0_vg20,
 979						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
 980		break;
 981	case CHIP_ARCTURUS:
 982		soc15_program_register_sequence(adev,
 983						golden_settings_gc_9_4_1_arct,
 984						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
 985		break;
 986	case CHIP_RAVEN:
 987		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 988						ARRAY_SIZE(golden_settings_gc_9_1));
 989		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 990			soc15_program_register_sequence(adev,
 991							golden_settings_gc_9_1_rv2,
 992							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
 993		else
 994			soc15_program_register_sequence(adev,
 995							golden_settings_gc_9_1_rv1,
 996							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 997		break;
 998	 case CHIP_RENOIR:
 999		soc15_program_register_sequence(adev,
1000						golden_settings_gc_9_1_rn,
1001						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1002		return; /* for renoir, don't need common goldensetting */
1003	case CHIP_ALDEBARAN:
1004		gfx_v9_4_2_init_golden_registers(adev,
1005						 adev->smuio.funcs->get_die_id(adev));
1006		break;
1007	default:
1008		break;
1009	}
1010
1011	if ((adev->asic_type != CHIP_ARCTURUS) &&
1012	    (adev->asic_type != CHIP_ALDEBARAN))
1013		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1014						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1015}
1016
1017static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
1018{
1019	adev->gfx.scratch.num_reg = 8;
1020	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1021	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
1022}
1023
1024static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1025				       bool wc, uint32_t reg, uint32_t val)
1026{
1027	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1028	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1029				WRITE_DATA_DST_SEL(0) |
1030				(wc ? WR_CONFIRM : 0));
1031	amdgpu_ring_write(ring, reg);
1032	amdgpu_ring_write(ring, 0);
1033	amdgpu_ring_write(ring, val);
1034}
1035
1036static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1037				  int mem_space, int opt, uint32_t addr0,
1038				  uint32_t addr1, uint32_t ref, uint32_t mask,
1039				  uint32_t inv)
1040{
1041	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1042	amdgpu_ring_write(ring,
1043				 /* memory (1) or register (0) */
1044				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1045				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1046				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1047				 WAIT_REG_MEM_ENGINE(eng_sel)));
1048
1049	if (mem_space)
1050		BUG_ON(addr0 & 0x3); /* Dword align */
1051	amdgpu_ring_write(ring, addr0);
1052	amdgpu_ring_write(ring, addr1);
1053	amdgpu_ring_write(ring, ref);
1054	amdgpu_ring_write(ring, mask);
1055	amdgpu_ring_write(ring, inv); /* poll interval */
1056}
1057
1058static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1059{
1060	struct amdgpu_device *adev = ring->adev;
1061	uint32_t scratch;
1062	uint32_t tmp = 0;
1063	unsigned i;
1064	int r;
1065
1066	r = amdgpu_gfx_scratch_get(adev, &scratch);
1067	if (r)
1068		return r;
1069
1070	WREG32(scratch, 0xCAFEDEAD);
1071	r = amdgpu_ring_alloc(ring, 3);
1072	if (r)
1073		goto error_free_scratch;
1074
1075	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1076	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1077	amdgpu_ring_write(ring, 0xDEADBEEF);
1078	amdgpu_ring_commit(ring);
1079
1080	for (i = 0; i < adev->usec_timeout; i++) {
1081		tmp = RREG32(scratch);
1082		if (tmp == 0xDEADBEEF)
1083			break;
1084		udelay(1);
1085	}
1086
1087	if (i >= adev->usec_timeout)
1088		r = -ETIMEDOUT;
1089
1090error_free_scratch:
1091	amdgpu_gfx_scratch_free(adev, scratch);
1092	return r;
1093}
1094
1095static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1096{
1097	struct amdgpu_device *adev = ring->adev;
1098	struct amdgpu_ib ib;
1099	struct dma_fence *f = NULL;
1100
1101	unsigned index;
1102	uint64_t gpu_addr;
1103	uint32_t tmp;
1104	long r;
1105
1106	r = amdgpu_device_wb_get(adev, &index);
1107	if (r)
1108		return r;
1109
1110	gpu_addr = adev->wb.gpu_addr + (index * 4);
1111	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1112	memset(&ib, 0, sizeof(ib));
1113	r = amdgpu_ib_get(adev, NULL, 16,
1114					AMDGPU_IB_POOL_DIRECT, &ib);
1115	if (r)
1116		goto err1;
1117
1118	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1119	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1120	ib.ptr[2] = lower_32_bits(gpu_addr);
1121	ib.ptr[3] = upper_32_bits(gpu_addr);
1122	ib.ptr[4] = 0xDEADBEEF;
1123	ib.length_dw = 5;
1124
1125	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1126	if (r)
1127		goto err2;
1128
1129	r = dma_fence_wait_timeout(f, false, timeout);
1130	if (r == 0) {
1131		r = -ETIMEDOUT;
1132		goto err2;
1133	} else if (r < 0) {
1134		goto err2;
1135	}
1136
1137	tmp = adev->wb.wb[index];
1138	if (tmp == 0xDEADBEEF)
1139		r = 0;
1140	else
1141		r = -EINVAL;
1142
1143err2:
1144	amdgpu_ib_free(adev, &ib, NULL);
1145	dma_fence_put(f);
1146err1:
1147	amdgpu_device_wb_free(adev, index);
1148	return r;
1149}
1150
1151
1152static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1153{
1154	release_firmware(adev->gfx.pfp_fw);
1155	adev->gfx.pfp_fw = NULL;
1156	release_firmware(adev->gfx.me_fw);
1157	adev->gfx.me_fw = NULL;
1158	release_firmware(adev->gfx.ce_fw);
1159	adev->gfx.ce_fw = NULL;
1160	release_firmware(adev->gfx.rlc_fw);
1161	adev->gfx.rlc_fw = NULL;
1162	release_firmware(adev->gfx.mec_fw);
1163	adev->gfx.mec_fw = NULL;
1164	release_firmware(adev->gfx.mec2_fw);
1165	adev->gfx.mec2_fw = NULL;
1166
1167	kfree(adev->gfx.rlc.register_list_format);
1168}
1169
1170static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1171{
1172	const struct rlc_firmware_header_v2_1 *rlc_hdr;
1173
1174	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1175	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1176	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1177	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1178	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1179	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1180	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1181	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1182	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1183	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1184	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1185	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1186	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1187	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1188			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1189}
1190
1191static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1192{
1193	adev->gfx.me_fw_write_wait = false;
1194	adev->gfx.mec_fw_write_wait = false;
1195
1196	if ((adev->asic_type != CHIP_ARCTURUS) &&
1197	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1198	    (adev->gfx.mec_feature_version < 46) ||
1199	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
1200	    (adev->gfx.pfp_feature_version < 46)))
1201		DRM_WARN_ONCE("CP firmware version too old, please update!");
1202
1203	switch (adev->asic_type) {
1204	case CHIP_VEGA10:
1205		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1206		    (adev->gfx.me_feature_version >= 42) &&
1207		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1208		    (adev->gfx.pfp_feature_version >= 42))
1209			adev->gfx.me_fw_write_wait = true;
1210
1211		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1212		    (adev->gfx.mec_feature_version >= 42))
1213			adev->gfx.mec_fw_write_wait = true;
1214		break;
1215	case CHIP_VEGA12:
1216		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1217		    (adev->gfx.me_feature_version >= 44) &&
1218		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1219		    (adev->gfx.pfp_feature_version >= 44))
1220			adev->gfx.me_fw_write_wait = true;
1221
1222		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1223		    (adev->gfx.mec_feature_version >= 44))
1224			adev->gfx.mec_fw_write_wait = true;
1225		break;
1226	case CHIP_VEGA20:
1227		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1228		    (adev->gfx.me_feature_version >= 44) &&
1229		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1230		    (adev->gfx.pfp_feature_version >= 44))
1231			adev->gfx.me_fw_write_wait = true;
1232
1233		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1234		    (adev->gfx.mec_feature_version >= 44))
1235			adev->gfx.mec_fw_write_wait = true;
1236		break;
1237	case CHIP_RAVEN:
1238		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1239		    (adev->gfx.me_feature_version >= 42) &&
1240		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1241		    (adev->gfx.pfp_feature_version >= 42))
1242			adev->gfx.me_fw_write_wait = true;
1243
1244		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1245		    (adev->gfx.mec_feature_version >= 42))
1246			adev->gfx.mec_fw_write_wait = true;
1247		break;
1248	default:
1249		adev->gfx.me_fw_write_wait = true;
1250		adev->gfx.mec_fw_write_wait = true;
1251		break;
1252	}
1253}
1254
1255struct amdgpu_gfxoff_quirk {
1256	u16 chip_vendor;
1257	u16 chip_device;
1258	u16 subsys_vendor;
1259	u16 subsys_device;
1260	u8 revision;
1261};
1262
1263static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1264	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1265	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1266	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1267	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1268	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1269	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1270	{ 0, 0, 0, 0, 0 },
1271};
1272
1273static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1274{
1275	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1276
1277	while (p && p->chip_device != 0) {
1278		if (pdev->vendor == p->chip_vendor &&
1279		    pdev->device == p->chip_device &&
1280		    pdev->subsystem_vendor == p->subsys_vendor &&
1281		    pdev->subsystem_device == p->subsys_device &&
1282		    pdev->revision == p->revision) {
1283			return true;
1284		}
1285		++p;
1286	}
1287	return false;
1288}
1289
1290static bool is_raven_kicker(struct amdgpu_device *adev)
1291{
1292	if (adev->pm.fw_version >= 0x41e2b)
1293		return true;
1294	else
1295		return false;
1296}
1297
1298static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1299{
1300	if ((adev->asic_type == CHIP_RENOIR) &&
1301	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1302	    (adev->gfx.me_feature_version >= 52))
1303		return true;
1304	else
1305		return false;
1306}
1307
1308static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1309{
1310	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1311		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1312
1313	switch (adev->asic_type) {
1314	case CHIP_VEGA10:
1315	case CHIP_VEGA12:
1316	case CHIP_VEGA20:
1317		break;
1318	case CHIP_RAVEN:
1319		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1320		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1321		    ((!is_raven_kicker(adev) &&
1322		      adev->gfx.rlc_fw_version < 531) ||
1323		     (adev->gfx.rlc_feature_version < 1) ||
1324		     !adev->gfx.rlc.is_rlc_v2_1))
1325			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1326
1327		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1328			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1329				AMD_PG_SUPPORT_CP |
1330				AMD_PG_SUPPORT_RLC_SMU_HS;
1331		break;
1332	case CHIP_RENOIR:
1333		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1334			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1335				AMD_PG_SUPPORT_CP |
1336				AMD_PG_SUPPORT_RLC_SMU_HS;
1337		break;
1338	default:
1339		break;
1340	}
1341}
1342
1343static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1344					  const char *chip_name)
1345{
1346	char fw_name[30];
1347	int err;
1348	struct amdgpu_firmware_info *info = NULL;
1349	const struct common_firmware_header *header = NULL;
1350	const struct gfx_firmware_header_v1_0 *cp_hdr;
1351
1352	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1353	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1354	if (err)
1355		goto out;
1356	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1357	if (err)
1358		goto out;
1359	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1360	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1361	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1362
1363	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1364	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1365	if (err)
1366		goto out;
1367	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1368	if (err)
1369		goto out;
1370	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1371	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1372	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1373
1374	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1375	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1376	if (err)
1377		goto out;
1378	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1379	if (err)
1380		goto out;
1381	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1382	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1383	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1384
1385	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1386		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1387		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1388		info->fw = adev->gfx.pfp_fw;
1389		header = (const struct common_firmware_header *)info->fw->data;
1390		adev->firmware.fw_size +=
1391			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1392
1393		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1394		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1395		info->fw = adev->gfx.me_fw;
1396		header = (const struct common_firmware_header *)info->fw->data;
1397		adev->firmware.fw_size +=
1398			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1399
1400		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1401		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1402		info->fw = adev->gfx.ce_fw;
1403		header = (const struct common_firmware_header *)info->fw->data;
1404		adev->firmware.fw_size +=
1405			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1406	}
1407
1408out:
1409	if (err) {
1410		dev_err(adev->dev,
1411			"gfx9: Failed to load firmware \"%s\"\n",
1412			fw_name);
1413		release_firmware(adev->gfx.pfp_fw);
1414		adev->gfx.pfp_fw = NULL;
1415		release_firmware(adev->gfx.me_fw);
1416		adev->gfx.me_fw = NULL;
1417		release_firmware(adev->gfx.ce_fw);
1418		adev->gfx.ce_fw = NULL;
1419	}
1420	return err;
1421}
1422
1423static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1424					  const char *chip_name)
1425{
1426	char fw_name[30];
1427	int err;
1428	struct amdgpu_firmware_info *info = NULL;
1429	const struct common_firmware_header *header = NULL;
1430	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1431	unsigned int *tmp = NULL;
1432	unsigned int i = 0;
1433	uint16_t version_major;
1434	uint16_t version_minor;
1435	uint32_t smu_version;
1436
1437	/*
1438	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1439	 * instead of picasso_rlc.bin.
1440	 * Judgment method:
1441	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1442	 *          or revision >= 0xD8 && revision <= 0xDF
1443	 * otherwise is PCO FP5
1444	 */
1445	if (!strcmp(chip_name, "picasso") &&
1446		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1447		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1448		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1449	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1450		(smu_version >= 0x41e2b))
1451		/**
1452		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1453		*/
1454		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1455	else
1456		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1457	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1458	if (err)
1459		goto out;
1460	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1461	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1462
1463	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1464	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1465	if (version_major == 2 && version_minor == 1)
1466		adev->gfx.rlc.is_rlc_v2_1 = true;
1467
1468	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1469	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1470	adev->gfx.rlc.save_and_restore_offset =
1471			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1472	adev->gfx.rlc.clear_state_descriptor_offset =
1473			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1474	adev->gfx.rlc.avail_scratch_ram_locations =
1475			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1476	adev->gfx.rlc.reg_restore_list_size =
1477			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1478	adev->gfx.rlc.reg_list_format_start =
1479			le32_to_cpu(rlc_hdr->reg_list_format_start);
1480	adev->gfx.rlc.reg_list_format_separate_start =
1481			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1482	adev->gfx.rlc.starting_offsets_start =
1483			le32_to_cpu(rlc_hdr->starting_offsets_start);
1484	adev->gfx.rlc.reg_list_format_size_bytes =
1485			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1486	adev->gfx.rlc.reg_list_size_bytes =
1487			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1488	adev->gfx.rlc.register_list_format =
1489			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1490				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1491	if (!adev->gfx.rlc.register_list_format) {
1492		err = -ENOMEM;
1493		goto out;
1494	}
1495
1496	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1497			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1498	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1499		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1500
1501	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1502
1503	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1504			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1505	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1506		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1507
1508	if (adev->gfx.rlc.is_rlc_v2_1)
1509		gfx_v9_0_init_rlc_ext_microcode(adev);
1510
1511	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1512		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1513		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1514		info->fw = adev->gfx.rlc_fw;
1515		header = (const struct common_firmware_header *)info->fw->data;
1516		adev->firmware.fw_size +=
1517			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1518
1519		if (adev->gfx.rlc.is_rlc_v2_1 &&
1520		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1521		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1522		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1523			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1524			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1525			info->fw = adev->gfx.rlc_fw;
1526			adev->firmware.fw_size +=
1527				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1528
1529			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1530			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1531			info->fw = adev->gfx.rlc_fw;
1532			adev->firmware.fw_size +=
1533				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1534
1535			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1536			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1537			info->fw = adev->gfx.rlc_fw;
1538			adev->firmware.fw_size +=
1539				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1540		}
1541	}
1542
1543out:
1544	if (err) {
1545		dev_err(adev->dev,
1546			"gfx9: Failed to load firmware \"%s\"\n",
1547			fw_name);
1548		release_firmware(adev->gfx.rlc_fw);
1549		adev->gfx.rlc_fw = NULL;
1550	}
1551	return err;
1552}
1553
1554static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1555{
1556	if (adev->asic_type == CHIP_ALDEBARAN ||
1557	    adev->asic_type == CHIP_ARCTURUS ||
1558	    adev->asic_type == CHIP_RENOIR)
1559		return false;
1560
1561	return true;
1562}
1563
1564static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1565					  const char *chip_name)
1566{
1567	char fw_name[30];
1568	int err;
1569	struct amdgpu_firmware_info *info = NULL;
1570	const struct common_firmware_header *header = NULL;
1571	const struct gfx_firmware_header_v1_0 *cp_hdr;
1572
1573	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1574	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1575	if (err)
1576		goto out;
1577	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1578	if (err)
1579		goto out;
1580	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1581	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1582	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1583
1584
1585	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1586		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1587		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1588		if (!err) {
1589			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1590			if (err)
1591				goto out;
1592			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1593			adev->gfx.mec2_fw->data;
1594			adev->gfx.mec2_fw_version =
1595			le32_to_cpu(cp_hdr->header.ucode_version);
1596			adev->gfx.mec2_feature_version =
1597			le32_to_cpu(cp_hdr->ucode_feature_version);
1598		} else {
1599			err = 0;
1600			adev->gfx.mec2_fw = NULL;
1601		}
1602	} else {
1603		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1604		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1605	}
1606
1607	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1608		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1609		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1610		info->fw = adev->gfx.mec_fw;
1611		header = (const struct common_firmware_header *)info->fw->data;
1612		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1613		adev->firmware.fw_size +=
1614			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1615
1616		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1617		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1618		info->fw = adev->gfx.mec_fw;
1619		adev->firmware.fw_size +=
1620			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1621
1622		if (adev->gfx.mec2_fw) {
1623			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1624			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1625			info->fw = adev->gfx.mec2_fw;
1626			header = (const struct common_firmware_header *)info->fw->data;
1627			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1628			adev->firmware.fw_size +=
1629				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1630
1631			/* TODO: Determine if MEC2 JT FW loading can be removed
1632				 for all GFX V9 asic and above */
1633			if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1634				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1635				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1636				info->fw = adev->gfx.mec2_fw;
1637				adev->firmware.fw_size +=
1638					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1639					PAGE_SIZE);
1640			}
1641		}
1642	}
1643
1644out:
1645	gfx_v9_0_check_if_need_gfxoff(adev);
1646	gfx_v9_0_check_fw_write_wait(adev);
1647	if (err) {
1648		dev_err(adev->dev,
1649			"gfx9: Failed to load firmware \"%s\"\n",
1650			fw_name);
1651		release_firmware(adev->gfx.mec_fw);
1652		adev->gfx.mec_fw = NULL;
1653		release_firmware(adev->gfx.mec2_fw);
1654		adev->gfx.mec2_fw = NULL;
1655	}
1656	return err;
1657}
1658
1659static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1660{
1661	const char *chip_name;
1662	int r;
1663
1664	DRM_DEBUG("\n");
1665
1666	switch (adev->asic_type) {
1667	case CHIP_VEGA10:
1668		chip_name = "vega10";
1669		break;
1670	case CHIP_VEGA12:
1671		chip_name = "vega12";
1672		break;
1673	case CHIP_VEGA20:
1674		chip_name = "vega20";
1675		break;
1676	case CHIP_RAVEN:
1677		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1678			chip_name = "raven2";
1679		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1680			chip_name = "picasso";
1681		else
1682			chip_name = "raven";
1683		break;
1684	case CHIP_ARCTURUS:
1685		chip_name = "arcturus";
1686		break;
1687	case CHIP_RENOIR:
1688		if (adev->apu_flags & AMD_APU_IS_RENOIR)
1689			chip_name = "renoir";
1690		else
1691			chip_name = "green_sardine";
1692		break;
1693	case CHIP_ALDEBARAN:
1694		chip_name = "aldebaran";
1695		break;
1696	default:
1697		BUG();
1698	}
1699
1700	/* No CPG in Arcturus */
1701	if (adev->gfx.num_gfx_rings) {
1702		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1703		if (r)
1704			return r;
1705	}
1706
1707	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1708	if (r)
1709		return r;
1710
1711	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1712	if (r)
1713		return r;
1714
1715	return r;
1716}
1717
1718static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1719{
1720	u32 count = 0;
1721	const struct cs_section_def *sect = NULL;
1722	const struct cs_extent_def *ext = NULL;
1723
1724	/* begin clear state */
1725	count += 2;
1726	/* context control state */
1727	count += 3;
1728
1729	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1730		for (ext = sect->section; ext->extent != NULL; ++ext) {
1731			if (sect->id == SECT_CONTEXT)
1732				count += 2 + ext->reg_count;
1733			else
1734				return 0;
1735		}
1736	}
1737
1738	/* end clear state */
1739	count += 2;
1740	/* clear state */
1741	count += 2;
1742
1743	return count;
1744}
1745
1746static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1747				    volatile u32 *buffer)
1748{
1749	u32 count = 0, i;
1750	const struct cs_section_def *sect = NULL;
1751	const struct cs_extent_def *ext = NULL;
1752
1753	if (adev->gfx.rlc.cs_data == NULL)
1754		return;
1755	if (buffer == NULL)
1756		return;
1757
1758	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1759	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1760
1761	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1762	buffer[count++] = cpu_to_le32(0x80000000);
1763	buffer[count++] = cpu_to_le32(0x80000000);
1764
1765	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1766		for (ext = sect->section; ext->extent != NULL; ++ext) {
1767			if (sect->id == SECT_CONTEXT) {
1768				buffer[count++] =
1769					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1770				buffer[count++] = cpu_to_le32(ext->reg_index -
1771						PACKET3_SET_CONTEXT_REG_START);
1772				for (i = 0; i < ext->reg_count; i++)
1773					buffer[count++] = cpu_to_le32(ext->extent[i]);
1774			} else {
1775				return;
1776			}
1777		}
1778	}
1779
1780	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1781	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1782
1783	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1784	buffer[count++] = cpu_to_le32(0);
1785}
1786
1787static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1788{
1789	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1790	uint32_t pg_always_on_cu_num = 2;
1791	uint32_t always_on_cu_num;
1792	uint32_t i, j, k;
1793	uint32_t mask, cu_bitmap, counter;
1794
1795	if (adev->flags & AMD_IS_APU)
1796		always_on_cu_num = 4;
1797	else if (adev->asic_type == CHIP_VEGA12)
1798		always_on_cu_num = 8;
1799	else
1800		always_on_cu_num = 12;
1801
1802	mutex_lock(&adev->grbm_idx_mutex);
1803	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1804		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1805			mask = 1;
1806			cu_bitmap = 0;
1807			counter = 0;
1808			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1809
1810			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1811				if (cu_info->bitmap[i][j] & mask) {
1812					if (counter == pg_always_on_cu_num)
1813						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1814					if (counter < always_on_cu_num)
1815						cu_bitmap |= mask;
1816					else
1817						break;
1818					counter++;
1819				}
1820				mask <<= 1;
1821			}
1822
1823			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1824			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1825		}
1826	}
1827	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1828	mutex_unlock(&adev->grbm_idx_mutex);
1829}
1830
1831static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1832{
1833	uint32_t data;
1834
1835	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1836	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1837	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1838	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1839	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1840
1841	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1842	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1843
1844	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1845	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1846
1847	mutex_lock(&adev->grbm_idx_mutex);
1848	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1849	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1850	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1851
1852	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1853	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1854	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1855	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1856	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1857
1858	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1859	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1860	data &= 0x0000FFFF;
1861	data |= 0x00C00000;
1862	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1863
1864	/*
1865	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1866	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1867	 */
1868
1869	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1870	 * but used for RLC_LB_CNTL configuration */
1871	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1872	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1873	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1874	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1875	mutex_unlock(&adev->grbm_idx_mutex);
1876
1877	gfx_v9_0_init_always_on_cu_mask(adev);
1878}
1879
1880static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1881{
1882	uint32_t data;
1883
1884	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1885	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1886	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1887	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1888	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1889
1890	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1891	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1892
1893	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1894	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1895
1896	mutex_lock(&adev->grbm_idx_mutex);
1897	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1898	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1899	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1900
1901	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1902	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1903	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1904	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1905	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1906
1907	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1908	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1909	data &= 0x0000FFFF;
1910	data |= 0x00C00000;
1911	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1912
1913	/*
1914	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1915	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1916	 */
1917
1918	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1919	 * but used for RLC_LB_CNTL configuration */
1920	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1921	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1922	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1923	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1924	mutex_unlock(&adev->grbm_idx_mutex);
1925
1926	gfx_v9_0_init_always_on_cu_mask(adev);
1927}
1928
1929static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1930{
1931	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1932}
1933
1934static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1935{
1936	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1937		return 5;
1938	else
1939		return 4;
1940}
1941
1942static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1943{
1944	const struct cs_section_def *cs_data;
1945	int r;
1946
1947	adev->gfx.rlc.cs_data = gfx9_cs_data;
1948
1949	cs_data = adev->gfx.rlc.cs_data;
1950
1951	if (cs_data) {
1952		/* init clear state block */
1953		r = amdgpu_gfx_rlc_init_csb(adev);
1954		if (r)
1955			return r;
1956	}
1957
1958	if (adev->flags & AMD_IS_APU) {
1959		/* TODO: double check the cp_table_size for RV */
1960		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1961		r = amdgpu_gfx_rlc_init_cpt(adev);
1962		if (r)
1963			return r;
1964	}
1965
1966	switch (adev->asic_type) {
1967	case CHIP_RAVEN:
1968		gfx_v9_0_init_lbpw(adev);
1969		break;
1970	case CHIP_VEGA20:
1971		gfx_v9_4_init_lbpw(adev);
1972		break;
1973	default:
1974		break;
1975	}
1976
1977	/* init spm vmid with 0xf */
1978	if (adev->gfx.rlc.funcs->update_spm_vmid)
1979		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1980
1981	return 0;
1982}
1983
1984static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1985{
1986	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1987	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1988}
1989
1990static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1991{
1992	int r;
1993	u32 *hpd;
1994	const __le32 *fw_data;
1995	unsigned fw_size;
1996	u32 *fw;
1997	size_t mec_hpd_size;
1998
1999	const struct gfx_firmware_header_v1_0 *mec_hdr;
2000
2001	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
2002
2003	/* take ownership of the relevant compute queues */
2004	amdgpu_gfx_compute_queue_acquire(adev);
2005	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
2006	if (mec_hpd_size) {
2007		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
2008					      AMDGPU_GEM_DOMAIN_VRAM,
2009					      &adev->gfx.mec.hpd_eop_obj,
2010					      &adev->gfx.mec.hpd_eop_gpu_addr,
2011					      (void **)&hpd);
2012		if (r) {
2013			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
2014			gfx_v9_0_mec_fini(adev);
2015			return r;
2016		}
2017
2018		memset(hpd, 0, mec_hpd_size);
2019
2020		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
2021		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
2022	}
2023
2024	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2025
2026	fw_data = (const __le32 *)
2027		(adev->gfx.mec_fw->data +
2028		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2029	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
2030
2031	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
2032				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2033				      &adev->gfx.mec.mec_fw_obj,
2034				      &adev->gfx.mec.mec_fw_gpu_addr,
2035				      (void **)&fw);
2036	if (r) {
2037		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
2038		gfx_v9_0_mec_fini(adev);
2039		return r;
2040	}
2041
2042	memcpy(fw, fw_data, fw_size);
2043
2044	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2045	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2046
2047	return 0;
2048}
2049
2050static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2051{
2052	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2053		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2054		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2055		(address << SQ_IND_INDEX__INDEX__SHIFT) |
2056		(SQ_IND_INDEX__FORCE_READ_MASK));
2057	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2058}
2059
2060static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2061			   uint32_t wave, uint32_t thread,
2062			   uint32_t regno, uint32_t num, uint32_t *out)
2063{
2064	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2065		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2066		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2067		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
2068		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2069		(SQ_IND_INDEX__FORCE_READ_MASK) |
2070		(SQ_IND_INDEX__AUTO_INCR_MASK));
2071	while (num--)
2072		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2073}
2074
2075static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2076{
2077	/* type 1 wave data */
2078	dst[(*no_fields)++] = 1;
2079	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2080	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2081	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2082	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2083	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2084	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2085	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2086	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2087	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2088	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2089	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2090	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2091	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2092	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2093}
2094
2095static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2096				     uint32_t wave, uint32_t start,
2097				     uint32_t size, uint32_t *dst)
2098{
2099	wave_read_regs(
2100		adev, simd, wave, 0,
2101		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2102}
2103
2104static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2105				     uint32_t wave, uint32_t thread,
2106				     uint32_t start, uint32_t size,
2107				     uint32_t *dst)
2108{
2109	wave_read_regs(
2110		adev, simd, wave, thread,
2111		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2112}
2113
2114static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2115				  u32 me, u32 pipe, u32 q, u32 vm)
2116{
2117	soc15_grbm_select(adev, me, pipe, q, vm);
2118}
2119
2120static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2121        .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2122        .select_se_sh = &gfx_v9_0_select_se_sh,
2123        .read_wave_data = &gfx_v9_0_read_wave_data,
2124        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2125        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2126        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2127};
2128
2129static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = {
2130	.ras_late_init = amdgpu_gfx_ras_late_init,
2131	.ras_fini = amdgpu_gfx_ras_fini,
2132	.ras_error_inject = &gfx_v9_0_ras_error_inject,
2133	.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2134	.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2135};
2136
2137static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2138{
2139	u32 gb_addr_config;
2140	int err;
2141
2142	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2143
2144	switch (adev->asic_type) {
2145	case CHIP_VEGA10:
2146		adev->gfx.config.max_hw_contexts = 8;
2147		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2148		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2149		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2150		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2151		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2152		break;
2153	case CHIP_VEGA12:
2154		adev->gfx.config.max_hw_contexts = 8;
2155		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2156		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2157		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2158		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2159		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2160		DRM_INFO("fix gfx.config for vega12\n");
2161		break;
2162	case CHIP_VEGA20:
2163		adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs;
2164		adev->gfx.config.max_hw_contexts = 8;
2165		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2166		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2167		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2168		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2169		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2170		gb_addr_config &= ~0xf3e777ff;
2171		gb_addr_config |= 0x22014042;
2172		/* check vbios table if gpu info is not available */
2173		err = amdgpu_atomfirmware_get_gfx_info(adev);
2174		if (err)
2175			return err;
2176		break;
2177	case CHIP_RAVEN:
2178		adev->gfx.config.max_hw_contexts = 8;
2179		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2180		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2181		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2182		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2183		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2184			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2185		else
2186			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2187		break;
2188	case CHIP_ARCTURUS:
2189		adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs;
2190		adev->gfx.config.max_hw_contexts = 8;
2191		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2192		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2193		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2194		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2195		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2196		gb_addr_config &= ~0xf3e777ff;
2197		gb_addr_config |= 0x22014042;
2198		break;
2199	case CHIP_RENOIR:
2200		adev->gfx.config.max_hw_contexts = 8;
2201		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2202		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2203		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2204		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2205		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2206		gb_addr_config &= ~0xf3e777ff;
2207		gb_addr_config |= 0x22010042;
2208		break;
2209	case CHIP_ALDEBARAN:
2210		adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs;
2211		adev->gfx.config.max_hw_contexts = 8;
2212		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2213		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2214		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2215		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2216		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2217		gb_addr_config &= ~0xf3e777ff;
2218		gb_addr_config |= 0x22014042;
2219		/* check vbios table if gpu info is not available */
2220		err = amdgpu_atomfirmware_get_gfx_info(adev);
2221		if (err)
2222			return err;
2223		break;
2224	default:
2225		BUG();
2226		break;
2227	}
2228
2229	adev->gfx.config.gb_addr_config = gb_addr_config;
2230
2231	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2232			REG_GET_FIELD(
2233					adev->gfx.config.gb_addr_config,
2234					GB_ADDR_CONFIG,
2235					NUM_PIPES);
2236
2237	adev->gfx.config.max_tile_pipes =
2238		adev->gfx.config.gb_addr_config_fields.num_pipes;
2239
2240	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2241			REG_GET_FIELD(
2242					adev->gfx.config.gb_addr_config,
2243					GB_ADDR_CONFIG,
2244					NUM_BANKS);
2245	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2246			REG_GET_FIELD(
2247					adev->gfx.config.gb_addr_config,
2248					GB_ADDR_CONFIG,
2249					MAX_COMPRESSED_FRAGS);
2250	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2251			REG_GET_FIELD(
2252					adev->gfx.config.gb_addr_config,
2253					GB_ADDR_CONFIG,
2254					NUM_RB_PER_SE);
2255	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2256			REG_GET_FIELD(
2257					adev->gfx.config.gb_addr_config,
2258					GB_ADDR_CONFIG,
2259					NUM_SHADER_ENGINES);
2260	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2261			REG_GET_FIELD(
2262					adev->gfx.config.gb_addr_config,
2263					GB_ADDR_CONFIG,
2264					PIPE_INTERLEAVE_SIZE));
2265
2266	return 0;
2267}
2268
2269static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2270				      int mec, int pipe, int queue)
2271{
2272	unsigned irq_type;
2273	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2274	unsigned int hw_prio;
2275
2276	ring = &adev->gfx.compute_ring[ring_id];
2277
2278	/* mec0 is me1 */
2279	ring->me = mec + 1;
2280	ring->pipe = pipe;
2281	ring->queue = queue;
2282
2283	ring->ring_obj = NULL;
2284	ring->use_doorbell = true;
2285	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2286	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2287				+ (ring_id * GFX9_MEC_HPD_SIZE);
2288	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2289
2290	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2291		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2292		+ ring->pipe;
2293	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2294			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2295	/* type-2 packets are deprecated on MEC, use type-3 instead */
2296	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2297				hw_prio, NULL);
2298}
2299
2300static int gfx_v9_0_sw_init(void *handle)
2301{
2302	int i, j, k, r, ring_id;
2303	struct amdgpu_ring *ring;
2304	struct amdgpu_kiq *kiq;
2305	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2306
2307	switch (adev->asic_type) {
2308	case CHIP_VEGA10:
2309	case CHIP_VEGA12:
2310	case CHIP_VEGA20:
2311	case CHIP_RAVEN:
2312	case CHIP_ARCTURUS:
2313	case CHIP_RENOIR:
2314	case CHIP_ALDEBARAN:
2315		adev->gfx.mec.num_mec = 2;
2316		break;
2317	default:
2318		adev->gfx.mec.num_mec = 1;
2319		break;
2320	}
2321
2322	adev->gfx.mec.num_pipe_per_mec = 4;
2323	adev->gfx.mec.num_queue_per_pipe = 8;
2324
2325	/* EOP Event */
2326	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2327	if (r)
2328		return r;
2329
2330	/* Privileged reg */
2331	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2332			      &adev->gfx.priv_reg_irq);
2333	if (r)
2334		return r;
2335
2336	/* Privileged inst */
2337	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2338			      &adev->gfx.priv_inst_irq);
2339	if (r)
2340		return r;
2341
2342	/* ECC error */
2343	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2344			      &adev->gfx.cp_ecc_error_irq);
2345	if (r)
2346		return r;
2347
2348	/* FUE error */
2349	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2350			      &adev->gfx.cp_ecc_error_irq);
2351	if (r)
2352		return r;
2353
2354	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2355
2356	gfx_v9_0_scratch_init(adev);
2357
2358	r = gfx_v9_0_init_microcode(adev);
2359	if (r) {
2360		DRM_ERROR("Failed to load gfx firmware!\n");
2361		return r;
2362	}
2363
2364	r = adev->gfx.rlc.funcs->init(adev);
2365	if (r) {
2366		DRM_ERROR("Failed to init rlc BOs!\n");
2367		return r;
2368	}
2369
2370	r = gfx_v9_0_mec_init(adev);
2371	if (r) {
2372		DRM_ERROR("Failed to init MEC BOs!\n");
2373		return r;
2374	}
2375
2376	/* set up the gfx ring */
2377	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2378		ring = &adev->gfx.gfx_ring[i];
2379		ring->ring_obj = NULL;
2380		if (!i)
2381			sprintf(ring->name, "gfx");
2382		else
2383			sprintf(ring->name, "gfx_%d", i);
2384		ring->use_doorbell = true;
2385		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2386		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2387				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2388				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2389		if (r)
2390			return r;
2391	}
2392
2393	/* set up the compute queues - allocate horizontally across pipes */
2394	ring_id = 0;
2395	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2396		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2397			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2398				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2399					continue;
2400
2401				r = gfx_v9_0_compute_ring_init(adev,
2402							       ring_id,
2403							       i, k, j);
2404				if (r)
2405					return r;
2406
2407				ring_id++;
2408			}
2409		}
2410	}
2411
2412	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2413	if (r) {
2414		DRM_ERROR("Failed to init KIQ BOs!\n");
2415		return r;
2416	}
2417
2418	kiq = &adev->gfx.kiq;
2419	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2420	if (r)
2421		return r;
2422
2423	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2424	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2425	if (r)
2426		return r;
2427
2428	adev->gfx.ce_ram_size = 0x8000;
2429
2430	r = gfx_v9_0_gpu_early_init(adev);
2431	if (r)
2432		return r;
2433
2434	return 0;
2435}
2436
2437
2438static int gfx_v9_0_sw_fini(void *handle)
2439{
2440	int i;
2441	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2442
2443	if (adev->gfx.ras_funcs &&
2444	    adev->gfx.ras_funcs->ras_fini)
2445		adev->gfx.ras_funcs->ras_fini(adev);
2446
2447	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2448		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2449	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2450		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2451
2452	amdgpu_gfx_mqd_sw_fini(adev);
2453	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2454	amdgpu_gfx_kiq_fini(adev);
2455
2456	gfx_v9_0_mec_fini(adev);
2457	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2458	if (adev->flags & AMD_IS_APU) {
2459		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2460				&adev->gfx.rlc.cp_table_gpu_addr,
2461				(void **)&adev->gfx.rlc.cp_table_ptr);
2462	}
2463	gfx_v9_0_free_microcode(adev);
2464
2465	return 0;
2466}
2467
2468
2469static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2470{
2471	/* TODO */
2472}
2473
2474void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2475			   u32 instance)
2476{
2477	u32 data;
2478
2479	if (instance == 0xffffffff)
2480		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2481	else
2482		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2483
2484	if (se_num == 0xffffffff)
2485		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2486	else
2487		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2488
2489	if (sh_num == 0xffffffff)
2490		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2491	else
2492		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2493
2494	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2495}
2496
2497static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2498{
2499	u32 data, mask;
2500
2501	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2502	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2503
2504	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2505	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2506
2507	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2508					 adev->gfx.config.max_sh_per_se);
2509
2510	return (~data) & mask;
2511}
2512
2513static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2514{
2515	int i, j;
2516	u32 data;
2517	u32 active_rbs = 0;
2518	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2519					adev->gfx.config.max_sh_per_se;
2520
2521	mutex_lock(&adev->grbm_idx_mutex);
2522	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2523		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2524			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2525			data = gfx_v9_0_get_rb_active_bitmap(adev);
2526			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2527					       rb_bitmap_width_per_sh);
2528		}
2529	}
2530	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2531	mutex_unlock(&adev->grbm_idx_mutex);
2532
2533	adev->gfx.config.backend_enable_mask = active_rbs;
2534	adev->gfx.config.num_rbs = hweight32(active_rbs);
2535}
2536
2537#define DEFAULT_SH_MEM_BASES	(0x6000)
2538static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2539{
2540	int i;
2541	uint32_t sh_mem_config;
2542	uint32_t sh_mem_bases;
2543
2544	/*
2545	 * Configure apertures:
2546	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2547	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2548	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2549	 */
2550	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2551
2552	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2553			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2554			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2555
2556	mutex_lock(&adev->srbm_mutex);
2557	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2558		soc15_grbm_select(adev, 0, 0, 0, i);
2559		/* CP and shaders */
2560		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2561		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2562	}
2563	soc15_grbm_select(adev, 0, 0, 0, 0);
2564	mutex_unlock(&adev->srbm_mutex);
2565
2566	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2567	   acccess. These should be enabled by FW for target VMIDs. */
2568	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2569		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2570		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2571		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2572		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2573	}
2574}
2575
2576static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2577{
2578	int vmid;
2579
2580	/*
2581	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2582	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2583	 * the driver can enable them for graphics. VMID0 should maintain
2584	 * access so that HWS firmware can save/restore entries.
2585	 */
2586	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2587		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2588		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2589		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2590		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2591	}
2592}
2593
2594static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2595{
2596	uint32_t tmp;
2597
2598	switch (adev->asic_type) {
2599	case CHIP_ARCTURUS:
2600		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2601		tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2602					DISABLE_BARRIER_WAITCNT, 1);
2603		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2604		break;
2605	default:
2606		break;
2607	}
2608}
2609
2610static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2611{
2612	u32 tmp;
2613	int i;
2614
2615	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2616
2617	gfx_v9_0_tiling_mode_table_init(adev);
2618
2619	gfx_v9_0_setup_rb(adev);
2620	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2621	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2622
2623	/* XXX SH_MEM regs */
2624	/* where to put LDS, scratch, GPUVM in FSA64 space */
2625	mutex_lock(&adev->srbm_mutex);
2626	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2627		soc15_grbm_select(adev, 0, 0, 0, i);
2628		/* CP and shaders */
2629		if (i == 0) {
2630			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2631					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2632			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2633					    !!adev->gmc.noretry);
2634			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2635			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2636		} else {
2637			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2638					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2639			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2640					    !!adev->gmc.noretry);
2641			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2642			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2643				(adev->gmc.private_aperture_start >> 48));
2644			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2645				(adev->gmc.shared_aperture_start >> 48));
2646			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2647		}
2648	}
2649	soc15_grbm_select(adev, 0, 0, 0, 0);
2650
2651	mutex_unlock(&adev->srbm_mutex);
2652
2653	gfx_v9_0_init_compute_vmid(adev);
2654	gfx_v9_0_init_gds_vmid(adev);
2655	gfx_v9_0_init_sq_config(adev);
2656}
2657
2658static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2659{
2660	u32 i, j, k;
2661	u32 mask;
2662
2663	mutex_lock(&adev->grbm_idx_mutex);
2664	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2665		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2666			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2667			for (k = 0; k < adev->usec_timeout; k++) {
2668				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2669					break;
2670				udelay(1);
2671			}
2672			if (k == adev->usec_timeout) {
2673				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2674						      0xffffffff, 0xffffffff);
2675				mutex_unlock(&adev->grbm_idx_mutex);
2676				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2677					 i, j);
2678				return;
2679			}
2680		}
2681	}
2682	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2683	mutex_unlock(&adev->grbm_idx_mutex);
2684
2685	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2686		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2687		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2688		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2689	for (k = 0; k < adev->usec_timeout; k++) {
2690		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2691			break;
2692		udelay(1);
2693	}
2694}
2695
2696static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2697					       bool enable)
2698{
2699	u32 tmp;
2700
2701	/* These interrupts should be enabled to drive DS clock */
2702
2703	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2704
2705	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2706	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2707	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2708	if(adev->gfx.num_gfx_rings)
2709		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2710
2711	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2712}
2713
2714static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2715{
2716	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2717	/* csib */
2718	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2719			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2720	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2721			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2722	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2723			adev->gfx.rlc.clear_state_size);
2724}
2725
2726static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2727				int indirect_offset,
2728				int list_size,
2729				int *unique_indirect_regs,
2730				int unique_indirect_reg_count,
2731				int *indirect_start_offsets,
2732				int *indirect_start_offsets_count,
2733				int max_start_offsets_count)
2734{
2735	int idx;
2736
2737	for (; indirect_offset < list_size; indirect_offset++) {
2738		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2739		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2740		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2741
2742		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2743			indirect_offset += 2;
2744
2745			/* look for the matching indice */
2746			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2747				if (unique_indirect_regs[idx] ==
2748					register_list_format[indirect_offset] ||
2749					!unique_indirect_regs[idx])
2750					break;
2751			}
2752
2753			BUG_ON(idx >= unique_indirect_reg_count);
2754
2755			if (!unique_indirect_regs[idx])
2756				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2757
2758			indirect_offset++;
2759		}
2760	}
2761}
2762
2763static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2764{
2765	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2766	int unique_indirect_reg_count = 0;
2767
2768	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2769	int indirect_start_offsets_count = 0;
2770
2771	int list_size = 0;
2772	int i = 0, j = 0;
2773	u32 tmp = 0;
2774
2775	u32 *register_list_format =
2776		kmemdup(adev->gfx.rlc.register_list_format,
2777			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2778	if (!register_list_format)
2779		return -ENOMEM;
2780
2781	/* setup unique_indirect_regs array and indirect_start_offsets array */
2782	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2783	gfx_v9_1_parse_ind_reg_list(register_list_format,
2784				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2785				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2786				    unique_indirect_regs,
2787				    unique_indirect_reg_count,
2788				    indirect_start_offsets,
2789				    &indirect_start_offsets_count,
2790				    ARRAY_SIZE(indirect_start_offsets));
2791
2792	/* enable auto inc in case it is disabled */
2793	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2794	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2795	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2796
2797	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2798	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2799		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2800	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2801		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2802			adev->gfx.rlc.register_restore[i]);
2803
2804	/* load indirect register */
2805	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2806		adev->gfx.rlc.reg_list_format_start);
2807
2808	/* direct register portion */
2809	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2810		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2811			register_list_format[i]);
2812
2813	/* indirect register portion */
2814	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2815		if (register_list_format[i] == 0xFFFFFFFF) {
2816			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2817			continue;
2818		}
2819
2820		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2821		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2822
2823		for (j = 0; j < unique_indirect_reg_count; j++) {
2824			if (register_list_format[i] == unique_indirect_regs[j]) {
2825				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2826				break;
2827			}
2828		}
2829
2830		BUG_ON(j >= unique_indirect_reg_count);
2831
2832		i++;
2833	}
2834
2835	/* set save/restore list size */
2836	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2837	list_size = list_size >> 1;
2838	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2839		adev->gfx.rlc.reg_restore_list_size);
2840	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2841
2842	/* write the starting offsets to RLC scratch ram */
2843	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2844		adev->gfx.rlc.starting_offsets_start);
2845	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2846		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2847		       indirect_start_offsets[i]);
2848
2849	/* load unique indirect regs*/
2850	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2851		if (unique_indirect_regs[i] != 0) {
2852			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2853			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2854			       unique_indirect_regs[i] & 0x3FFFF);
2855
2856			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2857			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2858			       unique_indirect_regs[i] >> 20);
2859		}
2860	}
2861
2862	kfree(register_list_format);
2863	return 0;
2864}
2865
2866static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2867{
2868	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2869}
2870
2871static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2872					     bool enable)
2873{
2874	uint32_t data = 0;
2875	uint32_t default_data = 0;
2876
2877	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2878	if (enable) {
2879		/* enable GFXIP control over CGPG */
2880		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2881		if(default_data != data)
2882			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2883
2884		/* update status */
2885		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2886		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2887		if(default_data != data)
2888			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2889	} else {
2890		/* restore GFXIP control over GCPG */
2891		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2892		if(default_data != data)
2893			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2894	}
2895}
2896
2897static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2898{
2899	uint32_t data = 0;
2900
2901	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2902			      AMD_PG_SUPPORT_GFX_SMG |
2903			      AMD_PG_SUPPORT_GFX_DMG)) {
2904		/* init IDLE_POLL_COUNT = 60 */
2905		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2906		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2907		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2908		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2909
2910		/* init RLC PG Delay */
2911		data = 0;
2912		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2913		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2914		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2915		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2916		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2917
2918		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2919		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2920		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2921		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2922
2923		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2924		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2925		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2926		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2927
2928		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2929		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2930
2931		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2932		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2933		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2934		if (adev->asic_type != CHIP_RENOIR)
2935			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2936	}
2937}
2938
2939static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2940						bool enable)
2941{
2942	uint32_t data = 0;
2943	uint32_t default_data = 0;
2944
2945	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2948			     enable ? 1 : 0);
2949	if (default_data != data)
2950		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2951}
2952
2953static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2954						bool enable)
2955{
2956	uint32_t data = 0;
2957	uint32_t default_data = 0;
2958
2959	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2960	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2961			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2962			     enable ? 1 : 0);
2963	if(default_data != data)
2964		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2965}
2966
2967static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2968					bool enable)
2969{
2970	uint32_t data = 0;
2971	uint32_t default_data = 0;
2972
2973	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2974	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2975			     CP_PG_DISABLE,
2976			     enable ? 0 : 1);
2977	if(default_data != data)
2978		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2979}
2980
2981static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2982						bool enable)
2983{
2984	uint32_t data, default_data;
2985
2986	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2987	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2988			     GFX_POWER_GATING_ENABLE,
2989			     enable ? 1 : 0);
2990	if(default_data != data)
2991		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2992}
2993
2994static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2995						bool enable)
2996{
2997	uint32_t data, default_data;
2998
2999	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3000	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3001			     GFX_PIPELINE_PG_ENABLE,
3002			     enable ? 1 : 0);
3003	if(default_data != data)
3004		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3005
3006	if (!enable)
3007		/* read any GFX register to wake up GFX */
3008		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3009}
3010
3011static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3012						       bool enable)
3013{
3014	uint32_t data, default_data;
3015
3016	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3017	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3018			     STATIC_PER_CU_PG_ENABLE,
3019			     enable ? 1 : 0);
3020	if(default_data != data)
3021		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3022}
3023
3024static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3025						bool enable)
3026{
3027	uint32_t data, default_data;
3028
3029	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3030	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3031			     DYN_PER_CU_PG_ENABLE,
3032			     enable ? 1 : 0);
3033	if(default_data != data)
3034		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3035}
3036
3037static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3038{
3039	gfx_v9_0_init_csb(adev);
3040
3041	/*
3042	 * Rlc save restore list is workable since v2_1.
3043	 * And it's needed by gfxoff feature.
3044	 */
3045	if (adev->gfx.rlc.is_rlc_v2_1) {
3046		if (adev->asic_type == CHIP_VEGA12 ||
3047		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3048			gfx_v9_1_init_rlc_save_restore_list(adev);
3049		gfx_v9_0_enable_save_restore_machine(adev);
3050	}
3051
3052	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3053			      AMD_PG_SUPPORT_GFX_SMG |
3054			      AMD_PG_SUPPORT_GFX_DMG |
3055			      AMD_PG_SUPPORT_CP |
3056			      AMD_PG_SUPPORT_GDS |
3057			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3058		WREG32(mmRLC_JUMP_TABLE_RESTORE,
3059		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
3060		gfx_v9_0_init_gfx_power_gating(adev);
3061	}
3062}
3063
3064static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3065{
3066	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3067	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3068	gfx_v9_0_wait_for_rlc_serdes(adev);
3069}
3070
3071static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3072{
3073	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3074	udelay(50);
3075	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3076	udelay(50);
3077}
3078
3079static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3080{
3081#ifdef AMDGPU_RLC_DEBUG_RETRY
3082	u32 rlc_ucode_ver;
3083#endif
3084
3085	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3086	udelay(50);
3087
3088	/* carrizo do enable cp interrupt after cp inited */
3089	if (!(adev->flags & AMD_IS_APU)) {
3090		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3091		udelay(50);
3092	}
3093
3094#ifdef AMDGPU_RLC_DEBUG_RETRY
3095	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3096	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3097	if(rlc_ucode_ver == 0x108) {
3098		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3099				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3100		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3101		 * default is 0x9C4 to create a 100us interval */
3102		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3103		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3104		 * to disable the page fault retry interrupts, default is
3105		 * 0x100 (256) */
3106		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3107	}
3108#endif
3109}
3110
3111static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3112{
3113	const struct rlc_firmware_header_v2_0 *hdr;
3114	const __le32 *fw_data;
3115	unsigned i, fw_size;
3116
3117	if (!adev->gfx.rlc_fw)
3118		return -EINVAL;
3119
3120	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3121	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3122
3123	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3124			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3125	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3126
3127	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3128			RLCG_UCODE_LOADING_START_ADDRESS);
3129	for (i = 0; i < fw_size; i++)
3130		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3131	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3132
3133	return 0;
3134}
3135
3136static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3137{
3138	int r;
3139
3140	if (amdgpu_sriov_vf(adev)) {
3141		gfx_v9_0_init_csb(adev);
3142		return 0;
3143	}
3144
3145	adev->gfx.rlc.funcs->stop(adev);
3146
3147	/* disable CG */
3148	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3149
3150	gfx_v9_0_init_pg(adev);
3151
3152	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3153		/* legacy rlc firmware loading */
3154		r = gfx_v9_0_rlc_load_microcode(adev);
3155		if (r)
3156			return r;
3157	}
3158
3159	switch (adev->asic_type) {
3160	case CHIP_RAVEN:
3161		if (amdgpu_lbpw == 0)
3162			gfx_v9_0_enable_lbpw(adev, false);
3163		else
3164			gfx_v9_0_enable_lbpw(adev, true);
3165		break;
3166	case CHIP_VEGA20:
3167		if (amdgpu_lbpw > 0)
3168			gfx_v9_0_enable_lbpw(adev, true);
3169		else
3170			gfx_v9_0_enable_lbpw(adev, false);
3171		break;
3172	default:
3173		break;
3174	}
3175
3176	adev->gfx.rlc.funcs->start(adev);
3177
3178	return 0;
3179}
3180
3181static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3182{
3183	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3184
3185	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3186	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3187	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3188	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3189	udelay(50);
3190}
3191
3192static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3193{
3194	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3195	const struct gfx_firmware_header_v1_0 *ce_hdr;
3196	const struct gfx_firmware_header_v1_0 *me_hdr;
3197	const __le32 *fw_data;
3198	unsigned i, fw_size;
3199
3200	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3201		return -EINVAL;
3202
3203	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3204		adev->gfx.pfp_fw->data;
3205	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3206		adev->gfx.ce_fw->data;
3207	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3208		adev->gfx.me_fw->data;
3209
3210	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3211	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3212	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3213
3214	gfx_v9_0_cp_gfx_enable(adev, false);
3215
3216	/* PFP */
3217	fw_data = (const __le32 *)
3218		(adev->gfx.pfp_fw->data +
3219		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3220	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3221	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3222	for (i = 0; i < fw_size; i++)
3223		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3224	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3225
3226	/* CE */
3227	fw_data = (const __le32 *)
3228		(adev->gfx.ce_fw->data +
3229		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3230	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3231	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3232	for (i = 0; i < fw_size; i++)
3233		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3234	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3235
3236	/* ME */
3237	fw_data = (const __le32 *)
3238		(adev->gfx.me_fw->data +
3239		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3240	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3241	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3242	for (i = 0; i < fw_size; i++)
3243		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3244	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3245
3246	return 0;
3247}
3248
3249static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3250{
3251	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3252	const struct cs_section_def *sect = NULL;
3253	const struct cs_extent_def *ext = NULL;
3254	int r, i, tmp;
3255
3256	/* init the CP */
3257	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3258	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3259
3260	gfx_v9_0_cp_gfx_enable(adev, true);
3261
3262	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3263	if (r) {
3264		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3265		return r;
3266	}
3267
3268	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3269	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3270
3271	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3272	amdgpu_ring_write(ring, 0x80000000);
3273	amdgpu_ring_write(ring, 0x80000000);
3274
3275	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3276		for (ext = sect->section; ext->extent != NULL; ++ext) {
3277			if (sect->id == SECT_CONTEXT) {
3278				amdgpu_ring_write(ring,
3279				       PACKET3(PACKET3_SET_CONTEXT_REG,
3280					       ext->reg_count));
3281				amdgpu_ring_write(ring,
3282				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3283				for (i = 0; i < ext->reg_count; i++)
3284					amdgpu_ring_write(ring, ext->extent[i]);
3285			}
3286		}
3287	}
3288
3289	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3290	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3291
3292	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3293	amdgpu_ring_write(ring, 0);
3294
3295	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3296	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3297	amdgpu_ring_write(ring, 0x8000);
3298	amdgpu_ring_write(ring, 0x8000);
3299
3300	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3301	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3302		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3303	amdgpu_ring_write(ring, tmp);
3304	amdgpu_ring_write(ring, 0);
3305
3306	amdgpu_ring_commit(ring);
3307
3308	return 0;
3309}
3310
3311static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3312{
3313	struct amdgpu_ring *ring;
3314	u32 tmp;
3315	u32 rb_bufsz;
3316	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3317
3318	/* Set the write pointer delay */
3319	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3320
3321	/* set the RB to use vmid 0 */
3322	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3323
3324	/* Set ring buffer size */
3325	ring = &adev->gfx.gfx_ring[0];
3326	rb_bufsz = order_base_2(ring->ring_size / 8);
3327	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3328	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3329#ifdef __BIG_ENDIAN
3330	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3331#endif
3332	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3333
3334	/* Initialize the ring buffer's write pointers */
3335	ring->wptr = 0;
3336	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3337	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3338
3339	/* set the wb address wether it's enabled or not */
3340	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3341	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3342	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3343
3344	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3345	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3346	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3347
3348	mdelay(1);
3349	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3350
3351	rb_addr = ring->gpu_addr >> 8;
3352	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3353	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3354
3355	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3356	if (ring->use_doorbell) {
3357		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3358				    DOORBELL_OFFSET, ring->doorbell_index);
3359		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3360				    DOORBELL_EN, 1);
3361	} else {
3362		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3363	}
3364	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3365
3366	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3367			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3368	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3369
3370	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3371		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3372
3373
3374	/* start the ring */
3375	gfx_v9_0_cp_gfx_start(adev);
3376	ring->sched.ready = true;
3377
3378	return 0;
3379}
3380
3381static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3382{
3383	if (enable) {
3384		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3385	} else {
3386		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3387			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3388		adev->gfx.kiq.ring.sched.ready = false;
3389	}
3390	udelay(50);
3391}
3392
3393static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3394{
3395	const struct gfx_firmware_header_v1_0 *mec_hdr;
3396	const __le32 *fw_data;
3397	unsigned i;
3398	u32 tmp;
3399
3400	if (!adev->gfx.mec_fw)
3401		return -EINVAL;
3402
3403	gfx_v9_0_cp_compute_enable(adev, false);
3404
3405	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3406	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3407
3408	fw_data = (const __le32 *)
3409		(adev->gfx.mec_fw->data +
3410		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3411	tmp = 0;
3412	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3413	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3414	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3415
3416	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3417		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3418	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3419		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3420
3421	/* MEC1 */
3422	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3423			 mec_hdr->jt_offset);
3424	for (i = 0; i < mec_hdr->jt_size; i++)
3425		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3426			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3427
3428	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3429			adev->gfx.mec_fw_version);
3430	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3431
3432	return 0;
3433}
3434
3435/* KIQ functions */
3436static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3437{
3438	uint32_t tmp;
3439	struct amdgpu_device *adev = ring->adev;
3440
3441	/* tell RLC which is KIQ queue */
3442	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3443	tmp &= 0xffffff00;
3444	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3445	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3446	tmp |= 0x80;
3447	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3448}
3449
3450static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3451{
3452	struct amdgpu_device *adev = ring->adev;
3453
3454	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3455		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3456			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3457			mqd->cp_hqd_queue_priority =
3458				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3459		}
3460	}
3461}
3462
3463static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3464{
3465	struct amdgpu_device *adev = ring->adev;
3466	struct v9_mqd *mqd = ring->mqd_ptr;
3467	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3468	uint32_t tmp;
3469
3470	mqd->header = 0xC0310800;
3471	mqd->compute_pipelinestat_enable = 0x00000001;
3472	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3473	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3474	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3475	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3476	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3477	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3478	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3479	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3480	mqd->compute_misc_reserved = 0x00000003;
3481
3482	mqd->dynamic_cu_mask_addr_lo =
3483		lower_32_bits(ring->mqd_gpu_addr
3484			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3485	mqd->dynamic_cu_mask_addr_hi =
3486		upper_32_bits(ring->mqd_gpu_addr
3487			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3488
3489	eop_base_addr = ring->eop_gpu_addr >> 8;
3490	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3491	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3492
3493	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3494	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3495	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3496			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3497
3498	mqd->cp_hqd_eop_control = tmp;
3499
3500	/* enable doorbell? */
3501	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3502
3503	if (ring->use_doorbell) {
3504		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505				    DOORBELL_OFFSET, ring->doorbell_index);
3506		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3507				    DOORBELL_EN, 1);
3508		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3509				    DOORBELL_SOURCE, 0);
3510		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3511				    DOORBELL_HIT, 0);
3512	} else {
3513		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3514					 DOORBELL_EN, 0);
3515	}
3516
3517	mqd->cp_hqd_pq_doorbell_control = tmp;
3518
3519	/* disable the queue if it's active */
3520	ring->wptr = 0;
3521	mqd->cp_hqd_dequeue_request = 0;
3522	mqd->cp_hqd_pq_rptr = 0;
3523	mqd->cp_hqd_pq_wptr_lo = 0;
3524	mqd->cp_hqd_pq_wptr_hi = 0;
3525
3526	/* set the pointer to the MQD */
3527	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3528	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3529
3530	/* set MQD vmid to 0 */
3531	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3532	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3533	mqd->cp_mqd_control = tmp;
3534
3535	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3536	hqd_gpu_addr = ring->gpu_addr >> 8;
3537	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3538	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3539
3540	/* set up the HQD, this is similar to CP_RB0_CNTL */
3541	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3542	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3543			    (order_base_2(ring->ring_size / 4) - 1));
3544	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3545			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3546#ifdef __BIG_ENDIAN
3547	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3548#endif
3549	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3550	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3551	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3552	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3553	mqd->cp_hqd_pq_control = tmp;
3554
3555	/* set the wb address whether it's enabled or not */
3556	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3557	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3558	mqd->cp_hqd_pq_rptr_report_addr_hi =
3559		upper_32_bits(wb_gpu_addr) & 0xffff;
3560
3561	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3562	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3563	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3564	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3565
3566	tmp = 0;
3567	/* enable the doorbell if requested */
3568	if (ring->use_doorbell) {
3569		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3570		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3571				DOORBELL_OFFSET, ring->doorbell_index);
3572
3573		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3574					 DOORBELL_EN, 1);
3575		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3576					 DOORBELL_SOURCE, 0);
3577		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3578					 DOORBELL_HIT, 0);
3579	}
3580
3581	mqd->cp_hqd_pq_doorbell_control = tmp;
3582
3583	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3584	ring->wptr = 0;
3585	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3586
3587	/* set the vmid for the queue */
3588	mqd->cp_hqd_vmid = 0;
3589
3590	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3591	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3592	mqd->cp_hqd_persistent_state = tmp;
3593
3594	/* set MIN_IB_AVAIL_SIZE */
3595	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3596	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3597	mqd->cp_hqd_ib_control = tmp;
3598
3599	/* set static priority for a queue/ring */
3600	gfx_v9_0_mqd_set_priority(ring, mqd);
3601	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3602
3603	/* map_queues packet doesn't need activate the queue,
3604	 * so only kiq need set this field.
3605	 */
3606	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3607		mqd->cp_hqd_active = 1;
3608
3609	return 0;
3610}
3611
3612static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3613{
3614	struct amdgpu_device *adev = ring->adev;
3615	struct v9_mqd *mqd = ring->mqd_ptr;
3616	int j;
3617
3618	/* disable wptr polling */
3619	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3620
3621	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3622	       mqd->cp_hqd_eop_base_addr_lo);
3623	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3624	       mqd->cp_hqd_eop_base_addr_hi);
3625
3626	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3627	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3628	       mqd->cp_hqd_eop_control);
3629
3630	/* enable doorbell? */
3631	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3632	       mqd->cp_hqd_pq_doorbell_control);
3633
3634	/* disable the queue if it's active */
3635	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3636		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3637		for (j = 0; j < adev->usec_timeout; j++) {
3638			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3639				break;
3640			udelay(1);
3641		}
3642		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3643		       mqd->cp_hqd_dequeue_request);
3644		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3645		       mqd->cp_hqd_pq_rptr);
3646		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3647		       mqd->cp_hqd_pq_wptr_lo);
3648		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3649		       mqd->cp_hqd_pq_wptr_hi);
3650	}
3651
3652	/* set the pointer to the MQD */
3653	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3654	       mqd->cp_mqd_base_addr_lo);
3655	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3656	       mqd->cp_mqd_base_addr_hi);
3657
3658	/* set MQD vmid to 0 */
3659	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3660	       mqd->cp_mqd_control);
3661
3662	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3663	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3664	       mqd->cp_hqd_pq_base_lo);
3665	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3666	       mqd->cp_hqd_pq_base_hi);
3667
3668	/* set up the HQD, this is similar to CP_RB0_CNTL */
3669	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3670	       mqd->cp_hqd_pq_control);
3671
3672	/* set the wb address whether it's enabled or not */
3673	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3674				mqd->cp_hqd_pq_rptr_report_addr_lo);
3675	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3676				mqd->cp_hqd_pq_rptr_report_addr_hi);
3677
3678	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3679	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3680	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3681	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3682	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3683
3684	/* enable the doorbell if requested */
3685	if (ring->use_doorbell) {
3686		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3687					(adev->doorbell_index.kiq * 2) << 2);
3688		/* If GC has entered CGPG, ringing doorbell > first page
3689		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3690		 * workaround this issue. And this change has to align with firmware
3691		 * update.
3692		 */
3693		if (check_if_enlarge_doorbell_range(adev))
3694			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3695					(adev->doorbell.size - 4));
3696		else
3697			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3698					(adev->doorbell_index.userqueue_end * 2) << 2);
3699	}
3700
3701	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3702	       mqd->cp_hqd_pq_doorbell_control);
3703
3704	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3705	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3706	       mqd->cp_hqd_pq_wptr_lo);
3707	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3708	       mqd->cp_hqd_pq_wptr_hi);
3709
3710	/* set the vmid for the queue */
3711	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3712
3713	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3714	       mqd->cp_hqd_persistent_state);
3715
3716	/* activate the queue */
3717	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3718	       mqd->cp_hqd_active);
3719
3720	if (ring->use_doorbell)
3721		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3722
3723	return 0;
3724}
3725
3726static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3727{
3728	struct amdgpu_device *adev = ring->adev;
3729	int j;
3730
3731	/* disable the queue if it's active */
3732	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3733
3734		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3735
3736		for (j = 0; j < adev->usec_timeout; j++) {
3737			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3738				break;
3739			udelay(1);
3740		}
3741
3742		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3743			DRM_DEBUG("KIQ dequeue request failed.\n");
3744
3745			/* Manual disable if dequeue request times out */
3746			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3747		}
3748
3749		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3750		      0);
3751	}
3752
3753	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3754	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3755	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3756	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3757	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3758	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3759	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3760	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3761
3762	return 0;
3763}
3764
3765static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3766{
3767	struct amdgpu_device *adev = ring->adev;
3768	struct v9_mqd *mqd = ring->mqd_ptr;
3769	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3770	struct v9_mqd *tmp_mqd;
3771
3772	gfx_v9_0_kiq_setting(ring);
3773
3774	/* GPU could be in bad state during probe, driver trigger the reset
3775	 * after load the SMU, in this case , the mqd is not be initialized.
3776	 * driver need to re-init the mqd.
3777	 * check mqd->cp_hqd_pq_control since this value should not be 0
3778	 */
3779	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3780	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3781		/* for GPU_RESET case , reset MQD to a clean status */
3782		if (adev->gfx.mec.mqd_backup[mqd_idx])
3783			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3784
3785		/* reset ring buffer */
3786		ring->wptr = 0;
3787		amdgpu_ring_clear_ring(ring);
3788
3789		mutex_lock(&adev->srbm_mutex);
3790		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3791		gfx_v9_0_kiq_init_register(ring);
3792		soc15_grbm_select(adev, 0, 0, 0, 0);
3793		mutex_unlock(&adev->srbm_mutex);
3794	} else {
3795		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3796		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3797		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3798		mutex_lock(&adev->srbm_mutex);
3799		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3800		gfx_v9_0_mqd_init(ring);
3801		gfx_v9_0_kiq_init_register(ring);
3802		soc15_grbm_select(adev, 0, 0, 0, 0);
3803		mutex_unlock(&adev->srbm_mutex);
3804
3805		if (adev->gfx.mec.mqd_backup[mqd_idx])
3806			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3807	}
3808
3809	return 0;
3810}
3811
3812static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3813{
3814	struct amdgpu_device *adev = ring->adev;
3815	struct v9_mqd *mqd = ring->mqd_ptr;
3816	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3817	struct v9_mqd *tmp_mqd;
3818
3819	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3820	 * is not be initialized before
3821	 */
3822	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3823
3824	if (!tmp_mqd->cp_hqd_pq_control ||
3825	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3826		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3827		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3828		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3829		mutex_lock(&adev->srbm_mutex);
3830		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3831		gfx_v9_0_mqd_init(ring);
3832		soc15_grbm_select(adev, 0, 0, 0, 0);
3833		mutex_unlock(&adev->srbm_mutex);
3834
3835		if (adev->gfx.mec.mqd_backup[mqd_idx])
3836			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3837	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3838		/* reset MQD to a clean status */
3839		if (adev->gfx.mec.mqd_backup[mqd_idx])
3840			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3841
3842		/* reset ring buffer */
3843		ring->wptr = 0;
3844		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3845		amdgpu_ring_clear_ring(ring);
3846	} else {
3847		amdgpu_ring_clear_ring(ring);
3848	}
3849
3850	return 0;
3851}
3852
3853static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3854{
3855	struct amdgpu_ring *ring;
3856	int r;
3857
3858	ring = &adev->gfx.kiq.ring;
3859
3860	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3861	if (unlikely(r != 0))
3862		return r;
3863
3864	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3865	if (unlikely(r != 0))
3866		return r;
3867
3868	gfx_v9_0_kiq_init_queue(ring);
3869	amdgpu_bo_kunmap(ring->mqd_obj);
3870	ring->mqd_ptr = NULL;
3871	amdgpu_bo_unreserve(ring->mqd_obj);
3872	ring->sched.ready = true;
3873	return 0;
3874}
3875
3876static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3877{
3878	struct amdgpu_ring *ring = NULL;
3879	int r = 0, i;
3880
3881	gfx_v9_0_cp_compute_enable(adev, true);
3882
3883	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3884		ring = &adev->gfx.compute_ring[i];
3885
3886		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3887		if (unlikely(r != 0))
3888			goto done;
3889		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3890		if (!r) {
3891			r = gfx_v9_0_kcq_init_queue(ring);
3892			amdgpu_bo_kunmap(ring->mqd_obj);
3893			ring->mqd_ptr = NULL;
3894		}
3895		amdgpu_bo_unreserve(ring->mqd_obj);
3896		if (r)
3897			goto done;
3898	}
3899
3900	r = amdgpu_gfx_enable_kcq(adev);
3901done:
3902	return r;
3903}
3904
3905static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3906{
3907	int r, i;
3908	struct amdgpu_ring *ring;
3909
3910	if (!(adev->flags & AMD_IS_APU))
3911		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3912
3913	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3914		if (adev->gfx.num_gfx_rings) {
3915			/* legacy firmware loading */
3916			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3917			if (r)
3918				return r;
3919		}
3920
3921		r = gfx_v9_0_cp_compute_load_microcode(adev);
3922		if (r)
3923			return r;
3924	}
3925
3926	r = gfx_v9_0_kiq_resume(adev);
3927	if (r)
3928		return r;
3929
3930	if (adev->gfx.num_gfx_rings) {
3931		r = gfx_v9_0_cp_gfx_resume(adev);
3932		if (r)
3933			return r;
3934	}
3935
3936	r = gfx_v9_0_kcq_resume(adev);
3937	if (r)
3938		return r;
3939
3940	if (adev->gfx.num_gfx_rings) {
3941		ring = &adev->gfx.gfx_ring[0];
3942		r = amdgpu_ring_test_helper(ring);
3943		if (r)
3944			return r;
3945	}
3946
3947	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3948		ring = &adev->gfx.compute_ring[i];
3949		amdgpu_ring_test_helper(ring);
3950	}
3951
3952	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3953
3954	return 0;
3955}
3956
3957static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3958{
3959	u32 tmp;
3960
3961	if (adev->asic_type != CHIP_ARCTURUS &&
3962	    adev->asic_type != CHIP_ALDEBARAN)
3963		return;
3964
3965	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3966	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3967				adev->df.hash_status.hash_64k);
3968	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3969				adev->df.hash_status.hash_2m);
3970	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3971				adev->df.hash_status.hash_1g);
3972	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3973}
3974
3975static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3976{
3977	if (adev->gfx.num_gfx_rings)
3978		gfx_v9_0_cp_gfx_enable(adev, enable);
3979	gfx_v9_0_cp_compute_enable(adev, enable);
3980}
3981
3982static int gfx_v9_0_hw_init(void *handle)
3983{
3984	int r;
3985	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3986
3987	if (!amdgpu_sriov_vf(adev))
3988		gfx_v9_0_init_golden_registers(adev);
3989
3990	gfx_v9_0_constants_init(adev);
3991
3992	gfx_v9_0_init_tcp_config(adev);
3993
3994	r = adev->gfx.rlc.funcs->resume(adev);
3995	if (r)
3996		return r;
3997
3998	r = gfx_v9_0_cp_resume(adev);
3999	if (r)
4000		return r;
4001
4002	if (adev->asic_type == CHIP_ALDEBARAN)
4003		gfx_v9_4_2_set_power_brake_sequence(adev);
4004
4005	return r;
4006}
4007
4008static int gfx_v9_0_hw_fini(void *handle)
4009{
4010	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4011
4012	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4013	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4014	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4015
4016	/* DF freeze and kcq disable will fail */
4017	if (!amdgpu_ras_intr_triggered())
4018		/* disable KCQ to avoid CPC touch memory not valid anymore */
4019		amdgpu_gfx_disable_kcq(adev);
4020
4021	if (amdgpu_sriov_vf(adev)) {
4022		gfx_v9_0_cp_gfx_enable(adev, false);
4023		/* must disable polling for SRIOV when hw finished, otherwise
4024		 * CPC engine may still keep fetching WB address which is already
4025		 * invalid after sw finished and trigger DMAR reading error in
4026		 * hypervisor side.
4027		 */
4028		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4029		return 0;
4030	}
4031
4032	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4033	 * otherwise KIQ is hanging when binding back
4034	 */
4035	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4036		mutex_lock(&adev->srbm_mutex);
4037		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4038				adev->gfx.kiq.ring.pipe,
4039				adev->gfx.kiq.ring.queue, 0);
4040		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4041		soc15_grbm_select(adev, 0, 0, 0, 0);
4042		mutex_unlock(&adev->srbm_mutex);
4043	}
4044
4045	gfx_v9_0_cp_enable(adev, false);
4046
4047	/* Skip suspend with A+A reset */
4048	if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
4049		dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
4050		return 0;
4051	}
4052
4053	adev->gfx.rlc.funcs->stop(adev);
4054	return 0;
4055}
4056
4057static int gfx_v9_0_suspend(void *handle)
4058{
4059	return gfx_v9_0_hw_fini(handle);
4060}
4061
4062static int gfx_v9_0_resume(void *handle)
4063{
4064	return gfx_v9_0_hw_init(handle);
4065}
4066
4067static bool gfx_v9_0_is_idle(void *handle)
4068{
4069	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4070
4071	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4072				GRBM_STATUS, GUI_ACTIVE))
4073		return false;
4074	else
4075		return true;
4076}
4077
4078static int gfx_v9_0_wait_for_idle(void *handle)
4079{
4080	unsigned i;
4081	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4082
4083	for (i = 0; i < adev->usec_timeout; i++) {
4084		if (gfx_v9_0_is_idle(handle))
4085			return 0;
4086		udelay(1);
4087	}
4088	return -ETIMEDOUT;
4089}
4090
4091static int gfx_v9_0_soft_reset(void *handle)
4092{
4093	u32 grbm_soft_reset = 0;
4094	u32 tmp;
4095	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4096
4097	/* GRBM_STATUS */
4098	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4099	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4100		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4101		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4102		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4103		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4104		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4105		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4106						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4107		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4108						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4109	}
4110
4111	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4112		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4113						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4114	}
4115
4116	/* GRBM_STATUS2 */
4117	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4118	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4119		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4120						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4121
4122
4123	if (grbm_soft_reset) {
4124		/* stop the rlc */
4125		adev->gfx.rlc.funcs->stop(adev);
4126
4127		if (adev->gfx.num_gfx_rings)
4128			/* Disable GFX parsing/prefetching */
4129			gfx_v9_0_cp_gfx_enable(adev, false);
4130
4131		/* Disable MEC parsing/prefetching */
4132		gfx_v9_0_cp_compute_enable(adev, false);
4133
4134		if (grbm_soft_reset) {
4135			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4136			tmp |= grbm_soft_reset;
4137			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4138			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4139			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4140
4141			udelay(50);
4142
4143			tmp &= ~grbm_soft_reset;
4144			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4145			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4146		}
4147
4148		/* Wait a little for things to settle down */
4149		udelay(50);
4150	}
4151	return 0;
4152}
4153
4154static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4155{
4156	signed long r, cnt = 0;
4157	unsigned long flags;
4158	uint32_t seq, reg_val_offs = 0;
4159	uint64_t value = 0;
4160	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4161	struct amdgpu_ring *ring = &kiq->ring;
4162
4163	BUG_ON(!ring->funcs->emit_rreg);
4164
4165	spin_lock_irqsave(&kiq->ring_lock, flags);
4166	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4167		pr_err("critical bug! too many kiq readers\n");
4168		goto failed_unlock;
4169	}
4170	amdgpu_ring_alloc(ring, 32);
4171	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4172	amdgpu_ring_write(ring, 9 |	/* src: register*/
4173				(5 << 8) |	/* dst: memory */
4174				(1 << 16) |	/* count sel */
4175				(1 << 20));	/* write confirm */
4176	amdgpu_ring_write(ring, 0);
4177	amdgpu_ring_write(ring, 0);
4178	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4179				reg_val_offs * 4));
4180	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4181				reg_val_offs * 4));
4182	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4183	if (r)
4184		goto failed_undo;
4185
4186	amdgpu_ring_commit(ring);
4187	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4188
4189	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4190
4191	/* don't wait anymore for gpu reset case because this way may
4192	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4193	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4194	 * never return if we keep waiting in virt_kiq_rreg, which cause
4195	 * gpu_recover() hang there.
4196	 *
4197	 * also don't wait anymore for IRQ context
4198	 * */
4199	if (r < 1 && (amdgpu_in_reset(adev)))
4200		goto failed_kiq_read;
4201
4202	might_sleep();
4203	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4204		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4205		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4206	}
4207
4208	if (cnt > MAX_KIQ_REG_TRY)
4209		goto failed_kiq_read;
4210
4211	mb();
4212	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4213		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4214	amdgpu_device_wb_free(adev, reg_val_offs);
4215	return value;
4216
4217failed_undo:
4218	amdgpu_ring_undo(ring);
4219failed_unlock:
4220	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4221failed_kiq_read:
4222	if (reg_val_offs)
4223		amdgpu_device_wb_free(adev, reg_val_offs);
4224	pr_err("failed to read gpu clock\n");
4225	return ~0;
4226}
4227
4228static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4229{
4230	uint64_t clock;
4231
4232	amdgpu_gfx_off_ctrl(adev, false);
4233	mutex_lock(&adev->gfx.gpu_clock_mutex);
4234	if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) {
4235		clock = gfx_v9_0_kiq_read_clock(adev);
4236	} else {
4237		WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4238		clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4239			((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4240	}
4241	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4242	amdgpu_gfx_off_ctrl(adev, true);
4243	return clock;
4244}
4245
4246static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4247					  uint32_t vmid,
4248					  uint32_t gds_base, uint32_t gds_size,
4249					  uint32_t gws_base, uint32_t gws_size,
4250					  uint32_t oa_base, uint32_t oa_size)
4251{
4252	struct amdgpu_device *adev = ring->adev;
4253
4254	/* GDS Base */
4255	gfx_v9_0_write_data_to_reg(ring, 0, false,
4256				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4257				   gds_base);
4258
4259	/* GDS Size */
4260	gfx_v9_0_write_data_to_reg(ring, 0, false,
4261				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4262				   gds_size);
4263
4264	/* GWS */
4265	gfx_v9_0_write_data_to_reg(ring, 0, false,
4266				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4267				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4268
4269	/* OA */
4270	gfx_v9_0_write_data_to_reg(ring, 0, false,
4271				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4272				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4273}
4274
4275static const u32 vgpr_init_compute_shader[] =
4276{
4277	0xb07c0000, 0xbe8000ff,
4278	0x000000f8, 0xbf110800,
4279	0x7e000280, 0x7e020280,
4280	0x7e040280, 0x7e060280,
4281	0x7e080280, 0x7e0a0280,
4282	0x7e0c0280, 0x7e0e0280,
4283	0x80808800, 0xbe803200,
4284	0xbf84fff5, 0xbf9c0000,
4285	0xd28c0001, 0x0001007f,
4286	0xd28d0001, 0x0002027e,
4287	0x10020288, 0xb8810904,
4288	0xb7814000, 0xd1196a01,
4289	0x00000301, 0xbe800087,
4290	0xbefc00c1, 0xd89c4000,
4291	0x00020201, 0xd89cc080,
4292	0x00040401, 0x320202ff,
4293	0x00000800, 0x80808100,
4294	0xbf84fff8, 0x7e020280,
4295	0xbf810000, 0x00000000,
4296};
4297
4298static const u32 sgpr_init_compute_shader[] =
4299{
4300	0xb07c0000, 0xbe8000ff,
4301	0x0000005f, 0xbee50080,
4302	0xbe812c65, 0xbe822c65,
4303	0xbe832c65, 0xbe842c65,
4304	0xbe852c65, 0xb77c0005,
4305	0x80808500, 0xbf84fff8,
4306	0xbe800080, 0xbf810000,
4307};
4308
4309static const u32 vgpr_init_compute_shader_arcturus[] = {
4310	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4311	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4312	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4313	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4314	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4315	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4316	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4317	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4318	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4319	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4320	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4321	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4322	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4323	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4324	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4325	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4326	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4327	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4328	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4329	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4330	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4331	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4332	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4333	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4334	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4335	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4336	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4337	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4338	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4339	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4340	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4341	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4342	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4343	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4344	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4345	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4346	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4347	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4348	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4349	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4350	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4351	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4352	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4353	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4354	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4355	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4356	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4357	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4358	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4359	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4360	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4361	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4362	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4363	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4364	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4365	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4366	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4367	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4368	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4369	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4370	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4371	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4372	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4373	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4374	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4375	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4376	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4377	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4378	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4379	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4380	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4381	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4382	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4383	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4384	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4385	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4386	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4387	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4388	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4389	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4390	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4391	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4392	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4393	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4394	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4395	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4396	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4397	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4398	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4399	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4400	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4401	0xbf84fff8, 0xbf810000,
4402};
4403
4404/* When below register arrays changed, please update gpr_reg_size,
4405  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4406  to cover all gfx9 ASICs */
4407static const struct soc15_reg_entry vgpr_init_regs[] = {
4408   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4409   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4410   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4411   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4412   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4413   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4414   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4415   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4416   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4417   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4418   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4419   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4420   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4421   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4422};
4423
4424static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4425   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4426   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4427   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4428   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4429   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4430   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4431   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4432   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4433   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4434   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4435   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4436   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4437   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4438   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4439};
4440
4441static const struct soc15_reg_entry sgpr1_init_regs[] = {
4442   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4443   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4444   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4445   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4446   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4447   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4448   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4449   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4450   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4451   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4452   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4453   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4454   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4455   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4456};
4457
4458static const struct soc15_reg_entry sgpr2_init_regs[] = {
4459   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4460   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4461   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4462   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4463   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4464   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4465   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4466   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4467   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4468   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4469   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4470   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4471   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4472   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4473};
4474
4475static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4476   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4477   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4478   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4479   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4480   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4481   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4482   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4483   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4484   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4485   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4486   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4487   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4488   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4489   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4490   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4491   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4492   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4493   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4494   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4495   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4496   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4497   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4498   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4499   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4500   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4501   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4502   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4503   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4504   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4505   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4506   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4507   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4508   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4509};
4510
4511static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4512{
4513	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4514	int i, r;
4515
4516	/* only support when RAS is enabled */
4517	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4518		return 0;
4519
4520	r = amdgpu_ring_alloc(ring, 7);
4521	if (r) {
4522		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4523			ring->name, r);
4524		return r;
4525	}
4526
4527	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4528	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4529
4530	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4531	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4532				PACKET3_DMA_DATA_DST_SEL(1) |
4533				PACKET3_DMA_DATA_SRC_SEL(2) |
4534				PACKET3_DMA_DATA_ENGINE(0)));
4535	amdgpu_ring_write(ring, 0);
4536	amdgpu_ring_write(ring, 0);
4537	amdgpu_ring_write(ring, 0);
4538	amdgpu_ring_write(ring, 0);
4539	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4540				adev->gds.gds_size);
4541
4542	amdgpu_ring_commit(ring);
4543
4544	for (i = 0; i < adev->usec_timeout; i++) {
4545		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4546			break;
4547		udelay(1);
4548	}
4549
4550	if (i >= adev->usec_timeout)
4551		r = -ETIMEDOUT;
4552
4553	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4554
4555	return r;
4556}
4557
4558static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4559{
4560	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4561	struct amdgpu_ib ib;
4562	struct dma_fence *f = NULL;
4563	int r, i;
4564	unsigned total_size, vgpr_offset, sgpr_offset;
4565	u64 gpu_addr;
4566
4567	int compute_dim_x = adev->gfx.config.max_shader_engines *
4568						adev->gfx.config.max_cu_per_sh *
4569						adev->gfx.config.max_sh_per_se;
4570	int sgpr_work_group_size = 5;
4571	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4572	int vgpr_init_shader_size;
4573	const u32 *vgpr_init_shader_ptr;
4574	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4575
4576	/* only support when RAS is enabled */
4577	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4578		return 0;
4579
4580	/* bail if the compute ring is not ready */
4581	if (!ring->sched.ready)
4582		return 0;
4583
4584	if (adev->asic_type == CHIP_ARCTURUS) {
4585		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4586		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4587		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4588	} else {
4589		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4590		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4591		vgpr_init_regs_ptr = vgpr_init_regs;
4592	}
4593
4594	total_size =
4595		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4596	total_size +=
4597		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4598	total_size +=
4599		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4600	total_size = ALIGN(total_size, 256);
4601	vgpr_offset = total_size;
4602	total_size += ALIGN(vgpr_init_shader_size, 256);
4603	sgpr_offset = total_size;
4604	total_size += sizeof(sgpr_init_compute_shader);
4605
4606	/* allocate an indirect buffer to put the commands in */
4607	memset(&ib, 0, sizeof(ib));
4608	r = amdgpu_ib_get(adev, NULL, total_size,
4609					AMDGPU_IB_POOL_DIRECT, &ib);
4610	if (r) {
4611		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4612		return r;
4613	}
4614
4615	/* load the compute shaders */
4616	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4617		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4618
4619	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4620		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4621
4622	/* init the ib length to 0 */
4623	ib.length_dw = 0;
4624
4625	/* VGPR */
4626	/* write the register state for the compute dispatch */
4627	for (i = 0; i < gpr_reg_size; i++) {
4628		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4629		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4630								- PACKET3_SET_SH_REG_START;
4631		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4632	}
4633	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4634	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4635	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4636	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4637							- PACKET3_SET_SH_REG_START;
4638	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4639	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4640
4641	/* write dispatch packet */
4642	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4643	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4644	ib.ptr[ib.length_dw++] = 1; /* y */
4645	ib.ptr[ib.length_dw++] = 1; /* z */
4646	ib.ptr[ib.length_dw++] =
4647		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4648
4649	/* write CS partial flush packet */
4650	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4651	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4652
4653	/* SGPR1 */
4654	/* write the register state for the compute dispatch */
4655	for (i = 0; i < gpr_reg_size; i++) {
4656		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4657		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4658								- PACKET3_SET_SH_REG_START;
4659		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4660	}
4661	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4662	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4663	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4664	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4665							- PACKET3_SET_SH_REG_START;
4666	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4667	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4668
4669	/* write dispatch packet */
4670	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4671	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4672	ib.ptr[ib.length_dw++] = 1; /* y */
4673	ib.ptr[ib.length_dw++] = 1; /* z */
4674	ib.ptr[ib.length_dw++] =
4675		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4676
4677	/* write CS partial flush packet */
4678	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4679	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4680
4681	/* SGPR2 */
4682	/* write the register state for the compute dispatch */
4683	for (i = 0; i < gpr_reg_size; i++) {
4684		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4685		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4686								- PACKET3_SET_SH_REG_START;
4687		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4688	}
4689	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4690	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4691	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4692	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4693							- PACKET3_SET_SH_REG_START;
4694	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4695	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4696
4697	/* write dispatch packet */
4698	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4699	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4700	ib.ptr[ib.length_dw++] = 1; /* y */
4701	ib.ptr[ib.length_dw++] = 1; /* z */
4702	ib.ptr[ib.length_dw++] =
4703		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4704
4705	/* write CS partial flush packet */
4706	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4707	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4708
4709	/* shedule the ib on the ring */
4710	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4711	if (r) {
4712		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4713		goto fail;
4714	}
4715
4716	/* wait for the GPU to finish processing the IB */
4717	r = dma_fence_wait(f, false);
4718	if (r) {
4719		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4720		goto fail;
4721	}
4722
4723fail:
4724	amdgpu_ib_free(adev, &ib, NULL);
4725	dma_fence_put(f);
4726
4727	return r;
4728}
4729
4730static int gfx_v9_0_early_init(void *handle)
4731{
4732	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4733
4734	if (adev->asic_type == CHIP_ARCTURUS ||
4735	    adev->asic_type == CHIP_ALDEBARAN)
4736		adev->gfx.num_gfx_rings = 0;
4737	else
4738		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4739	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4740					  AMDGPU_MAX_COMPUTE_RINGS);
4741	gfx_v9_0_set_kiq_pm4_funcs(adev);
4742	gfx_v9_0_set_ring_funcs(adev);
4743	gfx_v9_0_set_irq_funcs(adev);
4744	gfx_v9_0_set_gds_init(adev);
4745	gfx_v9_0_set_rlc_funcs(adev);
4746
4747	return 0;
4748}
4749
4750static int gfx_v9_0_ecc_late_init(void *handle)
4751{
4752	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4753	int r;
4754
4755	/*
4756	 * Temp workaround to fix the issue that CP firmware fails to
4757	 * update read pointer when CPDMA is writing clearing operation
4758	 * to GDS in suspend/resume sequence on several cards. So just
4759	 * limit this operation in cold boot sequence.
4760	 */
4761	if ((!adev->in_suspend) &&
4762	    (adev->gds.gds_size)) {
4763		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4764		if (r)
4765			return r;
4766	}
4767
4768	/* requires IBs so do in late init after IB pool is initialized */
4769	if (adev->asic_type == CHIP_ALDEBARAN)
4770		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4771	else
4772		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4773
4774	if (r)
4775		return r;
4776
4777	if (adev->gfx.ras_funcs &&
4778	    adev->gfx.ras_funcs->ras_late_init) {
4779		r = adev->gfx.ras_funcs->ras_late_init(adev);
4780		if (r)
4781			return r;
4782	}
4783
4784	if (adev->gfx.ras_funcs &&
4785	    adev->gfx.ras_funcs->enable_watchdog_timer)
4786		adev->gfx.ras_funcs->enable_watchdog_timer(adev);
4787
4788	return 0;
4789}
4790
4791static int gfx_v9_0_late_init(void *handle)
4792{
4793	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4794	int r;
4795
4796	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4797	if (r)
4798		return r;
4799
4800	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4801	if (r)
4802		return r;
4803
4804	r = gfx_v9_0_ecc_late_init(handle);
4805	if (r)
4806		return r;
4807
4808	return 0;
4809}
4810
4811static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4812{
4813	uint32_t rlc_setting;
4814
4815	/* if RLC is not enabled, do nothing */
4816	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4817	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4818		return false;
4819
4820	return true;
4821}
4822
4823static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4824{
4825	uint32_t data;
4826	unsigned i;
4827
4828	data = RLC_SAFE_MODE__CMD_MASK;
4829	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4830	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4831
4832	/* wait for RLC_SAFE_MODE */
4833	for (i = 0; i < adev->usec_timeout; i++) {
4834		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4835			break;
4836		udelay(1);
4837	}
4838}
4839
4840static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4841{
4842	uint32_t data;
4843
4844	data = RLC_SAFE_MODE__CMD_MASK;
4845	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4846}
4847
4848static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4849						bool enable)
4850{
4851	amdgpu_gfx_rlc_enter_safe_mode(adev);
4852
4853	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4854		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4855		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4856			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4857	} else {
4858		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4859		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4860			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4861	}
4862
4863	amdgpu_gfx_rlc_exit_safe_mode(adev);
4864}
4865
4866static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4867						bool enable)
4868{
4869	/* TODO: double check if we need to perform under safe mode */
4870	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4871
4872	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4873		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4874	else
4875		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4876
4877	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4878		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4879	else
4880		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4881
4882	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4883}
4884
4885static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4886						      bool enable)
4887{
4888	uint32_t data, def;
4889
4890	amdgpu_gfx_rlc_enter_safe_mode(adev);
4891
4892	/* It is disabled by HW by default */
4893	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4894		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4895		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4896
4897		if (adev->asic_type != CHIP_VEGA12)
4898			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4899
4900		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4901			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4902			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4903
4904		/* only for Vega10 & Raven1 */
4905		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4906
4907		if (def != data)
4908			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4909
4910		/* MGLS is a global flag to control all MGLS in GFX */
4911		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4912			/* 2 - RLC memory Light sleep */
4913			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4914				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4915				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4916				if (def != data)
4917					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4918			}
4919			/* 3 - CP memory Light sleep */
4920			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4921				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4922				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4923				if (def != data)
4924					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4925			}
4926		}
4927	} else {
4928		/* 1 - MGCG_OVERRIDE */
4929		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4930
4931		if (adev->asic_type != CHIP_VEGA12)
4932			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4933
4934		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4935			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4936			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4937			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4938
4939		if (def != data)
4940			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4941
4942		/* 2 - disable MGLS in RLC */
4943		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4944		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4945			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4946			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4947		}
4948
4949		/* 3 - disable MGLS in CP */
4950		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4951		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4952			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4953			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4954		}
4955	}
4956
4957	amdgpu_gfx_rlc_exit_safe_mode(adev);
4958}
4959
4960static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4961					   bool enable)
4962{
4963	uint32_t data, def;
4964
4965	if (!adev->gfx.num_gfx_rings)
4966		return;
4967
4968	amdgpu_gfx_rlc_enter_safe_mode(adev);
4969
4970	/* Enable 3D CGCG/CGLS */
4971	if (enable) {
4972		/* write cmd to clear cgcg/cgls ov */
4973		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4974		/* unset CGCG override */
4975		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4976		/* update CGCG and CGLS override bits */
4977		if (def != data)
4978			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4979
4980		/* enable 3Dcgcg FSM(0x0000363f) */
4981		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4982
4983		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4984			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4985				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4986		else
4987			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4988
4989		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4990			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4991				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4992		if (def != data)
4993			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4994
4995		/* set IDLE_POLL_COUNT(0x00900100) */
4996		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4997		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4998			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4999		if (def != data)
5000			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5001	} else {
5002		/* Disable CGCG/CGLS */
5003		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5004		/* disable cgcg, cgls should be disabled */
5005		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5006			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5007		/* disable cgcg and cgls in FSM */
5008		if (def != data)
5009			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5010	}
5011
5012	amdgpu_gfx_rlc_exit_safe_mode(adev);
5013}
5014
5015static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5016						      bool enable)
5017{
5018	uint32_t def, data;
5019
5020	amdgpu_gfx_rlc_enter_safe_mode(adev);
5021
5022	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5023		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5024		/* unset CGCG override */
5025		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5026		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5027			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5028		else
5029			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5030		/* update CGCG and CGLS override bits */
5031		if (def != data)
5032			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5033
5034		/* enable cgcg FSM(0x0000363F) */
5035		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5036
5037		if (adev->asic_type == CHIP_ARCTURUS)
5038			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5039				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5040		else
5041			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5042				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5043		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5044			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5045				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5046		if (def != data)
5047			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5048
5049		/* set IDLE_POLL_COUNT(0x00900100) */
5050		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5051		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5052			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5053		if (def != data)
5054			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5055	} else {
5056		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5057		/* reset CGCG/CGLS bits */
5058		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5059		/* disable cgcg and cgls in FSM */
5060		if (def != data)
5061			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5062	}
5063
5064	amdgpu_gfx_rlc_exit_safe_mode(adev);
5065}
5066
5067static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5068					    bool enable)
5069{
5070	if (enable) {
5071		/* CGCG/CGLS should be enabled after MGCG/MGLS
5072		 * ===  MGCG + MGLS ===
5073		 */
5074		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5075		/* ===  CGCG /CGLS for GFX 3D Only === */
5076		gfx_v9_0_update_3d_clock_gating(adev, enable);
5077		/* ===  CGCG + CGLS === */
5078		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5079	} else {
5080		/* CGCG/CGLS should be disabled before MGCG/MGLS
5081		 * ===  CGCG + CGLS ===
5082		 */
5083		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5084		/* ===  CGCG /CGLS for GFX 3D Only === */
5085		gfx_v9_0_update_3d_clock_gating(adev, enable);
5086		/* ===  MGCG + MGLS === */
5087		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5088	}
5089	return 0;
5090}
5091
5092static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5093{
5094	u32 reg, data;
5095
5096	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5097	if (amdgpu_sriov_is_pp_one_vf(adev))
5098		data = RREG32_NO_KIQ(reg);
5099	else
5100		data = RREG32(reg);
5101
5102	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5103	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5104
5105	if (amdgpu_sriov_is_pp_one_vf(adev))
5106		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5107	else
5108		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5109}
5110
5111static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5112					uint32_t offset,
5113					struct soc15_reg_rlcg *entries, int arr_size)
5114{
5115	int i;
5116	uint32_t reg;
5117
5118	if (!entries)
5119		return false;
5120
5121	for (i = 0; i < arr_size; i++) {
5122		const struct soc15_reg_rlcg *entry;
5123
5124		entry = &entries[i];
5125		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5126		if (offset == reg)
5127			return true;
5128	}
5129
5130	return false;
5131}
5132
5133static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5134{
5135	return gfx_v9_0_check_rlcg_range(adev, offset,
5136					(void *)rlcg_access_gc_9_0,
5137					ARRAY_SIZE(rlcg_access_gc_9_0));
5138}
5139
5140static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5141	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5142	.set_safe_mode = gfx_v9_0_set_safe_mode,
5143	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5144	.init = gfx_v9_0_rlc_init,
5145	.get_csb_size = gfx_v9_0_get_csb_size,
5146	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5147	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5148	.resume = gfx_v9_0_rlc_resume,
5149	.stop = gfx_v9_0_rlc_stop,
5150	.reset = gfx_v9_0_rlc_reset,
5151	.start = gfx_v9_0_rlc_start,
5152	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5153	.rlcg_wreg = gfx_v9_0_rlcg_wreg,
5154	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5155};
5156
5157static int gfx_v9_0_set_powergating_state(void *handle,
5158					  enum amd_powergating_state state)
5159{
5160	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5161	bool enable = (state == AMD_PG_STATE_GATE);
5162
5163	switch (adev->asic_type) {
5164	case CHIP_RAVEN:
5165	case CHIP_RENOIR:
5166		if (!enable)
5167			amdgpu_gfx_off_ctrl(adev, false);
5168
5169		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5170			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5171			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5172		} else {
5173			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5174			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5175		}
5176
5177		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5178			gfx_v9_0_enable_cp_power_gating(adev, true);
5179		else
5180			gfx_v9_0_enable_cp_power_gating(adev, false);
5181
5182		/* update gfx cgpg state */
5183		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5184
5185		/* update mgcg state */
5186		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5187
5188		if (enable)
5189			amdgpu_gfx_off_ctrl(adev, true);
5190		break;
5191	case CHIP_VEGA12:
5192		amdgpu_gfx_off_ctrl(adev, enable);
5193		break;
5194	default:
5195		break;
5196	}
5197
5198	return 0;
5199}
5200
5201static int gfx_v9_0_set_clockgating_state(void *handle,
5202					  enum amd_clockgating_state state)
5203{
5204	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5205
5206	if (amdgpu_sriov_vf(adev))
5207		return 0;
5208
5209	switch (adev->asic_type) {
5210	case CHIP_VEGA10:
5211	case CHIP_VEGA12:
5212	case CHIP_VEGA20:
5213	case CHIP_RAVEN:
5214	case CHIP_ARCTURUS:
5215	case CHIP_RENOIR:
5216	case CHIP_ALDEBARAN:
5217		gfx_v9_0_update_gfx_clock_gating(adev,
5218						 state == AMD_CG_STATE_GATE);
5219		break;
5220	default:
5221		break;
5222	}
5223	return 0;
5224}
5225
5226static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5227{
5228	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5229	int data;
5230
5231	if (amdgpu_sriov_vf(adev))
5232		*flags = 0;
5233
5234	/* AMD_CG_SUPPORT_GFX_MGCG */
5235	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5236	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5237		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5238
5239	/* AMD_CG_SUPPORT_GFX_CGCG */
5240	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5241	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5242		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5243
5244	/* AMD_CG_SUPPORT_GFX_CGLS */
5245	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5246		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5247
5248	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5249	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5250	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5251		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5252
5253	/* AMD_CG_SUPPORT_GFX_CP_LS */
5254	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5255	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5256		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5257
5258	if (adev->asic_type != CHIP_ARCTURUS) {
5259		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5260		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5261		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5262			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5263
5264		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5265		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5266			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5267	}
5268}
5269
5270static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5271{
5272	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5273}
5274
5275static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5276{
5277	struct amdgpu_device *adev = ring->adev;
5278	u64 wptr;
5279
5280	/* XXX check if swapping is necessary on BE */
5281	if (ring->use_doorbell) {
5282		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5283	} else {
5284		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5285		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5286	}
5287
5288	return wptr;
5289}
5290
5291static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5292{
5293	struct amdgpu_device *adev = ring->adev;
5294
5295	if (ring->use_doorbell) {
5296		/* XXX check if swapping is necessary on BE */
5297		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5298		WDOORBELL64(ring->doorbell_index, ring->wptr);
5299	} else {
5300		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5301		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5302	}
5303}
5304
5305static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5306{
5307	struct amdgpu_device *adev = ring->adev;
5308	u32 ref_and_mask, reg_mem_engine;
5309	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5310
5311	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5312		switch (ring->me) {
5313		case 1:
5314			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5315			break;
5316		case 2:
5317			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5318			break;
5319		default:
5320			return;
5321		}
5322		reg_mem_engine = 0;
5323	} else {
5324		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5325		reg_mem_engine = 1; /* pfp */
5326	}
5327
5328	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5329			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5330			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5331			      ref_and_mask, ref_and_mask, 0x20);
5332}
5333
5334static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5335					struct amdgpu_job *job,
5336					struct amdgpu_ib *ib,
5337					uint32_t flags)
5338{
5339	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5340	u32 header, control = 0;
5341
5342	if (ib->flags & AMDGPU_IB_FLAG_CE)
5343		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5344	else
5345		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5346
5347	control |= ib->length_dw | (vmid << 24);
5348
5349	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5350		control |= INDIRECT_BUFFER_PRE_ENB(1);
5351
5352		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5353			gfx_v9_0_ring_emit_de_meta(ring);
5354	}
5355
5356	amdgpu_ring_write(ring, header);
5357	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5358	amdgpu_ring_write(ring,
5359#ifdef __BIG_ENDIAN
5360		(2 << 0) |
5361#endif
5362		lower_32_bits(ib->gpu_addr));
5363	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5364	amdgpu_ring_write(ring, control);
5365}
5366
5367static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5368					  struct amdgpu_job *job,
5369					  struct amdgpu_ib *ib,
5370					  uint32_t flags)
5371{
5372	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5373	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5374
5375	/* Currently, there is a high possibility to get wave ID mismatch
5376	 * between ME and GDS, leading to a hw deadlock, because ME generates
5377	 * different wave IDs than the GDS expects. This situation happens
5378	 * randomly when at least 5 compute pipes use GDS ordered append.
5379	 * The wave IDs generated by ME are also wrong after suspend/resume.
5380	 * Those are probably bugs somewhere else in the kernel driver.
5381	 *
5382	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5383	 * GDS to 0 for this ring (me/pipe).
5384	 */
5385	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5386		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5387		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5388		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5389	}
5390
5391	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5392	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5393	amdgpu_ring_write(ring,
5394#ifdef __BIG_ENDIAN
5395				(2 << 0) |
5396#endif
5397				lower_32_bits(ib->gpu_addr));
5398	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5399	amdgpu_ring_write(ring, control);
5400}
5401
5402static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5403				     u64 seq, unsigned flags)
5404{
5405	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5406	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5407	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5408
5409	/* RELEASE_MEM - flush caches, send int */
5410	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5411	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5412					       EOP_TC_NC_ACTION_EN) :
5413					      (EOP_TCL1_ACTION_EN |
5414					       EOP_TC_ACTION_EN |
5415					       EOP_TC_WB_ACTION_EN |
5416					       EOP_TC_MD_ACTION_EN)) |
5417				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5418				 EVENT_INDEX(5)));
5419	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5420
5421	/*
5422	 * the address should be Qword aligned if 64bit write, Dword
5423	 * aligned if only send 32bit data low (discard data high)
5424	 */
5425	if (write64bit)
5426		BUG_ON(addr & 0x7);
5427	else
5428		BUG_ON(addr & 0x3);
5429	amdgpu_ring_write(ring, lower_32_bits(addr));
5430	amdgpu_ring_write(ring, upper_32_bits(addr));
5431	amdgpu_ring_write(ring, lower_32_bits(seq));
5432	amdgpu_ring_write(ring, upper_32_bits(seq));
5433	amdgpu_ring_write(ring, 0);
5434}
5435
5436static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5437{
5438	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5439	uint32_t seq = ring->fence_drv.sync_seq;
5440	uint64_t addr = ring->fence_drv.gpu_addr;
5441
5442	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5443			      lower_32_bits(addr), upper_32_bits(addr),
5444			      seq, 0xffffffff, 4);
5445}
5446
5447static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5448					unsigned vmid, uint64_t pd_addr)
5449{
5450	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5451
5452	/* compute doesn't have PFP */
5453	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5454		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5455		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5456		amdgpu_ring_write(ring, 0x0);
5457	}
5458}
5459
5460static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5461{
5462	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5463}
5464
5465static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5466{
5467	u64 wptr;
5468
5469	/* XXX check if swapping is necessary on BE */
5470	if (ring->use_doorbell)
5471		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5472	else
5473		BUG();
5474	return wptr;
5475}
5476
5477static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5478{
5479	struct amdgpu_device *adev = ring->adev;
5480
5481	/* XXX check if swapping is necessary on BE */
5482	if (ring->use_doorbell) {
5483		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5484		WDOORBELL64(ring->doorbell_index, ring->wptr);
5485	} else{
5486		BUG(); /* only DOORBELL method supported on gfx9 now */
5487	}
5488}
5489
5490static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5491					 u64 seq, unsigned int flags)
5492{
5493	struct amdgpu_device *adev = ring->adev;
5494
5495	/* we only allocate 32bit for each seq wb address */
5496	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5497
5498	/* write fence seq to the "addr" */
5499	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5500	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5501				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5502	amdgpu_ring_write(ring, lower_32_bits(addr));
5503	amdgpu_ring_write(ring, upper_32_bits(addr));
5504	amdgpu_ring_write(ring, lower_32_bits(seq));
5505
5506	if (flags & AMDGPU_FENCE_FLAG_INT) {
5507		/* set register to trigger INT */
5508		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5509		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5510					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5511		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5512		amdgpu_ring_write(ring, 0);
5513		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5514	}
5515}
5516
5517static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5518{
5519	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5520	amdgpu_ring_write(ring, 0);
5521}
5522
5523static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5524{
5525	struct v9_ce_ib_state ce_payload = {0};
5526	uint64_t csa_addr;
5527	int cnt;
5528
5529	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5530	csa_addr = amdgpu_csa_vaddr(ring->adev);
5531
5532	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5533	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5534				 WRITE_DATA_DST_SEL(8) |
5535				 WR_CONFIRM) |
5536				 WRITE_DATA_CACHE_POLICY(0));
5537	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5538	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5539	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5540}
5541
5542static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5543{
5544	struct v9_de_ib_state de_payload = {0};
5545	uint64_t csa_addr, gds_addr;
5546	int cnt;
5547
5548	csa_addr = amdgpu_csa_vaddr(ring->adev);
5549	gds_addr = csa_addr + 4096;
5550	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5551	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5552
5553	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5554	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5555	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5556				 WRITE_DATA_DST_SEL(8) |
5557				 WR_CONFIRM) |
5558				 WRITE_DATA_CACHE_POLICY(0));
5559	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5560	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5561	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5562}
5563
5564static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5565				   bool secure)
5566{
5567	uint32_t v = secure ? FRAME_TMZ : 0;
5568
5569	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5570	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5571}
5572
5573static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5574{
5575	uint32_t dw2 = 0;
5576
5577	if (amdgpu_sriov_vf(ring->adev))
5578		gfx_v9_0_ring_emit_ce_meta(ring);
5579
5580	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5581	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5582		/* set load_global_config & load_global_uconfig */
5583		dw2 |= 0x8001;
5584		/* set load_cs_sh_regs */
5585		dw2 |= 0x01000000;
5586		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5587		dw2 |= 0x10002;
5588
5589		/* set load_ce_ram if preamble presented */
5590		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5591			dw2 |= 0x10000000;
5592	} else {
5593		/* still load_ce_ram if this is the first time preamble presented
5594		 * although there is no context switch happens.
5595		 */
5596		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5597			dw2 |= 0x10000000;
5598	}
5599
5600	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5601	amdgpu_ring_write(ring, dw2);
5602	amdgpu_ring_write(ring, 0);
5603}
5604
5605static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5606{
5607	unsigned ret;
5608	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5609	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5610	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5611	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5612	ret = ring->wptr & ring->buf_mask;
5613	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5614	return ret;
5615}
5616
5617static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5618{
5619	unsigned cur;
5620	BUG_ON(offset > ring->buf_mask);
5621	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5622
5623	cur = (ring->wptr & ring->buf_mask) - 1;
5624	if (likely(cur > offset))
5625		ring->ring[offset] = cur - offset;
5626	else
5627		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5628}
5629
5630static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5631				    uint32_t reg_val_offs)
5632{
5633	struct amdgpu_device *adev = ring->adev;
5634
5635	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5636	amdgpu_ring_write(ring, 0 |	/* src: register*/
5637				(5 << 8) |	/* dst: memory */
5638				(1 << 20));	/* write confirm */
5639	amdgpu_ring_write(ring, reg);
5640	amdgpu_ring_write(ring, 0);
5641	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5642				reg_val_offs * 4));
5643	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5644				reg_val_offs * 4));
5645}
5646
5647static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5648				    uint32_t val)
5649{
5650	uint32_t cmd = 0;
5651
5652	switch (ring->funcs->type) {
5653	case AMDGPU_RING_TYPE_GFX:
5654		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5655		break;
5656	case AMDGPU_RING_TYPE_KIQ:
5657		cmd = (1 << 16); /* no inc addr */
5658		break;
5659	default:
5660		cmd = WR_CONFIRM;
5661		break;
5662	}
5663	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5664	amdgpu_ring_write(ring, cmd);
5665	amdgpu_ring_write(ring, reg);
5666	amdgpu_ring_write(ring, 0);
5667	amdgpu_ring_write(ring, val);
5668}
5669
5670static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5671					uint32_t val, uint32_t mask)
5672{
5673	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5674}
5675
5676static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5677						  uint32_t reg0, uint32_t reg1,
5678						  uint32_t ref, uint32_t mask)
5679{
5680	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5681	struct amdgpu_device *adev = ring->adev;
5682	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5683		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5684
5685	if (fw_version_ok)
5686		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5687				      ref, mask, 0x20);
5688	else
5689		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5690							   ref, mask);
5691}
5692
5693static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5694{
5695	struct amdgpu_device *adev = ring->adev;
5696	uint32_t value = 0;
5697
5698	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5699	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5700	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5701	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5702	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5703}
5704
5705static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5706						 enum amdgpu_interrupt_state state)
5707{
5708	switch (state) {
5709	case AMDGPU_IRQ_STATE_DISABLE:
5710	case AMDGPU_IRQ_STATE_ENABLE:
5711		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5712			       TIME_STAMP_INT_ENABLE,
5713			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5714		break;
5715	default:
5716		break;
5717	}
5718}
5719
5720static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5721						     int me, int pipe,
5722						     enum amdgpu_interrupt_state state)
5723{
5724	u32 mec_int_cntl, mec_int_cntl_reg;
5725
5726	/*
5727	 * amdgpu controls only the first MEC. That's why this function only
5728	 * handles the setting of interrupts for this specific MEC. All other
5729	 * pipes' interrupts are set by amdkfd.
5730	 */
5731
5732	if (me == 1) {
5733		switch (pipe) {
5734		case 0:
5735			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5736			break;
5737		case 1:
5738			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5739			break;
5740		case 2:
5741			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5742			break;
5743		case 3:
5744			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5745			break;
5746		default:
5747			DRM_DEBUG("invalid pipe %d\n", pipe);
5748			return;
5749		}
5750	} else {
5751		DRM_DEBUG("invalid me %d\n", me);
5752		return;
5753	}
5754
5755	switch (state) {
5756	case AMDGPU_IRQ_STATE_DISABLE:
5757		mec_int_cntl = RREG32(mec_int_cntl_reg);
5758		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5759					     TIME_STAMP_INT_ENABLE, 0);
5760		WREG32(mec_int_cntl_reg, mec_int_cntl);
5761		break;
5762	case AMDGPU_IRQ_STATE_ENABLE:
5763		mec_int_cntl = RREG32(mec_int_cntl_reg);
5764		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5765					     TIME_STAMP_INT_ENABLE, 1);
5766		WREG32(mec_int_cntl_reg, mec_int_cntl);
5767		break;
5768	default:
5769		break;
5770	}
5771}
5772
5773static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5774					     struct amdgpu_irq_src *source,
5775					     unsigned type,
5776					     enum amdgpu_interrupt_state state)
5777{
5778	switch (state) {
5779	case AMDGPU_IRQ_STATE_DISABLE:
5780	case AMDGPU_IRQ_STATE_ENABLE:
5781		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5782			       PRIV_REG_INT_ENABLE,
5783			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5784		break;
5785	default:
5786		break;
5787	}
5788
5789	return 0;
5790}
5791
5792static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5793					      struct amdgpu_irq_src *source,
5794					      unsigned type,
5795					      enum amdgpu_interrupt_state state)
5796{
5797	switch (state) {
5798	case AMDGPU_IRQ_STATE_DISABLE:
5799	case AMDGPU_IRQ_STATE_ENABLE:
5800		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5801			       PRIV_INSTR_INT_ENABLE,
5802			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5803		break;
5804	default:
5805		break;
5806	}
5807
5808	return 0;
5809}
5810
5811#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5812	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5813			CP_ECC_ERROR_INT_ENABLE, 1)
5814
5815#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5816	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5817			CP_ECC_ERROR_INT_ENABLE, 0)
5818
5819static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5820					      struct amdgpu_irq_src *source,
5821					      unsigned type,
5822					      enum amdgpu_interrupt_state state)
5823{
5824	switch (state) {
5825	case AMDGPU_IRQ_STATE_DISABLE:
5826		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5827				CP_ECC_ERROR_INT_ENABLE, 0);
5828		DISABLE_ECC_ON_ME_PIPE(1, 0);
5829		DISABLE_ECC_ON_ME_PIPE(1, 1);
5830		DISABLE_ECC_ON_ME_PIPE(1, 2);
5831		DISABLE_ECC_ON_ME_PIPE(1, 3);
5832		break;
5833
5834	case AMDGPU_IRQ_STATE_ENABLE:
5835		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5836				CP_ECC_ERROR_INT_ENABLE, 1);
5837		ENABLE_ECC_ON_ME_PIPE(1, 0);
5838		ENABLE_ECC_ON_ME_PIPE(1, 1);
5839		ENABLE_ECC_ON_ME_PIPE(1, 2);
5840		ENABLE_ECC_ON_ME_PIPE(1, 3);
5841		break;
5842	default:
5843		break;
5844	}
5845
5846	return 0;
5847}
5848
5849
5850static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5851					    struct amdgpu_irq_src *src,
5852					    unsigned type,
5853					    enum amdgpu_interrupt_state state)
5854{
5855	switch (type) {
5856	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5857		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5858		break;
5859	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5860		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5861		break;
5862	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5863		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5864		break;
5865	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5866		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5867		break;
5868	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5869		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5870		break;
5871	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5872		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5873		break;
5874	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5875		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5876		break;
5877	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5878		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5879		break;
5880	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5881		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5882		break;
5883	default:
5884		break;
5885	}
5886	return 0;
5887}
5888
5889static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5890			    struct amdgpu_irq_src *source,
5891			    struct amdgpu_iv_entry *entry)
5892{
5893	int i;
5894	u8 me_id, pipe_id, queue_id;
5895	struct amdgpu_ring *ring;
5896
5897	DRM_DEBUG("IH: CP EOP\n");
5898	me_id = (entry->ring_id & 0x0c) >> 2;
5899	pipe_id = (entry->ring_id & 0x03) >> 0;
5900	queue_id = (entry->ring_id & 0x70) >> 4;
5901
5902	switch (me_id) {
5903	case 0:
5904		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5905		break;
5906	case 1:
5907	case 2:
5908		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5909			ring = &adev->gfx.compute_ring[i];
5910			/* Per-queue interrupt is supported for MEC starting from VI.
5911			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5912			  */
5913			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5914				amdgpu_fence_process(ring);
5915		}
5916		break;
5917	}
5918	return 0;
5919}
5920
5921static void gfx_v9_0_fault(struct amdgpu_device *adev,
5922			   struct amdgpu_iv_entry *entry)
5923{
5924	u8 me_id, pipe_id, queue_id;
5925	struct amdgpu_ring *ring;
5926	int i;
5927
5928	me_id = (entry->ring_id & 0x0c) >> 2;
5929	pipe_id = (entry->ring_id & 0x03) >> 0;
5930	queue_id = (entry->ring_id & 0x70) >> 4;
5931
5932	switch (me_id) {
5933	case 0:
5934		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5935		break;
5936	case 1:
5937	case 2:
5938		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5939			ring = &adev->gfx.compute_ring[i];
5940			if (ring->me == me_id && ring->pipe == pipe_id &&
5941			    ring->queue == queue_id)
5942				drm_sched_fault(&ring->sched);
5943		}
5944		break;
5945	}
5946}
5947
5948static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5949				 struct amdgpu_irq_src *source,
5950				 struct amdgpu_iv_entry *entry)
5951{
5952	DRM_ERROR("Illegal register access in command stream\n");
5953	gfx_v9_0_fault(adev, entry);
5954	return 0;
5955}
5956
5957static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5958				  struct amdgpu_irq_src *source,
5959				  struct amdgpu_iv_entry *entry)
5960{
5961	DRM_ERROR("Illegal instruction in command stream\n");
5962	gfx_v9_0_fault(adev, entry);
5963	return 0;
5964}
5965
5966
5967static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5968	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5969	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5970	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5971	},
5972	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5973	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5974	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5975	},
5976	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5977	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5978	  0, 0
5979	},
5980	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5981	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5982	  0, 0
5983	},
5984	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5985	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
5986	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
5987	},
5988	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5989	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
5990	  0, 0
5991	},
5992	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
5993	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5994	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
5995	},
5996	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
5997	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
5998	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
5999	},
6000	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6001	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6002	  0, 0
6003	},
6004	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6005	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6006	  0, 0
6007	},
6008	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6009	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6010	  0, 0
6011	},
6012	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6013	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6014	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6015	},
6016	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6017	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6018	  0, 0
6019	},
6020	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6021	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6022	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6023	},
6024	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6025	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6026	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6027	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6028	},
6029	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6030	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6031	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6032	  0, 0
6033	},
6034	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6035	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6036	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6037	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6038	},
6039	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6040	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6041	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6042	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6043	},
6044	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6045	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6046	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6047	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6048	},
6049	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6050	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6051	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6052	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6053	},
6054	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6055	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6056	  0, 0
6057	},
6058	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6059	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6060	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6061	},
6062	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6063	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6064	  0, 0
6065	},
6066	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6067	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6068	  0, 0
6069	},
6070	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6071	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6072	  0, 0
6073	},
6074	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6075	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6076	  0, 0
6077	},
6078	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6079	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6080	  0, 0
6081	},
6082	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6083	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6084	  0, 0
6085	},
6086	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6087	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6088	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6089	},
6090	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6091	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6092	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6093	},
6094	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6095	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6096	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6097	},
6098	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6099	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6100	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6101	},
6102	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6103	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6104	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6105	},
6106	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6107	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6108	  0, 0
6109	},
6110	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6111	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6112	  0, 0
6113	},
6114	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6115	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6116	  0, 0
6117	},
6118	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6119	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6120	  0, 0
6121	},
6122	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6123	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6124	  0, 0
6125	},
6126	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6127	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6128	  0, 0
6129	},
6130	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6131	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6132	  0, 0
6133	},
6134	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6135	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6136	  0, 0
6137	},
6138	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6139	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6140	  0, 0
6141	},
6142	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6143	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6144	  0, 0
6145	},
6146	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6147	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6148	  0, 0
6149	},
6150	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6151	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6152	  0, 0
6153	},
6154	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6155	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6156	  0, 0
6157	},
6158	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6159	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6160	  0, 0
6161	},
6162	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6163	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6164	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6165	},
6166	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6167	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6168	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6169	},
6170	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6171	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6172	  0, 0
6173	},
6174	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6175	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6176	  0, 0
6177	},
6178	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6179	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6180	  0, 0
6181	},
6182	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6183	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6184	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6185	},
6186	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6187	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6188	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6189	},
6190	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6191	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6192	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6193	},
6194	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6195	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6196	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6197	},
6198	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6199	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6200	  0, 0
6201	},
6202	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6203	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6204	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6205	},
6206	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6207	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6208	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6209	},
6210	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6211	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6212	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6213	},
6214	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6215	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6216	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6217	},
6218	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6219	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6220	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6221	},
6222	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6223	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6224	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6225	},
6226	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6227	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6228	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6229	},
6230	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6231	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6232	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6233	},
6234	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6235	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6236	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6237	},
6238	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6239	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6240	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6241	},
6242	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6243	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6244	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6245	},
6246	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6247	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6248	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6249	},
6250	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6251	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6252	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6253	},
6254	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6255	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6256	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6257	},
6258	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6259	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6260	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6261	},
6262	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6263	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6264	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6265	},
6266	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6267	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6268	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6269	},
6270	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6271	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6272	  0, 0
6273	},
6274	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6275	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6276	  0, 0
6277	},
6278	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6279	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6280	  0, 0
6281	},
6282	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6283	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6284	  0, 0
6285	},
6286	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6287	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6288	  0, 0
6289	},
6290	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6291	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6292	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6293	},
6294	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6295	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6296	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6297	},
6298	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6299	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6300	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6301	},
6302	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6303	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6304	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6305	},
6306	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6307	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6308	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6309	},
6310	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6311	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6312	  0, 0
6313	},
6314	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6315	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6316	  0, 0
6317	},
6318	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6319	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6320	  0, 0
6321	},
6322	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6323	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6324	  0, 0
6325	},
6326	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6327	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6328	  0, 0
6329	},
6330	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6331	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6332	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6333	},
6334	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6335	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6336	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6337	},
6338	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6339	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6340	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6341	},
6342	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6343	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6344	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6345	},
6346	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6347	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6348	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6349	},
6350	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6351	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6352	  0, 0
6353	},
6354	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6355	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6356	  0, 0
6357	},
6358	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6359	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6360	  0, 0
6361	},
6362	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6363	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6364	  0, 0
6365	},
6366	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6367	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6368	  0, 0
6369	},
6370	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6371	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6372	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6373	},
6374	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6375	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6376	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6377	},
6378	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6379	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6380	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6381	},
6382	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6383	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6384	  0, 0
6385	},
6386	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6387	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6388	  0, 0
6389	},
6390	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6391	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6392	  0, 0
6393	},
6394	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6395	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6396	  0, 0
6397	},
6398	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6399	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6400	  0, 0
6401	},
6402	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6403	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6404	  0, 0
6405	}
6406};
6407
6408static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6409				     void *inject_if)
6410{
6411	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6412	int ret;
6413	struct ta_ras_trigger_error_input block_info = { 0 };
6414
6415	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6416		return -EINVAL;
6417
6418	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6419		return -EINVAL;
6420
6421	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6422		return -EPERM;
6423
6424	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6425	      info->head.type)) {
6426		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6427			ras_gfx_subblocks[info->head.sub_block_index].name,
6428			info->head.type);
6429		return -EPERM;
6430	}
6431
6432	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6433	      info->head.type)) {
6434		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6435			ras_gfx_subblocks[info->head.sub_block_index].name,
6436			info->head.type);
6437		return -EPERM;
6438	}
6439
6440	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6441	block_info.sub_block_index =
6442		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6443	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6444	block_info.address = info->address;
6445	block_info.value = info->value;
6446
6447	mutex_lock(&adev->grbm_idx_mutex);
6448	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6449	mutex_unlock(&adev->grbm_idx_mutex);
6450
6451	return ret;
6452}
6453
6454static const char *vml2_mems[] = {
6455	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6456	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6457	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6458	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6459	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6460	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6461	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6462	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6463	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6464	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6465	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6466	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6467	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6468	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6469	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6470	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6471};
6472
6473static const char *vml2_walker_mems[] = {
6474	"UTC_VML2_CACHE_PDE0_MEM0",
6475	"UTC_VML2_CACHE_PDE0_MEM1",
6476	"UTC_VML2_CACHE_PDE1_MEM0",
6477	"UTC_VML2_CACHE_PDE1_MEM1",
6478	"UTC_VML2_CACHE_PDE2_MEM0",
6479	"UTC_VML2_CACHE_PDE2_MEM1",
6480	"UTC_VML2_RDIF_LOG_FIFO",
6481};
6482
6483static const char *atc_l2_cache_2m_mems[] = {
6484	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6485	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6486	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6487	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6488};
6489
6490static const char *atc_l2_cache_4k_mems[] = {
6491	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6492	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6493	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6494	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6495	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6496	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6497	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6498	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6499	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6500	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6501	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6502	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6503	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6504	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6505	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6506	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6507	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6508	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6509	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6510	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6511	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6512	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6513	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6514	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6515	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6516	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6517	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6518	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6519	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6520	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6521	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6522	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6523};
6524
6525static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6526					 struct ras_err_data *err_data)
6527{
6528	uint32_t i, data;
6529	uint32_t sec_count, ded_count;
6530
6531	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6532	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6533	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6534	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6535	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6536	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6537	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6538	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6539
6540	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6541		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6542		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6543
6544		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6545		if (sec_count) {
6546			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6547				"SEC %d\n", i, vml2_mems[i], sec_count);
6548			err_data->ce_count += sec_count;
6549		}
6550
6551		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6552		if (ded_count) {
6553			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6554				"DED %d\n", i, vml2_mems[i], ded_count);
6555			err_data->ue_count += ded_count;
6556		}
6557	}
6558
6559	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6560		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6561		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6562
6563		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6564						SEC_COUNT);
6565		if (sec_count) {
6566			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6567				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6568			err_data->ce_count += sec_count;
6569		}
6570
6571		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6572						DED_COUNT);
6573		if (ded_count) {
6574			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6575				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6576			err_data->ue_count += ded_count;
6577		}
6578	}
6579
6580	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6581		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6582		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6583
6584		sec_count = (data & 0x00006000L) >> 0xd;
6585		if (sec_count) {
6586			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6587				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6588				sec_count);
6589			err_data->ce_count += sec_count;
6590		}
6591	}
6592
6593	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6594		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6595		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6596
6597		sec_count = (data & 0x00006000L) >> 0xd;
6598		if (sec_count) {
6599			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6600				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6601				sec_count);
6602			err_data->ce_count += sec_count;
6603		}
6604
6605		ded_count = (data & 0x00018000L) >> 0xf;
6606		if (ded_count) {
6607			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6608				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6609				ded_count);
6610			err_data->ue_count += ded_count;
6611		}
6612	}
6613
6614	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6615	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6616	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6617	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6618
6619	return 0;
6620}
6621
6622static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6623	const struct soc15_reg_entry *reg,
6624	uint32_t se_id, uint32_t inst_id, uint32_t value,
6625	uint32_t *sec_count, uint32_t *ded_count)
6626{
6627	uint32_t i;
6628	uint32_t sec_cnt, ded_cnt;
6629
6630	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6631		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6632			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6633			gfx_v9_0_ras_fields[i].inst != reg->inst)
6634			continue;
6635
6636		sec_cnt = (value &
6637				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6638				gfx_v9_0_ras_fields[i].sec_count_shift;
6639		if (sec_cnt) {
6640			dev_info(adev->dev, "GFX SubBlock %s, "
6641				"Instance[%d][%d], SEC %d\n",
6642				gfx_v9_0_ras_fields[i].name,
6643				se_id, inst_id,
6644				sec_cnt);
6645			*sec_count += sec_cnt;
6646		}
6647
6648		ded_cnt = (value &
6649				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6650				gfx_v9_0_ras_fields[i].ded_count_shift;
6651		if (ded_cnt) {
6652			dev_info(adev->dev, "GFX SubBlock %s, "
6653				"Instance[%d][%d], DED %d\n",
6654				gfx_v9_0_ras_fields[i].name,
6655				se_id, inst_id,
6656				ded_cnt);
6657			*ded_count += ded_cnt;
6658		}
6659	}
6660
6661	return 0;
6662}
6663
6664static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6665{
6666	int i, j, k;
6667
6668	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6669		return;
6670
6671	/* read back registers to clear the counters */
6672	mutex_lock(&adev->grbm_idx_mutex);
6673	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6674		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6675			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6676				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6677				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6678			}
6679		}
6680	}
6681	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6682	mutex_unlock(&adev->grbm_idx_mutex);
6683
6684	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6685	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6686	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6687	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6688	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6689	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6690	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6691	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6692
6693	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6694		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6695		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6696	}
6697
6698	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6699		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6700		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6701	}
6702
6703	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6704		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6705		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6706	}
6707
6708	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6709		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6710		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6711	}
6712
6713	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6714	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6715	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6716	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6717}
6718
6719static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6720					  void *ras_error_status)
6721{
6722	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6723	uint32_t sec_count = 0, ded_count = 0;
6724	uint32_t i, j, k;
6725	uint32_t reg_value;
6726
6727	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6728		return -EINVAL;
6729
6730	err_data->ue_count = 0;
6731	err_data->ce_count = 0;
6732
6733	mutex_lock(&adev->grbm_idx_mutex);
6734
6735	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6736		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6737			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6738				gfx_v9_0_select_se_sh(adev, j, 0, k);
6739				reg_value =
6740					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6741				if (reg_value)
6742					gfx_v9_0_ras_error_count(adev,
6743						&gfx_v9_0_edc_counter_regs[i],
6744						j, k, reg_value,
6745						&sec_count, &ded_count);
6746			}
6747		}
6748	}
6749
6750	err_data->ce_count += sec_count;
6751	err_data->ue_count += ded_count;
6752
6753	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6754	mutex_unlock(&adev->grbm_idx_mutex);
6755
6756	gfx_v9_0_query_utc_edc_status(adev, err_data);
6757
6758	return 0;
6759}
6760
6761static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6762{
6763	const unsigned int cp_coher_cntl =
6764			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6765			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6766			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6767			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6768			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6769
6770	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6771	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6772	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6773	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6774	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6775	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6776	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6777	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6778}
6779
6780static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6781					uint32_t pipe, bool enable)
6782{
6783	struct amdgpu_device *adev = ring->adev;
6784	uint32_t val;
6785	uint32_t wcl_cs_reg;
6786
6787	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6788	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6789
6790	switch (pipe) {
6791	case 0:
6792		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6793		break;
6794	case 1:
6795		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6796		break;
6797	case 2:
6798		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6799		break;
6800	case 3:
6801		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6802		break;
6803	default:
6804		DRM_DEBUG("invalid pipe %d\n", pipe);
6805		return;
6806	}
6807
6808	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6809
6810}
6811static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6812{
6813	struct amdgpu_device *adev = ring->adev;
6814	uint32_t val;
6815	int i;
6816
6817
6818	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6819	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6820	 * around 25% of gpu resources.
6821	 */
6822	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6823	amdgpu_ring_emit_wreg(ring,
6824			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6825			      val);
6826
6827	/* Restrict waves for normal/low priority compute queues as well
6828	 * to get best QoS for high priority compute jobs.
6829	 *
6830	 * amdgpu controls only 1st ME(0-3 CS pipes).
6831	 */
6832	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6833		if (i != ring->pipe)
6834			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6835
6836	}
6837}
6838
6839static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6840	.name = "gfx_v9_0",
6841	.early_init = gfx_v9_0_early_init,
6842	.late_init = gfx_v9_0_late_init,
6843	.sw_init = gfx_v9_0_sw_init,
6844	.sw_fini = gfx_v9_0_sw_fini,
6845	.hw_init = gfx_v9_0_hw_init,
6846	.hw_fini = gfx_v9_0_hw_fini,
6847	.suspend = gfx_v9_0_suspend,
6848	.resume = gfx_v9_0_resume,
6849	.is_idle = gfx_v9_0_is_idle,
6850	.wait_for_idle = gfx_v9_0_wait_for_idle,
6851	.soft_reset = gfx_v9_0_soft_reset,
6852	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6853	.set_powergating_state = gfx_v9_0_set_powergating_state,
6854	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6855};
6856
6857static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6858	.type = AMDGPU_RING_TYPE_GFX,
6859	.align_mask = 0xff,
6860	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6861	.support_64bit_ptrs = true,
6862	.vmhub = AMDGPU_GFXHUB_0,
6863	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6864	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6865	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6866	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6867		5 +  /* COND_EXEC */
6868		7 +  /* PIPELINE_SYNC */
6869		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6870		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6871		2 + /* VM_FLUSH */
6872		8 +  /* FENCE for VM_FLUSH */
6873		20 + /* GDS switch */
6874		4 + /* double SWITCH_BUFFER,
6875		       the first COND_EXEC jump to the place just
6876			   prior to this double SWITCH_BUFFER  */
6877		5 + /* COND_EXEC */
6878		7 +	 /*	HDP_flush */
6879		4 +	 /*	VGT_flush */
6880		14 + /*	CE_META */
6881		31 + /*	DE_META */
6882		3 + /* CNTX_CTRL */
6883		5 + /* HDP_INVL */
6884		8 + 8 + /* FENCE x2 */
6885		2 + /* SWITCH_BUFFER */
6886		7, /* gfx_v9_0_emit_mem_sync */
6887	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6888	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6889	.emit_fence = gfx_v9_0_ring_emit_fence,
6890	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6891	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6892	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6893	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6894	.test_ring = gfx_v9_0_ring_test_ring,
6895	.test_ib = gfx_v9_0_ring_test_ib,
6896	.insert_nop = amdgpu_ring_insert_nop,
6897	.pad_ib = amdgpu_ring_generic_pad_ib,
6898	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6899	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6900	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6901	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6902	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6903	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6904	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6905	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6906	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6907	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6908};
6909
6910static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6911	.type = AMDGPU_RING_TYPE_COMPUTE,
6912	.align_mask = 0xff,
6913	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6914	.support_64bit_ptrs = true,
6915	.vmhub = AMDGPU_GFXHUB_0,
6916	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6917	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6918	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6919	.emit_frame_size =
6920		20 + /* gfx_v9_0_ring_emit_gds_switch */
6921		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6922		5 + /* hdp invalidate */
6923		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6924		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6925		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6926		2 + /* gfx_v9_0_ring_emit_vm_flush */
6927		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6928		7 + /* gfx_v9_0_emit_mem_sync */
6929		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6930		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6931	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6932	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6933	.emit_fence = gfx_v9_0_ring_emit_fence,
6934	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6935	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6936	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6937	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6938	.test_ring = gfx_v9_0_ring_test_ring,
6939	.test_ib = gfx_v9_0_ring_test_ib,
6940	.insert_nop = amdgpu_ring_insert_nop,
6941	.pad_ib = amdgpu_ring_generic_pad_ib,
6942	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6943	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6944	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6945	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6946	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
6947};
6948
6949static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6950	.type = AMDGPU_RING_TYPE_KIQ,
6951	.align_mask = 0xff,
6952	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6953	.support_64bit_ptrs = true,
6954	.vmhub = AMDGPU_GFXHUB_0,
6955	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6956	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6957	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6958	.emit_frame_size =
6959		20 + /* gfx_v9_0_ring_emit_gds_switch */
6960		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6961		5 + /* hdp invalidate */
6962		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6963		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6964		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6965		2 + /* gfx_v9_0_ring_emit_vm_flush */
6966		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6967	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6968	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6969	.test_ring = gfx_v9_0_ring_test_ring,
6970	.insert_nop = amdgpu_ring_insert_nop,
6971	.pad_ib = amdgpu_ring_generic_pad_ib,
6972	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6973	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6974	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6975	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6976};
6977
6978static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6979{
6980	int i;
6981
6982	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6983
6984	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6985		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6986
6987	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6988		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6989}
6990
6991static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6992	.set = gfx_v9_0_set_eop_interrupt_state,
6993	.process = gfx_v9_0_eop_irq,
6994};
6995
6996static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6997	.set = gfx_v9_0_set_priv_reg_fault_state,
6998	.process = gfx_v9_0_priv_reg_irq,
6999};
7000
7001static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7002	.set = gfx_v9_0_set_priv_inst_fault_state,
7003	.process = gfx_v9_0_priv_inst_irq,
7004};
7005
7006static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7007	.set = gfx_v9_0_set_cp_ecc_error_state,
7008	.process = amdgpu_gfx_cp_ecc_error_irq,
7009};
7010
7011
7012static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7013{
7014	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7015	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7016
7017	adev->gfx.priv_reg_irq.num_types = 1;
7018	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7019
7020	adev->gfx.priv_inst_irq.num_types = 1;
7021	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7022
7023	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7024	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7025}
7026
7027static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7028{
7029	switch (adev->asic_type) {
7030	case CHIP_VEGA10:
7031	case CHIP_VEGA12:
7032	case CHIP_VEGA20:
7033	case CHIP_RAVEN:
7034	case CHIP_ARCTURUS:
7035	case CHIP_RENOIR:
7036	case CHIP_ALDEBARAN:
7037		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7038		break;
7039	default:
7040		break;
7041	}
7042}
7043
7044static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7045{
7046	/* init asci gds info */
7047	switch (adev->asic_type) {
7048	case CHIP_VEGA10:
7049	case CHIP_VEGA12:
7050	case CHIP_VEGA20:
7051		adev->gds.gds_size = 0x10000;
7052		break;
7053	case CHIP_RAVEN:
7054	case CHIP_ARCTURUS:
7055		adev->gds.gds_size = 0x1000;
7056		break;
7057	case CHIP_ALDEBARAN:
7058		/* aldebaran removed all the GDS internal memory,
7059		 * only support GWS opcode in kernel, like barrier
7060		 * semaphore.etc */
7061		adev->gds.gds_size = 0;
7062		break;
7063	default:
7064		adev->gds.gds_size = 0x10000;
7065		break;
7066	}
7067
7068	switch (adev->asic_type) {
7069	case CHIP_VEGA10:
7070	case CHIP_VEGA20:
7071		adev->gds.gds_compute_max_wave_id = 0x7ff;
7072		break;
7073	case CHIP_VEGA12:
7074		adev->gds.gds_compute_max_wave_id = 0x27f;
7075		break;
7076	case CHIP_RAVEN:
7077		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7078			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7079		else
7080			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7081		break;
7082	case CHIP_ARCTURUS:
7083		adev->gds.gds_compute_max_wave_id = 0xfff;
7084		break;
7085	case CHIP_ALDEBARAN:
7086		/* deprecated for Aldebaran, no usage at all */
7087		adev->gds.gds_compute_max_wave_id = 0;
7088		break;
7089	default:
7090		/* this really depends on the chip */
7091		adev->gds.gds_compute_max_wave_id = 0x7ff;
7092		break;
7093	}
7094
7095	adev->gds.gws_size = 64;
7096	adev->gds.oa_size = 16;
7097}
7098
7099static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7100						 u32 bitmap)
7101{
7102	u32 data;
7103
7104	if (!bitmap)
7105		return;
7106
7107	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7108	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7109
7110	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7111}
7112
7113static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7114{
7115	u32 data, mask;
7116
7117	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7118	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7119
7120	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7121	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7122
7123	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7124
7125	return (~data) & mask;
7126}
7127
7128static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7129				 struct amdgpu_cu_info *cu_info)
7130{
7131	int i, j, k, counter, active_cu_number = 0;
7132	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7133	unsigned disable_masks[4 * 4];
7134
7135	if (!adev || !cu_info)
7136		return -EINVAL;
7137
7138	/*
7139	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7140	 */
7141	if (adev->gfx.config.max_shader_engines *
7142		adev->gfx.config.max_sh_per_se > 16)
7143		return -EINVAL;
7144
7145	amdgpu_gfx_parse_disable_cu(disable_masks,
7146				    adev->gfx.config.max_shader_engines,
7147				    adev->gfx.config.max_sh_per_se);
7148
7149	mutex_lock(&adev->grbm_idx_mutex);
7150	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7151		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7152			mask = 1;
7153			ao_bitmap = 0;
7154			counter = 0;
7155			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7156			gfx_v9_0_set_user_cu_inactive_bitmap(
7157				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7158			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7159
7160			/*
7161			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7162			 * 4x4 size array, and it's usually suitable for Vega
7163			 * ASICs which has 4*2 SE/SH layout.
7164			 * But for Arcturus, SE/SH layout is changed to 8*1.
7165			 * To mostly reduce the impact, we make it compatible
7166			 * with current bitmap array as below:
7167			 *    SE4,SH0 --> bitmap[0][1]
7168			 *    SE5,SH0 --> bitmap[1][1]
7169			 *    SE6,SH0 --> bitmap[2][1]
7170			 *    SE7,SH0 --> bitmap[3][1]
7171			 */
7172			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7173
7174			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7175				if (bitmap & mask) {
7176					if (counter < adev->gfx.config.max_cu_per_sh)
7177						ao_bitmap |= mask;
7178					counter ++;
7179				}
7180				mask <<= 1;
7181			}
7182			active_cu_number += counter;
7183			if (i < 2 && j < 2)
7184				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7185			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7186		}
7187	}
7188	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7189	mutex_unlock(&adev->grbm_idx_mutex);
7190
7191	cu_info->number = active_cu_number;
7192	cu_info->ao_cu_mask = ao_cu_mask;
7193	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7194
7195	return 0;
7196}
7197
7198const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7199{
7200	.type = AMD_IP_BLOCK_TYPE_GFX,
7201	.major = 9,
7202	.minor = 0,
7203	.rev = 0,
7204	.funcs = &gfx_v9_0_ip_funcs,
7205};