Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "soc15.h"
  33#include "soc15d.h"
  34#include "amdgpu_atomfirmware.h"
  35#include "amdgpu_pm.h"
  36
  37#include "gc/gc_9_0_offset.h"
  38#include "gc/gc_9_0_sh_mask.h"
  39
  40#include "vega10_enum.h"
  41#include "hdp/hdp_4_0_offset.h"
  42
  43#include "soc15_common.h"
  44#include "clearstate_gfx9.h"
  45#include "v9_structs.h"
  46
  47#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
  48
  49#include "amdgpu_ras.h"
  50
  51#define GFX9_NUM_GFX_RINGS     1
  52#define GFX9_MEC_HPD_SIZE 4096
  53#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  54#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  55
  56#define mmPWR_MISC_CNTL_STATUS					0x0183
  57#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
  58#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
  59#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
  60#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
  61#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
  62
  63#define mmGCEA_PROBE_MAP                        0x070c
  64#define mmGCEA_PROBE_MAP_BASE_IDX               0
  65
  66MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  67MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  68MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  69MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  70MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  71MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  72
  73MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  74MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  75MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  76MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  77MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  78MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  79
  80MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
  81MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
  82MODULE_FIRMWARE("amdgpu/vega20_me.bin");
  83MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
  84MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
  85MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
  86
  87MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  88MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  89MODULE_FIRMWARE("amdgpu/raven_me.bin");
  90MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  91MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  92MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  93
  94MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
  95MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/picasso_me.bin");
  97MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
  98MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
  99MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
 100MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 101
 102MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 103MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/raven2_me.bin");
 105MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
 106MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
 107MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
 108MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
 109
 110MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
 111MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
 112MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
 115MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/renoir_me.bin");
 117MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
 118MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
 120
 121#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
 122#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
 123#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
 124#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
 125#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
 126#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
 127#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
 128#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
 129#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
 130#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
 131#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
 132#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
 133
 134enum ta_ras_gfx_subblock {
 135	/*CPC*/
 136	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
 137	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
 138	TA_RAS_BLOCK__GFX_CPC_UCODE,
 139	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
 140	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
 141	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
 142	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
 143	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
 144	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 145	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 146	/* CPF*/
 147	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 148	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 149	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
 150	TA_RAS_BLOCK__GFX_CPF_TAG,
 151	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
 152	/* CPG*/
 153	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 154	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 155	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
 156	TA_RAS_BLOCK__GFX_CPG_TAG,
 157	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
 158	/* GDS*/
 159	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 160	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 161	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
 162	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
 163	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
 164	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 165	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 166	/* SPI*/
 167	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
 168	/* SQ*/
 169	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 170	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 171	TA_RAS_BLOCK__GFX_SQ_LDS_D,
 172	TA_RAS_BLOCK__GFX_SQ_LDS_I,
 173	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
 174	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
 175	/* SQC (3 ranges)*/
 176	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 177	/* SQC range 0*/
 178	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 179	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
 180		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
 181	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
 182	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
 183	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
 184	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
 185	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
 186	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 187	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
 188		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 189	/* SQC range 1*/
 190	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 191	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
 192		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 193	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
 194	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
 195	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
 196	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
 197	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
 198	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
 199	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
 200	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 201	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
 202		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 203	/* SQC range 2*/
 204	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 205	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
 206		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 207	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
 208	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
 209	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
 210	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
 211	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
 212	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
 213	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
 214	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 215	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
 216		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 217	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
 218	/* TA*/
 219	TA_RAS_BLOCK__GFX_TA_INDEX_START,
 220	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
 221	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
 222	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
 223	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
 224	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 225	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 226	/* TCA*/
 227	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 228	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 229	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 230	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 231	/* TCC (5 sub-ranges)*/
 232	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 233	/* TCC range 0*/
 234	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 235	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
 236	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
 237	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
 238	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
 239	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
 240	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
 241	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
 242	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 243	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 244	/* TCC range 1*/
 245	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 246	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 247	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 248	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
 249		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 250	/* TCC range 2*/
 251	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 252	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 253	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
 254	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
 255	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
 256	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
 257	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
 258	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
 259	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 260	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
 261		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 262	/* TCC range 3*/
 263	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 264	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 265	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 266	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
 267		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 268	/* TCC range 4*/
 269	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 270	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
 271		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 272	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 273	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
 274		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 275	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
 276	/* TCI*/
 277	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
 278	/* TCP*/
 279	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 280	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 281	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
 282	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
 283	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
 284	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
 285	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
 286	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 287	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 288	/* TD*/
 289	TA_RAS_BLOCK__GFX_TD_INDEX_START,
 290	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
 291	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
 292	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 293	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 294	/* EA (3 sub-ranges)*/
 295	TA_RAS_BLOCK__GFX_EA_INDEX_START,
 296	/* EA range 0*/
 297	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
 298	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
 299	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
 300	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
 301	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
 302	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
 303	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
 304	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
 305	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 306	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 307	/* EA range 1*/
 308	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 309	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 310	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
 311	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
 312	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
 313	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
 314	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
 315	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 316	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 317	/* EA range 2*/
 318	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 319	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 320	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
 321	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
 322	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 323	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 324	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
 325	/* UTC VM L2 bank*/
 326	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
 327	/* UTC VM walker*/
 328	TA_RAS_BLOCK__UTC_VML2_WALKER,
 329	/* UTC ATC L2 2MB cache*/
 330	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
 331	/* UTC ATC L2 4KB cache*/
 332	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
 333	TA_RAS_BLOCK__GFX_MAX
 334};
 335
 336struct ras_gfx_subblock {
 337	unsigned char *name;
 338	int ta_subblock;
 339	int hw_supported_error_type;
 340	int sw_supported_error_type;
 341};
 342
 343#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
 344	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
 345		#subblock,                                                     \
 346		TA_RAS_BLOCK__##subblock,                                      \
 347		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
 348		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
 349	}
 350
 351static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
 352	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
 353	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
 354	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 355	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 356	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 357	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 358	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 359	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 360	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 361	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 362	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
 363	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
 364	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
 365	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
 366	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 367	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
 368	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
 369			     0),
 370	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
 371			     0),
 372	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 373	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 374	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 375	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
 376	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
 377	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 378	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
 379	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 380			     0, 0),
 381	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 382			     0),
 383	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 384			     0, 0),
 385	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
 386			     0),
 387	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 388			     0, 0),
 389	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 390			     0),
 391	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 392			     1),
 393	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 394			     0, 0, 0),
 395	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 396			     0),
 397	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 398			     0),
 399	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 400			     0),
 401	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 402			     0),
 403	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 404			     0),
 405	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 406			     0, 0),
 407	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 408			     0),
 409	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 410			     0),
 411	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 412			     0, 0, 0),
 413	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 414			     0),
 415	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 416			     0),
 417	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 418			     0),
 419	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 420			     0),
 421	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 422			     0),
 423	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 424			     0, 0),
 425	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 426			     0),
 427	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
 428	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 429	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 430	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 431	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 432	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
 433	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 434	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
 435	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
 436			     1),
 437	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
 438			     1),
 439	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
 440			     1),
 441	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
 442			     0),
 443	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
 444			     0),
 445	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 446	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 447	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
 448	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
 449	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
 450	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
 451	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 452	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
 453	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
 454	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
 456	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
 457			     0),
 458	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
 460			     0),
 461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
 462			     0, 0),
 463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
 464			     0),
 465	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 466	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
 467	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
 468	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 469	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 470	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 471	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
 472	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
 473	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
 474	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
 475	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 476	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
 477	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 478	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 479	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 480	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 481	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 482	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 483	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 484	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 485	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 486	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 487	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 488	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 489	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 490	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 491	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 492	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 493	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 495	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
 496	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
 497	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
 498	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
 499};
 500
 501static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 502{
 503	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 504	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
 505	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 506	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 507	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 508	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 509	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 510	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 511	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 512	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 513	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 514	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 515	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 516	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 517	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 518	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 519	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
 520	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 523};
 524
 525static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 526{
 527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 532	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 533	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 534	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 535	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 537	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 538	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 539	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 540	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 541	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 542	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 543	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
 544	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 545};
 546
 547static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
 548{
 549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
 550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
 553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
 554	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
 555	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
 556	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
 557	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
 558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
 559	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
 560};
 561
 562static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 563{
 564	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 565	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 566	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 569	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 570	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 571	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 572	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 574	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 575	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 576	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 577	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 578	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 579	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 580	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 581	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
 585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 588};
 589
 590static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 591{
 592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 597	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 598	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 599};
 600
 601static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 602{
 603	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
 604	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 605	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 606	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
 607	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
 608	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
 609	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
 610	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
 611	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 613	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
 614	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
 615	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
 616	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 617	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 618	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 619	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
 620	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 622};
 623
 624static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
 625{
 626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
 630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
 631	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 632	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 633	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 634	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 636	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 637	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
 638};
 639
 640static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 641{
 642	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
 643	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 645};
 646
 647static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 648{
 649	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 650	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 652	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 653	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 654	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 655	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 656	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 657	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 658	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 659	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 660	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 661	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 662	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 663	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 664	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 665};
 666
 667static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 668{
 669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 674	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 675	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 676	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 677	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 679	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
 680	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
 681	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 682};
 683
 684static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 685{
 686	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
 688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
 689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
 690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
 691	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 692	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 693	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
 694};
 695
 696static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 697{
 698	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 699	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 700	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 701	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 702	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 703	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 704	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 705	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 706};
 707
 708static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 709{
 710	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 711	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 712	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 713	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 714	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 715	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 716	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 717	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 718};
 719
 720#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 721#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 722#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 723#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
 724
 725static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 726static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 727static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 728static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 729static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 730                                 struct amdgpu_cu_info *cu_info);
 731static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 732static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 733static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 734static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 735static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 736					  void *ras_error_status);
 737static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 738				     void *inject_if);
 739
 740static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 741{
 742	switch (adev->asic_type) {
 743	case CHIP_VEGA10:
 744		soc15_program_register_sequence(adev,
 745						golden_settings_gc_9_0,
 746						ARRAY_SIZE(golden_settings_gc_9_0));
 747		soc15_program_register_sequence(adev,
 748						golden_settings_gc_9_0_vg10,
 749						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 750		break;
 751	case CHIP_VEGA12:
 752		soc15_program_register_sequence(adev,
 753						golden_settings_gc_9_2_1,
 754						ARRAY_SIZE(golden_settings_gc_9_2_1));
 755		soc15_program_register_sequence(adev,
 756						golden_settings_gc_9_2_1_vg12,
 757						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 758		break;
 759	case CHIP_VEGA20:
 760		soc15_program_register_sequence(adev,
 761						golden_settings_gc_9_0,
 762						ARRAY_SIZE(golden_settings_gc_9_0));
 763		soc15_program_register_sequence(adev,
 764						golden_settings_gc_9_0_vg20,
 765						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
 766		break;
 767	case CHIP_ARCTURUS:
 768		soc15_program_register_sequence(adev,
 769						golden_settings_gc_9_4_1_arct,
 770						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
 771		break;
 772	case CHIP_RAVEN:
 773		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 774						ARRAY_SIZE(golden_settings_gc_9_1));
 775		if (adev->rev_id >= 8)
 776			soc15_program_register_sequence(adev,
 777							golden_settings_gc_9_1_rv2,
 778							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
 779		else
 780			soc15_program_register_sequence(adev,
 781							golden_settings_gc_9_1_rv1,
 782							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 783		break;
 784	 case CHIP_RENOIR:
 785		soc15_program_register_sequence(adev,
 786						golden_settings_gc_9_1_rn,
 787						ARRAY_SIZE(golden_settings_gc_9_1_rn));
 788		return; /* for renoir, don't need common goldensetting */
 789	default:
 790		break;
 791	}
 792
 793	if (adev->asic_type != CHIP_ARCTURUS)
 794		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 795						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 796}
 797
 798static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 799{
 800	adev->gfx.scratch.num_reg = 8;
 801	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 802	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 803}
 804
 805static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 806				       bool wc, uint32_t reg, uint32_t val)
 807{
 808	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 809	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 810				WRITE_DATA_DST_SEL(0) |
 811				(wc ? WR_CONFIRM : 0));
 812	amdgpu_ring_write(ring, reg);
 813	amdgpu_ring_write(ring, 0);
 814	amdgpu_ring_write(ring, val);
 815}
 816
 817static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 818				  int mem_space, int opt, uint32_t addr0,
 819				  uint32_t addr1, uint32_t ref, uint32_t mask,
 820				  uint32_t inv)
 821{
 822	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 823	amdgpu_ring_write(ring,
 824				 /* memory (1) or register (0) */
 825				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 826				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
 827				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 828				 WAIT_REG_MEM_ENGINE(eng_sel)));
 829
 830	if (mem_space)
 831		BUG_ON(addr0 & 0x3); /* Dword align */
 832	amdgpu_ring_write(ring, addr0);
 833	amdgpu_ring_write(ring, addr1);
 834	amdgpu_ring_write(ring, ref);
 835	amdgpu_ring_write(ring, mask);
 836	amdgpu_ring_write(ring, inv); /* poll interval */
 837}
 838
 839static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 840{
 841	struct amdgpu_device *adev = ring->adev;
 842	uint32_t scratch;
 843	uint32_t tmp = 0;
 844	unsigned i;
 845	int r;
 846
 847	r = amdgpu_gfx_scratch_get(adev, &scratch);
 848	if (r)
 849		return r;
 850
 851	WREG32(scratch, 0xCAFEDEAD);
 852	r = amdgpu_ring_alloc(ring, 3);
 853	if (r)
 854		goto error_free_scratch;
 855
 856	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 857	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 858	amdgpu_ring_write(ring, 0xDEADBEEF);
 859	amdgpu_ring_commit(ring);
 860
 861	for (i = 0; i < adev->usec_timeout; i++) {
 862		tmp = RREG32(scratch);
 863		if (tmp == 0xDEADBEEF)
 864			break;
 865		udelay(1);
 866	}
 867
 868	if (i >= adev->usec_timeout)
 869		r = -ETIMEDOUT;
 870
 871error_free_scratch:
 872	amdgpu_gfx_scratch_free(adev, scratch);
 873	return r;
 874}
 875
 876static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 877{
 878	struct amdgpu_device *adev = ring->adev;
 879	struct amdgpu_ib ib;
 880	struct dma_fence *f = NULL;
 881
 882	unsigned index;
 883	uint64_t gpu_addr;
 884	uint32_t tmp;
 885	long r;
 886
 887	r = amdgpu_device_wb_get(adev, &index);
 888	if (r)
 889		return r;
 890
 891	gpu_addr = adev->wb.gpu_addr + (index * 4);
 892	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 893	memset(&ib, 0, sizeof(ib));
 894	r = amdgpu_ib_get(adev, NULL, 16, &ib);
 895	if (r)
 896		goto err1;
 897
 898	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 899	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 900	ib.ptr[2] = lower_32_bits(gpu_addr);
 901	ib.ptr[3] = upper_32_bits(gpu_addr);
 902	ib.ptr[4] = 0xDEADBEEF;
 903	ib.length_dw = 5;
 904
 905	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 906	if (r)
 907		goto err2;
 908
 909	r = dma_fence_wait_timeout(f, false, timeout);
 910	if (r == 0) {
 911		r = -ETIMEDOUT;
 912		goto err2;
 913	} else if (r < 0) {
 914		goto err2;
 915	}
 916
 917	tmp = adev->wb.wb[index];
 918	if (tmp == 0xDEADBEEF)
 919		r = 0;
 920	else
 921		r = -EINVAL;
 922
 923err2:
 924	amdgpu_ib_free(adev, &ib, NULL);
 925	dma_fence_put(f);
 926err1:
 927	amdgpu_device_wb_free(adev, index);
 928	return r;
 929}
 930
 931
 932static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 933{
 934	release_firmware(adev->gfx.pfp_fw);
 935	adev->gfx.pfp_fw = NULL;
 936	release_firmware(adev->gfx.me_fw);
 937	adev->gfx.me_fw = NULL;
 938	release_firmware(adev->gfx.ce_fw);
 939	adev->gfx.ce_fw = NULL;
 940	release_firmware(adev->gfx.rlc_fw);
 941	adev->gfx.rlc_fw = NULL;
 942	release_firmware(adev->gfx.mec_fw);
 943	adev->gfx.mec_fw = NULL;
 944	release_firmware(adev->gfx.mec2_fw);
 945	adev->gfx.mec2_fw = NULL;
 946
 947	kfree(adev->gfx.rlc.register_list_format);
 948}
 949
 950static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 951{
 952	const struct rlc_firmware_header_v2_1 *rlc_hdr;
 953
 954	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
 955	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
 956	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
 957	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
 958	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
 959	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
 960	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
 961	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
 962	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
 963	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
 964	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
 965	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
 966	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
 967	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
 968			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
 969}
 970
 971static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
 972{
 973	adev->gfx.me_fw_write_wait = false;
 974	adev->gfx.mec_fw_write_wait = false;
 975
 976	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
 977	    (adev->gfx.mec_feature_version < 46) ||
 978	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
 979	    (adev->gfx.pfp_feature_version < 46))
 980		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
 981			      GRBM requires 1-cycle delay in cp firmware\n");
 982
 983	switch (adev->asic_type) {
 984	case CHIP_VEGA10:
 985		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 986		    (adev->gfx.me_feature_version >= 42) &&
 987		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
 988		    (adev->gfx.pfp_feature_version >= 42))
 989			adev->gfx.me_fw_write_wait = true;
 990
 991		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
 992		    (adev->gfx.mec_feature_version >= 42))
 993			adev->gfx.mec_fw_write_wait = true;
 994		break;
 995	case CHIP_VEGA12:
 996		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
 997		    (adev->gfx.me_feature_version >= 44) &&
 998		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
 999		    (adev->gfx.pfp_feature_version >= 44))
1000			adev->gfx.me_fw_write_wait = true;
1001
1002		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1003		    (adev->gfx.mec_feature_version >= 44))
1004			adev->gfx.mec_fw_write_wait = true;
1005		break;
1006	case CHIP_VEGA20:
1007		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1008		    (adev->gfx.me_feature_version >= 44) &&
1009		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1010		    (adev->gfx.pfp_feature_version >= 44))
1011			adev->gfx.me_fw_write_wait = true;
1012
1013		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1014		    (adev->gfx.mec_feature_version >= 44))
1015			adev->gfx.mec_fw_write_wait = true;
1016		break;
1017	case CHIP_RAVEN:
1018		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1019		    (adev->gfx.me_feature_version >= 42) &&
1020		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1021		    (adev->gfx.pfp_feature_version >= 42))
1022			adev->gfx.me_fw_write_wait = true;
1023
1024		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1025		    (adev->gfx.mec_feature_version >= 42))
1026			adev->gfx.mec_fw_write_wait = true;
1027		break;
1028	default:
1029		break;
1030	}
1031}
1032
1033static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1034{
1035	switch (adev->asic_type) {
1036	case CHIP_VEGA10:
1037	case CHIP_VEGA12:
1038	case CHIP_VEGA20:
1039		break;
1040	case CHIP_RAVEN:
1041		/* Disable GFXOFF on original raven.  There are combinations
1042		 * of sbios and platforms that are not stable.
1043		 */
1044		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
1045			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1046		else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1047			 &&((adev->gfx.rlc_fw_version != 106 &&
1048			     adev->gfx.rlc_fw_version < 531) ||
1049			    (adev->gfx.rlc_fw_version == 53815) ||
1050			    (adev->gfx.rlc_feature_version < 1) ||
1051			    !adev->gfx.rlc.is_rlc_v2_1))
1052			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1053
1054		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1055			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1056				AMD_PG_SUPPORT_CP |
1057				AMD_PG_SUPPORT_RLC_SMU_HS;
1058		break;
1059	case CHIP_RENOIR:
1060		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1061			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1062				AMD_PG_SUPPORT_CP |
1063				AMD_PG_SUPPORT_RLC_SMU_HS;
1064		break;
1065	default:
1066		break;
1067	}
1068}
1069
1070static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1071					  const char *chip_name)
1072{
1073	char fw_name[30];
1074	int err;
1075	struct amdgpu_firmware_info *info = NULL;
1076	const struct common_firmware_header *header = NULL;
1077	const struct gfx_firmware_header_v1_0 *cp_hdr;
1078
1079	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1080	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1081	if (err)
1082		goto out;
1083	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1084	if (err)
1085		goto out;
1086	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1087	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1088	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1089
1090	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1091	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1092	if (err)
1093		goto out;
1094	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1095	if (err)
1096		goto out;
1097	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1098	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1099	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1100
1101	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1102	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1103	if (err)
1104		goto out;
1105	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1106	if (err)
1107		goto out;
1108	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1109	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1110	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1111
1112	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1113		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1114		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1115		info->fw = adev->gfx.pfp_fw;
1116		header = (const struct common_firmware_header *)info->fw->data;
1117		adev->firmware.fw_size +=
1118			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1119
1120		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1121		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1122		info->fw = adev->gfx.me_fw;
1123		header = (const struct common_firmware_header *)info->fw->data;
1124		adev->firmware.fw_size +=
1125			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1126
1127		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1128		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1129		info->fw = adev->gfx.ce_fw;
1130		header = (const struct common_firmware_header *)info->fw->data;
1131		adev->firmware.fw_size +=
1132			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1133	}
1134
1135out:
1136	if (err) {
1137		dev_err(adev->dev,
1138			"gfx9: Failed to load firmware \"%s\"\n",
1139			fw_name);
1140		release_firmware(adev->gfx.pfp_fw);
1141		adev->gfx.pfp_fw = NULL;
1142		release_firmware(adev->gfx.me_fw);
1143		adev->gfx.me_fw = NULL;
1144		release_firmware(adev->gfx.ce_fw);
1145		adev->gfx.ce_fw = NULL;
1146	}
1147	return err;
1148}
1149
1150static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1151					  const char *chip_name)
1152{
1153	char fw_name[30];
1154	int err;
1155	struct amdgpu_firmware_info *info = NULL;
1156	const struct common_firmware_header *header = NULL;
1157	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1158	unsigned int *tmp = NULL;
1159	unsigned int i = 0;
1160	uint16_t version_major;
1161	uint16_t version_minor;
1162	uint32_t smu_version;
1163
1164	/*
1165	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1166	 * instead of picasso_rlc.bin.
1167	 * Judgment method:
1168	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1169	 *          or revision >= 0xD8 && revision <= 0xDF
1170	 * otherwise is PCO FP5
1171	 */
1172	if (!strcmp(chip_name, "picasso") &&
1173		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1174		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1175		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1176	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1177		(smu_version >= 0x41e2b))
1178		/**
1179		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1180		*/
1181		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1182	else
1183		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1184	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1185	if (err)
1186		goto out;
1187	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1188	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1189
1190	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1191	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1192	if (version_major == 2 && version_minor == 1)
1193		adev->gfx.rlc.is_rlc_v2_1 = true;
1194
1195	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1196	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1197	adev->gfx.rlc.save_and_restore_offset =
1198			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1199	adev->gfx.rlc.clear_state_descriptor_offset =
1200			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1201	adev->gfx.rlc.avail_scratch_ram_locations =
1202			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1203	adev->gfx.rlc.reg_restore_list_size =
1204			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1205	adev->gfx.rlc.reg_list_format_start =
1206			le32_to_cpu(rlc_hdr->reg_list_format_start);
1207	adev->gfx.rlc.reg_list_format_separate_start =
1208			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1209	adev->gfx.rlc.starting_offsets_start =
1210			le32_to_cpu(rlc_hdr->starting_offsets_start);
1211	adev->gfx.rlc.reg_list_format_size_bytes =
1212			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1213	adev->gfx.rlc.reg_list_size_bytes =
1214			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1215	adev->gfx.rlc.register_list_format =
1216			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1217				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1218	if (!adev->gfx.rlc.register_list_format) {
1219		err = -ENOMEM;
1220		goto out;
1221	}
1222
1223	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1224			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1225	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1226		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1227
1228	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1229
1230	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1231			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1232	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1233		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1234
1235	if (adev->gfx.rlc.is_rlc_v2_1)
1236		gfx_v9_0_init_rlc_ext_microcode(adev);
1237
1238	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1239		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1240		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1241		info->fw = adev->gfx.rlc_fw;
1242		header = (const struct common_firmware_header *)info->fw->data;
1243		adev->firmware.fw_size +=
1244			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1245
1246		if (adev->gfx.rlc.is_rlc_v2_1 &&
1247		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1248		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1249		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1250			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1251			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1252			info->fw = adev->gfx.rlc_fw;
1253			adev->firmware.fw_size +=
1254				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1255
1256			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1257			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1258			info->fw = adev->gfx.rlc_fw;
1259			adev->firmware.fw_size +=
1260				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1261
1262			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1263			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1264			info->fw = adev->gfx.rlc_fw;
1265			adev->firmware.fw_size +=
1266				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1267		}
1268	}
1269
1270out:
1271	if (err) {
1272		dev_err(adev->dev,
1273			"gfx9: Failed to load firmware \"%s\"\n",
1274			fw_name);
1275		release_firmware(adev->gfx.rlc_fw);
1276		adev->gfx.rlc_fw = NULL;
1277	}
1278	return err;
1279}
1280
1281static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1282					  const char *chip_name)
1283{
1284	char fw_name[30];
1285	int err;
1286	struct amdgpu_firmware_info *info = NULL;
1287	const struct common_firmware_header *header = NULL;
1288	const struct gfx_firmware_header_v1_0 *cp_hdr;
1289
1290	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1291	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1292	if (err)
1293		goto out;
1294	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1295	if (err)
1296		goto out;
1297	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1298	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1299	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1300
1301
1302	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1303	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1304	if (!err) {
1305		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1306		if (err)
1307			goto out;
1308		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1309		adev->gfx.mec2_fw->data;
1310		adev->gfx.mec2_fw_version =
1311		le32_to_cpu(cp_hdr->header.ucode_version);
1312		adev->gfx.mec2_feature_version =
1313		le32_to_cpu(cp_hdr->ucode_feature_version);
1314	} else {
1315		err = 0;
1316		adev->gfx.mec2_fw = NULL;
1317	}
1318
1319	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1320		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1321		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1322		info->fw = adev->gfx.mec_fw;
1323		header = (const struct common_firmware_header *)info->fw->data;
1324		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1325		adev->firmware.fw_size +=
1326			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1327
1328		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1329		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1330		info->fw = adev->gfx.mec_fw;
1331		adev->firmware.fw_size +=
1332			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1333
1334		if (adev->gfx.mec2_fw) {
1335			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1336			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1337			info->fw = adev->gfx.mec2_fw;
1338			header = (const struct common_firmware_header *)info->fw->data;
1339			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1340			adev->firmware.fw_size +=
1341				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1342
1343			/* TODO: Determine if MEC2 JT FW loading can be removed
1344				 for all GFX V9 asic and above */
1345			if (adev->asic_type != CHIP_ARCTURUS) {
1346				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1347				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1348				info->fw = adev->gfx.mec2_fw;
1349				adev->firmware.fw_size +=
1350					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1351					PAGE_SIZE);
1352			}
1353		}
1354	}
1355
1356out:
1357	gfx_v9_0_check_if_need_gfxoff(adev);
1358	gfx_v9_0_check_fw_write_wait(adev);
1359	if (err) {
1360		dev_err(adev->dev,
1361			"gfx9: Failed to load firmware \"%s\"\n",
1362			fw_name);
1363		release_firmware(adev->gfx.mec_fw);
1364		adev->gfx.mec_fw = NULL;
1365		release_firmware(adev->gfx.mec2_fw);
1366		adev->gfx.mec2_fw = NULL;
1367	}
1368	return err;
1369}
1370
1371static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1372{
1373	const char *chip_name;
1374	int r;
1375
1376	DRM_DEBUG("\n");
1377
1378	switch (adev->asic_type) {
1379	case CHIP_VEGA10:
1380		chip_name = "vega10";
1381		break;
1382	case CHIP_VEGA12:
1383		chip_name = "vega12";
1384		break;
1385	case CHIP_VEGA20:
1386		chip_name = "vega20";
1387		break;
1388	case CHIP_RAVEN:
1389		if (adev->rev_id >= 8)
1390			chip_name = "raven2";
1391		else if (adev->pdev->device == 0x15d8)
1392			chip_name = "picasso";
1393		else
1394			chip_name = "raven";
1395		break;
1396	case CHIP_ARCTURUS:
1397		chip_name = "arcturus";
1398		break;
1399	case CHIP_RENOIR:
1400		chip_name = "renoir";
1401		break;
1402	default:
1403		BUG();
1404	}
1405
1406	/* No CPG in Arcturus */
1407	if (adev->asic_type != CHIP_ARCTURUS) {
1408		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1409		if (r)
1410			return r;
1411	}
1412
1413	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1414	if (r)
1415		return r;
1416
1417	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1418	if (r)
1419		return r;
1420
1421	return r;
1422}
1423
1424static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1425{
1426	u32 count = 0;
1427	const struct cs_section_def *sect = NULL;
1428	const struct cs_extent_def *ext = NULL;
1429
1430	/* begin clear state */
1431	count += 2;
1432	/* context control state */
1433	count += 3;
1434
1435	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1436		for (ext = sect->section; ext->extent != NULL; ++ext) {
1437			if (sect->id == SECT_CONTEXT)
1438				count += 2 + ext->reg_count;
1439			else
1440				return 0;
1441		}
1442	}
1443
1444	/* end clear state */
1445	count += 2;
1446	/* clear state */
1447	count += 2;
1448
1449	return count;
1450}
1451
1452static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1453				    volatile u32 *buffer)
1454{
1455	u32 count = 0, i;
1456	const struct cs_section_def *sect = NULL;
1457	const struct cs_extent_def *ext = NULL;
1458
1459	if (adev->gfx.rlc.cs_data == NULL)
1460		return;
1461	if (buffer == NULL)
1462		return;
1463
1464	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1465	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1466
1467	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1468	buffer[count++] = cpu_to_le32(0x80000000);
1469	buffer[count++] = cpu_to_le32(0x80000000);
1470
1471	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1472		for (ext = sect->section; ext->extent != NULL; ++ext) {
1473			if (sect->id == SECT_CONTEXT) {
1474				buffer[count++] =
1475					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1476				buffer[count++] = cpu_to_le32(ext->reg_index -
1477						PACKET3_SET_CONTEXT_REG_START);
1478				for (i = 0; i < ext->reg_count; i++)
1479					buffer[count++] = cpu_to_le32(ext->extent[i]);
1480			} else {
1481				return;
1482			}
1483		}
1484	}
1485
1486	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1487	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1488
1489	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1490	buffer[count++] = cpu_to_le32(0);
1491}
1492
1493static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1494{
1495	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1496	uint32_t pg_always_on_cu_num = 2;
1497	uint32_t always_on_cu_num;
1498	uint32_t i, j, k;
1499	uint32_t mask, cu_bitmap, counter;
1500
1501	if (adev->flags & AMD_IS_APU)
1502		always_on_cu_num = 4;
1503	else if (adev->asic_type == CHIP_VEGA12)
1504		always_on_cu_num = 8;
1505	else
1506		always_on_cu_num = 12;
1507
1508	mutex_lock(&adev->grbm_idx_mutex);
1509	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1510		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1511			mask = 1;
1512			cu_bitmap = 0;
1513			counter = 0;
1514			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1515
1516			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1517				if (cu_info->bitmap[i][j] & mask) {
1518					if (counter == pg_always_on_cu_num)
1519						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1520					if (counter < always_on_cu_num)
1521						cu_bitmap |= mask;
1522					else
1523						break;
1524					counter++;
1525				}
1526				mask <<= 1;
1527			}
1528
1529			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1530			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1531		}
1532	}
1533	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1534	mutex_unlock(&adev->grbm_idx_mutex);
1535}
1536
1537static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1538{
1539	uint32_t data;
1540
1541	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1542	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1543	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1544	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1545	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1546
1547	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1548	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1549
1550	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1551	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1552
1553	mutex_lock(&adev->grbm_idx_mutex);
1554	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1555	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1556	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1557
1558	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1559	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1560	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1561	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1562	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1563
1564	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1565	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1566	data &= 0x0000FFFF;
1567	data |= 0x00C00000;
1568	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1569
1570	/*
1571	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1572	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1573	 */
1574
1575	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1576	 * but used for RLC_LB_CNTL configuration */
1577	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1578	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1579	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1580	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1581	mutex_unlock(&adev->grbm_idx_mutex);
1582
1583	gfx_v9_0_init_always_on_cu_mask(adev);
1584}
1585
1586static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1587{
1588	uint32_t data;
1589
1590	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1591	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1592	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1593	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1594	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1595
1596	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1597	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1598
1599	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1600	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1601
1602	mutex_lock(&adev->grbm_idx_mutex);
1603	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1604	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1605	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1606
1607	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1608	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1609	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1610	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1611	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1612
1613	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1614	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1615	data &= 0x0000FFFF;
1616	data |= 0x00C00000;
1617	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1618
1619	/*
1620	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1621	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1622	 */
1623
1624	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1625	 * but used for RLC_LB_CNTL configuration */
1626	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1627	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1628	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1629	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1630	mutex_unlock(&adev->grbm_idx_mutex);
1631
1632	gfx_v9_0_init_always_on_cu_mask(adev);
1633}
1634
1635static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1636{
1637	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1638}
1639
1640static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1641{
1642	return 5;
1643}
1644
1645static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1646{
1647	const struct cs_section_def *cs_data;
1648	int r;
1649
1650	adev->gfx.rlc.cs_data = gfx9_cs_data;
1651
1652	cs_data = adev->gfx.rlc.cs_data;
1653
1654	if (cs_data) {
1655		/* init clear state block */
1656		r = amdgpu_gfx_rlc_init_csb(adev);
1657		if (r)
1658			return r;
1659	}
1660
1661	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1662		/* TODO: double check the cp_table_size for RV */
1663		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1664		r = amdgpu_gfx_rlc_init_cpt(adev);
1665		if (r)
1666			return r;
1667	}
1668
1669	switch (adev->asic_type) {
1670	case CHIP_RAVEN:
1671		gfx_v9_0_init_lbpw(adev);
1672		break;
1673	case CHIP_VEGA20:
1674		gfx_v9_4_init_lbpw(adev);
1675		break;
1676	default:
1677		break;
1678	}
1679
1680	return 0;
1681}
1682
1683static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1684{
1685	int r;
1686
1687	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1688	if (unlikely(r != 0))
1689		return r;
1690
1691	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1692			AMDGPU_GEM_DOMAIN_VRAM);
1693	if (!r)
1694		adev->gfx.rlc.clear_state_gpu_addr =
1695			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1696
1697	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1698
1699	return r;
1700}
1701
1702static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1703{
1704	int r;
1705
1706	if (!adev->gfx.rlc.clear_state_obj)
1707		return;
1708
1709	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1710	if (likely(r == 0)) {
1711		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1712		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1713	}
1714}
1715
1716static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1717{
1718	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1719	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1720}
1721
1722static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1723{
1724	int r;
1725	u32 *hpd;
1726	const __le32 *fw_data;
1727	unsigned fw_size;
1728	u32 *fw;
1729	size_t mec_hpd_size;
1730
1731	const struct gfx_firmware_header_v1_0 *mec_hdr;
1732
1733	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1734
1735	/* take ownership of the relevant compute queues */
1736	amdgpu_gfx_compute_queue_acquire(adev);
1737	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1738
1739	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1740				      AMDGPU_GEM_DOMAIN_VRAM,
1741				      &adev->gfx.mec.hpd_eop_obj,
1742				      &adev->gfx.mec.hpd_eop_gpu_addr,
1743				      (void **)&hpd);
1744	if (r) {
1745		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1746		gfx_v9_0_mec_fini(adev);
1747		return r;
1748	}
1749
1750	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1751
1752	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1753	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1754
1755	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1756
1757	fw_data = (const __le32 *)
1758		(adev->gfx.mec_fw->data +
1759		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1760	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1761
1762	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1763				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1764				      &adev->gfx.mec.mec_fw_obj,
1765				      &adev->gfx.mec.mec_fw_gpu_addr,
1766				      (void **)&fw);
1767	if (r) {
1768		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1769		gfx_v9_0_mec_fini(adev);
1770		return r;
1771	}
1772
1773	memcpy(fw, fw_data, fw_size);
1774
1775	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1776	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1777
1778	return 0;
1779}
1780
1781static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1782{
1783	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1784		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1785		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1786		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1787		(SQ_IND_INDEX__FORCE_READ_MASK));
1788	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1789}
1790
1791static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1792			   uint32_t wave, uint32_t thread,
1793			   uint32_t regno, uint32_t num, uint32_t *out)
1794{
1795	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1796		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1797		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1798		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1799		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1800		(SQ_IND_INDEX__FORCE_READ_MASK) |
1801		(SQ_IND_INDEX__AUTO_INCR_MASK));
1802	while (num--)
1803		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1804}
1805
1806static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1807{
1808	/* type 1 wave data */
1809	dst[(*no_fields)++] = 1;
1810	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1811	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1812	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1813	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1814	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1815	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1816	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1817	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1818	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1819	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1820	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1821	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1822	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1823	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1824}
1825
1826static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1827				     uint32_t wave, uint32_t start,
1828				     uint32_t size, uint32_t *dst)
1829{
1830	wave_read_regs(
1831		adev, simd, wave, 0,
1832		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1833}
1834
1835static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1836				     uint32_t wave, uint32_t thread,
1837				     uint32_t start, uint32_t size,
1838				     uint32_t *dst)
1839{
1840	wave_read_regs(
1841		adev, simd, wave, thread,
1842		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1843}
1844
1845static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1846				  u32 me, u32 pipe, u32 q, u32 vm)
1847{
1848	soc15_grbm_select(adev, me, pipe, q, vm);
1849}
1850
1851static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1852	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1853	.select_se_sh = &gfx_v9_0_select_se_sh,
1854	.read_wave_data = &gfx_v9_0_read_wave_data,
1855	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1856	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1857	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1858	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1859	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1860};
1861
1862static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1863{
1864	u32 gb_addr_config;
1865	int err;
1866
1867	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1868
1869	switch (adev->asic_type) {
1870	case CHIP_VEGA10:
1871		adev->gfx.config.max_hw_contexts = 8;
1872		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1873		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1874		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1875		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1876		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1877		break;
1878	case CHIP_VEGA12:
1879		adev->gfx.config.max_hw_contexts = 8;
1880		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1881		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1882		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1883		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1884		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1885		DRM_INFO("fix gfx.config for vega12\n");
1886		break;
1887	case CHIP_VEGA20:
1888		adev->gfx.config.max_hw_contexts = 8;
1889		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1890		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1891		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1892		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1893		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1894		gb_addr_config &= ~0xf3e777ff;
1895		gb_addr_config |= 0x22014042;
1896		/* check vbios table if gpu info is not available */
1897		err = amdgpu_atomfirmware_get_gfx_info(adev);
1898		if (err)
1899			return err;
1900		break;
1901	case CHIP_RAVEN:
1902		adev->gfx.config.max_hw_contexts = 8;
1903		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1904		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1905		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1906		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1907		if (adev->rev_id >= 8)
1908			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1909		else
1910			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1911		break;
1912	case CHIP_ARCTURUS:
1913		adev->gfx.config.max_hw_contexts = 8;
1914		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1915		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1916		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1917		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1918		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1919		gb_addr_config &= ~0xf3e777ff;
1920		gb_addr_config |= 0x22014042;
1921		break;
1922	case CHIP_RENOIR:
1923		adev->gfx.config.max_hw_contexts = 8;
1924		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1925		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1926		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1927		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1928		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1929		gb_addr_config &= ~0xf3e777ff;
1930		gb_addr_config |= 0x22010042;
1931		break;
1932	default:
1933		BUG();
1934		break;
1935	}
1936
1937	adev->gfx.config.gb_addr_config = gb_addr_config;
1938
1939	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1940			REG_GET_FIELD(
1941					adev->gfx.config.gb_addr_config,
1942					GB_ADDR_CONFIG,
1943					NUM_PIPES);
1944
1945	adev->gfx.config.max_tile_pipes =
1946		adev->gfx.config.gb_addr_config_fields.num_pipes;
1947
1948	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1949			REG_GET_FIELD(
1950					adev->gfx.config.gb_addr_config,
1951					GB_ADDR_CONFIG,
1952					NUM_BANKS);
1953	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1954			REG_GET_FIELD(
1955					adev->gfx.config.gb_addr_config,
1956					GB_ADDR_CONFIG,
1957					MAX_COMPRESSED_FRAGS);
1958	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1959			REG_GET_FIELD(
1960					adev->gfx.config.gb_addr_config,
1961					GB_ADDR_CONFIG,
1962					NUM_RB_PER_SE);
1963	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1964			REG_GET_FIELD(
1965					adev->gfx.config.gb_addr_config,
1966					GB_ADDR_CONFIG,
1967					NUM_SHADER_ENGINES);
1968	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1969			REG_GET_FIELD(
1970					adev->gfx.config.gb_addr_config,
1971					GB_ADDR_CONFIG,
1972					PIPE_INTERLEAVE_SIZE));
1973
1974	return 0;
1975}
1976
1977static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1978				   struct amdgpu_ngg_buf *ngg_buf,
1979				   int size_se,
1980				   int default_size_se)
1981{
1982	int r;
1983
1984	if (size_se < 0) {
1985		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1986		return -EINVAL;
1987	}
1988	size_se = size_se ? size_se : default_size_se;
1989
1990	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1991	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1992				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1993				    &ngg_buf->bo,
1994				    &ngg_buf->gpu_addr,
1995				    NULL);
1996	if (r) {
1997		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1998		return r;
1999	}
2000	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
2001
2002	return r;
2003}
2004
2005static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
2006{
2007	int i;
2008
2009	for (i = 0; i < NGG_BUF_MAX; i++)
2010		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
2011				      &adev->gfx.ngg.buf[i].gpu_addr,
2012				      NULL);
2013
2014	memset(&adev->gfx.ngg.buf[0], 0,
2015			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
2016
2017	adev->gfx.ngg.init = false;
2018
2019	return 0;
2020}
2021
2022static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2023{
2024	int r;
2025
2026	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2027		return 0;
2028
2029	/* GDS reserve memory: 64 bytes alignment */
2030	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2031	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2032	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2033	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2034
2035	/* Primitive Buffer */
2036	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2037				    amdgpu_prim_buf_per_se,
2038				    64 * 1024);
2039	if (r) {
2040		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2041		goto err;
2042	}
2043
2044	/* Position Buffer */
2045	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2046				    amdgpu_pos_buf_per_se,
2047				    256 * 1024);
2048	if (r) {
2049		dev_err(adev->dev, "Failed to create Position Buffer\n");
2050		goto err;
2051	}
2052
2053	/* Control Sideband */
2054	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2055				    amdgpu_cntl_sb_buf_per_se,
2056				    256);
2057	if (r) {
2058		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2059		goto err;
2060	}
2061
2062	/* Parameter Cache, not created by default */
2063	if (amdgpu_param_buf_per_se <= 0)
2064		goto out;
2065
2066	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2067				    amdgpu_param_buf_per_se,
2068				    512 * 1024);
2069	if (r) {
2070		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2071		goto err;
2072	}
2073
2074out:
2075	adev->gfx.ngg.init = true;
2076	return 0;
2077err:
2078	gfx_v9_0_ngg_fini(adev);
2079	return r;
2080}
2081
2082static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2083{
2084	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2085	int r;
2086	u32 data, base;
2087
2088	if (!amdgpu_ngg)
2089		return 0;
2090
2091	/* Program buffer size */
2092	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2093			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2094	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2095			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2096	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2097
2098	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2099			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2100	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2101			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2102	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2103
2104	/* Program buffer base address */
2105	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2106	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2107	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2108
2109	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2110	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2111	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2112
2113	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2114	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2115	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2116
2117	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2118	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2119	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2120
2121	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2122	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2123	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2124
2125	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2126	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2127	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2128
2129	/* Clear GDS reserved memory */
2130	r = amdgpu_ring_alloc(ring, 17);
2131	if (r) {
2132		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2133			  ring->name, r);
2134		return r;
2135	}
2136
2137	gfx_v9_0_write_data_to_reg(ring, 0, false,
2138				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2139			           (adev->gds.gds_size +
2140				    adev->gfx.ngg.gds_reserve_size));
2141
2142	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2143	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2144				PACKET3_DMA_DATA_DST_SEL(1) |
2145				PACKET3_DMA_DATA_SRC_SEL(2)));
2146	amdgpu_ring_write(ring, 0);
2147	amdgpu_ring_write(ring, 0);
2148	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2149	amdgpu_ring_write(ring, 0);
2150	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2151				adev->gfx.ngg.gds_reserve_size);
2152
2153	gfx_v9_0_write_data_to_reg(ring, 0, false,
2154				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2155
2156	amdgpu_ring_commit(ring);
2157
2158	return 0;
2159}
2160
2161static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2162				      int mec, int pipe, int queue)
2163{
2164	int r;
2165	unsigned irq_type;
2166	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2167
2168	ring = &adev->gfx.compute_ring[ring_id];
2169
2170	/* mec0 is me1 */
2171	ring->me = mec + 1;
2172	ring->pipe = pipe;
2173	ring->queue = queue;
2174
2175	ring->ring_obj = NULL;
2176	ring->use_doorbell = true;
2177	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2178	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2179				+ (ring_id * GFX9_MEC_HPD_SIZE);
2180	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2181
2182	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2183		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2184		+ ring->pipe;
2185
2186	/* type-2 packets are deprecated on MEC, use type-3 instead */
2187	r = amdgpu_ring_init(adev, ring, 1024,
2188			     &adev->gfx.eop_irq, irq_type);
2189	if (r)
2190		return r;
2191
2192
2193	return 0;
2194}
2195
2196static int gfx_v9_0_sw_init(void *handle)
2197{
2198	int i, j, k, r, ring_id;
2199	struct amdgpu_ring *ring;
2200	struct amdgpu_kiq *kiq;
2201	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2202
2203	switch (adev->asic_type) {
2204	case CHIP_VEGA10:
2205	case CHIP_VEGA12:
2206	case CHIP_VEGA20:
2207	case CHIP_RAVEN:
2208	case CHIP_ARCTURUS:
2209	case CHIP_RENOIR:
2210		adev->gfx.mec.num_mec = 2;
2211		break;
2212	default:
2213		adev->gfx.mec.num_mec = 1;
2214		break;
2215	}
2216
2217	adev->gfx.mec.num_pipe_per_mec = 4;
2218	adev->gfx.mec.num_queue_per_pipe = 8;
2219
2220	/* EOP Event */
2221	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2222	if (r)
2223		return r;
2224
2225	/* Privileged reg */
2226	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2227			      &adev->gfx.priv_reg_irq);
2228	if (r)
2229		return r;
2230
2231	/* Privileged inst */
2232	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2233			      &adev->gfx.priv_inst_irq);
2234	if (r)
2235		return r;
2236
2237	/* ECC error */
2238	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2239			      &adev->gfx.cp_ecc_error_irq);
2240	if (r)
2241		return r;
2242
2243	/* FUE error */
2244	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2245			      &adev->gfx.cp_ecc_error_irq);
2246	if (r)
2247		return r;
2248
2249	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2250
2251	gfx_v9_0_scratch_init(adev);
2252
2253	r = gfx_v9_0_init_microcode(adev);
2254	if (r) {
2255		DRM_ERROR("Failed to load gfx firmware!\n");
2256		return r;
2257	}
2258
2259	r = adev->gfx.rlc.funcs->init(adev);
2260	if (r) {
2261		DRM_ERROR("Failed to init rlc BOs!\n");
2262		return r;
2263	}
2264
2265	r = gfx_v9_0_mec_init(adev);
2266	if (r) {
2267		DRM_ERROR("Failed to init MEC BOs!\n");
2268		return r;
2269	}
2270
2271	/* set up the gfx ring */
2272	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2273		ring = &adev->gfx.gfx_ring[i];
2274		ring->ring_obj = NULL;
2275		if (!i)
2276			sprintf(ring->name, "gfx");
2277		else
2278			sprintf(ring->name, "gfx_%d", i);
2279		ring->use_doorbell = true;
2280		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2281		r = amdgpu_ring_init(adev, ring, 1024,
2282				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2283		if (r)
2284			return r;
2285	}
2286
2287	/* set up the compute queues - allocate horizontally across pipes */
2288	ring_id = 0;
2289	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2290		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2291			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2292				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2293					continue;
2294
2295				r = gfx_v9_0_compute_ring_init(adev,
2296							       ring_id,
2297							       i, k, j);
2298				if (r)
2299					return r;
2300
2301				ring_id++;
2302			}
2303		}
2304	}
2305
2306	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2307	if (r) {
2308		DRM_ERROR("Failed to init KIQ BOs!\n");
2309		return r;
2310	}
2311
2312	kiq = &adev->gfx.kiq;
2313	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2314	if (r)
2315		return r;
2316
2317	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2318	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2319	if (r)
2320		return r;
2321
2322	adev->gfx.ce_ram_size = 0x8000;
2323
2324	r = gfx_v9_0_gpu_early_init(adev);
2325	if (r)
2326		return r;
2327
2328	r = gfx_v9_0_ngg_init(adev);
2329	if (r)
2330		return r;
2331
2332	return 0;
2333}
2334
2335
2336static int gfx_v9_0_sw_fini(void *handle)
2337{
2338	int i;
2339	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2340
2341	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2342			adev->gfx.ras_if) {
2343		struct ras_common_if *ras_if = adev->gfx.ras_if;
2344		struct ras_ih_if ih_info = {
2345			.head = *ras_if,
2346		};
2347
2348		amdgpu_ras_debugfs_remove(adev, ras_if);
2349		amdgpu_ras_sysfs_remove(adev, ras_if);
2350		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2351		amdgpu_ras_feature_enable(adev, ras_if, 0);
2352		kfree(ras_if);
2353	}
2354
2355	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2356		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2357	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2358		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2359
2360	amdgpu_gfx_mqd_sw_fini(adev);
2361	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2362	amdgpu_gfx_kiq_fini(adev);
2363
2364	gfx_v9_0_mec_fini(adev);
2365	gfx_v9_0_ngg_fini(adev);
2366	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2367	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2368		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2369				&adev->gfx.rlc.cp_table_gpu_addr,
2370				(void **)&adev->gfx.rlc.cp_table_ptr);
2371	}
2372	gfx_v9_0_free_microcode(adev);
2373
2374	return 0;
2375}
2376
2377
2378static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2379{
2380	/* TODO */
2381}
2382
2383static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2384{
2385	u32 data;
2386
2387	if (instance == 0xffffffff)
2388		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2389	else
2390		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2391
2392	if (se_num == 0xffffffff)
2393		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2394	else
2395		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2396
2397	if (sh_num == 0xffffffff)
2398		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2399	else
2400		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2401
2402	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2403}
2404
2405static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2406{
2407	u32 data, mask;
2408
2409	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2410	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2411
2412	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2413	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2414
2415	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2416					 adev->gfx.config.max_sh_per_se);
2417
2418	return (~data) & mask;
2419}
2420
2421static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2422{
2423	int i, j;
2424	u32 data;
2425	u32 active_rbs = 0;
2426	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2427					adev->gfx.config.max_sh_per_se;
2428
2429	mutex_lock(&adev->grbm_idx_mutex);
2430	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2431		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2432			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2433			data = gfx_v9_0_get_rb_active_bitmap(adev);
2434			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2435					       rb_bitmap_width_per_sh);
2436		}
2437	}
2438	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2439	mutex_unlock(&adev->grbm_idx_mutex);
2440
2441	adev->gfx.config.backend_enable_mask = active_rbs;
2442	adev->gfx.config.num_rbs = hweight32(active_rbs);
2443}
2444
2445#define DEFAULT_SH_MEM_BASES	(0x6000)
2446#define FIRST_COMPUTE_VMID	(8)
2447#define LAST_COMPUTE_VMID	(16)
2448static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2449{
2450	int i;
2451	uint32_t sh_mem_config;
2452	uint32_t sh_mem_bases;
2453
2454	/*
2455	 * Configure apertures:
2456	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2457	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2458	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2459	 */
2460	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2461
2462	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2463			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2464			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2465
2466	mutex_lock(&adev->srbm_mutex);
2467	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2468		soc15_grbm_select(adev, 0, 0, 0, i);
2469		/* CP and shaders */
2470		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2471		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2472	}
2473	soc15_grbm_select(adev, 0, 0, 0, 0);
2474	mutex_unlock(&adev->srbm_mutex);
2475
2476	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2477	   acccess. These should be enabled by FW for target VMIDs. */
2478	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2479		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2480		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2481		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2482		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2483	}
2484}
2485
2486static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2487{
2488	int vmid;
2489
2490	/*
2491	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2492	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2493	 * the driver can enable them for graphics. VMID0 should maintain
2494	 * access so that HWS firmware can save/restore entries.
2495	 */
2496	for (vmid = 1; vmid < 16; vmid++) {
2497		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2498		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2499		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2500		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2501	}
2502}
2503
2504static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2505{
2506	u32 tmp;
2507	int i;
2508
2509	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2510
2511	gfx_v9_0_tiling_mode_table_init(adev);
2512
2513	gfx_v9_0_setup_rb(adev);
2514	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2515	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2516
2517	/* XXX SH_MEM regs */
2518	/* where to put LDS, scratch, GPUVM in FSA64 space */
2519	mutex_lock(&adev->srbm_mutex);
2520	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2521		soc15_grbm_select(adev, 0, 0, 0, i);
2522		/* CP and shaders */
2523		if (i == 0) {
2524			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2525					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2526			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2527					    !!amdgpu_noretry);
2528			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2529			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2530		} else {
2531			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2532					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2533			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2534					    !!amdgpu_noretry);
2535			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2536			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2537				(adev->gmc.private_aperture_start >> 48));
2538			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2539				(adev->gmc.shared_aperture_start >> 48));
2540			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2541		}
2542	}
2543	soc15_grbm_select(adev, 0, 0, 0, 0);
2544
2545	mutex_unlock(&adev->srbm_mutex);
2546
2547	gfx_v9_0_init_compute_vmid(adev);
2548	gfx_v9_0_init_gds_vmid(adev);
2549}
2550
2551static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2552{
2553	u32 i, j, k;
2554	u32 mask;
2555
2556	mutex_lock(&adev->grbm_idx_mutex);
2557	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2558		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2559			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2560			for (k = 0; k < adev->usec_timeout; k++) {
2561				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2562					break;
2563				udelay(1);
2564			}
2565			if (k == adev->usec_timeout) {
2566				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2567						      0xffffffff, 0xffffffff);
2568				mutex_unlock(&adev->grbm_idx_mutex);
2569				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2570					 i, j);
2571				return;
2572			}
2573		}
2574	}
2575	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2576	mutex_unlock(&adev->grbm_idx_mutex);
2577
2578	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2579		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2580		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2581		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2582	for (k = 0; k < adev->usec_timeout; k++) {
2583		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2584			break;
2585		udelay(1);
2586	}
2587}
2588
2589static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2590					       bool enable)
2591{
2592	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2593
2594	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2595	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2596	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2597	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2598
2599	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2600}
2601
2602static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2603{
2604	/* csib */
2605	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2606			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2607	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2608			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2609	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2610			adev->gfx.rlc.clear_state_size);
2611}
2612
2613static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2614				int indirect_offset,
2615				int list_size,
2616				int *unique_indirect_regs,
2617				int unique_indirect_reg_count,
2618				int *indirect_start_offsets,
2619				int *indirect_start_offsets_count,
2620				int max_start_offsets_count)
2621{
2622	int idx;
2623
2624	for (; indirect_offset < list_size; indirect_offset++) {
2625		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2626		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2627		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2628
2629		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2630			indirect_offset += 2;
2631
2632			/* look for the matching indice */
2633			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2634				if (unique_indirect_regs[idx] ==
2635					register_list_format[indirect_offset] ||
2636					!unique_indirect_regs[idx])
2637					break;
2638			}
2639
2640			BUG_ON(idx >= unique_indirect_reg_count);
2641
2642			if (!unique_indirect_regs[idx])
2643				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2644
2645			indirect_offset++;
2646		}
2647	}
2648}
2649
2650static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2651{
2652	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2653	int unique_indirect_reg_count = 0;
2654
2655	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2656	int indirect_start_offsets_count = 0;
2657
2658	int list_size = 0;
2659	int i = 0, j = 0;
2660	u32 tmp = 0;
2661
2662	u32 *register_list_format =
2663		kmemdup(adev->gfx.rlc.register_list_format,
2664			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2665	if (!register_list_format)
2666		return -ENOMEM;
2667
2668	/* setup unique_indirect_regs array and indirect_start_offsets array */
2669	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2670	gfx_v9_1_parse_ind_reg_list(register_list_format,
2671				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2672				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2673				    unique_indirect_regs,
2674				    unique_indirect_reg_count,
2675				    indirect_start_offsets,
2676				    &indirect_start_offsets_count,
2677				    ARRAY_SIZE(indirect_start_offsets));
2678
2679	/* enable auto inc in case it is disabled */
2680	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2681	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2682	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2683
2684	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2685	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2686		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2687	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2688		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2689			adev->gfx.rlc.register_restore[i]);
2690
2691	/* load indirect register */
2692	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2693		adev->gfx.rlc.reg_list_format_start);
2694
2695	/* direct register portion */
2696	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2697		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2698			register_list_format[i]);
2699
2700	/* indirect register portion */
2701	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2702		if (register_list_format[i] == 0xFFFFFFFF) {
2703			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2704			continue;
2705		}
2706
2707		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2708		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2709
2710		for (j = 0; j < unique_indirect_reg_count; j++) {
2711			if (register_list_format[i] == unique_indirect_regs[j]) {
2712				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2713				break;
2714			}
2715		}
2716
2717		BUG_ON(j >= unique_indirect_reg_count);
2718
2719		i++;
2720	}
2721
2722	/* set save/restore list size */
2723	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2724	list_size = list_size >> 1;
2725	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2726		adev->gfx.rlc.reg_restore_list_size);
2727	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2728
2729	/* write the starting offsets to RLC scratch ram */
2730	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2731		adev->gfx.rlc.starting_offsets_start);
2732	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2733		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2734		       indirect_start_offsets[i]);
2735
2736	/* load unique indirect regs*/
2737	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2738		if (unique_indirect_regs[i] != 0) {
2739			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2740			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2741			       unique_indirect_regs[i] & 0x3FFFF);
2742
2743			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2744			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2745			       unique_indirect_regs[i] >> 20);
2746		}
2747	}
2748
2749	kfree(register_list_format);
2750	return 0;
2751}
2752
2753static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2754{
2755	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2756}
2757
2758static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2759					     bool enable)
2760{
2761	uint32_t data = 0;
2762	uint32_t default_data = 0;
2763
2764	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2765	if (enable == true) {
2766		/* enable GFXIP control over CGPG */
2767		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2768		if(default_data != data)
2769			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2770
2771		/* update status */
2772		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2773		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2774		if(default_data != data)
2775			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2776	} else {
2777		/* restore GFXIP control over GCPG */
2778		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2779		if(default_data != data)
2780			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2781	}
2782}
2783
2784static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2785{
2786	uint32_t data = 0;
2787
2788	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2789			      AMD_PG_SUPPORT_GFX_SMG |
2790			      AMD_PG_SUPPORT_GFX_DMG)) {
2791		/* init IDLE_POLL_COUNT = 60 */
2792		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2793		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2794		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2795		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2796
2797		/* init RLC PG Delay */
2798		data = 0;
2799		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2800		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2801		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2802		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2803		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2804
2805		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2806		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2807		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2808		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2809
2810		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2811		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2812		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2813		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2814
2815		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2816		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2817
2818		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2819		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2820		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2821
2822		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2823	}
2824}
2825
2826static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2827						bool enable)
2828{
2829	uint32_t data = 0;
2830	uint32_t default_data = 0;
2831
2832	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2833	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2834			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2835			     enable ? 1 : 0);
2836	if (default_data != data)
2837		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2838}
2839
2840static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2841						bool enable)
2842{
2843	uint32_t data = 0;
2844	uint32_t default_data = 0;
2845
2846	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2847	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2848			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2849			     enable ? 1 : 0);
2850	if(default_data != data)
2851		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2852}
2853
2854static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2855					bool enable)
2856{
2857	uint32_t data = 0;
2858	uint32_t default_data = 0;
2859
2860	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2861	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2862			     CP_PG_DISABLE,
2863			     enable ? 0 : 1);
2864	if(default_data != data)
2865		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2866}
2867
2868static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2869						bool enable)
2870{
2871	uint32_t data, default_data;
2872
2873	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2874	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2875			     GFX_POWER_GATING_ENABLE,
2876			     enable ? 1 : 0);
2877	if(default_data != data)
2878		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2879}
2880
2881static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2882						bool enable)
2883{
2884	uint32_t data, default_data;
2885
2886	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2887	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2888			     GFX_PIPELINE_PG_ENABLE,
2889			     enable ? 1 : 0);
2890	if(default_data != data)
2891		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2892
2893	if (!enable)
2894		/* read any GFX register to wake up GFX */
2895		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2896}
2897
2898static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2899						       bool enable)
2900{
2901	uint32_t data, default_data;
2902
2903	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2904	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2905			     STATIC_PER_CU_PG_ENABLE,
2906			     enable ? 1 : 0);
2907	if(default_data != data)
2908		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2909}
2910
2911static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2912						bool enable)
2913{
2914	uint32_t data, default_data;
2915
2916	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2917	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2918			     DYN_PER_CU_PG_ENABLE,
2919			     enable ? 1 : 0);
2920	if(default_data != data)
2921		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2922}
2923
2924static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2925{
2926	gfx_v9_0_init_csb(adev);
2927
2928	/*
2929	 * Rlc save restore list is workable since v2_1.
2930	 * And it's needed by gfxoff feature.
2931	 */
2932	if (adev->gfx.rlc.is_rlc_v2_1) {
2933		gfx_v9_1_init_rlc_save_restore_list(adev);
2934		gfx_v9_0_enable_save_restore_machine(adev);
2935	}
2936
2937	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2938			      AMD_PG_SUPPORT_GFX_SMG |
2939			      AMD_PG_SUPPORT_GFX_DMG |
2940			      AMD_PG_SUPPORT_CP |
2941			      AMD_PG_SUPPORT_GDS |
2942			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2943		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2944		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2945		gfx_v9_0_init_gfx_power_gating(adev);
2946	}
2947}
2948
2949void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2950{
2951	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2952	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2953	gfx_v9_0_wait_for_rlc_serdes(adev);
2954}
2955
2956static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2957{
2958	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2959	udelay(50);
2960	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2961	udelay(50);
2962}
2963
2964static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2965{
2966#ifdef AMDGPU_RLC_DEBUG_RETRY
2967	u32 rlc_ucode_ver;
2968#endif
2969
2970	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2971	udelay(50);
2972
2973	/* carrizo do enable cp interrupt after cp inited */
2974	if (!(adev->flags & AMD_IS_APU)) {
2975		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2976		udelay(50);
2977	}
2978
2979#ifdef AMDGPU_RLC_DEBUG_RETRY
2980	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2981	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2982	if(rlc_ucode_ver == 0x108) {
2983		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2984				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2985		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2986		 * default is 0x9C4 to create a 100us interval */
2987		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2988		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2989		 * to disable the page fault retry interrupts, default is
2990		 * 0x100 (256) */
2991		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2992	}
2993#endif
2994}
2995
2996static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2997{
2998	const struct rlc_firmware_header_v2_0 *hdr;
2999	const __le32 *fw_data;
3000	unsigned i, fw_size;
3001
3002	if (!adev->gfx.rlc_fw)
3003		return -EINVAL;
3004
3005	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3006	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3007
3008	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3009			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3010	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3011
3012	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3013			RLCG_UCODE_LOADING_START_ADDRESS);
3014	for (i = 0; i < fw_size; i++)
3015		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3016	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3017
3018	return 0;
3019}
3020
3021static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3022{
3023	int r;
3024
3025	if (amdgpu_sriov_vf(adev)) {
3026		gfx_v9_0_init_csb(adev);
3027		return 0;
3028	}
3029
3030	adev->gfx.rlc.funcs->stop(adev);
3031
3032	/* disable CG */
3033	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3034
3035	gfx_v9_0_init_pg(adev);
3036
3037	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3038		/* legacy rlc firmware loading */
3039		r = gfx_v9_0_rlc_load_microcode(adev);
3040		if (r)
3041			return r;
3042	}
3043
3044	switch (adev->asic_type) {
3045	case CHIP_RAVEN:
3046		if (amdgpu_lbpw == 0)
3047			gfx_v9_0_enable_lbpw(adev, false);
3048		else
3049			gfx_v9_0_enable_lbpw(adev, true);
3050		break;
3051	case CHIP_VEGA20:
3052		if (amdgpu_lbpw > 0)
3053			gfx_v9_0_enable_lbpw(adev, true);
3054		else
3055			gfx_v9_0_enable_lbpw(adev, false);
3056		break;
3057	default:
3058		break;
3059	}
3060
3061	adev->gfx.rlc.funcs->start(adev);
3062
3063	return 0;
3064}
3065
3066static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3067{
3068	int i;
3069	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3070
3071	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3072	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3073	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3074	if (!enable) {
3075		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3076			adev->gfx.gfx_ring[i].sched.ready = false;
3077	}
3078	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3079	udelay(50);
3080}
3081
3082static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3083{
3084	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3085	const struct gfx_firmware_header_v1_0 *ce_hdr;
3086	const struct gfx_firmware_header_v1_0 *me_hdr;
3087	const __le32 *fw_data;
3088	unsigned i, fw_size;
3089
3090	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3091		return -EINVAL;
3092
3093	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3094		adev->gfx.pfp_fw->data;
3095	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3096		adev->gfx.ce_fw->data;
3097	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3098		adev->gfx.me_fw->data;
3099
3100	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3101	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3102	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3103
3104	gfx_v9_0_cp_gfx_enable(adev, false);
3105
3106	/* PFP */
3107	fw_data = (const __le32 *)
3108		(adev->gfx.pfp_fw->data +
3109		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3110	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3111	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3112	for (i = 0; i < fw_size; i++)
3113		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3114	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3115
3116	/* CE */
3117	fw_data = (const __le32 *)
3118		(adev->gfx.ce_fw->data +
3119		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3120	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3121	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3122	for (i = 0; i < fw_size; i++)
3123		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3124	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3125
3126	/* ME */
3127	fw_data = (const __le32 *)
3128		(adev->gfx.me_fw->data +
3129		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3130	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3131	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3132	for (i = 0; i < fw_size; i++)
3133		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3134	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3135
3136	return 0;
3137}
3138
3139static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3140{
3141	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3142	const struct cs_section_def *sect = NULL;
3143	const struct cs_extent_def *ext = NULL;
3144	int r, i, tmp;
3145
3146	/* init the CP */
3147	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3148	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3149
3150	gfx_v9_0_cp_gfx_enable(adev, true);
3151
3152	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3153	if (r) {
3154		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3155		return r;
3156	}
3157
3158	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3159	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3160
3161	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3162	amdgpu_ring_write(ring, 0x80000000);
3163	amdgpu_ring_write(ring, 0x80000000);
3164
3165	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3166		for (ext = sect->section; ext->extent != NULL; ++ext) {
3167			if (sect->id == SECT_CONTEXT) {
3168				amdgpu_ring_write(ring,
3169				       PACKET3(PACKET3_SET_CONTEXT_REG,
3170					       ext->reg_count));
3171				amdgpu_ring_write(ring,
3172				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3173				for (i = 0; i < ext->reg_count; i++)
3174					amdgpu_ring_write(ring, ext->extent[i]);
3175			}
3176		}
3177	}
3178
3179	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3180	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3181
3182	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3183	amdgpu_ring_write(ring, 0);
3184
3185	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3186	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3187	amdgpu_ring_write(ring, 0x8000);
3188	amdgpu_ring_write(ring, 0x8000);
3189
3190	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3191	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3192		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3193	amdgpu_ring_write(ring, tmp);
3194	amdgpu_ring_write(ring, 0);
3195
3196	amdgpu_ring_commit(ring);
3197
3198	return 0;
3199}
3200
3201static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3202{
3203	struct amdgpu_ring *ring;
3204	u32 tmp;
3205	u32 rb_bufsz;
3206	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3207
3208	/* Set the write pointer delay */
3209	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3210
3211	/* set the RB to use vmid 0 */
3212	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3213
3214	/* Set ring buffer size */
3215	ring = &adev->gfx.gfx_ring[0];
3216	rb_bufsz = order_base_2(ring->ring_size / 8);
3217	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3218	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3219#ifdef __BIG_ENDIAN
3220	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3221#endif
3222	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3223
3224	/* Initialize the ring buffer's write pointers */
3225	ring->wptr = 0;
3226	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3227	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3228
3229	/* set the wb address wether it's enabled or not */
3230	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3231	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3232	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3233
3234	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3235	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3236	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3237
3238	mdelay(1);
3239	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3240
3241	rb_addr = ring->gpu_addr >> 8;
3242	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3243	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3244
3245	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3246	if (ring->use_doorbell) {
3247		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3248				    DOORBELL_OFFSET, ring->doorbell_index);
3249		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3250				    DOORBELL_EN, 1);
3251	} else {
3252		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3253	}
3254	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3255
3256	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3257			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3258	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3259
3260	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3261		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3262
3263
3264	/* start the ring */
3265	gfx_v9_0_cp_gfx_start(adev);
3266	ring->sched.ready = true;
3267
3268	return 0;
3269}
3270
3271static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3272{
3273	int i;
3274
3275	if (enable) {
3276		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3277	} else {
3278		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3279			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3280		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3281			adev->gfx.compute_ring[i].sched.ready = false;
3282		adev->gfx.kiq.ring.sched.ready = false;
3283	}
3284	udelay(50);
3285}
3286
3287static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3288{
3289	const struct gfx_firmware_header_v1_0 *mec_hdr;
3290	const __le32 *fw_data;
3291	unsigned i;
3292	u32 tmp;
3293
3294	if (!adev->gfx.mec_fw)
3295		return -EINVAL;
3296
3297	gfx_v9_0_cp_compute_enable(adev, false);
3298
3299	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3300	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3301
3302	fw_data = (const __le32 *)
3303		(adev->gfx.mec_fw->data +
3304		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3305	tmp = 0;
3306	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3307	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3308	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3309
3310	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3311		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3312	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3313		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3314
3315	/* MEC1 */
3316	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3317			 mec_hdr->jt_offset);
3318	for (i = 0; i < mec_hdr->jt_size; i++)
3319		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3320			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3321
3322	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3323			adev->gfx.mec_fw_version);
3324	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3325
3326	return 0;
3327}
3328
3329/* KIQ functions */
3330static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3331{
3332	uint32_t tmp;
3333	struct amdgpu_device *adev = ring->adev;
3334
3335	/* tell RLC which is KIQ queue */
3336	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3337	tmp &= 0xffffff00;
3338	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3339	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3340	tmp |= 0x80;
3341	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3342}
3343
3344static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3345{
3346	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3347	uint64_t queue_mask = 0;
3348	int r, i;
3349
3350	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3351		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3352			continue;
3353
3354		/* This situation may be hit in the future if a new HW
3355		 * generation exposes more than 64 queues. If so, the
3356		 * definition of queue_mask needs updating */
3357		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3358			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3359			break;
3360		}
3361
3362		queue_mask |= (1ull << i);
3363	}
3364
3365	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3366	if (r) {
3367		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3368		return r;
3369	}
3370
3371	/* set resources */
3372	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3373	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3374			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3375	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3376	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3377	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3378	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3379	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3380	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3381	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3382		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3383		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3384		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3385
3386		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3387		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3388		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3389				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3390				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3391				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3392				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3393				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3394				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3395				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3396				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3397				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3398		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3399		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3400		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3401		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3402		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3403	}
3404
3405	r = amdgpu_ring_test_helper(kiq_ring);
3406	if (r)
3407		DRM_ERROR("KCQ enable failed\n");
3408
3409	return r;
3410}
3411
3412static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3413{
3414	struct amdgpu_device *adev = ring->adev;
3415	struct v9_mqd *mqd = ring->mqd_ptr;
3416	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3417	uint32_t tmp;
3418
3419	mqd->header = 0xC0310800;
3420	mqd->compute_pipelinestat_enable = 0x00000001;
3421	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3422	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3423	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3424	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3425	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3426	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3427	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3428	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3429	mqd->compute_misc_reserved = 0x00000003;
3430
3431	mqd->dynamic_cu_mask_addr_lo =
3432		lower_32_bits(ring->mqd_gpu_addr
3433			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3434	mqd->dynamic_cu_mask_addr_hi =
3435		upper_32_bits(ring->mqd_gpu_addr
3436			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3437
3438	eop_base_addr = ring->eop_gpu_addr >> 8;
3439	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3440	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3441
3442	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3443	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3444	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3445			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3446
3447	mqd->cp_hqd_eop_control = tmp;
3448
3449	/* enable doorbell? */
3450	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3451
3452	if (ring->use_doorbell) {
3453		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3454				    DOORBELL_OFFSET, ring->doorbell_index);
3455		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456				    DOORBELL_EN, 1);
3457		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458				    DOORBELL_SOURCE, 0);
3459		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3460				    DOORBELL_HIT, 0);
3461	} else {
3462		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3463					 DOORBELL_EN, 0);
3464	}
3465
3466	mqd->cp_hqd_pq_doorbell_control = tmp;
3467
3468	/* disable the queue if it's active */
3469	ring->wptr = 0;
3470	mqd->cp_hqd_dequeue_request = 0;
3471	mqd->cp_hqd_pq_rptr = 0;
3472	mqd->cp_hqd_pq_wptr_lo = 0;
3473	mqd->cp_hqd_pq_wptr_hi = 0;
3474
3475	/* set the pointer to the MQD */
3476	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3477	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3478
3479	/* set MQD vmid to 0 */
3480	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3481	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3482	mqd->cp_mqd_control = tmp;
3483
3484	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3485	hqd_gpu_addr = ring->gpu_addr >> 8;
3486	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3487	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3488
3489	/* set up the HQD, this is similar to CP_RB0_CNTL */
3490	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3491	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3492			    (order_base_2(ring->ring_size / 4) - 1));
3493	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3494			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3495#ifdef __BIG_ENDIAN
3496	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3497#endif
3498	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3499	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3500	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3501	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3502	mqd->cp_hqd_pq_control = tmp;
3503
3504	/* set the wb address whether it's enabled or not */
3505	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3506	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3507	mqd->cp_hqd_pq_rptr_report_addr_hi =
3508		upper_32_bits(wb_gpu_addr) & 0xffff;
3509
3510	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3511	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3512	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3513	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3514
3515	tmp = 0;
3516	/* enable the doorbell if requested */
3517	if (ring->use_doorbell) {
3518		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3519		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520				DOORBELL_OFFSET, ring->doorbell_index);
3521
3522		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523					 DOORBELL_EN, 1);
3524		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3525					 DOORBELL_SOURCE, 0);
3526		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3527					 DOORBELL_HIT, 0);
3528	}
3529
3530	mqd->cp_hqd_pq_doorbell_control = tmp;
3531
3532	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3533	ring->wptr = 0;
3534	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3535
3536	/* set the vmid for the queue */
3537	mqd->cp_hqd_vmid = 0;
3538
3539	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3540	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3541	mqd->cp_hqd_persistent_state = tmp;
3542
3543	/* set MIN_IB_AVAIL_SIZE */
3544	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3545	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3546	mqd->cp_hqd_ib_control = tmp;
3547
3548	/* activate the queue */
3549	mqd->cp_hqd_active = 1;
3550
3551	return 0;
3552}
3553
3554static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3555{
3556	struct amdgpu_device *adev = ring->adev;
3557	struct v9_mqd *mqd = ring->mqd_ptr;
3558	int j;
3559
3560	/* disable wptr polling */
3561	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3562
3563	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3564	       mqd->cp_hqd_eop_base_addr_lo);
3565	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3566	       mqd->cp_hqd_eop_base_addr_hi);
3567
3568	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3569	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3570	       mqd->cp_hqd_eop_control);
3571
3572	/* enable doorbell? */
3573	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3574	       mqd->cp_hqd_pq_doorbell_control);
3575
3576	/* disable the queue if it's active */
3577	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3578		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3579		for (j = 0; j < adev->usec_timeout; j++) {
3580			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3581				break;
3582			udelay(1);
3583		}
3584		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3585		       mqd->cp_hqd_dequeue_request);
3586		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3587		       mqd->cp_hqd_pq_rptr);
3588		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3589		       mqd->cp_hqd_pq_wptr_lo);
3590		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3591		       mqd->cp_hqd_pq_wptr_hi);
3592	}
3593
3594	/* set the pointer to the MQD */
3595	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3596	       mqd->cp_mqd_base_addr_lo);
3597	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3598	       mqd->cp_mqd_base_addr_hi);
3599
3600	/* set MQD vmid to 0 */
3601	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3602	       mqd->cp_mqd_control);
3603
3604	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3605	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3606	       mqd->cp_hqd_pq_base_lo);
3607	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3608	       mqd->cp_hqd_pq_base_hi);
3609
3610	/* set up the HQD, this is similar to CP_RB0_CNTL */
3611	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3612	       mqd->cp_hqd_pq_control);
3613
3614	/* set the wb address whether it's enabled or not */
3615	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3616				mqd->cp_hqd_pq_rptr_report_addr_lo);
3617	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3618				mqd->cp_hqd_pq_rptr_report_addr_hi);
3619
3620	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3621	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3622	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3623	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3624	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3625
3626	/* enable the doorbell if requested */
3627	if (ring->use_doorbell) {
3628		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3629					(adev->doorbell_index.kiq * 2) << 2);
3630		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3631					(adev->doorbell_index.userqueue_end * 2) << 2);
3632	}
3633
3634	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3635	       mqd->cp_hqd_pq_doorbell_control);
3636
3637	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3638	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3639	       mqd->cp_hqd_pq_wptr_lo);
3640	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3641	       mqd->cp_hqd_pq_wptr_hi);
3642
3643	/* set the vmid for the queue */
3644	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3645
3646	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3647	       mqd->cp_hqd_persistent_state);
3648
3649	/* activate the queue */
3650	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3651	       mqd->cp_hqd_active);
3652
3653	if (ring->use_doorbell)
3654		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3655
3656	return 0;
3657}
3658
3659static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3660{
3661	struct amdgpu_device *adev = ring->adev;
3662	int j;
3663
3664	/* disable the queue if it's active */
3665	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3666
3667		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3668
3669		for (j = 0; j < adev->usec_timeout; j++) {
3670			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3671				break;
3672			udelay(1);
3673		}
3674
3675		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3676			DRM_DEBUG("KIQ dequeue request failed.\n");
3677
3678			/* Manual disable if dequeue request times out */
3679			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3680		}
3681
3682		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3683		      0);
3684	}
3685
3686	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3687	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3688	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3689	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3690	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3691	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3692	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3693	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3694
3695	return 0;
3696}
3697
3698static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3699{
3700	struct amdgpu_device *adev = ring->adev;
3701	struct v9_mqd *mqd = ring->mqd_ptr;
3702	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3703
3704	gfx_v9_0_kiq_setting(ring);
3705
3706	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3707		/* reset MQD to a clean status */
3708		if (adev->gfx.mec.mqd_backup[mqd_idx])
3709			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3710
3711		/* reset ring buffer */
3712		ring->wptr = 0;
3713		amdgpu_ring_clear_ring(ring);
3714
3715		mutex_lock(&adev->srbm_mutex);
3716		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3717		gfx_v9_0_kiq_init_register(ring);
3718		soc15_grbm_select(adev, 0, 0, 0, 0);
3719		mutex_unlock(&adev->srbm_mutex);
3720	} else {
3721		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3722		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3723		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3724		mutex_lock(&adev->srbm_mutex);
3725		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3726		gfx_v9_0_mqd_init(ring);
3727		gfx_v9_0_kiq_init_register(ring);
3728		soc15_grbm_select(adev, 0, 0, 0, 0);
3729		mutex_unlock(&adev->srbm_mutex);
3730
3731		if (adev->gfx.mec.mqd_backup[mqd_idx])
3732			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3733	}
3734
3735	return 0;
3736}
3737
3738static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3739{
3740	struct amdgpu_device *adev = ring->adev;
3741	struct v9_mqd *mqd = ring->mqd_ptr;
3742	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3743
3744	if (!adev->in_gpu_reset && !adev->in_suspend) {
3745		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3746		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3747		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3748		mutex_lock(&adev->srbm_mutex);
3749		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3750		gfx_v9_0_mqd_init(ring);
3751		soc15_grbm_select(adev, 0, 0, 0, 0);
3752		mutex_unlock(&adev->srbm_mutex);
3753
3754		if (adev->gfx.mec.mqd_backup[mqd_idx])
3755			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3756	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3757		/* reset MQD to a clean status */
3758		if (adev->gfx.mec.mqd_backup[mqd_idx])
3759			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3760
3761		/* reset ring buffer */
3762		ring->wptr = 0;
3763		amdgpu_ring_clear_ring(ring);
3764	} else {
3765		amdgpu_ring_clear_ring(ring);
3766	}
3767
3768	return 0;
3769}
3770
3771static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3772{
3773	struct amdgpu_ring *ring;
3774	int r;
3775
3776	ring = &adev->gfx.kiq.ring;
3777
3778	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3779	if (unlikely(r != 0))
3780		return r;
3781
3782	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3783	if (unlikely(r != 0))
3784		return r;
3785
3786	gfx_v9_0_kiq_init_queue(ring);
3787	amdgpu_bo_kunmap(ring->mqd_obj);
3788	ring->mqd_ptr = NULL;
3789	amdgpu_bo_unreserve(ring->mqd_obj);
3790	ring->sched.ready = true;
3791	return 0;
3792}
3793
3794static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3795{
3796	struct amdgpu_ring *ring = NULL;
3797	int r = 0, i;
3798
3799	gfx_v9_0_cp_compute_enable(adev, true);
3800
3801	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3802		ring = &adev->gfx.compute_ring[i];
3803
3804		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3805		if (unlikely(r != 0))
3806			goto done;
3807		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3808		if (!r) {
3809			r = gfx_v9_0_kcq_init_queue(ring);
3810			amdgpu_bo_kunmap(ring->mqd_obj);
3811			ring->mqd_ptr = NULL;
3812		}
3813		amdgpu_bo_unreserve(ring->mqd_obj);
3814		if (r)
3815			goto done;
3816	}
3817
3818	r = gfx_v9_0_kiq_kcq_enable(adev);
3819done:
3820	return r;
3821}
3822
3823static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3824{
3825	int r, i;
3826	struct amdgpu_ring *ring;
3827
3828	if (!(adev->flags & AMD_IS_APU))
3829		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3830
3831	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3832		if (adev->asic_type != CHIP_ARCTURUS) {
3833			/* legacy firmware loading */
3834			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3835			if (r)
3836				return r;
3837		}
3838
3839		r = gfx_v9_0_cp_compute_load_microcode(adev);
3840		if (r)
3841			return r;
3842	}
3843
3844	r = gfx_v9_0_kiq_resume(adev);
3845	if (r)
3846		return r;
3847
3848	if (adev->asic_type != CHIP_ARCTURUS) {
3849		r = gfx_v9_0_cp_gfx_resume(adev);
3850		if (r)
3851			return r;
3852	}
3853
3854	r = gfx_v9_0_kcq_resume(adev);
3855	if (r)
3856		return r;
3857
3858	if (adev->asic_type != CHIP_ARCTURUS) {
3859		ring = &adev->gfx.gfx_ring[0];
3860		r = amdgpu_ring_test_helper(ring);
3861		if (r)
3862			return r;
3863	}
3864
3865	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3866		ring = &adev->gfx.compute_ring[i];
3867		amdgpu_ring_test_helper(ring);
3868	}
3869
3870	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3871
3872	return 0;
3873}
3874
3875static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3876{
3877	if (adev->asic_type != CHIP_ARCTURUS)
3878		gfx_v9_0_cp_gfx_enable(adev, enable);
3879	gfx_v9_0_cp_compute_enable(adev, enable);
3880}
3881
3882static int gfx_v9_0_hw_init(void *handle)
3883{
3884	int r;
3885	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3886
3887	if (!amdgpu_sriov_vf(adev))
3888		gfx_v9_0_init_golden_registers(adev);
3889
3890	gfx_v9_0_constants_init(adev);
3891
3892	r = gfx_v9_0_csb_vram_pin(adev);
3893	if (r)
3894		return r;
3895
3896	r = adev->gfx.rlc.funcs->resume(adev);
3897	if (r)
3898		return r;
3899
3900	r = gfx_v9_0_cp_resume(adev);
3901	if (r)
3902		return r;
3903
3904	if (adev->asic_type != CHIP_ARCTURUS) {
3905		r = gfx_v9_0_ngg_en(adev);
3906		if (r)
3907			return r;
3908	}
3909
3910	return r;
3911}
3912
3913static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3914{
3915	int r, i;
3916	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3917
3918	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3919	if (r)
3920		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3921
3922	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3923		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3924
3925		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3926		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3927						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3928						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3929						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3930						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3931		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3932		amdgpu_ring_write(kiq_ring, 0);
3933		amdgpu_ring_write(kiq_ring, 0);
3934		amdgpu_ring_write(kiq_ring, 0);
3935	}
3936	r = amdgpu_ring_test_helper(kiq_ring);
3937	if (r)
3938		DRM_ERROR("KCQ disable failed\n");
3939
3940	return r;
3941}
3942
3943static int gfx_v9_0_hw_fini(void *handle)
3944{
3945	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3946
3947	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3948	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3949	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3950
3951	/* disable KCQ to avoid CPC touch memory not valid anymore */
3952	gfx_v9_0_kcq_disable(adev);
3953
3954	if (amdgpu_sriov_vf(adev)) {
3955		gfx_v9_0_cp_gfx_enable(adev, false);
3956		/* must disable polling for SRIOV when hw finished, otherwise
3957		 * CPC engine may still keep fetching WB address which is already
3958		 * invalid after sw finished and trigger DMAR reading error in
3959		 * hypervisor side.
3960		 */
3961		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3962		return 0;
3963	}
3964
3965	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3966	 * otherwise KIQ is hanging when binding back
3967	 */
3968	if (!adev->in_gpu_reset && !adev->in_suspend) {
3969		mutex_lock(&adev->srbm_mutex);
3970		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3971				adev->gfx.kiq.ring.pipe,
3972				adev->gfx.kiq.ring.queue, 0);
3973		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3974		soc15_grbm_select(adev, 0, 0, 0, 0);
3975		mutex_unlock(&adev->srbm_mutex);
3976	}
3977
3978	gfx_v9_0_cp_enable(adev, false);
3979	adev->gfx.rlc.funcs->stop(adev);
3980
3981	gfx_v9_0_csb_vram_unpin(adev);
3982
3983	return 0;
3984}
3985
3986static int gfx_v9_0_suspend(void *handle)
3987{
3988	return gfx_v9_0_hw_fini(handle);
3989}
3990
3991static int gfx_v9_0_resume(void *handle)
3992{
3993	return gfx_v9_0_hw_init(handle);
3994}
3995
3996static bool gfx_v9_0_is_idle(void *handle)
3997{
3998	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3999
4000	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4001				GRBM_STATUS, GUI_ACTIVE))
4002		return false;
4003	else
4004		return true;
4005}
4006
4007static int gfx_v9_0_wait_for_idle(void *handle)
4008{
4009	unsigned i;
4010	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4011
4012	for (i = 0; i < adev->usec_timeout; i++) {
4013		if (gfx_v9_0_is_idle(handle))
4014			return 0;
4015		udelay(1);
4016	}
4017	return -ETIMEDOUT;
4018}
4019
4020static int gfx_v9_0_soft_reset(void *handle)
4021{
4022	u32 grbm_soft_reset = 0;
4023	u32 tmp;
4024	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4025
4026	/* GRBM_STATUS */
4027	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4028	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4029		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4030		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4031		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4032		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4033		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4034		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4035						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4036		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4037						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4038	}
4039
4040	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4041		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4042						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4043	}
4044
4045	/* GRBM_STATUS2 */
4046	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4047	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4048		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4049						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4050
4051
4052	if (grbm_soft_reset) {
4053		/* stop the rlc */
4054		adev->gfx.rlc.funcs->stop(adev);
4055
4056		if (adev->asic_type != CHIP_ARCTURUS)
4057			/* Disable GFX parsing/prefetching */
4058			gfx_v9_0_cp_gfx_enable(adev, false);
4059
4060		/* Disable MEC parsing/prefetching */
4061		gfx_v9_0_cp_compute_enable(adev, false);
4062
4063		if (grbm_soft_reset) {
4064			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4065			tmp |= grbm_soft_reset;
4066			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4067			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4068			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4069
4070			udelay(50);
4071
4072			tmp &= ~grbm_soft_reset;
4073			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4074			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4075		}
4076
4077		/* Wait a little for things to settle down */
4078		udelay(50);
4079	}
4080	return 0;
4081}
4082
4083static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4084{
4085	uint64_t clock;
4086
4087	mutex_lock(&adev->gfx.gpu_clock_mutex);
4088	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4089	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4090		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4091	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4092	return clock;
4093}
4094
4095static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4096					  uint32_t vmid,
4097					  uint32_t gds_base, uint32_t gds_size,
4098					  uint32_t gws_base, uint32_t gws_size,
4099					  uint32_t oa_base, uint32_t oa_size)
4100{
4101	struct amdgpu_device *adev = ring->adev;
4102
4103	/* GDS Base */
4104	gfx_v9_0_write_data_to_reg(ring, 0, false,
4105				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4106				   gds_base);
4107
4108	/* GDS Size */
4109	gfx_v9_0_write_data_to_reg(ring, 0, false,
4110				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4111				   gds_size);
4112
4113	/* GWS */
4114	gfx_v9_0_write_data_to_reg(ring, 0, false,
4115				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4116				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4117
4118	/* OA */
4119	gfx_v9_0_write_data_to_reg(ring, 0, false,
4120				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4121				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4122}
4123
4124static const u32 vgpr_init_compute_shader[] =
4125{
4126	0xb07c0000, 0xbe8000ff,
4127	0x000000f8, 0xbf110800,
4128	0x7e000280, 0x7e020280,
4129	0x7e040280, 0x7e060280,
4130	0x7e080280, 0x7e0a0280,
4131	0x7e0c0280, 0x7e0e0280,
4132	0x80808800, 0xbe803200,
4133	0xbf84fff5, 0xbf9c0000,
4134	0xd28c0001, 0x0001007f,
4135	0xd28d0001, 0x0002027e,
4136	0x10020288, 0xb8810904,
4137	0xb7814000, 0xd1196a01,
4138	0x00000301, 0xbe800087,
4139	0xbefc00c1, 0xd89c4000,
4140	0x00020201, 0xd89cc080,
4141	0x00040401, 0x320202ff,
4142	0x00000800, 0x80808100,
4143	0xbf84fff8, 0x7e020280,
4144	0xbf810000, 0x00000000,
4145};
4146
4147static const u32 sgpr_init_compute_shader[] =
4148{
4149	0xb07c0000, 0xbe8000ff,
4150	0x0000005f, 0xbee50080,
4151	0xbe812c65, 0xbe822c65,
4152	0xbe832c65, 0xbe842c65,
4153	0xbe852c65, 0xb77c0005,
4154	0x80808500, 0xbf84fff8,
4155	0xbe800080, 0xbf810000,
4156};
4157
4158static const struct soc15_reg_entry vgpr_init_regs[] = {
4159   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4160   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4161   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4162   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4163   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4164   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4165   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4166   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4167   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4168   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4169};
4170
4171static const struct soc15_reg_entry sgpr_init_regs[] = {
4172   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4173   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4174   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4175   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4176   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4177   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4178   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4179   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4180   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4181   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4182};
4183
4184static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4185   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4186   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4187   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4188   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4189   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4190   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4191   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4192   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4193   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4194   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4195   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4196   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4197   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4198   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4199   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4200   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4201   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4202   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4203   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4204   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4205   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4206   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4207   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4208   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4209   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4210   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4211   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4212   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4213   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4214   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4215   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4216   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4217};
4218
4219static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4220{
4221	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4222	int i, r;
4223
4224	r = amdgpu_ring_alloc(ring, 7);
4225	if (r) {
4226		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4227			ring->name, r);
4228		return r;
4229	}
4230
4231	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4232	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4233
4234	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4235	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4236				PACKET3_DMA_DATA_DST_SEL(1) |
4237				PACKET3_DMA_DATA_SRC_SEL(2) |
4238				PACKET3_DMA_DATA_ENGINE(0)));
4239	amdgpu_ring_write(ring, 0);
4240	amdgpu_ring_write(ring, 0);
4241	amdgpu_ring_write(ring, 0);
4242	amdgpu_ring_write(ring, 0);
4243	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4244				adev->gds.gds_size);
4245
4246	amdgpu_ring_commit(ring);
4247
4248	for (i = 0; i < adev->usec_timeout; i++) {
4249		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4250			break;
4251		udelay(1);
4252	}
4253
4254	if (i >= adev->usec_timeout)
4255		r = -ETIMEDOUT;
4256
4257	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4258
4259	return r;
4260}
4261
4262static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4263{
4264	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4265	struct amdgpu_ib ib;
4266	struct dma_fence *f = NULL;
4267	int r, i, j, k;
4268	unsigned total_size, vgpr_offset, sgpr_offset;
4269	u64 gpu_addr;
4270
4271	/* only support when RAS is enabled */
4272	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4273		return 0;
4274
4275	/* bail if the compute ring is not ready */
4276	if (!ring->sched.ready)
4277		return 0;
4278
4279	total_size =
4280		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4281	total_size +=
4282		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4283	total_size = ALIGN(total_size, 256);
4284	vgpr_offset = total_size;
4285	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4286	sgpr_offset = total_size;
4287	total_size += sizeof(sgpr_init_compute_shader);
4288
4289	/* allocate an indirect buffer to put the commands in */
4290	memset(&ib, 0, sizeof(ib));
4291	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4292	if (r) {
4293		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4294		return r;
4295	}
4296
4297	/* load the compute shaders */
4298	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4299		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4300
4301	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4302		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4303
4304	/* init the ib length to 0 */
4305	ib.length_dw = 0;
4306
4307	/* VGPR */
4308	/* write the register state for the compute dispatch */
4309	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4310		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4311		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4312								- PACKET3_SET_SH_REG_START;
4313		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4314	}
4315	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4316	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4317	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4318	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4319							- PACKET3_SET_SH_REG_START;
4320	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4321	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4322
4323	/* write dispatch packet */
4324	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4325	ib.ptr[ib.length_dw++] = 128; /* x */
4326	ib.ptr[ib.length_dw++] = 1; /* y */
4327	ib.ptr[ib.length_dw++] = 1; /* z */
4328	ib.ptr[ib.length_dw++] =
4329		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4330
4331	/* write CS partial flush packet */
4332	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4333	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4334
4335	/* SGPR */
4336	/* write the register state for the compute dispatch */
4337	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4338		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4339		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4340								- PACKET3_SET_SH_REG_START;
4341		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4342	}
4343	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4344	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4345	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4346	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4347							- PACKET3_SET_SH_REG_START;
4348	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4349	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4350
4351	/* write dispatch packet */
4352	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4353	ib.ptr[ib.length_dw++] = 128; /* x */
4354	ib.ptr[ib.length_dw++] = 1; /* y */
4355	ib.ptr[ib.length_dw++] = 1; /* z */
4356	ib.ptr[ib.length_dw++] =
4357		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4358
4359	/* write CS partial flush packet */
4360	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4361	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4362
4363	/* shedule the ib on the ring */
4364	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4365	if (r) {
4366		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4367		goto fail;
4368	}
4369
4370	/* wait for the GPU to finish processing the IB */
4371	r = dma_fence_wait(f, false);
4372	if (r) {
4373		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4374		goto fail;
4375	}
4376
4377	/* read back registers to clear the counters */
4378	mutex_lock(&adev->grbm_idx_mutex);
4379	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4380		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4381			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4382				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4383				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4384			}
4385		}
4386	}
4387	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4388	mutex_unlock(&adev->grbm_idx_mutex);
4389
4390fail:
4391	amdgpu_ib_free(adev, &ib, NULL);
4392	dma_fence_put(f);
4393
4394	return r;
4395}
4396
4397static int gfx_v9_0_early_init(void *handle)
4398{
4399	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4400
4401	if (adev->asic_type == CHIP_ARCTURUS)
4402		adev->gfx.num_gfx_rings = 0;
4403	else
4404		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4405	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4406	gfx_v9_0_set_ring_funcs(adev);
4407	gfx_v9_0_set_irq_funcs(adev);
4408	gfx_v9_0_set_gds_init(adev);
4409	gfx_v9_0_set_rlc_funcs(adev);
4410
4411	return 0;
4412}
4413
4414static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4415		struct ras_err_data *err_data,
4416		struct amdgpu_iv_entry *entry);
4417
4418static int gfx_v9_0_ecc_late_init(void *handle)
4419{
4420	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4421	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4422	struct ras_ih_if ih_info = {
4423		.cb = gfx_v9_0_process_ras_data_cb,
4424	};
4425	struct ras_fs_if fs_info = {
4426		.sysfs_name = "gfx_err_count",
4427		.debugfs_name = "gfx_err_inject",
4428	};
4429	struct ras_common_if ras_block = {
4430		.block = AMDGPU_RAS_BLOCK__GFX,
4431		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4432		.sub_block_index = 0,
4433		.name = "gfx",
4434	};
4435	int r;
4436
4437	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4438		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4439		return 0;
4440	}
4441
4442	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4443	if (r)
4444		return r;
4445
4446	/* requires IBs so do in late init after IB pool is initialized */
4447	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4448	if (r)
4449		return r;
4450
4451	/* handle resume path. */
4452	if (*ras_if) {
4453		/* resend ras TA enable cmd during resume.
4454		 * prepare to handle failure.
4455		 */
4456		ih_info.head = **ras_if;
4457		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4458		if (r) {
4459			if (r == -EAGAIN) {
4460				/* request a gpu reset. will run again. */
4461				amdgpu_ras_request_reset_on_boot(adev,
4462						AMDGPU_RAS_BLOCK__GFX);
4463				return 0;
4464			}
4465			/* fail to enable ras, cleanup all. */
4466			goto irq;
4467		}
4468		/* enable successfully. continue. */
4469		goto resume;
4470	}
4471
4472	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4473	if (!*ras_if)
4474		return -ENOMEM;
4475
4476	**ras_if = ras_block;
4477
4478	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4479	if (r) {
4480		if (r == -EAGAIN) {
4481			amdgpu_ras_request_reset_on_boot(adev,
4482					AMDGPU_RAS_BLOCK__GFX);
4483			r = 0;
4484		}
4485		goto feature;
4486	}
4487
4488	ih_info.head = **ras_if;
4489	fs_info.head = **ras_if;
4490
4491	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4492	if (r)
4493		goto interrupt;
4494
4495	amdgpu_ras_debugfs_create(adev, &fs_info);
4496
4497	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4498	if (r)
4499		goto sysfs;
4500resume:
4501	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4502	if (r)
4503		goto irq;
4504
4505	return 0;
4506irq:
4507	amdgpu_ras_sysfs_remove(adev, *ras_if);
4508sysfs:
4509	amdgpu_ras_debugfs_remove(adev, *ras_if);
4510	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4511interrupt:
4512	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4513feature:
4514	kfree(*ras_if);
4515	*ras_if = NULL;
4516	return r;
4517}
4518
4519static int gfx_v9_0_late_init(void *handle)
4520{
4521	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4522	int r;
4523
4524	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4525	if (r)
4526		return r;
4527
4528	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4529	if (r)
4530		return r;
4531
4532	r = gfx_v9_0_ecc_late_init(handle);
4533	if (r)
4534		return r;
4535
4536	return 0;
4537}
4538
4539static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4540{
4541	uint32_t rlc_setting;
4542
4543	/* if RLC is not enabled, do nothing */
4544	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4545	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4546		return false;
4547
4548	return true;
4549}
4550
4551static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4552{
4553	uint32_t data;
4554	unsigned i;
4555
4556	data = RLC_SAFE_MODE__CMD_MASK;
4557	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4558	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4559
4560	/* wait for RLC_SAFE_MODE */
4561	for (i = 0; i < adev->usec_timeout; i++) {
4562		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4563			break;
4564		udelay(1);
4565	}
4566}
4567
4568static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4569{
4570	uint32_t data;
4571
4572	data = RLC_SAFE_MODE__CMD_MASK;
4573	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4574}
4575
4576static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4577						bool enable)
4578{
4579	amdgpu_gfx_rlc_enter_safe_mode(adev);
4580
4581	if (is_support_sw_smu(adev) && !enable)
4582		smu_set_gfx_cgpg(&adev->smu, enable);
4583
4584	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4585		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4586		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4587			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4588	} else {
4589		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4590		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4591	}
4592
4593	amdgpu_gfx_rlc_exit_safe_mode(adev);
4594}
4595
4596static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4597						bool enable)
4598{
4599	/* TODO: double check if we need to perform under safe mode */
4600	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4601
4602	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4603		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4604	else
4605		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4606
4607	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4608		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4609	else
4610		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4611
4612	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4613}
4614
4615static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4616						      bool enable)
4617{
4618	uint32_t data, def;
4619
4620	amdgpu_gfx_rlc_enter_safe_mode(adev);
4621
4622	/* It is disabled by HW by default */
4623	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4624		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4625		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4626
4627		if (adev->asic_type != CHIP_VEGA12)
4628			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4629
4630		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4631			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4632			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4633
4634		/* only for Vega10 & Raven1 */
4635		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4636
4637		if (def != data)
4638			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4639
4640		/* MGLS is a global flag to control all MGLS in GFX */
4641		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4642			/* 2 - RLC memory Light sleep */
4643			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4644				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4645				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4646				if (def != data)
4647					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4648			}
4649			/* 3 - CP memory Light sleep */
4650			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4651				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4652				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4653				if (def != data)
4654					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4655			}
4656		}
4657	} else {
4658		/* 1 - MGCG_OVERRIDE */
4659		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4660
4661		if (adev->asic_type != CHIP_VEGA12)
4662			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4663
4664		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4665			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4666			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4667			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4668
4669		if (def != data)
4670			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4671
4672		/* 2 - disable MGLS in RLC */
4673		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4674		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4675			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4676			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4677		}
4678
4679		/* 3 - disable MGLS in CP */
4680		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4681		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4682			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4683			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4684		}
4685	}
4686
4687	amdgpu_gfx_rlc_exit_safe_mode(adev);
4688}
4689
4690static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4691					   bool enable)
4692{
4693	uint32_t data, def;
4694
4695	if (adev->asic_type == CHIP_ARCTURUS)
4696		return;
4697
4698	amdgpu_gfx_rlc_enter_safe_mode(adev);
4699
4700	/* Enable 3D CGCG/CGLS */
4701	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4702		/* write cmd to clear cgcg/cgls ov */
4703		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4704		/* unset CGCG override */
4705		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4706		/* update CGCG and CGLS override bits */
4707		if (def != data)
4708			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4709
4710		/* enable 3Dcgcg FSM(0x0000363f) */
4711		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4712
4713		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4714			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4715		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4716			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4717				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4718		if (def != data)
4719			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4720
4721		/* set IDLE_POLL_COUNT(0x00900100) */
4722		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4723		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4724			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4725		if (def != data)
4726			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4727	} else {
4728		/* Disable CGCG/CGLS */
4729		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4730		/* disable cgcg, cgls should be disabled */
4731		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4732			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4733		/* disable cgcg and cgls in FSM */
4734		if (def != data)
4735			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4736	}
4737
4738	amdgpu_gfx_rlc_exit_safe_mode(adev);
4739}
4740
4741static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4742						      bool enable)
4743{
4744	uint32_t def, data;
4745
4746	amdgpu_gfx_rlc_enter_safe_mode(adev);
4747
4748	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4749		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4750		/* unset CGCG override */
4751		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4752		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4753			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4754		else
4755			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4756		/* update CGCG and CGLS override bits */
4757		if (def != data)
4758			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4759
4760		/* enable cgcg FSM(0x0000363F) */
4761		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4762
4763		if (adev->asic_type == CHIP_ARCTURUS)
4764			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4765				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4766		else
4767			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4768				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4769		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4770			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4771				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4772		if (def != data)
4773			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4774
4775		/* set IDLE_POLL_COUNT(0x00900100) */
4776		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4777		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4778			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4779		if (def != data)
4780			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4781	} else {
4782		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4783		/* reset CGCG/CGLS bits */
4784		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4785		/* disable cgcg and cgls in FSM */
4786		if (def != data)
4787			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4788	}
4789
4790	amdgpu_gfx_rlc_exit_safe_mode(adev);
4791}
4792
4793static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4794					    bool enable)
4795{
4796	if (enable) {
4797		/* CGCG/CGLS should be enabled after MGCG/MGLS
4798		 * ===  MGCG + MGLS ===
4799		 */
4800		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4801		/* ===  CGCG /CGLS for GFX 3D Only === */
4802		gfx_v9_0_update_3d_clock_gating(adev, enable);
4803		/* ===  CGCG + CGLS === */
4804		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4805	} else {
4806		/* CGCG/CGLS should be disabled before MGCG/MGLS
4807		 * ===  CGCG + CGLS ===
4808		 */
4809		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4810		/* ===  CGCG /CGLS for GFX 3D Only === */
4811		gfx_v9_0_update_3d_clock_gating(adev, enable);
4812		/* ===  MGCG + MGLS === */
4813		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4814	}
4815	return 0;
4816}
4817
4818static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4819	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4820	.set_safe_mode = gfx_v9_0_set_safe_mode,
4821	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4822	.init = gfx_v9_0_rlc_init,
4823	.get_csb_size = gfx_v9_0_get_csb_size,
4824	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4825	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4826	.resume = gfx_v9_0_rlc_resume,
4827	.stop = gfx_v9_0_rlc_stop,
4828	.reset = gfx_v9_0_rlc_reset,
4829	.start = gfx_v9_0_rlc_start
4830};
4831
4832static int gfx_v9_0_set_powergating_state(void *handle,
4833					  enum amd_powergating_state state)
4834{
4835	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4837
4838	switch (adev->asic_type) {
4839	case CHIP_RAVEN:
4840	case CHIP_RENOIR:
4841		if (!enable) {
4842			amdgpu_gfx_off_ctrl(adev, false);
4843			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4844		}
4845		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4846			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4847			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4848		} else {
4849			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4850			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4851		}
4852
4853		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4854			gfx_v9_0_enable_cp_power_gating(adev, true);
4855		else
4856			gfx_v9_0_enable_cp_power_gating(adev, false);
4857
4858		/* update gfx cgpg state */
4859		if (is_support_sw_smu(adev) && enable)
4860			smu_set_gfx_cgpg(&adev->smu, enable);
4861		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4862
4863		/* update mgcg state */
4864		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4865
4866		if (enable)
4867			amdgpu_gfx_off_ctrl(adev, true);
4868		break;
4869	case CHIP_VEGA12:
4870		if (!enable) {
4871			amdgpu_gfx_off_ctrl(adev, false);
4872			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4873		} else {
4874			amdgpu_gfx_off_ctrl(adev, true);
4875		}
4876		break;
4877	default:
4878		break;
4879	}
4880
4881	return 0;
4882}
4883
4884static int gfx_v9_0_set_clockgating_state(void *handle,
4885					  enum amd_clockgating_state state)
4886{
4887	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888
4889	if (amdgpu_sriov_vf(adev))
4890		return 0;
4891
4892	switch (adev->asic_type) {
4893	case CHIP_VEGA10:
4894	case CHIP_VEGA12:
4895	case CHIP_VEGA20:
4896	case CHIP_RAVEN:
4897	case CHIP_ARCTURUS:
4898	case CHIP_RENOIR:
4899		gfx_v9_0_update_gfx_clock_gating(adev,
4900						 state == AMD_CG_STATE_GATE ? true : false);
4901		break;
4902	default:
4903		break;
4904	}
4905	return 0;
4906}
4907
4908static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4909{
4910	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4911	int data;
4912
4913	if (amdgpu_sriov_vf(adev))
4914		*flags = 0;
4915
4916	/* AMD_CG_SUPPORT_GFX_MGCG */
4917	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4918	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4919		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4920
4921	/* AMD_CG_SUPPORT_GFX_CGCG */
4922	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4923	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4924		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4925
4926	/* AMD_CG_SUPPORT_GFX_CGLS */
4927	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4928		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4929
4930	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4931	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4932	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4933		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4934
4935	/* AMD_CG_SUPPORT_GFX_CP_LS */
4936	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4937	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4938		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4939
4940	if (adev->asic_type != CHIP_ARCTURUS) {
4941		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4942		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4943		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4944			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4945
4946		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4947		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4948			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4949	}
4950}
4951
4952static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4953{
4954	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4955}
4956
4957static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4958{
4959	struct amdgpu_device *adev = ring->adev;
4960	u64 wptr;
4961
4962	/* XXX check if swapping is necessary on BE */
4963	if (ring->use_doorbell) {
4964		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4965	} else {
4966		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4967		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4968	}
4969
4970	return wptr;
4971}
4972
4973static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4974{
4975	struct amdgpu_device *adev = ring->adev;
4976
4977	if (ring->use_doorbell) {
4978		/* XXX check if swapping is necessary on BE */
4979		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4980		WDOORBELL64(ring->doorbell_index, ring->wptr);
4981	} else {
4982		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4983		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4984	}
4985}
4986
4987static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4988{
4989	struct amdgpu_device *adev = ring->adev;
4990	u32 ref_and_mask, reg_mem_engine;
4991	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4992
4993	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4994		switch (ring->me) {
4995		case 1:
4996			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4997			break;
4998		case 2:
4999			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5000			break;
5001		default:
5002			return;
5003		}
5004		reg_mem_engine = 0;
5005	} else {
5006		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5007		reg_mem_engine = 1; /* pfp */
5008	}
5009
5010	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5011			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
5012			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
5013			      ref_and_mask, ref_and_mask, 0x20);
5014}
5015
5016static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5017					struct amdgpu_job *job,
5018					struct amdgpu_ib *ib,
5019					uint32_t flags)
5020{
5021	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5022	u32 header, control = 0;
5023
5024	if (ib->flags & AMDGPU_IB_FLAG_CE)
5025		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5026	else
5027		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5028
5029	control |= ib->length_dw | (vmid << 24);
5030
5031	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5032		control |= INDIRECT_BUFFER_PRE_ENB(1);
5033
5034		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5035			gfx_v9_0_ring_emit_de_meta(ring);
5036	}
5037
5038	amdgpu_ring_write(ring, header);
5039	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5040	amdgpu_ring_write(ring,
5041#ifdef __BIG_ENDIAN
5042		(2 << 0) |
5043#endif
5044		lower_32_bits(ib->gpu_addr));
5045	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5046	amdgpu_ring_write(ring, control);
5047}
5048
5049static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5050					  struct amdgpu_job *job,
5051					  struct amdgpu_ib *ib,
5052					  uint32_t flags)
5053{
5054	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5055	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5056
5057	/* Currently, there is a high possibility to get wave ID mismatch
5058	 * between ME and GDS, leading to a hw deadlock, because ME generates
5059	 * different wave IDs than the GDS expects. This situation happens
5060	 * randomly when at least 5 compute pipes use GDS ordered append.
5061	 * The wave IDs generated by ME are also wrong after suspend/resume.
5062	 * Those are probably bugs somewhere else in the kernel driver.
5063	 *
5064	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5065	 * GDS to 0 for this ring (me/pipe).
5066	 */
5067	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5068		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5069		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5070		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5071	}
5072
5073	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5074	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5075	amdgpu_ring_write(ring,
5076#ifdef __BIG_ENDIAN
5077				(2 << 0) |
5078#endif
5079				lower_32_bits(ib->gpu_addr));
5080	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5081	amdgpu_ring_write(ring, control);
5082}
5083
5084static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5085				     u64 seq, unsigned flags)
5086{
5087	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5088	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5089	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5090
5091	/* RELEASE_MEM - flush caches, send int */
5092	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5093	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5094					       EOP_TC_NC_ACTION_EN) :
5095					      (EOP_TCL1_ACTION_EN |
5096					       EOP_TC_ACTION_EN |
5097					       EOP_TC_WB_ACTION_EN |
5098					       EOP_TC_MD_ACTION_EN)) |
5099				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5100				 EVENT_INDEX(5)));
5101	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5102
5103	/*
5104	 * the address should be Qword aligned if 64bit write, Dword
5105	 * aligned if only send 32bit data low (discard data high)
5106	 */
5107	if (write64bit)
5108		BUG_ON(addr & 0x7);
5109	else
5110		BUG_ON(addr & 0x3);
5111	amdgpu_ring_write(ring, lower_32_bits(addr));
5112	amdgpu_ring_write(ring, upper_32_bits(addr));
5113	amdgpu_ring_write(ring, lower_32_bits(seq));
5114	amdgpu_ring_write(ring, upper_32_bits(seq));
5115	amdgpu_ring_write(ring, 0);
5116}
5117
5118static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5119{
5120	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5121	uint32_t seq = ring->fence_drv.sync_seq;
5122	uint64_t addr = ring->fence_drv.gpu_addr;
5123
5124	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5125			      lower_32_bits(addr), upper_32_bits(addr),
5126			      seq, 0xffffffff, 4);
5127}
5128
5129static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5130					unsigned vmid, uint64_t pd_addr)
5131{
5132	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5133
5134	/* compute doesn't have PFP */
5135	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5136		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5137		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5138		amdgpu_ring_write(ring, 0x0);
5139	}
5140}
5141
5142static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5143{
5144	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5145}
5146
5147static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5148{
5149	u64 wptr;
5150
5151	/* XXX check if swapping is necessary on BE */
5152	if (ring->use_doorbell)
5153		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5154	else
5155		BUG();
5156	return wptr;
5157}
5158
5159static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5160					   bool acquire)
5161{
5162	struct amdgpu_device *adev = ring->adev;
5163	int pipe_num, tmp, reg;
5164	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5165
5166	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5167
5168	/* first me only has 2 entries, GFX and HP3D */
5169	if (ring->me > 0)
5170		pipe_num -= 2;
5171
5172	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5173	tmp = RREG32(reg);
5174	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5175	WREG32(reg, tmp);
5176}
5177
5178static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5179					    struct amdgpu_ring *ring,
5180					    bool acquire)
5181{
5182	int i, pipe;
5183	bool reserve;
5184	struct amdgpu_ring *iring;
5185
5186	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5187	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5188	if (acquire)
5189		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5190	else
5191		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5192
5193	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5194		/* Clear all reservations - everyone reacquires all resources */
5195		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5196			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5197						       true);
5198
5199		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5200			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5201						       true);
5202	} else {
5203		/* Lower all pipes without a current reservation */
5204		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5205			iring = &adev->gfx.gfx_ring[i];
5206			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5207							   iring->me,
5208							   iring->pipe,
5209							   0);
5210			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5211			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5212		}
5213
5214		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5215			iring = &adev->gfx.compute_ring[i];
5216			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5217							   iring->me,
5218							   iring->pipe,
5219							   0);
5220			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5221			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5222		}
5223	}
5224
5225	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5226}
5227
5228static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5229				      struct amdgpu_ring *ring,
5230				      bool acquire)
5231{
5232	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5233	uint32_t queue_priority = acquire ? 0xf : 0x0;
5234
5235	mutex_lock(&adev->srbm_mutex);
5236	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5237
5238	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5239	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5240
5241	soc15_grbm_select(adev, 0, 0, 0, 0);
5242	mutex_unlock(&adev->srbm_mutex);
5243}
5244
5245static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5246					       enum drm_sched_priority priority)
5247{
5248	struct amdgpu_device *adev = ring->adev;
5249	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5250
5251	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5252		return;
5253
5254	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5255	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5256}
5257
5258static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5259{
5260	struct amdgpu_device *adev = ring->adev;
5261
5262	/* XXX check if swapping is necessary on BE */
5263	if (ring->use_doorbell) {
5264		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5265		WDOORBELL64(ring->doorbell_index, ring->wptr);
5266	} else{
5267		BUG(); /* only DOORBELL method supported on gfx9 now */
5268	}
5269}
5270
5271static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5272					 u64 seq, unsigned int flags)
5273{
5274	struct amdgpu_device *adev = ring->adev;
5275
5276	/* we only allocate 32bit for each seq wb address */
5277	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5278
5279	/* write fence seq to the "addr" */
5280	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5281	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5282				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5283	amdgpu_ring_write(ring, lower_32_bits(addr));
5284	amdgpu_ring_write(ring, upper_32_bits(addr));
5285	amdgpu_ring_write(ring, lower_32_bits(seq));
5286
5287	if (flags & AMDGPU_FENCE_FLAG_INT) {
5288		/* set register to trigger INT */
5289		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5290		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5291					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5292		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5293		amdgpu_ring_write(ring, 0);
5294		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5295	}
5296}
5297
5298static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5299{
5300	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5301	amdgpu_ring_write(ring, 0);
5302}
5303
5304static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5305{
5306	struct v9_ce_ib_state ce_payload = {0};
5307	uint64_t csa_addr;
5308	int cnt;
5309
5310	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5311	csa_addr = amdgpu_csa_vaddr(ring->adev);
5312
5313	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5314	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5315				 WRITE_DATA_DST_SEL(8) |
5316				 WR_CONFIRM) |
5317				 WRITE_DATA_CACHE_POLICY(0));
5318	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5319	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5320	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5321}
5322
5323static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5324{
5325	struct v9_de_ib_state de_payload = {0};
5326	uint64_t csa_addr, gds_addr;
5327	int cnt;
5328
5329	csa_addr = amdgpu_csa_vaddr(ring->adev);
5330	gds_addr = csa_addr + 4096;
5331	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5332	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5333
5334	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5335	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5336	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5337				 WRITE_DATA_DST_SEL(8) |
5338				 WR_CONFIRM) |
5339				 WRITE_DATA_CACHE_POLICY(0));
5340	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5341	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5342	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5343}
5344
5345static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5346{
5347	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5348	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5349}
5350
5351static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5352{
5353	uint32_t dw2 = 0;
5354
5355	if (amdgpu_sriov_vf(ring->adev))
5356		gfx_v9_0_ring_emit_ce_meta(ring);
5357
5358	gfx_v9_0_ring_emit_tmz(ring, true);
5359
5360	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5361	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5362		/* set load_global_config & load_global_uconfig */
5363		dw2 |= 0x8001;
5364		/* set load_cs_sh_regs */
5365		dw2 |= 0x01000000;
5366		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5367		dw2 |= 0x10002;
5368
5369		/* set load_ce_ram if preamble presented */
5370		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5371			dw2 |= 0x10000000;
5372	} else {
5373		/* still load_ce_ram if this is the first time preamble presented
5374		 * although there is no context switch happens.
5375		 */
5376		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5377			dw2 |= 0x10000000;
5378	}
5379
5380	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5381	amdgpu_ring_write(ring, dw2);
5382	amdgpu_ring_write(ring, 0);
5383}
5384
5385static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5386{
5387	unsigned ret;
5388	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5389	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5390	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5391	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5392	ret = ring->wptr & ring->buf_mask;
5393	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5394	return ret;
5395}
5396
5397static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5398{
5399	unsigned cur;
5400	BUG_ON(offset > ring->buf_mask);
5401	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5402
5403	cur = (ring->wptr & ring->buf_mask) - 1;
5404	if (likely(cur > offset))
5405		ring->ring[offset] = cur - offset;
5406	else
5407		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5408}
5409
5410static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5411{
5412	struct amdgpu_device *adev = ring->adev;
5413
5414	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5415	amdgpu_ring_write(ring, 0 |	/* src: register*/
5416				(5 << 8) |	/* dst: memory */
5417				(1 << 20));	/* write confirm */
5418	amdgpu_ring_write(ring, reg);
5419	amdgpu_ring_write(ring, 0);
5420	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5421				adev->virt.reg_val_offs * 4));
5422	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5423				adev->virt.reg_val_offs * 4));
5424}
5425
5426static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5427				    uint32_t val)
5428{
5429	uint32_t cmd = 0;
5430
5431	switch (ring->funcs->type) {
5432	case AMDGPU_RING_TYPE_GFX:
5433		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5434		break;
5435	case AMDGPU_RING_TYPE_KIQ:
5436		cmd = (1 << 16); /* no inc addr */
5437		break;
5438	default:
5439		cmd = WR_CONFIRM;
5440		break;
5441	}
5442	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5443	amdgpu_ring_write(ring, cmd);
5444	amdgpu_ring_write(ring, reg);
5445	amdgpu_ring_write(ring, 0);
5446	amdgpu_ring_write(ring, val);
5447}
5448
5449static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5450					uint32_t val, uint32_t mask)
5451{
5452	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5453}
5454
5455static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5456						  uint32_t reg0, uint32_t reg1,
5457						  uint32_t ref, uint32_t mask)
5458{
5459	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5460	struct amdgpu_device *adev = ring->adev;
5461	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5462		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5463
5464	if (fw_version_ok)
5465		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5466				      ref, mask, 0x20);
5467	else
5468		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5469							   ref, mask);
5470}
5471
5472static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5473{
5474	struct amdgpu_device *adev = ring->adev;
5475	uint32_t value = 0;
5476
5477	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5478	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5479	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5480	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5481	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5482}
5483
5484static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5485						 enum amdgpu_interrupt_state state)
5486{
5487	switch (state) {
5488	case AMDGPU_IRQ_STATE_DISABLE:
5489	case AMDGPU_IRQ_STATE_ENABLE:
5490		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5491			       TIME_STAMP_INT_ENABLE,
5492			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5493		break;
5494	default:
5495		break;
5496	}
5497}
5498
5499static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5500						     int me, int pipe,
5501						     enum amdgpu_interrupt_state state)
5502{
5503	u32 mec_int_cntl, mec_int_cntl_reg;
5504
5505	/*
5506	 * amdgpu controls only the first MEC. That's why this function only
5507	 * handles the setting of interrupts for this specific MEC. All other
5508	 * pipes' interrupts are set by amdkfd.
5509	 */
5510
5511	if (me == 1) {
5512		switch (pipe) {
5513		case 0:
5514			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5515			break;
5516		case 1:
5517			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5518			break;
5519		case 2:
5520			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5521			break;
5522		case 3:
5523			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5524			break;
5525		default:
5526			DRM_DEBUG("invalid pipe %d\n", pipe);
5527			return;
5528		}
5529	} else {
5530		DRM_DEBUG("invalid me %d\n", me);
5531		return;
5532	}
5533
5534	switch (state) {
5535	case AMDGPU_IRQ_STATE_DISABLE:
5536		mec_int_cntl = RREG32(mec_int_cntl_reg);
5537		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5538					     TIME_STAMP_INT_ENABLE, 0);
5539		WREG32(mec_int_cntl_reg, mec_int_cntl);
5540		break;
5541	case AMDGPU_IRQ_STATE_ENABLE:
5542		mec_int_cntl = RREG32(mec_int_cntl_reg);
5543		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5544					     TIME_STAMP_INT_ENABLE, 1);
5545		WREG32(mec_int_cntl_reg, mec_int_cntl);
5546		break;
5547	default:
5548		break;
5549	}
5550}
5551
5552static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5553					     struct amdgpu_irq_src *source,
5554					     unsigned type,
5555					     enum amdgpu_interrupt_state state)
5556{
5557	switch (state) {
5558	case AMDGPU_IRQ_STATE_DISABLE:
5559	case AMDGPU_IRQ_STATE_ENABLE:
5560		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5561			       PRIV_REG_INT_ENABLE,
5562			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5563		break;
5564	default:
5565		break;
5566	}
5567
5568	return 0;
5569}
5570
5571static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5572					      struct amdgpu_irq_src *source,
5573					      unsigned type,
5574					      enum amdgpu_interrupt_state state)
5575{
5576	switch (state) {
5577	case AMDGPU_IRQ_STATE_DISABLE:
5578	case AMDGPU_IRQ_STATE_ENABLE:
5579		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5580			       PRIV_INSTR_INT_ENABLE,
5581			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5582	default:
5583		break;
5584	}
5585
5586	return 0;
5587}
5588
5589#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5590	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5591			CP_ECC_ERROR_INT_ENABLE, 1)
5592
5593#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5594	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5595			CP_ECC_ERROR_INT_ENABLE, 0)
5596
5597static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5598					      struct amdgpu_irq_src *source,
5599					      unsigned type,
5600					      enum amdgpu_interrupt_state state)
5601{
5602	switch (state) {
5603	case AMDGPU_IRQ_STATE_DISABLE:
5604		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5605				CP_ECC_ERROR_INT_ENABLE, 0);
5606		DISABLE_ECC_ON_ME_PIPE(1, 0);
5607		DISABLE_ECC_ON_ME_PIPE(1, 1);
5608		DISABLE_ECC_ON_ME_PIPE(1, 2);
5609		DISABLE_ECC_ON_ME_PIPE(1, 3);
5610		break;
5611
5612	case AMDGPU_IRQ_STATE_ENABLE:
5613		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5614				CP_ECC_ERROR_INT_ENABLE, 1);
5615		ENABLE_ECC_ON_ME_PIPE(1, 0);
5616		ENABLE_ECC_ON_ME_PIPE(1, 1);
5617		ENABLE_ECC_ON_ME_PIPE(1, 2);
5618		ENABLE_ECC_ON_ME_PIPE(1, 3);
5619		break;
5620	default:
5621		break;
5622	}
5623
5624	return 0;
5625}
5626
5627
5628static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5629					    struct amdgpu_irq_src *src,
5630					    unsigned type,
5631					    enum amdgpu_interrupt_state state)
5632{
5633	switch (type) {
5634	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5635		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5636		break;
5637	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5638		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5639		break;
5640	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5641		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5642		break;
5643	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5644		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5645		break;
5646	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5647		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5648		break;
5649	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5650		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5651		break;
5652	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5653		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5654		break;
5655	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5656		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5657		break;
5658	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5659		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5660		break;
5661	default:
5662		break;
5663	}
5664	return 0;
5665}
5666
5667static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5668			    struct amdgpu_irq_src *source,
5669			    struct amdgpu_iv_entry *entry)
5670{
5671	int i;
5672	u8 me_id, pipe_id, queue_id;
5673	struct amdgpu_ring *ring;
5674
5675	DRM_DEBUG("IH: CP EOP\n");
5676	me_id = (entry->ring_id & 0x0c) >> 2;
5677	pipe_id = (entry->ring_id & 0x03) >> 0;
5678	queue_id = (entry->ring_id & 0x70) >> 4;
5679
5680	switch (me_id) {
5681	case 0:
5682		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5683		break;
5684	case 1:
5685	case 2:
5686		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5687			ring = &adev->gfx.compute_ring[i];
5688			/* Per-queue interrupt is supported for MEC starting from VI.
5689			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5690			  */
5691			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5692				amdgpu_fence_process(ring);
5693		}
5694		break;
5695	}
5696	return 0;
5697}
5698
5699static void gfx_v9_0_fault(struct amdgpu_device *adev,
5700			   struct amdgpu_iv_entry *entry)
5701{
5702	u8 me_id, pipe_id, queue_id;
5703	struct amdgpu_ring *ring;
5704	int i;
5705
5706	me_id = (entry->ring_id & 0x0c) >> 2;
5707	pipe_id = (entry->ring_id & 0x03) >> 0;
5708	queue_id = (entry->ring_id & 0x70) >> 4;
5709
5710	switch (me_id) {
5711	case 0:
5712		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5713		break;
5714	case 1:
5715	case 2:
5716		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5717			ring = &adev->gfx.compute_ring[i];
5718			if (ring->me == me_id && ring->pipe == pipe_id &&
5719			    ring->queue == queue_id)
5720				drm_sched_fault(&ring->sched);
5721		}
5722		break;
5723	}
5724}
5725
5726static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5727				 struct amdgpu_irq_src *source,
5728				 struct amdgpu_iv_entry *entry)
5729{
5730	DRM_ERROR("Illegal register access in command stream\n");
5731	gfx_v9_0_fault(adev, entry);
5732	return 0;
5733}
5734
5735static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5736				  struct amdgpu_irq_src *source,
5737				  struct amdgpu_iv_entry *entry)
5738{
5739	DRM_ERROR("Illegal instruction in command stream\n");
5740	gfx_v9_0_fault(adev, entry);
5741	return 0;
5742}
5743
5744static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5745		struct ras_err_data *err_data,
5746		struct amdgpu_iv_entry *entry)
5747{
5748	/* TODO ue will trigger an interrupt. */
5749	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5750	if (adev->gfx.funcs->query_ras_error_count)
5751		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5752	amdgpu_ras_reset_gpu(adev, 0);
5753	return AMDGPU_RAS_SUCCESS;
5754}
5755
5756static const struct {
5757	const char *name;
5758	uint32_t ip;
5759	uint32_t inst;
5760	uint32_t seg;
5761	uint32_t reg_offset;
5762	uint32_t per_se_instance;
5763	int32_t num_instance;
5764	uint32_t sec_count_mask;
5765	uint32_t ded_count_mask;
5766} gfx_ras_edc_regs[] = {
5767	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5768	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5769	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5770	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5771	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5772	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5773	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5774	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5775	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5776	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5777	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5778	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5779	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5780	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5781	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5782	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5783	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5784	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5785	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5786	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5787	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5788	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5789	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5790	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5791	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5792	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5793	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5794	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5795	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5796	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5797	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5798	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5799	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5800	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5801	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5802	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5803	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5804	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5805	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5806	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5807	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5808	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5809	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5810	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5811	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5812	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5813	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5814	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5815	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5816	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5817	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5818	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5819	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5820	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5821	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5822	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5823	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5824	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5825	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5826	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5827	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5828	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5829	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5830	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5831	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5832	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5833	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5834	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5835	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5836	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5837	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5838	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5839	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5840	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5841	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5842	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5843	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5844	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5845	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5846	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5847	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5848	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5849	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5850	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5851	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5852	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5853	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5854	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5855	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5856	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5857	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5858	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5859	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5860	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5861	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5862	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5863	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5864	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5865	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5866	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5867	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5868	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5869	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5870	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5871	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5872	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5873	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5874	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5875	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5876	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5877	  0 },
5878	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5879	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5880	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5881	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5882	  0 },
5883	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5884	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5885	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5886	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5887	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5888	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5889	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5890	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5891	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5892	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5893	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5894	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5895	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5896	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5897	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5898	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5899	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5900	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5901	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5902	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5903	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5904	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5905	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5906	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5907	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5908	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5909	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5910	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5911	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5912	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5913	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5914	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5915	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5916	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5917	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5918	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5919	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5920	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5921	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5922	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5923	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5924	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5925	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5926	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5927	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5928	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5929	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5930	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5931	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5932	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5933	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5934	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5935	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5936	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5937	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5938	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5939	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5940	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5941	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5942	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5943	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5944	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5945	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5946	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5947	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5948	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5949	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5950	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5951	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5952	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5953	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5954	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5955	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5956	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5957	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5958	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5959	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5960	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5961	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5962	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5963	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5964	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5965	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5966	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5967	  0 },
5968	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5969	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5970	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5971	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5972	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5973	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5974	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5975	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5976	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5977	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5978	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5979	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5980	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5981	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5982	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5983	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5984	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5985	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5986	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5987	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5988	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5989	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5990	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5991	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5992	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5993	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5994	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5995	  0 },
5996	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5997	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5998	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5999	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
6000	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
6001	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
6002	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
6003	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
6004	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
6005	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6006	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6007	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
6008	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6009	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6010	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
6011	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6012	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6013	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
6014	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6015	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6016	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6017	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6018	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6019	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6020	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6021	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6022	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6023	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6024	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6025	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6026	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6027	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6028	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6029	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6030	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6031	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6032	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6033	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6034	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6035	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6036	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6037	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6038	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6039	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6040	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6041	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6042	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6043	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6044	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6045	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6046	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6047	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6048	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6049	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6050	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6051};
6052
6053static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6054				     void *inject_if)
6055{
6056	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6057	int ret;
6058	struct ta_ras_trigger_error_input block_info = { 0 };
6059
6060	if (adev->asic_type != CHIP_VEGA20)
6061		return -EINVAL;
6062
6063	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6064		return -EINVAL;
6065
6066	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6067		return -EPERM;
6068
6069	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6070	      info->head.type)) {
6071		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6072			ras_gfx_subblocks[info->head.sub_block_index].name,
6073			info->head.type);
6074		return -EPERM;
6075	}
6076
6077	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6078	      info->head.type)) {
6079		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6080			ras_gfx_subblocks[info->head.sub_block_index].name,
6081			info->head.type);
6082		return -EPERM;
6083	}
6084
6085	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6086	block_info.sub_block_index =
6087		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6088	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6089	block_info.address = info->address;
6090	block_info.value = info->value;
6091
6092	mutex_lock(&adev->grbm_idx_mutex);
6093	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6094	mutex_unlock(&adev->grbm_idx_mutex);
6095
6096	return ret;
6097}
6098
6099static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6100					  void *ras_error_status)
6101{
6102	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6103	uint32_t sec_count, ded_count;
6104	uint32_t i;
6105	uint32_t reg_value;
6106	uint32_t se_id, instance_id;
6107
6108	if (adev->asic_type != CHIP_VEGA20)
6109		return -EINVAL;
6110
6111	err_data->ue_count = 0;
6112	err_data->ce_count = 0;
6113
6114	mutex_lock(&adev->grbm_idx_mutex);
6115	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6116		for (instance_id = 0; instance_id < 256; instance_id++) {
6117			for (i = 0;
6118			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6119			     i++) {
6120				if (se_id != 0 &&
6121				    !gfx_ras_edc_regs[i].per_se_instance)
6122					continue;
6123				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6124					continue;
6125
6126				gfx_v9_0_select_se_sh(adev, se_id, 0,
6127						      instance_id);
6128
6129				reg_value = RREG32(
6130					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6131							[gfx_ras_edc_regs[i].inst]
6132							[gfx_ras_edc_regs[i].seg] +
6133					gfx_ras_edc_regs[i].reg_offset);
6134				sec_count = reg_value &
6135					    gfx_ras_edc_regs[i].sec_count_mask;
6136				ded_count = reg_value &
6137					    gfx_ras_edc_regs[i].ded_count_mask;
6138				if (sec_count) {
6139					DRM_INFO(
6140						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6141						se_id, instance_id,
6142						gfx_ras_edc_regs[i].name,
6143						sec_count);
6144					err_data->ce_count++;
6145				}
6146
6147				if (ded_count) {
6148					DRM_INFO(
6149						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6150						se_id, instance_id,
6151						gfx_ras_edc_regs[i].name,
6152						ded_count);
6153					err_data->ue_count++;
6154				}
6155			}
6156		}
6157	}
6158	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6159	mutex_unlock(&adev->grbm_idx_mutex);
6160
6161	return 0;
6162}
6163
6164static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6165				  struct amdgpu_irq_src *source,
6166				  struct amdgpu_iv_entry *entry)
6167{
6168	struct ras_common_if *ras_if = adev->gfx.ras_if;
6169	struct ras_dispatch_if ih_data = {
6170		.entry = entry,
6171	};
6172
6173	if (!ras_if)
6174		return 0;
6175
6176	ih_data.head = *ras_if;
6177
6178	DRM_ERROR("CP ECC ERROR IRQ\n");
6179	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6180	return 0;
6181}
6182
6183static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6184	.name = "gfx_v9_0",
6185	.early_init = gfx_v9_0_early_init,
6186	.late_init = gfx_v9_0_late_init,
6187	.sw_init = gfx_v9_0_sw_init,
6188	.sw_fini = gfx_v9_0_sw_fini,
6189	.hw_init = gfx_v9_0_hw_init,
6190	.hw_fini = gfx_v9_0_hw_fini,
6191	.suspend = gfx_v9_0_suspend,
6192	.resume = gfx_v9_0_resume,
6193	.is_idle = gfx_v9_0_is_idle,
6194	.wait_for_idle = gfx_v9_0_wait_for_idle,
6195	.soft_reset = gfx_v9_0_soft_reset,
6196	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6197	.set_powergating_state = gfx_v9_0_set_powergating_state,
6198	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6199};
6200
6201static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6202	.type = AMDGPU_RING_TYPE_GFX,
6203	.align_mask = 0xff,
6204	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6205	.support_64bit_ptrs = true,
6206	.vmhub = AMDGPU_GFXHUB_0,
6207	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6208	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6209	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6210	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6211		5 +  /* COND_EXEC */
6212		7 +  /* PIPELINE_SYNC */
6213		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6214		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6215		2 + /* VM_FLUSH */
6216		8 +  /* FENCE for VM_FLUSH */
6217		20 + /* GDS switch */
6218		4 + /* double SWITCH_BUFFER,
6219		       the first COND_EXEC jump to the place just
6220			   prior to this double SWITCH_BUFFER  */
6221		5 + /* COND_EXEC */
6222		7 +	 /*	HDP_flush */
6223		4 +	 /*	VGT_flush */
6224		14 + /*	CE_META */
6225		31 + /*	DE_META */
6226		3 + /* CNTX_CTRL */
6227		5 + /* HDP_INVL */
6228		8 + 8 + /* FENCE x2 */
6229		2, /* SWITCH_BUFFER */
6230	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6231	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6232	.emit_fence = gfx_v9_0_ring_emit_fence,
6233	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6234	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6235	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6236	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6237	.test_ring = gfx_v9_0_ring_test_ring,
6238	.test_ib = gfx_v9_0_ring_test_ib,
6239	.insert_nop = amdgpu_ring_insert_nop,
6240	.pad_ib = amdgpu_ring_generic_pad_ib,
6241	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6242	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6243	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6244	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6245	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6246	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6247	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6248	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6249	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6250};
6251
6252static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6253	.type = AMDGPU_RING_TYPE_COMPUTE,
6254	.align_mask = 0xff,
6255	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6256	.support_64bit_ptrs = true,
6257	.vmhub = AMDGPU_GFXHUB_0,
6258	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6259	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6260	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6261	.emit_frame_size =
6262		20 + /* gfx_v9_0_ring_emit_gds_switch */
6263		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6264		5 + /* hdp invalidate */
6265		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6266		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6267		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6268		2 + /* gfx_v9_0_ring_emit_vm_flush */
6269		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6270	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6271	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6272	.emit_fence = gfx_v9_0_ring_emit_fence,
6273	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6274	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6275	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6276	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6277	.test_ring = gfx_v9_0_ring_test_ring,
6278	.test_ib = gfx_v9_0_ring_test_ib,
6279	.insert_nop = amdgpu_ring_insert_nop,
6280	.pad_ib = amdgpu_ring_generic_pad_ib,
6281	.set_priority = gfx_v9_0_ring_set_priority_compute,
6282	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6283	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6284	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6285};
6286
6287static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6288	.type = AMDGPU_RING_TYPE_KIQ,
6289	.align_mask = 0xff,
6290	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6291	.support_64bit_ptrs = true,
6292	.vmhub = AMDGPU_GFXHUB_0,
6293	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6294	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6295	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6296	.emit_frame_size =
6297		20 + /* gfx_v9_0_ring_emit_gds_switch */
6298		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6299		5 + /* hdp invalidate */
6300		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6301		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6302		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6303		2 + /* gfx_v9_0_ring_emit_vm_flush */
6304		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6305	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6306	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6307	.test_ring = gfx_v9_0_ring_test_ring,
6308	.insert_nop = amdgpu_ring_insert_nop,
6309	.pad_ib = amdgpu_ring_generic_pad_ib,
6310	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6311	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6312	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6313	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6314};
6315
6316static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6317{
6318	int i;
6319
6320	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6321
6322	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6323		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6324
6325	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6326		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6327}
6328
6329static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6330	.set = gfx_v9_0_set_eop_interrupt_state,
6331	.process = gfx_v9_0_eop_irq,
6332};
6333
6334static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6335	.set = gfx_v9_0_set_priv_reg_fault_state,
6336	.process = gfx_v9_0_priv_reg_irq,
6337};
6338
6339static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6340	.set = gfx_v9_0_set_priv_inst_fault_state,
6341	.process = gfx_v9_0_priv_inst_irq,
6342};
6343
6344static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6345	.set = gfx_v9_0_set_cp_ecc_error_state,
6346	.process = gfx_v9_0_cp_ecc_error_irq,
6347};
6348
6349
6350static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6351{
6352	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6353	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6354
6355	adev->gfx.priv_reg_irq.num_types = 1;
6356	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6357
6358	adev->gfx.priv_inst_irq.num_types = 1;
6359	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6360
6361	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6362	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6363}
6364
6365static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6366{
6367	switch (adev->asic_type) {
6368	case CHIP_VEGA10:
6369	case CHIP_VEGA12:
6370	case CHIP_VEGA20:
6371	case CHIP_RAVEN:
6372	case CHIP_ARCTURUS:
6373	case CHIP_RENOIR:
6374		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6375		break;
6376	default:
6377		break;
6378	}
6379}
6380
6381static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6382{
6383	/* init asci gds info */
6384	switch (adev->asic_type) {
6385	case CHIP_VEGA10:
6386	case CHIP_VEGA12:
6387	case CHIP_VEGA20:
6388		adev->gds.gds_size = 0x10000;
6389		break;
6390	case CHIP_RAVEN:
6391	case CHIP_ARCTURUS:
6392		adev->gds.gds_size = 0x1000;
6393		break;
6394	default:
6395		adev->gds.gds_size = 0x10000;
6396		break;
6397	}
6398
6399	switch (adev->asic_type) {
6400	case CHIP_VEGA10:
6401	case CHIP_VEGA20:
6402		adev->gds.gds_compute_max_wave_id = 0x7ff;
6403		break;
6404	case CHIP_VEGA12:
6405		adev->gds.gds_compute_max_wave_id = 0x27f;
6406		break;
6407	case CHIP_RAVEN:
6408		if (adev->rev_id >= 0x8)
6409			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6410		else
6411			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6412		break;
6413	case CHIP_ARCTURUS:
6414		adev->gds.gds_compute_max_wave_id = 0xfff;
6415		break;
6416	default:
6417		/* this really depends on the chip */
6418		adev->gds.gds_compute_max_wave_id = 0x7ff;
6419		break;
6420	}
6421
6422	adev->gds.gws_size = 64;
6423	adev->gds.oa_size = 16;
6424}
6425
6426static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6427						 u32 bitmap)
6428{
6429	u32 data;
6430
6431	if (!bitmap)
6432		return;
6433
6434	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6435	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6436
6437	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6438}
6439
6440static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6441{
6442	u32 data, mask;
6443
6444	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6445	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6446
6447	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6448	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6449
6450	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6451
6452	return (~data) & mask;
6453}
6454
6455static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6456				 struct amdgpu_cu_info *cu_info)
6457{
6458	int i, j, k, counter, active_cu_number = 0;
6459	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6460	unsigned disable_masks[4 * 4];
6461
6462	if (!adev || !cu_info)
6463		return -EINVAL;
6464
6465	/*
6466	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6467	 */
6468	if (adev->gfx.config.max_shader_engines *
6469		adev->gfx.config.max_sh_per_se > 16)
6470		return -EINVAL;
6471
6472	amdgpu_gfx_parse_disable_cu(disable_masks,
6473				    adev->gfx.config.max_shader_engines,
6474				    adev->gfx.config.max_sh_per_se);
6475
6476	mutex_lock(&adev->grbm_idx_mutex);
6477	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6478		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6479			mask = 1;
6480			ao_bitmap = 0;
6481			counter = 0;
6482			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6483			gfx_v9_0_set_user_cu_inactive_bitmap(
6484				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6485			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6486
6487			/*
6488			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6489			 * 4x4 size array, and it's usually suitable for Vega
6490			 * ASICs which has 4*2 SE/SH layout.
6491			 * But for Arcturus, SE/SH layout is changed to 8*1.
6492			 * To mostly reduce the impact, we make it compatible
6493			 * with current bitmap array as below:
6494			 *    SE4,SH0 --> bitmap[0][1]
6495			 *    SE5,SH0 --> bitmap[1][1]
6496			 *    SE6,SH0 --> bitmap[2][1]
6497			 *    SE7,SH0 --> bitmap[3][1]
6498			 */
6499			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6500
6501			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6502				if (bitmap & mask) {
6503					if (counter < adev->gfx.config.max_cu_per_sh)
6504						ao_bitmap |= mask;
6505					counter ++;
6506				}
6507				mask <<= 1;
6508			}
6509			active_cu_number += counter;
6510			if (i < 2 && j < 2)
6511				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6512			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6513		}
6514	}
6515	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6516	mutex_unlock(&adev->grbm_idx_mutex);
6517
6518	cu_info->number = active_cu_number;
6519	cu_info->ao_cu_mask = ao_cu_mask;
6520	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6521
6522	return 0;
6523}
6524
6525const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6526{
6527	.type = AMD_IP_BLOCK_TYPE_GFX,
6528	.major = 9,
6529	.minor = 0,
6530	.rev = 0,
6531	.funcs = &gfx_v9_0_ip_funcs,
6532};