Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.15.
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96	BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738	uint32_t data;
 739
 740	switch (adev->asic_type) {
 741	case CHIP_TOPAZ:
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_mgcg_cgcg_init,
 744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745		amdgpu_device_program_register_sequence(adev,
 746							golden_settings_iceland_a11,
 747							ARRAY_SIZE(golden_settings_iceland_a11));
 748		amdgpu_device_program_register_sequence(adev,
 749							iceland_golden_common_all,
 750							ARRAY_SIZE(iceland_golden_common_all));
 751		break;
 752	case CHIP_FIJI:
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_mgcg_cgcg_init,
 755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756		amdgpu_device_program_register_sequence(adev,
 757							golden_settings_fiji_a10,
 758							ARRAY_SIZE(golden_settings_fiji_a10));
 759		amdgpu_device_program_register_sequence(adev,
 760							fiji_golden_common_all,
 761							ARRAY_SIZE(fiji_golden_common_all));
 762		break;
 763
 764	case CHIP_TONGA:
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_mgcg_cgcg_init,
 767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768		amdgpu_device_program_register_sequence(adev,
 769							golden_settings_tonga_a11,
 770							ARRAY_SIZE(golden_settings_tonga_a11));
 771		amdgpu_device_program_register_sequence(adev,
 772							tonga_golden_common_all,
 773							ARRAY_SIZE(tonga_golden_common_all));
 774		break;
 775	case CHIP_VEGAM:
 776		amdgpu_device_program_register_sequence(adev,
 777							golden_settings_vegam_a11,
 778							ARRAY_SIZE(golden_settings_vegam_a11));
 779		amdgpu_device_program_register_sequence(adev,
 780							vegam_golden_common_all,
 781							ARRAY_SIZE(vegam_golden_common_all));
 782		break;
 783	case CHIP_POLARIS11:
 784	case CHIP_POLARIS12:
 785		amdgpu_device_program_register_sequence(adev,
 786							golden_settings_polaris11_a11,
 787							ARRAY_SIZE(golden_settings_polaris11_a11));
 788		amdgpu_device_program_register_sequence(adev,
 789							polaris11_golden_common_all,
 790							ARRAY_SIZE(polaris11_golden_common_all));
 791		break;
 792	case CHIP_POLARIS10:
 793		amdgpu_device_program_register_sequence(adev,
 794							golden_settings_polaris10_a11,
 795							ARRAY_SIZE(golden_settings_polaris10_a11));
 796		amdgpu_device_program_register_sequence(adev,
 797							polaris10_golden_common_all,
 798							ARRAY_SIZE(polaris10_golden_common_all));
 799		data = RREG32_SMC(ixCG_ACLK_CNTL);
 800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802		WREG32_SMC(ixCG_ACLK_CNTL, data);
 803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809		}
 810		break;
 811	case CHIP_CARRIZO:
 812		amdgpu_device_program_register_sequence(adev,
 813							cz_mgcg_cgcg_init,
 814							ARRAY_SIZE(cz_mgcg_cgcg_init));
 815		amdgpu_device_program_register_sequence(adev,
 816							cz_golden_settings_a11,
 817							ARRAY_SIZE(cz_golden_settings_a11));
 818		amdgpu_device_program_register_sequence(adev,
 819							cz_golden_common_all,
 820							ARRAY_SIZE(cz_golden_common_all));
 821		break;
 822	case CHIP_STONEY:
 823		amdgpu_device_program_register_sequence(adev,
 824							stoney_mgcg_cgcg_init,
 825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826		amdgpu_device_program_register_sequence(adev,
 827							stoney_golden_settings_a11,
 828							ARRAY_SIZE(stoney_golden_settings_a11));
 829		amdgpu_device_program_register_sequence(adev,
 830							stoney_golden_common_all,
 831							ARRAY_SIZE(stoney_golden_common_all));
 832		break;
 833	default:
 834		break;
 835	}
 836}
 837
 838static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 839{
 840	struct amdgpu_device *adev = ring->adev;
 841	uint32_t tmp = 0;
 842	unsigned i;
 843	int r;
 844
 845	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
 846	r = amdgpu_ring_alloc(ring, 3);
 847	if (r)
 848		return r;
 849
 850	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 851	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
 852	amdgpu_ring_write(ring, 0xDEADBEEF);
 853	amdgpu_ring_commit(ring);
 854
 855	for (i = 0; i < adev->usec_timeout; i++) {
 856		tmp = RREG32(mmSCRATCH_REG0);
 857		if (tmp == 0xDEADBEEF)
 858			break;
 859		udelay(1);
 860	}
 861
 862	if (i >= adev->usec_timeout)
 863		r = -ETIMEDOUT;
 864
 865	return r;
 866}
 867
 868static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 869{
 870	struct amdgpu_device *adev = ring->adev;
 871	struct amdgpu_ib ib;
 872	struct dma_fence *f = NULL;
 873
 874	unsigned int index;
 875	uint64_t gpu_addr;
 876	uint32_t tmp;
 877	long r;
 878
 879	r = amdgpu_device_wb_get(adev, &index);
 880	if (r)
 881		return r;
 882
 883	gpu_addr = adev->wb.gpu_addr + (index * 4);
 884	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 885	memset(&ib, 0, sizeof(ib));
 886
 887	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 888	if (r)
 889		goto err1;
 890
 891	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 892	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 893	ib.ptr[2] = lower_32_bits(gpu_addr);
 894	ib.ptr[3] = upper_32_bits(gpu_addr);
 895	ib.ptr[4] = 0xDEADBEEF;
 896	ib.length_dw = 5;
 897
 898	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 899	if (r)
 900		goto err2;
 901
 902	r = dma_fence_wait_timeout(f, false, timeout);
 903	if (r == 0) {
 904		r = -ETIMEDOUT;
 905		goto err2;
 906	} else if (r < 0) {
 907		goto err2;
 908	}
 909
 910	tmp = adev->wb.wb[index];
 911	if (tmp == 0xDEADBEEF)
 912		r = 0;
 913	else
 914		r = -EINVAL;
 915
 916err2:
 917	amdgpu_ib_free(adev, &ib, NULL);
 918	dma_fence_put(f);
 919err1:
 920	amdgpu_device_wb_free(adev, index);
 921	return r;
 922}
 923
 924
 925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 926{
 927	amdgpu_ucode_release(&adev->gfx.pfp_fw);
 928	amdgpu_ucode_release(&adev->gfx.me_fw);
 929	amdgpu_ucode_release(&adev->gfx.ce_fw);
 930	amdgpu_ucode_release(&adev->gfx.rlc_fw);
 931	amdgpu_ucode_release(&adev->gfx.mec_fw);
 932	if ((adev->asic_type != CHIP_STONEY) &&
 933	    (adev->asic_type != CHIP_TOPAZ))
 934		amdgpu_ucode_release(&adev->gfx.mec2_fw);
 935
 936	kfree(adev->gfx.rlc.register_list_format);
 937}
 938
 939static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 940{
 941	const char *chip_name;
 942	char fw_name[30];
 943	int err;
 944	struct amdgpu_firmware_info *info = NULL;
 945	const struct common_firmware_header *header = NULL;
 946	const struct gfx_firmware_header_v1_0 *cp_hdr;
 947	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 948	unsigned int *tmp = NULL, i;
 949
 950	DRM_DEBUG("\n");
 951
 952	switch (adev->asic_type) {
 953	case CHIP_TOPAZ:
 954		chip_name = "topaz";
 955		break;
 956	case CHIP_TONGA:
 957		chip_name = "tonga";
 958		break;
 959	case CHIP_CARRIZO:
 960		chip_name = "carrizo";
 961		break;
 962	case CHIP_FIJI:
 963		chip_name = "fiji";
 964		break;
 965	case CHIP_STONEY:
 966		chip_name = "stoney";
 967		break;
 968	case CHIP_POLARIS10:
 969		chip_name = "polaris10";
 970		break;
 971	case CHIP_POLARIS11:
 972		chip_name = "polaris11";
 973		break;
 974	case CHIP_POLARIS12:
 975		chip_name = "polaris12";
 976		break;
 977	case CHIP_VEGAM:
 978		chip_name = "vegam";
 979		break;
 980	default:
 981		BUG();
 982	}
 983
 984	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 985		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 986		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 987		if (err == -ENODEV) {
 988			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 989			err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 990		}
 991	} else {
 992		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 993		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 994	}
 995	if (err)
 996		goto out;
 997	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 998	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 999	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1000
1001	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1002		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1003		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1004		if (err == -ENODEV) {
1005			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1006			err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1007		}
1008	} else {
1009		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1010		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1011	}
1012	if (err)
1013		goto out;
1014	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1015	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1016
1017	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1018
1019	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1020		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1021		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1022		if (err == -ENODEV) {
1023			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1024			err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1025		}
1026	} else {
1027		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1028		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1029	}
1030	if (err)
1031		goto out;
1032	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1033	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1034	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036	/*
1037	 * Support for MCBP/Virtualization in combination with chained IBs is
1038	 * formal released on feature version #46
1039	 */
1040	if (adev->gfx.ce_feature_version >= 46 &&
1041	    adev->gfx.pfp_feature_version >= 46) {
1042		adev->virt.chained_ib_support = true;
1043		DRM_INFO("Chained IB support enabled!\n");
1044	} else
1045		adev->virt.chained_ib_support = false;
1046
1047	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1048	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1049	if (err)
1050		goto out;
1051	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1052	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1053	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1054
1055	adev->gfx.rlc.save_and_restore_offset =
1056			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1057	adev->gfx.rlc.clear_state_descriptor_offset =
1058			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1059	adev->gfx.rlc.avail_scratch_ram_locations =
1060			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1061	adev->gfx.rlc.reg_restore_list_size =
1062			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1063	adev->gfx.rlc.reg_list_format_start =
1064			le32_to_cpu(rlc_hdr->reg_list_format_start);
1065	adev->gfx.rlc.reg_list_format_separate_start =
1066			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1067	adev->gfx.rlc.starting_offsets_start =
1068			le32_to_cpu(rlc_hdr->starting_offsets_start);
1069	adev->gfx.rlc.reg_list_format_size_bytes =
1070			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1071	adev->gfx.rlc.reg_list_size_bytes =
1072			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1073
1074	adev->gfx.rlc.register_list_format =
1075			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1076					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1077
1078	if (!adev->gfx.rlc.register_list_format) {
1079		err = -ENOMEM;
1080		goto out;
1081	}
1082
1083	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1084			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1085	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1086		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1087
1088	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1089
1090	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1091			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1092	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1093		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1094
1095	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1096		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1097		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1098		if (err == -ENODEV) {
1099			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1100			err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1101		}
1102	} else {
1103		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1104		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1105	}
1106	if (err)
1107		goto out;
1108	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1109	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1110	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1111
1112	if ((adev->asic_type != CHIP_STONEY) &&
1113	    (adev->asic_type != CHIP_TOPAZ)) {
1114		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1115			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1116			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1117			if (err == -ENODEV) {
1118				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1119				err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1120			}
1121		} else {
1122			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1123			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1124		}
1125		if (!err) {
1126			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1127				adev->gfx.mec2_fw->data;
1128			adev->gfx.mec2_fw_version =
1129				le32_to_cpu(cp_hdr->header.ucode_version);
1130			adev->gfx.mec2_feature_version =
1131				le32_to_cpu(cp_hdr->ucode_feature_version);
1132		} else {
1133			err = 0;
1134			adev->gfx.mec2_fw = NULL;
1135		}
1136	}
1137
1138	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1139	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1140	info->fw = adev->gfx.pfp_fw;
1141	header = (const struct common_firmware_header *)info->fw->data;
1142	adev->firmware.fw_size +=
1143		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1144
1145	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1146	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1147	info->fw = adev->gfx.me_fw;
1148	header = (const struct common_firmware_header *)info->fw->data;
1149	adev->firmware.fw_size +=
1150		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1151
1152	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1153	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1154	info->fw = adev->gfx.ce_fw;
1155	header = (const struct common_firmware_header *)info->fw->data;
1156	adev->firmware.fw_size +=
1157		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1158
1159	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1160	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1161	info->fw = adev->gfx.rlc_fw;
1162	header = (const struct common_firmware_header *)info->fw->data;
1163	adev->firmware.fw_size +=
1164		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1165
1166	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1167	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1168	info->fw = adev->gfx.mec_fw;
1169	header = (const struct common_firmware_header *)info->fw->data;
1170	adev->firmware.fw_size +=
1171		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1172
1173	/* we need account JT in */
1174	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1175	adev->firmware.fw_size +=
1176		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1177
1178	if (amdgpu_sriov_vf(adev)) {
1179		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1180		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1181		info->fw = adev->gfx.mec_fw;
1182		adev->firmware.fw_size +=
1183			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1184	}
1185
1186	if (adev->gfx.mec2_fw) {
1187		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1188		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1189		info->fw = adev->gfx.mec2_fw;
1190		header = (const struct common_firmware_header *)info->fw->data;
1191		adev->firmware.fw_size +=
1192			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1193	}
1194
1195out:
1196	if (err) {
1197		dev_err(adev->dev,
1198			"gfx8: Failed to load firmware \"%s\"\n",
1199			fw_name);
1200		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1201		amdgpu_ucode_release(&adev->gfx.me_fw);
1202		amdgpu_ucode_release(&adev->gfx.ce_fw);
1203		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1204		amdgpu_ucode_release(&adev->gfx.mec_fw);
1205		amdgpu_ucode_release(&adev->gfx.mec2_fw);
1206	}
1207	return err;
1208}
1209
1210static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1211				    volatile u32 *buffer)
1212{
1213	u32 count = 0, i;
1214	const struct cs_section_def *sect = NULL;
1215	const struct cs_extent_def *ext = NULL;
1216
1217	if (adev->gfx.rlc.cs_data == NULL)
1218		return;
1219	if (buffer == NULL)
1220		return;
1221
1222	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1223	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1224
1225	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1226	buffer[count++] = cpu_to_le32(0x80000000);
1227	buffer[count++] = cpu_to_le32(0x80000000);
1228
1229	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1230		for (ext = sect->section; ext->extent != NULL; ++ext) {
1231			if (sect->id == SECT_CONTEXT) {
1232				buffer[count++] =
1233					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1234				buffer[count++] = cpu_to_le32(ext->reg_index -
1235						PACKET3_SET_CONTEXT_REG_START);
1236				for (i = 0; i < ext->reg_count; i++)
1237					buffer[count++] = cpu_to_le32(ext->extent[i]);
1238			} else {
1239				return;
1240			}
1241		}
1242	}
1243
1244	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1245	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1246			PACKET3_SET_CONTEXT_REG_START);
1247	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1248	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1249
1250	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1252
1253	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1254	buffer[count++] = cpu_to_le32(0);
1255}
1256
1257static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1258{
1259	if (adev->asic_type == CHIP_CARRIZO)
1260		return 5;
1261	else
1262		return 4;
1263}
1264
1265static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1266{
1267	const struct cs_section_def *cs_data;
1268	int r;
1269
1270	adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272	cs_data = adev->gfx.rlc.cs_data;
1273
1274	if (cs_data) {
1275		/* init clear state block */
1276		r = amdgpu_gfx_rlc_init_csb(adev);
1277		if (r)
1278			return r;
1279	}
1280
1281	if ((adev->asic_type == CHIP_CARRIZO) ||
1282	    (adev->asic_type == CHIP_STONEY)) {
1283		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1284		r = amdgpu_gfx_rlc_init_cpt(adev);
1285		if (r)
1286			return r;
1287	}
1288
1289	/* init spm vmid with 0xf */
1290	if (adev->gfx.rlc.funcs->update_spm_vmid)
1291		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1292
1293	return 0;
1294}
1295
1296static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1297{
1298	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1299}
1300
1301static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1302{
1303	int r;
1304	u32 *hpd;
1305	size_t mec_hpd_size;
1306
1307	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1308
1309	/* take ownership of the relevant compute queues */
1310	amdgpu_gfx_compute_queue_acquire(adev);
1311
1312	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1313	if (mec_hpd_size) {
1314		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1315					      AMDGPU_GEM_DOMAIN_VRAM |
1316					      AMDGPU_GEM_DOMAIN_GTT,
1317					      &adev->gfx.mec.hpd_eop_obj,
1318					      &adev->gfx.mec.hpd_eop_gpu_addr,
1319					      (void **)&hpd);
1320		if (r) {
1321			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1322			return r;
1323		}
1324
1325		memset(hpd, 0, mec_hpd_size);
1326
1327		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1328		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1329	}
1330
1331	return 0;
1332}
1333
1334static const u32 vgpr_init_compute_shader[] =
1335{
1336	0x7e000209, 0x7e020208,
1337	0x7e040207, 0x7e060206,
1338	0x7e080205, 0x7e0a0204,
1339	0x7e0c0203, 0x7e0e0202,
1340	0x7e100201, 0x7e120200,
1341	0x7e140209, 0x7e160208,
1342	0x7e180207, 0x7e1a0206,
1343	0x7e1c0205, 0x7e1e0204,
1344	0x7e200203, 0x7e220202,
1345	0x7e240201, 0x7e260200,
1346	0x7e280209, 0x7e2a0208,
1347	0x7e2c0207, 0x7e2e0206,
1348	0x7e300205, 0x7e320204,
1349	0x7e340203, 0x7e360202,
1350	0x7e380201, 0x7e3a0200,
1351	0x7e3c0209, 0x7e3e0208,
1352	0x7e400207, 0x7e420206,
1353	0x7e440205, 0x7e460204,
1354	0x7e480203, 0x7e4a0202,
1355	0x7e4c0201, 0x7e4e0200,
1356	0x7e500209, 0x7e520208,
1357	0x7e540207, 0x7e560206,
1358	0x7e580205, 0x7e5a0204,
1359	0x7e5c0203, 0x7e5e0202,
1360	0x7e600201, 0x7e620200,
1361	0x7e640209, 0x7e660208,
1362	0x7e680207, 0x7e6a0206,
1363	0x7e6c0205, 0x7e6e0204,
1364	0x7e700203, 0x7e720202,
1365	0x7e740201, 0x7e760200,
1366	0x7e780209, 0x7e7a0208,
1367	0x7e7c0207, 0x7e7e0206,
1368	0xbf8a0000, 0xbf810000,
1369};
1370
1371static const u32 sgpr_init_compute_shader[] =
1372{
1373	0xbe8a0100, 0xbe8c0102,
1374	0xbe8e0104, 0xbe900106,
1375	0xbe920108, 0xbe940100,
1376	0xbe960102, 0xbe980104,
1377	0xbe9a0106, 0xbe9c0108,
1378	0xbe9e0100, 0xbea00102,
1379	0xbea20104, 0xbea40106,
1380	0xbea60108, 0xbea80100,
1381	0xbeaa0102, 0xbeac0104,
1382	0xbeae0106, 0xbeb00108,
1383	0xbeb20100, 0xbeb40102,
1384	0xbeb60104, 0xbeb80106,
1385	0xbeba0108, 0xbebc0100,
1386	0xbebe0102, 0xbec00104,
1387	0xbec20106, 0xbec40108,
1388	0xbec60100, 0xbec80102,
1389	0xbee60004, 0xbee70005,
1390	0xbeea0006, 0xbeeb0007,
1391	0xbee80008, 0xbee90009,
1392	0xbefc0000, 0xbf8a0000,
1393	0xbf810000, 0x00000000,
1394};
1395
1396static const u32 vgpr_init_regs[] =
1397{
1398	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1399	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1400	mmCOMPUTE_NUM_THREAD_X, 256*4,
1401	mmCOMPUTE_NUM_THREAD_Y, 1,
1402	mmCOMPUTE_NUM_THREAD_Z, 1,
1403	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1404	mmCOMPUTE_PGM_RSRC2, 20,
1405	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1406	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1407	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1408	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1409	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1410	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1411	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1412	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1413	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1414	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1415};
1416
1417static const u32 sgpr1_init_regs[] =
1418{
1419	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1420	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1421	mmCOMPUTE_NUM_THREAD_X, 256*5,
1422	mmCOMPUTE_NUM_THREAD_Y, 1,
1423	mmCOMPUTE_NUM_THREAD_Z, 1,
1424	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1425	mmCOMPUTE_PGM_RSRC2, 20,
1426	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436};
1437
1438static const u32 sgpr2_init_regs[] =
1439{
1440	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442	mmCOMPUTE_NUM_THREAD_X, 256*5,
1443	mmCOMPUTE_NUM_THREAD_Y, 1,
1444	mmCOMPUTE_NUM_THREAD_Z, 1,
1445	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1446	mmCOMPUTE_PGM_RSRC2, 20,
1447	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1448	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1449	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1450	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1451	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1452	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1453	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1454	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1455	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1456	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1457};
1458
1459static const u32 sec_ded_counter_registers[] =
1460{
1461	mmCPC_EDC_ATC_CNT,
1462	mmCPC_EDC_SCRATCH_CNT,
1463	mmCPC_EDC_UCODE_CNT,
1464	mmCPF_EDC_ATC_CNT,
1465	mmCPF_EDC_ROQ_CNT,
1466	mmCPF_EDC_TAG_CNT,
1467	mmCPG_EDC_ATC_CNT,
1468	mmCPG_EDC_DMA_CNT,
1469	mmCPG_EDC_TAG_CNT,
1470	mmDC_EDC_CSINVOC_CNT,
1471	mmDC_EDC_RESTORE_CNT,
1472	mmDC_EDC_STATE_CNT,
1473	mmGDS_EDC_CNT,
1474	mmGDS_EDC_GRBM_CNT,
1475	mmGDS_EDC_OA_DED,
1476	mmSPI_EDC_CNT,
1477	mmSQC_ATC_EDC_GATCL1_CNT,
1478	mmSQC_EDC_CNT,
1479	mmSQ_EDC_DED_CNT,
1480	mmSQ_EDC_INFO,
1481	mmSQ_EDC_SEC_CNT,
1482	mmTCC_EDC_CNT,
1483	mmTCP_ATC_EDC_GATCL1_CNT,
1484	mmTCP_EDC_CNT,
1485	mmTD_EDC_CNT
1486};
1487
1488static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1489{
1490	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1491	struct amdgpu_ib ib;
1492	struct dma_fence *f = NULL;
1493	int r, i;
1494	u32 tmp;
1495	unsigned total_size, vgpr_offset, sgpr_offset;
1496	u64 gpu_addr;
1497
1498	/* only supported on CZ */
1499	if (adev->asic_type != CHIP_CARRIZO)
1500		return 0;
1501
1502	/* bail if the compute ring is not ready */
1503	if (!ring->sched.ready)
1504		return 0;
1505
1506	tmp = RREG32(mmGB_EDC_MODE);
1507	WREG32(mmGB_EDC_MODE, 0);
1508
1509	total_size =
1510		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1511	total_size +=
1512		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1513	total_size +=
1514		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1515	total_size = ALIGN(total_size, 256);
1516	vgpr_offset = total_size;
1517	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1518	sgpr_offset = total_size;
1519	total_size += sizeof(sgpr_init_compute_shader);
1520
1521	/* allocate an indirect buffer to put the commands in */
1522	memset(&ib, 0, sizeof(ib));
1523	r = amdgpu_ib_get(adev, NULL, total_size,
1524					AMDGPU_IB_POOL_DIRECT, &ib);
1525	if (r) {
1526		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1527		return r;
1528	}
1529
1530	/* load the compute shaders */
1531	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1532		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1533
1534	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1535		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1536
1537	/* init the ib length to 0 */
1538	ib.length_dw = 0;
1539
1540	/* VGPR */
1541	/* write the register state for the compute dispatch */
1542	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1543		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1544		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1545		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1546	}
1547	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1548	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1549	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1550	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1551	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1552	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1553
1554	/* write dispatch packet */
1555	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1556	ib.ptr[ib.length_dw++] = 8; /* x */
1557	ib.ptr[ib.length_dw++] = 1; /* y */
1558	ib.ptr[ib.length_dw++] = 1; /* z */
1559	ib.ptr[ib.length_dw++] =
1560		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1561
1562	/* write CS partial flush packet */
1563	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1564	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1565
1566	/* SGPR1 */
1567	/* write the register state for the compute dispatch */
1568	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1569		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1570		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1571		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1572	}
1573	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1574	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1575	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1576	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1577	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1578	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1579
1580	/* write dispatch packet */
1581	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1582	ib.ptr[ib.length_dw++] = 8; /* x */
1583	ib.ptr[ib.length_dw++] = 1; /* y */
1584	ib.ptr[ib.length_dw++] = 1; /* z */
1585	ib.ptr[ib.length_dw++] =
1586		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1587
1588	/* write CS partial flush packet */
1589	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1590	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1591
1592	/* SGPR2 */
1593	/* write the register state for the compute dispatch */
1594	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1595		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1596		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1597		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1598	}
1599	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1600	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1601	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1602	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1603	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1604	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1605
1606	/* write dispatch packet */
1607	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1608	ib.ptr[ib.length_dw++] = 8; /* x */
1609	ib.ptr[ib.length_dw++] = 1; /* y */
1610	ib.ptr[ib.length_dw++] = 1; /* z */
1611	ib.ptr[ib.length_dw++] =
1612		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1613
1614	/* write CS partial flush packet */
1615	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1616	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1617
1618	/* shedule the ib on the ring */
1619	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1620	if (r) {
1621		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1622		goto fail;
1623	}
1624
1625	/* wait for the GPU to finish processing the IB */
1626	r = dma_fence_wait(f, false);
1627	if (r) {
1628		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1629		goto fail;
1630	}
1631
1632	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1633	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1634	WREG32(mmGB_EDC_MODE, tmp);
1635
1636	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1637	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1638	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1639
1640
1641	/* read back registers to clear the counters */
1642	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1643		RREG32(sec_ded_counter_registers[i]);
1644
1645fail:
1646	amdgpu_ib_free(adev, &ib, NULL);
1647	dma_fence_put(f);
1648
1649	return r;
1650}
1651
1652static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1653{
1654	u32 gb_addr_config;
1655	u32 mc_arb_ramcfg;
1656	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1657	u32 tmp;
1658	int ret;
1659
1660	switch (adev->asic_type) {
1661	case CHIP_TOPAZ:
1662		adev->gfx.config.max_shader_engines = 1;
1663		adev->gfx.config.max_tile_pipes = 2;
1664		adev->gfx.config.max_cu_per_sh = 6;
1665		adev->gfx.config.max_sh_per_se = 1;
1666		adev->gfx.config.max_backends_per_se = 2;
1667		adev->gfx.config.max_texture_channel_caches = 2;
1668		adev->gfx.config.max_gprs = 256;
1669		adev->gfx.config.max_gs_threads = 32;
1670		adev->gfx.config.max_hw_contexts = 8;
1671
1672		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1673		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1674		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1675		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1676		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1677		break;
1678	case CHIP_FIJI:
1679		adev->gfx.config.max_shader_engines = 4;
1680		adev->gfx.config.max_tile_pipes = 16;
1681		adev->gfx.config.max_cu_per_sh = 16;
1682		adev->gfx.config.max_sh_per_se = 1;
1683		adev->gfx.config.max_backends_per_se = 4;
1684		adev->gfx.config.max_texture_channel_caches = 16;
1685		adev->gfx.config.max_gprs = 256;
1686		adev->gfx.config.max_gs_threads = 32;
1687		adev->gfx.config.max_hw_contexts = 8;
1688
1689		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1690		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1691		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1692		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1693		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1694		break;
1695	case CHIP_POLARIS11:
1696	case CHIP_POLARIS12:
1697		ret = amdgpu_atombios_get_gfx_info(adev);
1698		if (ret)
1699			return ret;
1700		adev->gfx.config.max_gprs = 256;
1701		adev->gfx.config.max_gs_threads = 32;
1702		adev->gfx.config.max_hw_contexts = 8;
1703
1704		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1709		break;
1710	case CHIP_POLARIS10:
1711	case CHIP_VEGAM:
1712		ret = amdgpu_atombios_get_gfx_info(adev);
1713		if (ret)
1714			return ret;
1715		adev->gfx.config.max_gprs = 256;
1716		adev->gfx.config.max_gs_threads = 32;
1717		adev->gfx.config.max_hw_contexts = 8;
1718
1719		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1720		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1721		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1722		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1723		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1724		break;
1725	case CHIP_TONGA:
1726		adev->gfx.config.max_shader_engines = 4;
1727		adev->gfx.config.max_tile_pipes = 8;
1728		adev->gfx.config.max_cu_per_sh = 8;
1729		adev->gfx.config.max_sh_per_se = 1;
1730		adev->gfx.config.max_backends_per_se = 2;
1731		adev->gfx.config.max_texture_channel_caches = 8;
1732		adev->gfx.config.max_gprs = 256;
1733		adev->gfx.config.max_gs_threads = 32;
1734		adev->gfx.config.max_hw_contexts = 8;
1735
1736		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1737		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1738		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1739		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1740		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1741		break;
1742	case CHIP_CARRIZO:
1743		adev->gfx.config.max_shader_engines = 1;
1744		adev->gfx.config.max_tile_pipes = 2;
1745		adev->gfx.config.max_sh_per_se = 1;
1746		adev->gfx.config.max_backends_per_se = 2;
1747		adev->gfx.config.max_cu_per_sh = 8;
1748		adev->gfx.config.max_texture_channel_caches = 2;
1749		adev->gfx.config.max_gprs = 256;
1750		adev->gfx.config.max_gs_threads = 32;
1751		adev->gfx.config.max_hw_contexts = 8;
1752
1753		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1754		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1755		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1756		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1757		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1758		break;
1759	case CHIP_STONEY:
1760		adev->gfx.config.max_shader_engines = 1;
1761		adev->gfx.config.max_tile_pipes = 2;
1762		adev->gfx.config.max_sh_per_se = 1;
1763		adev->gfx.config.max_backends_per_se = 1;
1764		adev->gfx.config.max_cu_per_sh = 3;
1765		adev->gfx.config.max_texture_channel_caches = 2;
1766		adev->gfx.config.max_gprs = 256;
1767		adev->gfx.config.max_gs_threads = 16;
1768		adev->gfx.config.max_hw_contexts = 8;
1769
1770		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1771		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1772		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1773		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1774		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1775		break;
1776	default:
1777		adev->gfx.config.max_shader_engines = 2;
1778		adev->gfx.config.max_tile_pipes = 4;
1779		adev->gfx.config.max_cu_per_sh = 2;
1780		adev->gfx.config.max_sh_per_se = 1;
1781		adev->gfx.config.max_backends_per_se = 2;
1782		adev->gfx.config.max_texture_channel_caches = 4;
1783		adev->gfx.config.max_gprs = 256;
1784		adev->gfx.config.max_gs_threads = 32;
1785		adev->gfx.config.max_hw_contexts = 8;
1786
1787		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1788		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1789		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1790		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1791		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1792		break;
1793	}
1794
1795	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1796	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1797
1798	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1799				MC_ARB_RAMCFG, NOOFBANK);
1800	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1801				MC_ARB_RAMCFG, NOOFRANKS);
1802
1803	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1804	adev->gfx.config.mem_max_burst_length_bytes = 256;
1805	if (adev->flags & AMD_IS_APU) {
1806		/* Get memory bank mapping mode. */
1807		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1808		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1809		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1810
1811		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1812		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1813		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1814
1815		/* Validate settings in case only one DIMM installed. */
1816		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1817			dimm00_addr_map = 0;
1818		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1819			dimm01_addr_map = 0;
1820		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1821			dimm10_addr_map = 0;
1822		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1823			dimm11_addr_map = 0;
1824
1825		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1826		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1827		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1828			adev->gfx.config.mem_row_size_in_kb = 2;
1829		else
1830			adev->gfx.config.mem_row_size_in_kb = 1;
1831	} else {
1832		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1833		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1834		if (adev->gfx.config.mem_row_size_in_kb > 4)
1835			adev->gfx.config.mem_row_size_in_kb = 4;
1836	}
1837
1838	adev->gfx.config.shader_engine_tile_size = 32;
1839	adev->gfx.config.num_gpus = 1;
1840	adev->gfx.config.multi_gpu_tile_size = 64;
1841
1842	/* fix up row size */
1843	switch (adev->gfx.config.mem_row_size_in_kb) {
1844	case 1:
1845	default:
1846		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1847		break;
1848	case 2:
1849		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1850		break;
1851	case 4:
1852		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1853		break;
1854	}
1855	adev->gfx.config.gb_addr_config = gb_addr_config;
1856
1857	return 0;
1858}
1859
1860static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1861					int mec, int pipe, int queue)
1862{
1863	int r;
1864	unsigned irq_type;
1865	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1866	unsigned int hw_prio;
1867
1868	ring = &adev->gfx.compute_ring[ring_id];
1869
1870	/* mec0 is me1 */
1871	ring->me = mec + 1;
1872	ring->pipe = pipe;
1873	ring->queue = queue;
1874
1875	ring->ring_obj = NULL;
1876	ring->use_doorbell = true;
1877	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1878	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1879				+ (ring_id * GFX8_MEC_HPD_SIZE);
1880	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1881
1882	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1883		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1884		+ ring->pipe;
1885
1886	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1887			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1888	/* type-2 packets are deprecated on MEC, use type-3 instead */
1889	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1890			     hw_prio, NULL);
1891	if (r)
1892		return r;
1893
1894
1895	return 0;
1896}
1897
1898static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1899
1900static int gfx_v8_0_sw_init(void *handle)
1901{
1902	int i, j, k, r, ring_id;
1903	struct amdgpu_ring *ring;
1904	struct amdgpu_kiq *kiq;
1905	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1906
1907	switch (adev->asic_type) {
1908	case CHIP_TONGA:
1909	case CHIP_CARRIZO:
1910	case CHIP_FIJI:
1911	case CHIP_POLARIS10:
1912	case CHIP_POLARIS11:
1913	case CHIP_POLARIS12:
1914	case CHIP_VEGAM:
1915		adev->gfx.mec.num_mec = 2;
1916		break;
1917	case CHIP_TOPAZ:
1918	case CHIP_STONEY:
1919	default:
1920		adev->gfx.mec.num_mec = 1;
1921		break;
1922	}
1923
1924	adev->gfx.mec.num_pipe_per_mec = 4;
1925	adev->gfx.mec.num_queue_per_pipe = 8;
1926
1927	/* EOP Event */
1928	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1929	if (r)
1930		return r;
1931
1932	/* Privileged reg */
1933	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1934			      &adev->gfx.priv_reg_irq);
1935	if (r)
1936		return r;
1937
1938	/* Privileged inst */
1939	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1940			      &adev->gfx.priv_inst_irq);
1941	if (r)
1942		return r;
1943
1944	/* Add CP EDC/ECC irq  */
1945	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1946			      &adev->gfx.cp_ecc_error_irq);
1947	if (r)
1948		return r;
1949
1950	/* SQ interrupts. */
1951	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1952			      &adev->gfx.sq_irq);
1953	if (r) {
1954		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1955		return r;
1956	}
1957
1958	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1959
1960	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1961
1962	r = gfx_v8_0_init_microcode(adev);
1963	if (r) {
1964		DRM_ERROR("Failed to load gfx firmware!\n");
1965		return r;
1966	}
1967
1968	r = adev->gfx.rlc.funcs->init(adev);
1969	if (r) {
1970		DRM_ERROR("Failed to init rlc BOs!\n");
1971		return r;
1972	}
1973
1974	r = gfx_v8_0_mec_init(adev);
1975	if (r) {
1976		DRM_ERROR("Failed to init MEC BOs!\n");
1977		return r;
1978	}
1979
1980	/* set up the gfx ring */
1981	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1982		ring = &adev->gfx.gfx_ring[i];
1983		ring->ring_obj = NULL;
1984		sprintf(ring->name, "gfx");
1985		/* no gfx doorbells on iceland */
1986		if (adev->asic_type != CHIP_TOPAZ) {
1987			ring->use_doorbell = true;
1988			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1989		}
1990
1991		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1992				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1993				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1994		if (r)
1995			return r;
1996	}
1997
1998
1999	/* set up the compute queues - allocate horizontally across pipes */
2000	ring_id = 0;
2001	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2002		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2003			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2004				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2005								     k, j))
2006					continue;
2007
2008				r = gfx_v8_0_compute_ring_init(adev,
2009								ring_id,
2010								i, k, j);
2011				if (r)
2012					return r;
2013
2014				ring_id++;
2015			}
2016		}
2017	}
2018
2019	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2020	if (r) {
2021		DRM_ERROR("Failed to init KIQ BOs!\n");
2022		return r;
2023	}
2024
2025	kiq = &adev->gfx.kiq[0];
2026	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
2027	if (r)
2028		return r;
2029
2030	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2031	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2032	if (r)
2033		return r;
2034
2035	adev->gfx.ce_ram_size = 0x8000;
2036
2037	r = gfx_v8_0_gpu_early_init(adev);
2038	if (r)
2039		return r;
2040
2041	return 0;
2042}
2043
2044static int gfx_v8_0_sw_fini(void *handle)
2045{
2046	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2047	int i;
2048
2049	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2050		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2051	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2052		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2053
2054	amdgpu_gfx_mqd_sw_fini(adev, 0);
2055	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2056	amdgpu_gfx_kiq_fini(adev, 0);
2057
2058	gfx_v8_0_mec_fini(adev);
2059	amdgpu_gfx_rlc_fini(adev);
2060	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2061				&adev->gfx.rlc.clear_state_gpu_addr,
2062				(void **)&adev->gfx.rlc.cs_ptr);
2063	if ((adev->asic_type == CHIP_CARRIZO) ||
2064	    (adev->asic_type == CHIP_STONEY)) {
2065		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2066				&adev->gfx.rlc.cp_table_gpu_addr,
2067				(void **)&adev->gfx.rlc.cp_table_ptr);
2068	}
2069	gfx_v8_0_free_microcode(adev);
2070
2071	return 0;
2072}
2073
2074static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2075{
2076	uint32_t *modearray, *mod2array;
2077	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2078	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2079	u32 reg_offset;
2080
2081	modearray = adev->gfx.config.tile_mode_array;
2082	mod2array = adev->gfx.config.macrotile_mode_array;
2083
2084	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2085		modearray[reg_offset] = 0;
2086
2087	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2088		mod2array[reg_offset] = 0;
2089
2090	switch (adev->asic_type) {
2091	case CHIP_TOPAZ:
2092		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2093				PIPE_CONFIG(ADDR_SURF_P2) |
2094				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2095				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2096		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097				PIPE_CONFIG(ADDR_SURF_P2) |
2098				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2099				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2100		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2101				PIPE_CONFIG(ADDR_SURF_P2) |
2102				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2103				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2104		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105				PIPE_CONFIG(ADDR_SURF_P2) |
2106				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2107				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2108		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2109				PIPE_CONFIG(ADDR_SURF_P2) |
2110				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2111				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2112		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113				PIPE_CONFIG(ADDR_SURF_P2) |
2114				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2115				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2116		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2117				PIPE_CONFIG(ADDR_SURF_P2) |
2118				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2119				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2120		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2121				PIPE_CONFIG(ADDR_SURF_P2));
2122		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2123				PIPE_CONFIG(ADDR_SURF_P2) |
2124				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2125				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2126		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127				 PIPE_CONFIG(ADDR_SURF_P2) |
2128				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2129				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2130		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2131				 PIPE_CONFIG(ADDR_SURF_P2) |
2132				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2133				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2134		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2135				 PIPE_CONFIG(ADDR_SURF_P2) |
2136				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2137				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139				 PIPE_CONFIG(ADDR_SURF_P2) |
2140				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2141				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2143				 PIPE_CONFIG(ADDR_SURF_P2) |
2144				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2145				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2146		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2147				 PIPE_CONFIG(ADDR_SURF_P2) |
2148				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2149				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2150		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2151				 PIPE_CONFIG(ADDR_SURF_P2) |
2152				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2153				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2154		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2155				 PIPE_CONFIG(ADDR_SURF_P2) |
2156				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2157				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2158		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2159				 PIPE_CONFIG(ADDR_SURF_P2) |
2160				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2161				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2162		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2163				 PIPE_CONFIG(ADDR_SURF_P2) |
2164				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2165				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2166		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2167				 PIPE_CONFIG(ADDR_SURF_P2) |
2168				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2169				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2170		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2171				 PIPE_CONFIG(ADDR_SURF_P2) |
2172				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2173				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2174		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2175				 PIPE_CONFIG(ADDR_SURF_P2) |
2176				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2177				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2178		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2179				 PIPE_CONFIG(ADDR_SURF_P2) |
2180				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2181				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2182		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2183				 PIPE_CONFIG(ADDR_SURF_P2) |
2184				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2185				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187				 PIPE_CONFIG(ADDR_SURF_P2) |
2188				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2189				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191				 PIPE_CONFIG(ADDR_SURF_P2) |
2192				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2193				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194
2195		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2196				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198				NUM_BANKS(ADDR_SURF_8_BANK));
2199		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2200				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2201				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202				NUM_BANKS(ADDR_SURF_8_BANK));
2203		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2204				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2205				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2206				NUM_BANKS(ADDR_SURF_8_BANK));
2207		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2210				NUM_BANKS(ADDR_SURF_8_BANK));
2211		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2213				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2214				NUM_BANKS(ADDR_SURF_8_BANK));
2215		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2217				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2218				NUM_BANKS(ADDR_SURF_8_BANK));
2219		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2221				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2222				NUM_BANKS(ADDR_SURF_8_BANK));
2223		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2224				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2225				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2226				NUM_BANKS(ADDR_SURF_16_BANK));
2227		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2228				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2230				NUM_BANKS(ADDR_SURF_16_BANK));
2231		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2232				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234				 NUM_BANKS(ADDR_SURF_16_BANK));
2235		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2236				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2237				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238				 NUM_BANKS(ADDR_SURF_16_BANK));
2239		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2241				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242				 NUM_BANKS(ADDR_SURF_16_BANK));
2243		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2246				 NUM_BANKS(ADDR_SURF_16_BANK));
2247		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250				 NUM_BANKS(ADDR_SURF_8_BANK));
2251
2252		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2253			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2254			    reg_offset != 23)
2255				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2256
2257		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2258			if (reg_offset != 7)
2259				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2260
2261		break;
2262	case CHIP_FIJI:
2263	case CHIP_VEGAM:
2264		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2266				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2267				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2270				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2271				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2274				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2275				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2278				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2279				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2282				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2283				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2284		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2285				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2286				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2287				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2288		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2289				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2290				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2291				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2292		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2293				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2294				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2295				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2296		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2297				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2298		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2301				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2305				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2309				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2311				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2312				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2313				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2314		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2315				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2321				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2323				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2329				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2332				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2334		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2335				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2337				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2339				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2343				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2345				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2346		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2347				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2349				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2350		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2351				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2353				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2354		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2355				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2356				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2357				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2358		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2359				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2362		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2363				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2365				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2366		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2367				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2369				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2370		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2371				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2373				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2377				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2381				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2382		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2383				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2384				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2385				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2386
2387		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390				NUM_BANKS(ADDR_SURF_8_BANK));
2391		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2394				NUM_BANKS(ADDR_SURF_8_BANK));
2395		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2397				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2398				NUM_BANKS(ADDR_SURF_8_BANK));
2399		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2401				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2402				NUM_BANKS(ADDR_SURF_8_BANK));
2403		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2405				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406				NUM_BANKS(ADDR_SURF_8_BANK));
2407		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410				NUM_BANKS(ADDR_SURF_8_BANK));
2411		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2414				NUM_BANKS(ADDR_SURF_8_BANK));
2415		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2417				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418				NUM_BANKS(ADDR_SURF_8_BANK));
2419		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2421				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2422				NUM_BANKS(ADDR_SURF_8_BANK));
2423		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2425				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2426				 NUM_BANKS(ADDR_SURF_8_BANK));
2427		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2429				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2430				 NUM_BANKS(ADDR_SURF_8_BANK));
2431		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2433				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434				 NUM_BANKS(ADDR_SURF_8_BANK));
2435		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2438				 NUM_BANKS(ADDR_SURF_8_BANK));
2439		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2442				 NUM_BANKS(ADDR_SURF_4_BANK));
2443
2444		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2445			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2446
2447		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2448			if (reg_offset != 7)
2449				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2450
2451		break;
2452	case CHIP_TONGA:
2453		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2455				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2456				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2457		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2459				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2460				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2461		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2464				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2465		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2468				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2472				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2473		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2476				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2477		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2478				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2479				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2480				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2481		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2482				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2483				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2484				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2485		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2486				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2487		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2488				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2490				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2498				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2499		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2502				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2503		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2504				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2506				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2510				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2512				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2514				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2515		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2518				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2519		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2521				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2523		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2524				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2526				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2527		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2528				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2530				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2531		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2532				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2534				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2535		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2536				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2538				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2539		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2540				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2542				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2543		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2544				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2546				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2547		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2548				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2551		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2552				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2554				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2555		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2556				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2558				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2559		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2562				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2566				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2570				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2571		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2572				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2573				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2574				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2575
2576		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2579				NUM_BANKS(ADDR_SURF_16_BANK));
2580		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2583				NUM_BANKS(ADDR_SURF_16_BANK));
2584		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2587				NUM_BANKS(ADDR_SURF_16_BANK));
2588		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591				NUM_BANKS(ADDR_SURF_16_BANK));
2592		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2607				NUM_BANKS(ADDR_SURF_16_BANK));
2608		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2611				NUM_BANKS(ADDR_SURF_16_BANK));
2612		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2614				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2615				 NUM_BANKS(ADDR_SURF_16_BANK));
2616		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2619				 NUM_BANKS(ADDR_SURF_16_BANK));
2620		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2623				 NUM_BANKS(ADDR_SURF_8_BANK));
2624		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2627				 NUM_BANKS(ADDR_SURF_4_BANK));
2628		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2631				 NUM_BANKS(ADDR_SURF_4_BANK));
2632
2633		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2634			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2635
2636		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2637			if (reg_offset != 7)
2638				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2639
2640		break;
2641	case CHIP_POLARIS11:
2642	case CHIP_POLARIS12:
2643		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2646				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2650				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2658				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2664				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2666				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2670				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2671		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2674				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2675		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2685		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2688				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2690				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2692				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2693		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2694				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2702				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2708				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2709		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2712				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2713		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2714				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2716				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2718				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2722				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2726				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2730				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2732				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2734				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2736				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2738				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2741		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2742				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2744				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2745		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2746				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2748				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2749		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2758				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2760				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2761		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2764				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765
2766		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2768				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2769				NUM_BANKS(ADDR_SURF_16_BANK));
2770
2771		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2773				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774				NUM_BANKS(ADDR_SURF_16_BANK));
2775
2776		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2778				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779				NUM_BANKS(ADDR_SURF_16_BANK));
2780
2781		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2783				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2784				NUM_BANKS(ADDR_SURF_16_BANK));
2785
2786		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789				NUM_BANKS(ADDR_SURF_16_BANK));
2790
2791		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2793				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2794				NUM_BANKS(ADDR_SURF_16_BANK));
2795
2796		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799				NUM_BANKS(ADDR_SURF_16_BANK));
2800
2801		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2802				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2803				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2804				NUM_BANKS(ADDR_SURF_16_BANK));
2805
2806		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2807				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2808				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2809				NUM_BANKS(ADDR_SURF_16_BANK));
2810
2811		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2813				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2814				NUM_BANKS(ADDR_SURF_16_BANK));
2815
2816		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2818				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819				NUM_BANKS(ADDR_SURF_16_BANK));
2820
2821		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2828				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2829				NUM_BANKS(ADDR_SURF_8_BANK));
2830
2831		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2834				NUM_BANKS(ADDR_SURF_4_BANK));
2835
2836		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2837			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2838
2839		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2840			if (reg_offset != 7)
2841				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2842
2843		break;
2844	case CHIP_POLARIS10:
2845		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2848				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2852				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2856				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2857		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2859				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2860				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2861		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2864				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2865		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2866				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2868				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2869		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2872				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2873		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2874				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2876				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2877		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2878				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2879		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2880				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2882				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2886				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2890				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2891		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2892				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2894				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2895		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2904				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2907		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2910				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2913				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2915		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2916				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2918				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2920				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2924				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2928				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2930				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2931		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2932				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2934				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2935		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2936				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2938				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2939		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2940				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2943		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2944				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2946				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2947		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2948				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2950				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2951		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2954				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2958				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2960				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2962				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2963		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2965				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2966				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967
2968		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971				NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2975				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2976				NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981				NUM_BANKS(ADDR_SURF_16_BANK));
2982
2983		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987
2988		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2991				NUM_BANKS(ADDR_SURF_16_BANK));
2992
2993		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2995				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2996				NUM_BANKS(ADDR_SURF_16_BANK));
2997
2998		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3000				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3001				NUM_BANKS(ADDR_SURF_16_BANK));
3002
3003		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006				NUM_BANKS(ADDR_SURF_16_BANK));
3007
3008		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3010				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011				NUM_BANKS(ADDR_SURF_16_BANK));
3012
3013		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3016				NUM_BANKS(ADDR_SURF_16_BANK));
3017
3018		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3021				NUM_BANKS(ADDR_SURF_16_BANK));
3022
3023		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3026				NUM_BANKS(ADDR_SURF_8_BANK));
3027
3028		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3030				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3031				NUM_BANKS(ADDR_SURF_4_BANK));
3032
3033		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3036				NUM_BANKS(ADDR_SURF_4_BANK));
3037
3038		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3039			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3040
3041		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3042			if (reg_offset != 7)
3043				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3044
3045		break;
3046	case CHIP_STONEY:
3047		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3048				PIPE_CONFIG(ADDR_SURF_P2) |
3049				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3050				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052				PIPE_CONFIG(ADDR_SURF_P2) |
3053				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3054				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3056				PIPE_CONFIG(ADDR_SURF_P2) |
3057				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3058				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3059		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3060				PIPE_CONFIG(ADDR_SURF_P2) |
3061				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3062				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3063		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3064				PIPE_CONFIG(ADDR_SURF_P2) |
3065				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3066				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3067		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3068				PIPE_CONFIG(ADDR_SURF_P2) |
3069				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3070				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3071		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3072				PIPE_CONFIG(ADDR_SURF_P2) |
3073				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3074				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3075		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3076				PIPE_CONFIG(ADDR_SURF_P2));
3077		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078				PIPE_CONFIG(ADDR_SURF_P2) |
3079				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3080				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082				 PIPE_CONFIG(ADDR_SURF_P2) |
3083				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3084				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3085		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3086				 PIPE_CONFIG(ADDR_SURF_P2) |
3087				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3088				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3089		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3090				 PIPE_CONFIG(ADDR_SURF_P2) |
3091				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094				 PIPE_CONFIG(ADDR_SURF_P2) |
3095				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3097		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3098				 PIPE_CONFIG(ADDR_SURF_P2) |
3099				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3100				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3101		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3102				 PIPE_CONFIG(ADDR_SURF_P2) |
3103				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3104				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3105		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3106				 PIPE_CONFIG(ADDR_SURF_P2) |
3107				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3108				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3110				 PIPE_CONFIG(ADDR_SURF_P2) |
3111				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3114				 PIPE_CONFIG(ADDR_SURF_P2) |
3115				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3118				 PIPE_CONFIG(ADDR_SURF_P2) |
3119				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3120				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3122				 PIPE_CONFIG(ADDR_SURF_P2) |
3123				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3124				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3126				 PIPE_CONFIG(ADDR_SURF_P2) |
3127				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3128				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3130				 PIPE_CONFIG(ADDR_SURF_P2) |
3131				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3132				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3133		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3134				 PIPE_CONFIG(ADDR_SURF_P2) |
3135				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3136				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3137		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3138				 PIPE_CONFIG(ADDR_SURF_P2) |
3139				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142				 PIPE_CONFIG(ADDR_SURF_P2) |
3143				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3144				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3145		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3146				 PIPE_CONFIG(ADDR_SURF_P2) |
3147				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3148				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3149
3150		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3152				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153				NUM_BANKS(ADDR_SURF_8_BANK));
3154		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3156				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3157				NUM_BANKS(ADDR_SURF_8_BANK));
3158		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3161				NUM_BANKS(ADDR_SURF_8_BANK));
3162		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165				NUM_BANKS(ADDR_SURF_8_BANK));
3166		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3169				NUM_BANKS(ADDR_SURF_8_BANK));
3170		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173				NUM_BANKS(ADDR_SURF_8_BANK));
3174		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3175				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3176				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3177				NUM_BANKS(ADDR_SURF_8_BANK));
3178		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3179				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3180				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3181				NUM_BANKS(ADDR_SURF_16_BANK));
3182		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3183				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185				NUM_BANKS(ADDR_SURF_16_BANK));
3186		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3187				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3188				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189				 NUM_BANKS(ADDR_SURF_16_BANK));
3190		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3191				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3192				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193				 NUM_BANKS(ADDR_SURF_16_BANK));
3194		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3196				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3197				 NUM_BANKS(ADDR_SURF_16_BANK));
3198		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3201				 NUM_BANKS(ADDR_SURF_16_BANK));
3202		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3205				 NUM_BANKS(ADDR_SURF_8_BANK));
3206
3207		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3208			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3209			    reg_offset != 23)
3210				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3211
3212		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3213			if (reg_offset != 7)
3214				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3215
3216		break;
3217	default:
3218		dev_warn(adev->dev,
3219			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3220			 adev->asic_type);
3221		fallthrough;
3222
3223	case CHIP_CARRIZO:
3224		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225				PIPE_CONFIG(ADDR_SURF_P2) |
3226				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3227				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229				PIPE_CONFIG(ADDR_SURF_P2) |
3230				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3231				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233				PIPE_CONFIG(ADDR_SURF_P2) |
3234				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3235				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237				PIPE_CONFIG(ADDR_SURF_P2) |
3238				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3239				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241				PIPE_CONFIG(ADDR_SURF_P2) |
3242				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3243				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3245				PIPE_CONFIG(ADDR_SURF_P2) |
3246				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3247				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3249				PIPE_CONFIG(ADDR_SURF_P2) |
3250				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3251				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3252		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3253				PIPE_CONFIG(ADDR_SURF_P2));
3254		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255				PIPE_CONFIG(ADDR_SURF_P2) |
3256				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3257				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259				 PIPE_CONFIG(ADDR_SURF_P2) |
3260				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3261				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3262		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3263				 PIPE_CONFIG(ADDR_SURF_P2) |
3264				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3265				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3266		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3267				 PIPE_CONFIG(ADDR_SURF_P2) |
3268				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271				 PIPE_CONFIG(ADDR_SURF_P2) |
3272				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3275				 PIPE_CONFIG(ADDR_SURF_P2) |
3276				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3277				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3278		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3279				 PIPE_CONFIG(ADDR_SURF_P2) |
3280				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3282		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3283				 PIPE_CONFIG(ADDR_SURF_P2) |
3284				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3285				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3287				 PIPE_CONFIG(ADDR_SURF_P2) |
3288				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3291				 PIPE_CONFIG(ADDR_SURF_P2) |
3292				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3293				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3295				 PIPE_CONFIG(ADDR_SURF_P2) |
3296				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3299				 PIPE_CONFIG(ADDR_SURF_P2) |
3300				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3301				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3303				 PIPE_CONFIG(ADDR_SURF_P2) |
3304				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3305				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3307				 PIPE_CONFIG(ADDR_SURF_P2) |
3308				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3309				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3311				 PIPE_CONFIG(ADDR_SURF_P2) |
3312				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3313				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3314		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3315				 PIPE_CONFIG(ADDR_SURF_P2) |
3316				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3317				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3319				 PIPE_CONFIG(ADDR_SURF_P2) |
3320				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3321				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3323				 PIPE_CONFIG(ADDR_SURF_P2) |
3324				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3325				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3326
3327		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3329				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330				NUM_BANKS(ADDR_SURF_8_BANK));
3331		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3333				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3334				NUM_BANKS(ADDR_SURF_8_BANK));
3335		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338				NUM_BANKS(ADDR_SURF_8_BANK));
3339		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342				NUM_BANKS(ADDR_SURF_8_BANK));
3343		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3345				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3346				NUM_BANKS(ADDR_SURF_8_BANK));
3347		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350				NUM_BANKS(ADDR_SURF_8_BANK));
3351		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3352				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3353				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3354				NUM_BANKS(ADDR_SURF_8_BANK));
3355		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3356				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3357				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358				NUM_BANKS(ADDR_SURF_16_BANK));
3359		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3360				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3361				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362				NUM_BANKS(ADDR_SURF_16_BANK));
3363		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3364				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3365				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366				 NUM_BANKS(ADDR_SURF_16_BANK));
3367		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3368				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3369				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370				 NUM_BANKS(ADDR_SURF_16_BANK));
3371		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3373				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374				 NUM_BANKS(ADDR_SURF_16_BANK));
3375		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3378				 NUM_BANKS(ADDR_SURF_16_BANK));
3379		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382				 NUM_BANKS(ADDR_SURF_8_BANK));
3383
3384		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3385			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3386			    reg_offset != 23)
3387				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3388
3389		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3390			if (reg_offset != 7)
3391				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3392
3393		break;
3394	}
3395}
3396
3397static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3398				  u32 se_num, u32 sh_num, u32 instance,
3399				  int xcc_id)
3400{
3401	u32 data;
3402
3403	if (instance == 0xffffffff)
3404		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3405	else
3406		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3407
3408	if (se_num == 0xffffffff)
3409		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3410	else
3411		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3412
3413	if (sh_num == 0xffffffff)
3414		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3415	else
3416		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3417
3418	WREG32(mmGRBM_GFX_INDEX, data);
3419}
3420
3421static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3422				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3423{
3424	vi_srbm_select(adev, me, pipe, q, vm);
3425}
3426
3427static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3428{
3429	u32 data, mask;
3430
3431	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3432		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3433
3434	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3435
3436	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3437					 adev->gfx.config.max_sh_per_se);
3438
3439	return (~data) & mask;
3440}
3441
3442static void
3443gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3444{
3445	switch (adev->asic_type) {
3446	case CHIP_FIJI:
3447	case CHIP_VEGAM:
3448		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3449			  RB_XSEL2(1) | PKR_MAP(2) |
3450			  PKR_XSEL(1) | PKR_YSEL(1) |
3451			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3452		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3453			   SE_PAIR_YSEL(2);
3454		break;
3455	case CHIP_TONGA:
3456	case CHIP_POLARIS10:
3457		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3458			  SE_XSEL(1) | SE_YSEL(1);
3459		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3460			   SE_PAIR_YSEL(2);
3461		break;
3462	case CHIP_TOPAZ:
3463	case CHIP_CARRIZO:
3464		*rconf |= RB_MAP_PKR0(2);
3465		*rconf1 |= 0x0;
3466		break;
3467	case CHIP_POLARIS11:
3468	case CHIP_POLARIS12:
3469		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3470			  SE_XSEL(1) | SE_YSEL(1);
3471		*rconf1 |= 0x0;
3472		break;
3473	case CHIP_STONEY:
3474		*rconf |= 0x0;
3475		*rconf1 |= 0x0;
3476		break;
3477	default:
3478		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3479		break;
3480	}
3481}
3482
3483static void
3484gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3485					u32 raster_config, u32 raster_config_1,
3486					unsigned rb_mask, unsigned num_rb)
3487{
3488	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3489	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3490	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3491	unsigned rb_per_se = num_rb / num_se;
3492	unsigned se_mask[4];
3493	unsigned se;
3494
3495	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3496	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3497	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3498	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3499
3500	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3501	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3502	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3503
3504	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3505			     (!se_mask[2] && !se_mask[3]))) {
3506		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3507
3508		if (!se_mask[0] && !se_mask[1]) {
3509			raster_config_1 |=
3510				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3511		} else {
3512			raster_config_1 |=
3513				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3514		}
3515	}
3516
3517	for (se = 0; se < num_se; se++) {
3518		unsigned raster_config_se = raster_config;
3519		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3520		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3521		int idx = (se / 2) * 2;
3522
3523		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3524			raster_config_se &= ~SE_MAP_MASK;
3525
3526			if (!se_mask[idx]) {
3527				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3528			} else {
3529				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3530			}
3531		}
3532
3533		pkr0_mask &= rb_mask;
3534		pkr1_mask &= rb_mask;
3535		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3536			raster_config_se &= ~PKR_MAP_MASK;
3537
3538			if (!pkr0_mask) {
3539				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3540			} else {
3541				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3542			}
3543		}
3544
3545		if (rb_per_se >= 2) {
3546			unsigned rb0_mask = 1 << (se * rb_per_se);
3547			unsigned rb1_mask = rb0_mask << 1;
3548
3549			rb0_mask &= rb_mask;
3550			rb1_mask &= rb_mask;
3551			if (!rb0_mask || !rb1_mask) {
3552				raster_config_se &= ~RB_MAP_PKR0_MASK;
3553
3554				if (!rb0_mask) {
3555					raster_config_se |=
3556						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3557				} else {
3558					raster_config_se |=
3559						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3560				}
3561			}
3562
3563			if (rb_per_se > 2) {
3564				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3565				rb1_mask = rb0_mask << 1;
3566				rb0_mask &= rb_mask;
3567				rb1_mask &= rb_mask;
3568				if (!rb0_mask || !rb1_mask) {
3569					raster_config_se &= ~RB_MAP_PKR1_MASK;
3570
3571					if (!rb0_mask) {
3572						raster_config_se |=
3573							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3574					} else {
3575						raster_config_se |=
3576							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3577					}
3578				}
3579			}
3580		}
3581
3582		/* GRBM_GFX_INDEX has a different offset on VI */
3583		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3584		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3585		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3586	}
3587
3588	/* GRBM_GFX_INDEX has a different offset on VI */
3589	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3590}
3591
3592static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3593{
3594	int i, j;
3595	u32 data;
3596	u32 raster_config = 0, raster_config_1 = 0;
3597	u32 active_rbs = 0;
3598	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3599					adev->gfx.config.max_sh_per_se;
3600	unsigned num_rb_pipes;
3601
3602	mutex_lock(&adev->grbm_idx_mutex);
3603	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3604		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3605			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3606			data = gfx_v8_0_get_rb_active_bitmap(adev);
3607			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3608					       rb_bitmap_width_per_sh);
3609		}
3610	}
3611	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3612
3613	adev->gfx.config.backend_enable_mask = active_rbs;
3614	adev->gfx.config.num_rbs = hweight32(active_rbs);
3615
3616	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3617			     adev->gfx.config.max_shader_engines, 16);
3618
3619	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3620
3621	if (!adev->gfx.config.backend_enable_mask ||
3622			adev->gfx.config.num_rbs >= num_rb_pipes) {
3623		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3624		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3625	} else {
3626		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3627							adev->gfx.config.backend_enable_mask,
3628							num_rb_pipes);
3629	}
3630
3631	/* cache the values for userspace */
3632	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3633		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3634			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3635			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3636				RREG32(mmCC_RB_BACKEND_DISABLE);
3637			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3638				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3639			adev->gfx.config.rb_config[i][j].raster_config =
3640				RREG32(mmPA_SC_RASTER_CONFIG);
3641			adev->gfx.config.rb_config[i][j].raster_config_1 =
3642				RREG32(mmPA_SC_RASTER_CONFIG_1);
3643		}
3644	}
3645	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3646	mutex_unlock(&adev->grbm_idx_mutex);
3647}
3648
3649#define DEFAULT_SH_MEM_BASES	(0x6000)
3650/**
3651 * gfx_v8_0_init_compute_vmid - gart enable
3652 *
3653 * @adev: amdgpu_device pointer
3654 *
3655 * Initialize compute vmid sh_mem registers
3656 *
3657 */
3658static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3659{
3660	int i;
3661	uint32_t sh_mem_config;
3662	uint32_t sh_mem_bases;
3663
3664	/*
3665	 * Configure apertures:
3666	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3667	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3668	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3669	 */
3670	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3671
3672	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3673			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3674			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3675			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3676			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3677			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3678
3679	mutex_lock(&adev->srbm_mutex);
3680	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3681		vi_srbm_select(adev, 0, 0, 0, i);
3682		/* CP and shaders */
3683		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3684		WREG32(mmSH_MEM_APE1_BASE, 1);
3685		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3686		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3687	}
3688	vi_srbm_select(adev, 0, 0, 0, 0);
3689	mutex_unlock(&adev->srbm_mutex);
3690
3691	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3692	   access. These should be enabled by FW for target VMIDs. */
3693	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3694		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3695		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3696		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3697		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3698	}
3699}
3700
3701static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3702{
3703	int vmid;
3704
3705	/*
3706	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3707	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3708	 * the driver can enable them for graphics. VMID0 should maintain
3709	 * access so that HWS firmware can save/restore entries.
3710	 */
3711	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3712		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3713		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3714		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3715		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3716	}
3717}
3718
3719static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3720{
3721	switch (adev->asic_type) {
3722	default:
3723		adev->gfx.config.double_offchip_lds_buf = 1;
3724		break;
3725	case CHIP_CARRIZO:
3726	case CHIP_STONEY:
3727		adev->gfx.config.double_offchip_lds_buf = 0;
3728		break;
3729	}
3730}
3731
3732static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3733{
3734	u32 tmp, sh_static_mem_cfg;
3735	int i;
3736
3737	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3738	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3740	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3741
3742	gfx_v8_0_tiling_mode_table_init(adev);
3743	gfx_v8_0_setup_rb(adev);
3744	gfx_v8_0_get_cu_info(adev);
3745	gfx_v8_0_config_init(adev);
3746
3747	/* XXX SH_MEM regs */
3748	/* where to put LDS, scratch, GPUVM in FSA64 space */
3749	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3750				   SWIZZLE_ENABLE, 1);
3751	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3752				   ELEMENT_SIZE, 1);
3753	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3754				   INDEX_STRIDE, 3);
3755	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3756
3757	mutex_lock(&adev->srbm_mutex);
3758	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3759		vi_srbm_select(adev, 0, 0, 0, i);
3760		/* CP and shaders */
3761		if (i == 0) {
3762			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3763			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3764			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3765					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3766			WREG32(mmSH_MEM_CONFIG, tmp);
3767			WREG32(mmSH_MEM_BASES, 0);
3768		} else {
3769			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3770			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3771			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3772					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3773			WREG32(mmSH_MEM_CONFIG, tmp);
3774			tmp = adev->gmc.shared_aperture_start >> 48;
3775			WREG32(mmSH_MEM_BASES, tmp);
3776		}
3777
3778		WREG32(mmSH_MEM_APE1_BASE, 1);
3779		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3780	}
3781	vi_srbm_select(adev, 0, 0, 0, 0);
3782	mutex_unlock(&adev->srbm_mutex);
3783
3784	gfx_v8_0_init_compute_vmid(adev);
3785	gfx_v8_0_init_gds_vmid(adev);
3786
3787	mutex_lock(&adev->grbm_idx_mutex);
3788	/*
3789	 * making sure that the following register writes will be broadcasted
3790	 * to all the shaders
3791	 */
3792	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3793
3794	WREG32(mmPA_SC_FIFO_SIZE,
3795		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3796			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3797		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3798			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3799		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3800			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3801		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3802			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3803
3804	tmp = RREG32(mmSPI_ARB_PRIORITY);
3805	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3806	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3807	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3808	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3809	WREG32(mmSPI_ARB_PRIORITY, tmp);
3810
3811	mutex_unlock(&adev->grbm_idx_mutex);
3812
3813}
3814
3815static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3816{
3817	u32 i, j, k;
3818	u32 mask;
3819
3820	mutex_lock(&adev->grbm_idx_mutex);
3821	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3822		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3823			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3824			for (k = 0; k < adev->usec_timeout; k++) {
3825				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3826					break;
3827				udelay(1);
3828			}
3829			if (k == adev->usec_timeout) {
3830				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3831						      0xffffffff, 0xffffffff, 0);
3832				mutex_unlock(&adev->grbm_idx_mutex);
3833				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3834					 i, j);
3835				return;
3836			}
3837		}
3838	}
3839	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3840	mutex_unlock(&adev->grbm_idx_mutex);
3841
3842	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3843		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3844		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3845		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3846	for (k = 0; k < adev->usec_timeout; k++) {
3847		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3848			break;
3849		udelay(1);
3850	}
3851}
3852
3853static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3854					       bool enable)
3855{
3856	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3857
3858	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3859	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3860	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3861	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3862
3863	WREG32(mmCP_INT_CNTL_RING0, tmp);
3864}
3865
3866static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3867{
3868	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3869	/* csib */
3870	WREG32(mmRLC_CSIB_ADDR_HI,
3871			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3872	WREG32(mmRLC_CSIB_ADDR_LO,
3873			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3874	WREG32(mmRLC_CSIB_LENGTH,
3875			adev->gfx.rlc.clear_state_size);
3876}
3877
3878static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3879				int ind_offset,
3880				int list_size,
3881				int *unique_indices,
3882				int *indices_count,
3883				int max_indices,
3884				int *ind_start_offsets,
3885				int *offset_count,
3886				int max_offset)
3887{
3888	int indices;
3889	bool new_entry = true;
3890
3891	for (; ind_offset < list_size; ind_offset++) {
3892
3893		if (new_entry) {
3894			new_entry = false;
3895			ind_start_offsets[*offset_count] = ind_offset;
3896			*offset_count = *offset_count + 1;
3897			BUG_ON(*offset_count >= max_offset);
3898		}
3899
3900		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3901			new_entry = true;
3902			continue;
3903		}
3904
3905		ind_offset += 2;
3906
3907		/* look for the matching indice */
3908		for (indices = 0;
3909			indices < *indices_count;
3910			indices++) {
3911			if (unique_indices[indices] ==
3912				register_list_format[ind_offset])
3913				break;
3914		}
3915
3916		if (indices >= *indices_count) {
3917			unique_indices[*indices_count] =
3918				register_list_format[ind_offset];
3919			indices = *indices_count;
3920			*indices_count = *indices_count + 1;
3921			BUG_ON(*indices_count >= max_indices);
3922		}
3923
3924		register_list_format[ind_offset] = indices;
3925	}
3926}
3927
3928static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3929{
3930	int i, temp, data;
3931	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3932	int indices_count = 0;
3933	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3934	int offset_count = 0;
3935
3936	int list_size;
3937	unsigned int *register_list_format =
3938		kmemdup(adev->gfx.rlc.register_list_format,
3939			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3940	if (!register_list_format)
3941		return -ENOMEM;
3942
3943	gfx_v8_0_parse_ind_reg_list(register_list_format,
3944				RLC_FormatDirectRegListLength,
3945				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3946				unique_indices,
3947				&indices_count,
3948				ARRAY_SIZE(unique_indices),
3949				indirect_start_offsets,
3950				&offset_count,
3951				ARRAY_SIZE(indirect_start_offsets));
3952
3953	/* save and restore list */
3954	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3955
3956	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3957	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3958		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3959
3960	/* indirect list */
3961	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3962	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3963		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3964
3965	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3966	list_size = list_size >> 1;
3967	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3968	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3969
3970	/* starting offsets starts */
3971	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3972		adev->gfx.rlc.starting_offsets_start);
3973	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3974		WREG32(mmRLC_GPM_SCRATCH_DATA,
3975				indirect_start_offsets[i]);
3976
3977	/* unique indices */
3978	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3979	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3980	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3981		if (unique_indices[i] != 0) {
3982			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3983			WREG32(data + i, unique_indices[i] >> 20);
3984		}
3985	}
3986	kfree(register_list_format);
3987
3988	return 0;
3989}
3990
3991static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3992{
3993	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3994}
3995
3996static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3997{
3998	uint32_t data;
3999
4000	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4001
4002	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4003	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4004	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4005	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4006	WREG32(mmRLC_PG_DELAY, data);
4007
4008	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4009	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4010
4011}
4012
4013static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4014						bool enable)
4015{
4016	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4017}
4018
4019static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4020						  bool enable)
4021{
4022	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4023}
4024
4025static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4026{
4027	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4028}
4029
4030static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4031{
4032	if ((adev->asic_type == CHIP_CARRIZO) ||
4033	    (adev->asic_type == CHIP_STONEY)) {
4034		gfx_v8_0_init_csb(adev);
4035		gfx_v8_0_init_save_restore_list(adev);
4036		gfx_v8_0_enable_save_restore_machine(adev);
4037		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4038		gfx_v8_0_init_power_gating(adev);
4039		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4040	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4041		   (adev->asic_type == CHIP_POLARIS12) ||
4042		   (adev->asic_type == CHIP_VEGAM)) {
4043		gfx_v8_0_init_csb(adev);
4044		gfx_v8_0_init_save_restore_list(adev);
4045		gfx_v8_0_enable_save_restore_machine(adev);
4046		gfx_v8_0_init_power_gating(adev);
4047	}
4048
4049}
4050
4051static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4052{
4053	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4054
4055	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4056	gfx_v8_0_wait_for_rlc_serdes(adev);
4057}
4058
4059static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4060{
4061	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4062	udelay(50);
4063
4064	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4065	udelay(50);
4066}
4067
4068static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4069{
4070	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4071
4072	/* carrizo do enable cp interrupt after cp inited */
4073	if (!(adev->flags & AMD_IS_APU))
4074		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4075
4076	udelay(50);
4077}
4078
4079static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4080{
4081	if (amdgpu_sriov_vf(adev)) {
4082		gfx_v8_0_init_csb(adev);
4083		return 0;
4084	}
4085
4086	adev->gfx.rlc.funcs->stop(adev);
4087	adev->gfx.rlc.funcs->reset(adev);
4088	gfx_v8_0_init_pg(adev);
4089	adev->gfx.rlc.funcs->start(adev);
4090
4091	return 0;
4092}
4093
4094static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4095{
4096	u32 tmp = RREG32(mmCP_ME_CNTL);
4097
4098	if (enable) {
4099		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4100		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4101		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4102	} else {
4103		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4104		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4105		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4106	}
4107	WREG32(mmCP_ME_CNTL, tmp);
4108	udelay(50);
4109}
4110
4111static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4112{
4113	u32 count = 0;
4114	const struct cs_section_def *sect = NULL;
4115	const struct cs_extent_def *ext = NULL;
4116
4117	/* begin clear state */
4118	count += 2;
4119	/* context control state */
4120	count += 3;
4121
4122	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4123		for (ext = sect->section; ext->extent != NULL; ++ext) {
4124			if (sect->id == SECT_CONTEXT)
4125				count += 2 + ext->reg_count;
4126			else
4127				return 0;
4128		}
4129	}
4130	/* pa_sc_raster_config/pa_sc_raster_config1 */
4131	count += 4;
4132	/* end clear state */
4133	count += 2;
4134	/* clear state */
4135	count += 2;
4136
4137	return count;
4138}
4139
4140static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4141{
4142	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4143	const struct cs_section_def *sect = NULL;
4144	const struct cs_extent_def *ext = NULL;
4145	int r, i;
4146
4147	/* init the CP */
4148	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4149	WREG32(mmCP_ENDIAN_SWAP, 0);
4150	WREG32(mmCP_DEVICE_ID, 1);
4151
4152	gfx_v8_0_cp_gfx_enable(adev, true);
4153
4154	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4155	if (r) {
4156		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4157		return r;
4158	}
4159
4160	/* clear state buffer */
4161	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4162	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4163
4164	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4165	amdgpu_ring_write(ring, 0x80000000);
4166	amdgpu_ring_write(ring, 0x80000000);
4167
4168	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4169		for (ext = sect->section; ext->extent != NULL; ++ext) {
4170			if (sect->id == SECT_CONTEXT) {
4171				amdgpu_ring_write(ring,
4172				       PACKET3(PACKET3_SET_CONTEXT_REG,
4173					       ext->reg_count));
4174				amdgpu_ring_write(ring,
4175				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4176				for (i = 0; i < ext->reg_count; i++)
4177					amdgpu_ring_write(ring, ext->extent[i]);
4178			}
4179		}
4180	}
4181
4182	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4183	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4184	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4185	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4186
4187	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4188	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4189
4190	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4191	amdgpu_ring_write(ring, 0);
4192
4193	/* init the CE partitions */
4194	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4195	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4196	amdgpu_ring_write(ring, 0x8000);
4197	amdgpu_ring_write(ring, 0x8000);
4198
4199	amdgpu_ring_commit(ring);
4200
4201	return 0;
4202}
4203static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4204{
4205	u32 tmp;
4206	/* no gfx doorbells on iceland */
4207	if (adev->asic_type == CHIP_TOPAZ)
4208		return;
4209
4210	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4211
4212	if (ring->use_doorbell) {
4213		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4214				DOORBELL_OFFSET, ring->doorbell_index);
4215		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4216						DOORBELL_HIT, 0);
4217		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4218					    DOORBELL_EN, 1);
4219	} else {
4220		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4221	}
4222
4223	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4224
4225	if (adev->flags & AMD_IS_APU)
4226		return;
4227
4228	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4229					DOORBELL_RANGE_LOWER,
4230					adev->doorbell_index.gfx_ring0);
4231	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4232
4233	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4234		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4235}
4236
4237static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4238{
4239	struct amdgpu_ring *ring;
4240	u32 tmp;
4241	u32 rb_bufsz;
4242	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4243
4244	/* Set the write pointer delay */
4245	WREG32(mmCP_RB_WPTR_DELAY, 0);
4246
4247	/* set the RB to use vmid 0 */
4248	WREG32(mmCP_RB_VMID, 0);
4249
4250	/* Set ring buffer size */
4251	ring = &adev->gfx.gfx_ring[0];
4252	rb_bufsz = order_base_2(ring->ring_size / 8);
4253	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4254	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4255	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4256	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4257#ifdef __BIG_ENDIAN
4258	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4259#endif
4260	WREG32(mmCP_RB0_CNTL, tmp);
4261
4262	/* Initialize the ring buffer's read and write pointers */
4263	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4264	ring->wptr = 0;
4265	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4266
4267	/* set the wb address wether it's enabled or not */
4268	rptr_addr = ring->rptr_gpu_addr;
4269	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4270	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4271
4272	wptr_gpu_addr = ring->wptr_gpu_addr;
4273	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4274	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4275	mdelay(1);
4276	WREG32(mmCP_RB0_CNTL, tmp);
4277
4278	rb_addr = ring->gpu_addr >> 8;
4279	WREG32(mmCP_RB0_BASE, rb_addr);
4280	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4281
4282	gfx_v8_0_set_cpg_door_bell(adev, ring);
4283	/* start the ring */
4284	amdgpu_ring_clear_ring(ring);
4285	gfx_v8_0_cp_gfx_start(adev);
4286
4287	return 0;
4288}
4289
4290static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4291{
4292	if (enable) {
4293		WREG32(mmCP_MEC_CNTL, 0);
4294	} else {
4295		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4296		adev->gfx.kiq[0].ring.sched.ready = false;
4297	}
4298	udelay(50);
4299}
4300
4301/* KIQ functions */
4302static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4303{
4304	uint32_t tmp;
4305	struct amdgpu_device *adev = ring->adev;
4306
4307	/* tell RLC which is KIQ queue */
4308	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4309	tmp &= 0xffffff00;
4310	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4311	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4312	tmp |= 0x80;
4313	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4314}
4315
4316static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4317{
4318	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4319	uint64_t queue_mask = 0;
4320	int r, i;
4321
4322	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4323		if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4324			continue;
4325
4326		/* This situation may be hit in the future if a new HW
4327		 * generation exposes more than 64 queues. If so, the
4328		 * definition of queue_mask needs updating */
4329		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4330			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4331			break;
4332		}
4333
4334		queue_mask |= (1ull << i);
4335	}
4336
4337	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4338	if (r) {
4339		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4340		return r;
4341	}
4342	/* set resources */
4343	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4344	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4345	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4346	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4347	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4348	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4349	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4350	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4351	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4352		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4353		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4354		uint64_t wptr_addr = ring->wptr_gpu_addr;
4355
4356		/* map queues */
4357		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4358		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4359		amdgpu_ring_write(kiq_ring,
4360				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4361		amdgpu_ring_write(kiq_ring,
4362				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4363				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4364				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4365				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4366		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4367		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4368		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4369		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4370	}
4371
4372	amdgpu_ring_commit(kiq_ring);
4373
4374	return 0;
4375}
4376
4377static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4378{
4379	int i, r = 0;
4380
4381	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4382		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4383		for (i = 0; i < adev->usec_timeout; i++) {
4384			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4385				break;
4386			udelay(1);
4387		}
4388		if (i == adev->usec_timeout)
4389			r = -ETIMEDOUT;
4390	}
4391	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4392	WREG32(mmCP_HQD_PQ_RPTR, 0);
4393	WREG32(mmCP_HQD_PQ_WPTR, 0);
4394
4395	return r;
4396}
4397
4398static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4399{
4400	struct amdgpu_device *adev = ring->adev;
4401
4402	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4403		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4404			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4405			mqd->cp_hqd_queue_priority =
4406				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4407		}
4408	}
4409}
4410
4411static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4412{
4413	struct amdgpu_device *adev = ring->adev;
4414	struct vi_mqd *mqd = ring->mqd_ptr;
4415	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4416	uint32_t tmp;
4417
4418	mqd->header = 0xC0310800;
4419	mqd->compute_pipelinestat_enable = 0x00000001;
4420	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4421	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4422	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4423	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4424	mqd->compute_misc_reserved = 0x00000003;
4425	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4426						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4427	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4428						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4429	eop_base_addr = ring->eop_gpu_addr >> 8;
4430	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4431	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4432
4433	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4434	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4435	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4436			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4437
4438	mqd->cp_hqd_eop_control = tmp;
4439
4440	/* enable doorbell? */
4441	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4442			    CP_HQD_PQ_DOORBELL_CONTROL,
4443			    DOORBELL_EN,
4444			    ring->use_doorbell ? 1 : 0);
4445
4446	mqd->cp_hqd_pq_doorbell_control = tmp;
4447
4448	/* set the pointer to the MQD */
4449	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4450	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4451
4452	/* set MQD vmid to 0 */
4453	tmp = RREG32(mmCP_MQD_CONTROL);
4454	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4455	mqd->cp_mqd_control = tmp;
4456
4457	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4458	hqd_gpu_addr = ring->gpu_addr >> 8;
4459	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4460	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4461
4462	/* set up the HQD, this is similar to CP_RB0_CNTL */
4463	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4464	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4465			    (order_base_2(ring->ring_size / 4) - 1));
4466	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4467			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4468#ifdef __BIG_ENDIAN
4469	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4470#endif
4471	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4472	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4473	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4474	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4475	mqd->cp_hqd_pq_control = tmp;
4476
4477	/* set the wb address whether it's enabled or not */
4478	wb_gpu_addr = ring->rptr_gpu_addr;
4479	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4480	mqd->cp_hqd_pq_rptr_report_addr_hi =
4481		upper_32_bits(wb_gpu_addr) & 0xffff;
4482
4483	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4484	wb_gpu_addr = ring->wptr_gpu_addr;
4485	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4486	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4487
4488	tmp = 0;
4489	/* enable the doorbell if requested */
4490	if (ring->use_doorbell) {
4491		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4492		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4493				DOORBELL_OFFSET, ring->doorbell_index);
4494
4495		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4496					 DOORBELL_EN, 1);
4497		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4498					 DOORBELL_SOURCE, 0);
4499		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4500					 DOORBELL_HIT, 0);
4501	}
4502
4503	mqd->cp_hqd_pq_doorbell_control = tmp;
4504
4505	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4506	ring->wptr = 0;
4507	mqd->cp_hqd_pq_wptr = ring->wptr;
4508	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4509
4510	/* set the vmid for the queue */
4511	mqd->cp_hqd_vmid = 0;
4512
4513	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4514	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4515	mqd->cp_hqd_persistent_state = tmp;
4516
4517	/* set MTYPE */
4518	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4519	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4520	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4521	mqd->cp_hqd_ib_control = tmp;
4522
4523	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4524	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4525	mqd->cp_hqd_iq_timer = tmp;
4526
4527	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4528	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4529	mqd->cp_hqd_ctx_save_control = tmp;
4530
4531	/* defaults */
4532	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4533	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4534	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4535	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4536	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4537	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4538	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4539	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4540	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4541	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4542	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4543	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4544
4545	/* set static priority for a queue/ring */
4546	gfx_v8_0_mqd_set_priority(ring, mqd);
4547	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4548
4549	/* map_queues packet doesn't need activate the queue,
4550	 * so only kiq need set this field.
4551	 */
4552	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4553		mqd->cp_hqd_active = 1;
4554
4555	return 0;
4556}
4557
4558static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4559			struct vi_mqd *mqd)
4560{
4561	uint32_t mqd_reg;
4562	uint32_t *mqd_data;
4563
4564	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4565	mqd_data = &mqd->cp_mqd_base_addr_lo;
4566
4567	/* disable wptr polling */
4568	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4569
4570	/* program all HQD registers */
4571	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4572		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4573
4574	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4575	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4576	 * on ASICs that do not support context-save.
4577	 * EOP writes/reads can start anywhere in the ring.
4578	 */
4579	if (adev->asic_type != CHIP_TONGA) {
4580		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4581		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4582		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4583	}
4584
4585	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4586		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4587
4588	/* activate the HQD */
4589	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4590		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4591
4592	return 0;
4593}
4594
4595static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4596{
4597	struct amdgpu_device *adev = ring->adev;
4598	struct vi_mqd *mqd = ring->mqd_ptr;
4599
4600	gfx_v8_0_kiq_setting(ring);
4601
4602	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4603		/* reset MQD to a clean status */
4604		if (adev->gfx.kiq[0].mqd_backup)
4605			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4606
4607		/* reset ring buffer */
4608		ring->wptr = 0;
4609		amdgpu_ring_clear_ring(ring);
4610		mutex_lock(&adev->srbm_mutex);
4611		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4612		gfx_v8_0_mqd_commit(adev, mqd);
4613		vi_srbm_select(adev, 0, 0, 0, 0);
4614		mutex_unlock(&adev->srbm_mutex);
4615	} else {
4616		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4617		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4618		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4619		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4620			amdgpu_ring_clear_ring(ring);
4621		mutex_lock(&adev->srbm_mutex);
4622		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4623		gfx_v8_0_mqd_init(ring);
4624		gfx_v8_0_mqd_commit(adev, mqd);
4625		vi_srbm_select(adev, 0, 0, 0, 0);
4626		mutex_unlock(&adev->srbm_mutex);
4627
4628		if (adev->gfx.kiq[0].mqd_backup)
4629			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4630	}
4631
4632	return 0;
4633}
4634
4635static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4636{
4637	struct amdgpu_device *adev = ring->adev;
4638	struct vi_mqd *mqd = ring->mqd_ptr;
4639	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4640
4641	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4642		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4643		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4644		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4645		mutex_lock(&adev->srbm_mutex);
4646		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4647		gfx_v8_0_mqd_init(ring);
4648		vi_srbm_select(adev, 0, 0, 0, 0);
4649		mutex_unlock(&adev->srbm_mutex);
4650
4651		if (adev->gfx.mec.mqd_backup[mqd_idx])
4652			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4653	} else {
4654		/* restore MQD to a clean status */
4655		if (adev->gfx.mec.mqd_backup[mqd_idx])
4656			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4657		/* reset ring buffer */
4658		ring->wptr = 0;
4659		amdgpu_ring_clear_ring(ring);
4660	}
4661	return 0;
4662}
4663
4664static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4665{
4666	if (adev->asic_type > CHIP_TONGA) {
4667		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4668		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4669	}
4670	/* enable doorbells */
4671	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4672}
4673
4674static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4675{
4676	struct amdgpu_ring *ring;
4677	int r;
4678
4679	ring = &adev->gfx.kiq[0].ring;
4680
4681	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4682	if (unlikely(r != 0))
4683		return r;
4684
4685	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4686	if (unlikely(r != 0)) {
4687		amdgpu_bo_unreserve(ring->mqd_obj);
4688		return r;
4689	}
4690
4691	gfx_v8_0_kiq_init_queue(ring);
4692	amdgpu_bo_kunmap(ring->mqd_obj);
4693	ring->mqd_ptr = NULL;
4694	amdgpu_bo_unreserve(ring->mqd_obj);
4695	return 0;
4696}
4697
4698static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4699{
4700	struct amdgpu_ring *ring = NULL;
4701	int r = 0, i;
4702
4703	gfx_v8_0_cp_compute_enable(adev, true);
4704
4705	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4706		ring = &adev->gfx.compute_ring[i];
4707
4708		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709		if (unlikely(r != 0))
4710			goto done;
4711		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4712		if (!r) {
4713			r = gfx_v8_0_kcq_init_queue(ring);
4714			amdgpu_bo_kunmap(ring->mqd_obj);
4715			ring->mqd_ptr = NULL;
4716		}
4717		amdgpu_bo_unreserve(ring->mqd_obj);
4718		if (r)
4719			goto done;
4720	}
4721
4722	gfx_v8_0_set_mec_doorbell_range(adev);
4723
4724	r = gfx_v8_0_kiq_kcq_enable(adev);
4725	if (r)
4726		goto done;
4727
4728done:
4729	return r;
4730}
4731
4732static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4733{
4734	int r, i;
4735	struct amdgpu_ring *ring;
4736
4737	/* collect all the ring_tests here, gfx, kiq, compute */
4738	ring = &adev->gfx.gfx_ring[0];
4739	r = amdgpu_ring_test_helper(ring);
4740	if (r)
4741		return r;
4742
4743	ring = &adev->gfx.kiq[0].ring;
4744	r = amdgpu_ring_test_helper(ring);
4745	if (r)
4746		return r;
4747
4748	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4749		ring = &adev->gfx.compute_ring[i];
4750		amdgpu_ring_test_helper(ring);
4751	}
4752
4753	return 0;
4754}
4755
4756static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4757{
4758	int r;
4759
4760	if (!(adev->flags & AMD_IS_APU))
4761		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4762
4763	r = gfx_v8_0_kiq_resume(adev);
4764	if (r)
4765		return r;
4766
4767	r = gfx_v8_0_cp_gfx_resume(adev);
4768	if (r)
4769		return r;
4770
4771	r = gfx_v8_0_kcq_resume(adev);
4772	if (r)
4773		return r;
4774
4775	r = gfx_v8_0_cp_test_all_rings(adev);
4776	if (r)
4777		return r;
4778
4779	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4780
4781	return 0;
4782}
4783
4784static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4785{
4786	gfx_v8_0_cp_gfx_enable(adev, enable);
4787	gfx_v8_0_cp_compute_enable(adev, enable);
4788}
4789
4790static int gfx_v8_0_hw_init(void *handle)
4791{
4792	int r;
4793	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4794
4795	gfx_v8_0_init_golden_registers(adev);
4796	gfx_v8_0_constants_init(adev);
4797
4798	r = adev->gfx.rlc.funcs->resume(adev);
4799	if (r)
4800		return r;
4801
4802	r = gfx_v8_0_cp_resume(adev);
4803
4804	return r;
4805}
4806
4807static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4808{
4809	int r, i;
4810	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4811
4812	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4813	if (r)
4814		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4815
4816	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4817		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4818
4819		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4820		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4821						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4822						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4823						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4824						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4825		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4826		amdgpu_ring_write(kiq_ring, 0);
4827		amdgpu_ring_write(kiq_ring, 0);
4828		amdgpu_ring_write(kiq_ring, 0);
4829	}
4830	r = amdgpu_ring_test_helper(kiq_ring);
4831	if (r)
4832		DRM_ERROR("KCQ disable failed\n");
4833
4834	return r;
4835}
4836
4837static bool gfx_v8_0_is_idle(void *handle)
4838{
4839	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4840
4841	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4842		|| RREG32(mmGRBM_STATUS2) != 0x8)
4843		return false;
4844	else
4845		return true;
4846}
4847
4848static bool gfx_v8_0_rlc_is_idle(void *handle)
4849{
4850	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4851
4852	if (RREG32(mmGRBM_STATUS2) != 0x8)
4853		return false;
4854	else
4855		return true;
4856}
4857
4858static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4859{
4860	unsigned int i;
4861	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4862
4863	for (i = 0; i < adev->usec_timeout; i++) {
4864		if (gfx_v8_0_rlc_is_idle(handle))
4865			return 0;
4866
4867		udelay(1);
4868	}
4869	return -ETIMEDOUT;
4870}
4871
4872static int gfx_v8_0_wait_for_idle(void *handle)
4873{
4874	unsigned int i;
4875	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4876
4877	for (i = 0; i < adev->usec_timeout; i++) {
4878		if (gfx_v8_0_is_idle(handle))
4879			return 0;
4880
4881		udelay(1);
4882	}
4883	return -ETIMEDOUT;
4884}
4885
4886static int gfx_v8_0_hw_fini(void *handle)
4887{
4888	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889
4890	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4891	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4892
4893	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4894
4895	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4896
4897	/* disable KCQ to avoid CPC touch memory not valid anymore */
4898	gfx_v8_0_kcq_disable(adev);
4899
4900	if (amdgpu_sriov_vf(adev)) {
4901		pr_debug("For SRIOV client, shouldn't do anything.\n");
4902		return 0;
4903	}
4904	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4905	if (!gfx_v8_0_wait_for_idle(adev))
4906		gfx_v8_0_cp_enable(adev, false);
4907	else
4908		pr_err("cp is busy, skip halt cp\n");
4909	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4910		adev->gfx.rlc.funcs->stop(adev);
4911	else
4912		pr_err("rlc is busy, skip halt rlc\n");
4913	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4914
4915	return 0;
4916}
4917
4918static int gfx_v8_0_suspend(void *handle)
4919{
4920	return gfx_v8_0_hw_fini(handle);
4921}
4922
4923static int gfx_v8_0_resume(void *handle)
4924{
4925	return gfx_v8_0_hw_init(handle);
4926}
4927
4928static bool gfx_v8_0_check_soft_reset(void *handle)
4929{
4930	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4931	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4932	u32 tmp;
4933
4934	/* GRBM_STATUS */
4935	tmp = RREG32(mmGRBM_STATUS);
4936	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4937		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4938		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4939		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4940		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4941		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4942		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4943		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4944						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4945		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4946						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4947		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4948						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4949	}
4950
4951	/* GRBM_STATUS2 */
4952	tmp = RREG32(mmGRBM_STATUS2);
4953	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4954		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4955						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4956
4957	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4958	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4959	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4960		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4961						SOFT_RESET_CPF, 1);
4962		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4963						SOFT_RESET_CPC, 1);
4964		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4965						SOFT_RESET_CPG, 1);
4966		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4967						SOFT_RESET_GRBM, 1);
4968	}
4969
4970	/* SRBM_STATUS */
4971	tmp = RREG32(mmSRBM_STATUS);
4972	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4973		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4974						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4975	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4976		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4977						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4978
4979	if (grbm_soft_reset || srbm_soft_reset) {
4980		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4981		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4982		return true;
4983	} else {
4984		adev->gfx.grbm_soft_reset = 0;
4985		adev->gfx.srbm_soft_reset = 0;
4986		return false;
4987	}
4988}
4989
4990static int gfx_v8_0_pre_soft_reset(void *handle)
4991{
4992	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4993	u32 grbm_soft_reset = 0;
4994
4995	if ((!adev->gfx.grbm_soft_reset) &&
4996	    (!adev->gfx.srbm_soft_reset))
4997		return 0;
4998
4999	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5000
5001	/* stop the rlc */
5002	adev->gfx.rlc.funcs->stop(adev);
5003
5004	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5005	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5006		/* Disable GFX parsing/prefetching */
5007		gfx_v8_0_cp_gfx_enable(adev, false);
5008
5009	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5010	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5011	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5012	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5013		int i;
5014
5015		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5016			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5017
5018			mutex_lock(&adev->srbm_mutex);
5019			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5020			gfx_v8_0_deactivate_hqd(adev, 2);
5021			vi_srbm_select(adev, 0, 0, 0, 0);
5022			mutex_unlock(&adev->srbm_mutex);
5023		}
5024		/* Disable MEC parsing/prefetching */
5025		gfx_v8_0_cp_compute_enable(adev, false);
5026	}
5027
5028	return 0;
5029}
5030
5031static int gfx_v8_0_soft_reset(void *handle)
5032{
5033	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5034	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5035	u32 tmp;
5036
5037	if ((!adev->gfx.grbm_soft_reset) &&
5038	    (!adev->gfx.srbm_soft_reset))
5039		return 0;
5040
5041	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5042	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5043
5044	if (grbm_soft_reset || srbm_soft_reset) {
5045		tmp = RREG32(mmGMCON_DEBUG);
5046		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5047		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5048		WREG32(mmGMCON_DEBUG, tmp);
5049		udelay(50);
5050	}
5051
5052	if (grbm_soft_reset) {
5053		tmp = RREG32(mmGRBM_SOFT_RESET);
5054		tmp |= grbm_soft_reset;
5055		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5056		WREG32(mmGRBM_SOFT_RESET, tmp);
5057		tmp = RREG32(mmGRBM_SOFT_RESET);
5058
5059		udelay(50);
5060
5061		tmp &= ~grbm_soft_reset;
5062		WREG32(mmGRBM_SOFT_RESET, tmp);
5063		tmp = RREG32(mmGRBM_SOFT_RESET);
5064	}
5065
5066	if (srbm_soft_reset) {
5067		tmp = RREG32(mmSRBM_SOFT_RESET);
5068		tmp |= srbm_soft_reset;
5069		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5070		WREG32(mmSRBM_SOFT_RESET, tmp);
5071		tmp = RREG32(mmSRBM_SOFT_RESET);
5072
5073		udelay(50);
5074
5075		tmp &= ~srbm_soft_reset;
5076		WREG32(mmSRBM_SOFT_RESET, tmp);
5077		tmp = RREG32(mmSRBM_SOFT_RESET);
5078	}
5079
5080	if (grbm_soft_reset || srbm_soft_reset) {
5081		tmp = RREG32(mmGMCON_DEBUG);
5082		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5083		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5084		WREG32(mmGMCON_DEBUG, tmp);
5085	}
5086
5087	/* Wait a little for things to settle down */
5088	udelay(50);
5089
5090	return 0;
5091}
5092
5093static int gfx_v8_0_post_soft_reset(void *handle)
5094{
5095	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5096	u32 grbm_soft_reset = 0;
5097
5098	if ((!adev->gfx.grbm_soft_reset) &&
5099	    (!adev->gfx.srbm_soft_reset))
5100		return 0;
5101
5102	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5103
5104	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5105	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5106	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5107	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5108		int i;
5109
5110		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5111			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5112
5113			mutex_lock(&adev->srbm_mutex);
5114			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5115			gfx_v8_0_deactivate_hqd(adev, 2);
5116			vi_srbm_select(adev, 0, 0, 0, 0);
5117			mutex_unlock(&adev->srbm_mutex);
5118		}
5119		gfx_v8_0_kiq_resume(adev);
5120		gfx_v8_0_kcq_resume(adev);
5121	}
5122
5123	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5124	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5125		gfx_v8_0_cp_gfx_resume(adev);
5126
5127	gfx_v8_0_cp_test_all_rings(adev);
5128
5129	adev->gfx.rlc.funcs->start(adev);
5130
5131	return 0;
5132}
5133
5134/**
5135 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5136 *
5137 * @adev: amdgpu_device pointer
5138 *
5139 * Fetches a GPU clock counter snapshot.
5140 * Returns the 64 bit clock counter snapshot.
5141 */
5142static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5143{
5144	uint64_t clock;
5145
5146	mutex_lock(&adev->gfx.gpu_clock_mutex);
5147	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5148	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5149		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5150	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5151	return clock;
5152}
5153
5154static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5155					  uint32_t vmid,
5156					  uint32_t gds_base, uint32_t gds_size,
5157					  uint32_t gws_base, uint32_t gws_size,
5158					  uint32_t oa_base, uint32_t oa_size)
5159{
5160	/* GDS Base */
5161	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163				WRITE_DATA_DST_SEL(0)));
5164	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5165	amdgpu_ring_write(ring, 0);
5166	amdgpu_ring_write(ring, gds_base);
5167
5168	/* GDS Size */
5169	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171				WRITE_DATA_DST_SEL(0)));
5172	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5173	amdgpu_ring_write(ring, 0);
5174	amdgpu_ring_write(ring, gds_size);
5175
5176	/* GWS */
5177	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5178	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5179				WRITE_DATA_DST_SEL(0)));
5180	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5181	amdgpu_ring_write(ring, 0);
5182	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5183
5184	/* OA */
5185	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5186	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5187				WRITE_DATA_DST_SEL(0)));
5188	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5189	amdgpu_ring_write(ring, 0);
5190	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5191}
5192
5193static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5194{
5195	WREG32(mmSQ_IND_INDEX,
5196		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5197		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5198		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5199		(SQ_IND_INDEX__FORCE_READ_MASK));
5200	return RREG32(mmSQ_IND_DATA);
5201}
5202
5203static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5204			   uint32_t wave, uint32_t thread,
5205			   uint32_t regno, uint32_t num, uint32_t *out)
5206{
5207	WREG32(mmSQ_IND_INDEX,
5208		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5209		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5210		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5211		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5212		(SQ_IND_INDEX__FORCE_READ_MASK) |
5213		(SQ_IND_INDEX__AUTO_INCR_MASK));
5214	while (num--)
5215		*(out++) = RREG32(mmSQ_IND_DATA);
5216}
5217
5218static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5219{
5220	/* type 0 wave data */
5221	dst[(*no_fields)++] = 0;
5222	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5223	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5224	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5225	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5226	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5227	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5228	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5229	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5230	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5231	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5232	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5233	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5234	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5235	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5236	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5237	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5238	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5239	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5240	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5241}
5242
5243static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5244				     uint32_t wave, uint32_t start,
5245				     uint32_t size, uint32_t *dst)
5246{
5247	wave_read_regs(
5248		adev, simd, wave, 0,
5249		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5250}
5251
5252
5253static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5254	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5255	.select_se_sh = &gfx_v8_0_select_se_sh,
5256	.read_wave_data = &gfx_v8_0_read_wave_data,
5257	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5258	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5259};
5260
5261static int gfx_v8_0_early_init(void *handle)
5262{
5263	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5264
5265	adev->gfx.xcc_mask = 1;
5266	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5267	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5268					  AMDGPU_MAX_COMPUTE_RINGS);
5269	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5270	gfx_v8_0_set_ring_funcs(adev);
5271	gfx_v8_0_set_irq_funcs(adev);
5272	gfx_v8_0_set_gds_init(adev);
5273	gfx_v8_0_set_rlc_funcs(adev);
5274
5275	return 0;
5276}
5277
5278static int gfx_v8_0_late_init(void *handle)
5279{
5280	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5281	int r;
5282
5283	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5284	if (r)
5285		return r;
5286
5287	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5288	if (r)
5289		return r;
5290
5291	/* requires IBs so do in late init after IB pool is initialized */
5292	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5293	if (r)
5294		return r;
5295
5296	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5297	if (r) {
5298		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5299		return r;
5300	}
5301
5302	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5303	if (r) {
5304		DRM_ERROR(
5305			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5306			r);
5307		return r;
5308	}
5309
5310	return 0;
5311}
5312
5313static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5314						       bool enable)
5315{
5316	if ((adev->asic_type == CHIP_POLARIS11) ||
5317	    (adev->asic_type == CHIP_POLARIS12) ||
5318	    (adev->asic_type == CHIP_VEGAM))
5319		/* Send msg to SMU via Powerplay */
5320		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5321
5322	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5323}
5324
5325static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5326							bool enable)
5327{
5328	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5329}
5330
5331static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5332		bool enable)
5333{
5334	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5335}
5336
5337static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5338					  bool enable)
5339{
5340	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5341}
5342
5343static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5344						bool enable)
5345{
5346	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5347
5348	/* Read any GFX register to wake up GFX. */
5349	if (!enable)
5350		RREG32(mmDB_RENDER_CONTROL);
5351}
5352
5353static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5354					  bool enable)
5355{
5356	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5357		cz_enable_gfx_cg_power_gating(adev, true);
5358		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5359			cz_enable_gfx_pipeline_power_gating(adev, true);
5360	} else {
5361		cz_enable_gfx_cg_power_gating(adev, false);
5362		cz_enable_gfx_pipeline_power_gating(adev, false);
5363	}
5364}
5365
5366static int gfx_v8_0_set_powergating_state(void *handle,
5367					  enum amd_powergating_state state)
5368{
5369	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5370	bool enable = (state == AMD_PG_STATE_GATE);
5371
5372	if (amdgpu_sriov_vf(adev))
5373		return 0;
5374
5375	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5376				AMD_PG_SUPPORT_RLC_SMU_HS |
5377				AMD_PG_SUPPORT_CP |
5378				AMD_PG_SUPPORT_GFX_DMG))
5379		amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5380	switch (adev->asic_type) {
5381	case CHIP_CARRIZO:
5382	case CHIP_STONEY:
5383
5384		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5385			cz_enable_sck_slow_down_on_power_up(adev, true);
5386			cz_enable_sck_slow_down_on_power_down(adev, true);
5387		} else {
5388			cz_enable_sck_slow_down_on_power_up(adev, false);
5389			cz_enable_sck_slow_down_on_power_down(adev, false);
5390		}
5391		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5392			cz_enable_cp_power_gating(adev, true);
5393		else
5394			cz_enable_cp_power_gating(adev, false);
5395
5396		cz_update_gfx_cg_power_gating(adev, enable);
5397
5398		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5399			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5400		else
5401			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5402
5403		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5404			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5405		else
5406			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5407		break;
5408	case CHIP_POLARIS11:
5409	case CHIP_POLARIS12:
5410	case CHIP_VEGAM:
5411		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5412			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5413		else
5414			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5415
5416		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5417			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5418		else
5419			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5420
5421		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5422			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5423		else
5424			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5425		break;
5426	default:
5427		break;
5428	}
5429	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5430				AMD_PG_SUPPORT_RLC_SMU_HS |
5431				AMD_PG_SUPPORT_CP |
5432				AMD_PG_SUPPORT_GFX_DMG))
5433		amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5434	return 0;
5435}
5436
5437static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5438{
5439	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5440	int data;
5441
5442	if (amdgpu_sriov_vf(adev))
5443		*flags = 0;
5444
5445	/* AMD_CG_SUPPORT_GFX_MGCG */
5446	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5447	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5448		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5449
5450	/* AMD_CG_SUPPORT_GFX_CGLG */
5451	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5452	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5453		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5454
5455	/* AMD_CG_SUPPORT_GFX_CGLS */
5456	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5457		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5458
5459	/* AMD_CG_SUPPORT_GFX_CGTS */
5460	data = RREG32(mmCGTS_SM_CTRL_REG);
5461	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5462		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5463
5464	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5465	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5466		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5467
5468	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5469	data = RREG32(mmRLC_MEM_SLP_CNTL);
5470	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5471		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5472
5473	/* AMD_CG_SUPPORT_GFX_CP_LS */
5474	data = RREG32(mmCP_MEM_SLP_CNTL);
5475	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5476		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5477}
5478
5479static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5480				     uint32_t reg_addr, uint32_t cmd)
5481{
5482	uint32_t data;
5483
5484	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5485
5486	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5487	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5488
5489	data = RREG32(mmRLC_SERDES_WR_CTRL);
5490	if (adev->asic_type == CHIP_STONEY)
5491		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5492			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5493			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5494			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5495			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5496			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5497			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5498			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5499			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5500	else
5501		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5502			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5503			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5504			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5505			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5506			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5507			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5508			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5509			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5510			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5511			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5512	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5513		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5514		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5515		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5516
5517	WREG32(mmRLC_SERDES_WR_CTRL, data);
5518}
5519
5520#define MSG_ENTER_RLC_SAFE_MODE     1
5521#define MSG_EXIT_RLC_SAFE_MODE      0
5522#define RLC_GPR_REG2__REQ_MASK 0x00000001
5523#define RLC_GPR_REG2__REQ__SHIFT 0
5524#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5525#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5526
5527static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5528{
5529	uint32_t rlc_setting;
5530
5531	rlc_setting = RREG32(mmRLC_CNTL);
5532	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5533		return false;
5534
5535	return true;
5536}
5537
5538static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5539{
5540	uint32_t data;
5541	unsigned i;
5542	data = RREG32(mmRLC_CNTL);
5543	data |= RLC_SAFE_MODE__CMD_MASK;
5544	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5545	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5546	WREG32(mmRLC_SAFE_MODE, data);
5547
5548	/* wait for RLC_SAFE_MODE */
5549	for (i = 0; i < adev->usec_timeout; i++) {
5550		if ((RREG32(mmRLC_GPM_STAT) &
5551		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5552		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5553		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5554		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5555			break;
5556		udelay(1);
5557	}
5558	for (i = 0; i < adev->usec_timeout; i++) {
5559		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5560			break;
5561		udelay(1);
5562	}
5563}
5564
5565static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5566{
5567	uint32_t data;
5568	unsigned i;
5569
5570	data = RREG32(mmRLC_CNTL);
5571	data |= RLC_SAFE_MODE__CMD_MASK;
5572	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5573	WREG32(mmRLC_SAFE_MODE, data);
5574
5575	for (i = 0; i < adev->usec_timeout; i++) {
5576		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5577			break;
5578		udelay(1);
5579	}
5580}
5581
5582static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5583{
5584	u32 data;
5585
5586	amdgpu_gfx_off_ctrl(adev, false);
5587
5588	if (amdgpu_sriov_is_pp_one_vf(adev))
5589		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5590	else
5591		data = RREG32(mmRLC_SPM_VMID);
5592
5593	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5594	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5595
5596	if (amdgpu_sriov_is_pp_one_vf(adev))
5597		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5598	else
5599		WREG32(mmRLC_SPM_VMID, data);
5600
5601	amdgpu_gfx_off_ctrl(adev, true);
5602}
5603
5604static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5605	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5606	.set_safe_mode = gfx_v8_0_set_safe_mode,
5607	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5608	.init = gfx_v8_0_rlc_init,
5609	.get_csb_size = gfx_v8_0_get_csb_size,
5610	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5611	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5612	.resume = gfx_v8_0_rlc_resume,
5613	.stop = gfx_v8_0_rlc_stop,
5614	.reset = gfx_v8_0_rlc_reset,
5615	.start = gfx_v8_0_rlc_start,
5616	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5617};
5618
5619static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5620						      bool enable)
5621{
5622	uint32_t temp, data;
5623
5624	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5625
5626	/* It is disabled by HW by default */
5627	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5628		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5629			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5630				/* 1 - RLC memory Light sleep */
5631				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5632
5633			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5634				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5635		}
5636
5637		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5638		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5639		if (adev->flags & AMD_IS_APU)
5640			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5641				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5642				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5643		else
5644			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5645				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5646				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5647				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5648
5649		if (temp != data)
5650			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5651
5652		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5653		gfx_v8_0_wait_for_rlc_serdes(adev);
5654
5655		/* 5 - clear mgcg override */
5656		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5657
5658		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5659			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5660			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5661			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5662			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5663			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5664			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5665			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5666			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5667				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5668			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5669			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5670			if (temp != data)
5671				WREG32(mmCGTS_SM_CTRL_REG, data);
5672		}
5673		udelay(50);
5674
5675		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676		gfx_v8_0_wait_for_rlc_serdes(adev);
5677	} else {
5678		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5679		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5680		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5681				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5682				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5683				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5684		if (temp != data)
5685			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5686
5687		/* 2 - disable MGLS in RLC */
5688		data = RREG32(mmRLC_MEM_SLP_CNTL);
5689		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5690			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5691			WREG32(mmRLC_MEM_SLP_CNTL, data);
5692		}
5693
5694		/* 3 - disable MGLS in CP */
5695		data = RREG32(mmCP_MEM_SLP_CNTL);
5696		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5697			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5698			WREG32(mmCP_MEM_SLP_CNTL, data);
5699		}
5700
5701		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5702		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5703		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5704				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5705		if (temp != data)
5706			WREG32(mmCGTS_SM_CTRL_REG, data);
5707
5708		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5709		gfx_v8_0_wait_for_rlc_serdes(adev);
5710
5711		/* 6 - set mgcg override */
5712		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5713
5714		udelay(50);
5715
5716		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5717		gfx_v8_0_wait_for_rlc_serdes(adev);
5718	}
5719
5720	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5721}
5722
5723static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5724						      bool enable)
5725{
5726	uint32_t temp, temp1, data, data1;
5727
5728	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5729
5730	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5731
5732	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5733		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5734		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5735		if (temp1 != data1)
5736			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5737
5738		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5739		gfx_v8_0_wait_for_rlc_serdes(adev);
5740
5741		/* 2 - clear cgcg override */
5742		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5743
5744		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745		gfx_v8_0_wait_for_rlc_serdes(adev);
5746
5747		/* 3 - write cmd to set CGLS */
5748		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5749
5750		/* 4 - enable cgcg */
5751		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5752
5753		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5754			/* enable cgls*/
5755			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5756
5757			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5758			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5759
5760			if (temp1 != data1)
5761				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5762		} else {
5763			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5764		}
5765
5766		if (temp != data)
5767			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5768
5769		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5770		 * Cmp_busy/GFX_Idle interrupts
5771		 */
5772		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5773	} else {
5774		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5775		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5776
5777		/* TEST CGCG */
5778		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5779		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5780				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5781		if (temp1 != data1)
5782			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5783
5784		/* read gfx register to wake up cgcg */
5785		RREG32(mmCB_CGTT_SCLK_CTRL);
5786		RREG32(mmCB_CGTT_SCLK_CTRL);
5787		RREG32(mmCB_CGTT_SCLK_CTRL);
5788		RREG32(mmCB_CGTT_SCLK_CTRL);
5789
5790		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5791		gfx_v8_0_wait_for_rlc_serdes(adev);
5792
5793		/* write cmd to Set CGCG Override */
5794		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5795
5796		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5797		gfx_v8_0_wait_for_rlc_serdes(adev);
5798
5799		/* write cmd to Clear CGLS */
5800		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5801
5802		/* disable cgcg, cgls should be disabled too. */
5803		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5804			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5805		if (temp != data)
5806			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5807		/* enable interrupts again for PG */
5808		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5809	}
5810
5811	gfx_v8_0_wait_for_rlc_serdes(adev);
5812
5813	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5814}
5815static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5816					    bool enable)
5817{
5818	if (enable) {
5819		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5820		 * ===  MGCG + MGLS + TS(CG/LS) ===
5821		 */
5822		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5823		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5824	} else {
5825		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5826		 * ===  CGCG + CGLS ===
5827		 */
5828		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5829		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5830	}
5831	return 0;
5832}
5833
5834static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5835					  enum amd_clockgating_state state)
5836{
5837	uint32_t msg_id, pp_state = 0;
5838	uint32_t pp_support_state = 0;
5839
5840	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5841		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5842			pp_support_state = PP_STATE_SUPPORT_LS;
5843			pp_state = PP_STATE_LS;
5844		}
5845		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5846			pp_support_state |= PP_STATE_SUPPORT_CG;
5847			pp_state |= PP_STATE_CG;
5848		}
5849		if (state == AMD_CG_STATE_UNGATE)
5850			pp_state = 0;
5851
5852		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5853				PP_BLOCK_GFX_CG,
5854				pp_support_state,
5855				pp_state);
5856		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5857	}
5858
5859	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5860		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5861			pp_support_state = PP_STATE_SUPPORT_LS;
5862			pp_state = PP_STATE_LS;
5863		}
5864
5865		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5866			pp_support_state |= PP_STATE_SUPPORT_CG;
5867			pp_state |= PP_STATE_CG;
5868		}
5869
5870		if (state == AMD_CG_STATE_UNGATE)
5871			pp_state = 0;
5872
5873		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5874				PP_BLOCK_GFX_MG,
5875				pp_support_state,
5876				pp_state);
5877		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5878	}
5879
5880	return 0;
5881}
5882
5883static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5884					  enum amd_clockgating_state state)
5885{
5886
5887	uint32_t msg_id, pp_state = 0;
5888	uint32_t pp_support_state = 0;
5889
5890	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5891		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5892			pp_support_state = PP_STATE_SUPPORT_LS;
5893			pp_state = PP_STATE_LS;
5894		}
5895		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5896			pp_support_state |= PP_STATE_SUPPORT_CG;
5897			pp_state |= PP_STATE_CG;
5898		}
5899		if (state == AMD_CG_STATE_UNGATE)
5900			pp_state = 0;
5901
5902		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5903				PP_BLOCK_GFX_CG,
5904				pp_support_state,
5905				pp_state);
5906		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5907	}
5908
5909	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5910		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5911			pp_support_state = PP_STATE_SUPPORT_LS;
5912			pp_state = PP_STATE_LS;
5913		}
5914		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5915			pp_support_state |= PP_STATE_SUPPORT_CG;
5916			pp_state |= PP_STATE_CG;
5917		}
5918		if (state == AMD_CG_STATE_UNGATE)
5919			pp_state = 0;
5920
5921		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5922				PP_BLOCK_GFX_3D,
5923				pp_support_state,
5924				pp_state);
5925		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5926	}
5927
5928	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5929		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5930			pp_support_state = PP_STATE_SUPPORT_LS;
5931			pp_state = PP_STATE_LS;
5932		}
5933
5934		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5935			pp_support_state |= PP_STATE_SUPPORT_CG;
5936			pp_state |= PP_STATE_CG;
5937		}
5938
5939		if (state == AMD_CG_STATE_UNGATE)
5940			pp_state = 0;
5941
5942		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5943				PP_BLOCK_GFX_MG,
5944				pp_support_state,
5945				pp_state);
5946		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5947	}
5948
5949	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5950		pp_support_state = PP_STATE_SUPPORT_LS;
5951
5952		if (state == AMD_CG_STATE_UNGATE)
5953			pp_state = 0;
5954		else
5955			pp_state = PP_STATE_LS;
5956
5957		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5958				PP_BLOCK_GFX_RLC,
5959				pp_support_state,
5960				pp_state);
5961		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5962	}
5963
5964	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5965		pp_support_state = PP_STATE_SUPPORT_LS;
5966
5967		if (state == AMD_CG_STATE_UNGATE)
5968			pp_state = 0;
5969		else
5970			pp_state = PP_STATE_LS;
5971		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5972			PP_BLOCK_GFX_CP,
5973			pp_support_state,
5974			pp_state);
5975		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5976	}
5977
5978	return 0;
5979}
5980
5981static int gfx_v8_0_set_clockgating_state(void *handle,
5982					  enum amd_clockgating_state state)
5983{
5984	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5985
5986	if (amdgpu_sriov_vf(adev))
5987		return 0;
5988
5989	switch (adev->asic_type) {
5990	case CHIP_FIJI:
5991	case CHIP_CARRIZO:
5992	case CHIP_STONEY:
5993		gfx_v8_0_update_gfx_clock_gating(adev,
5994						 state == AMD_CG_STATE_GATE);
5995		break;
5996	case CHIP_TONGA:
5997		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5998		break;
5999	case CHIP_POLARIS10:
6000	case CHIP_POLARIS11:
6001	case CHIP_POLARIS12:
6002	case CHIP_VEGAM:
6003		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6004		break;
6005	default:
6006		break;
6007	}
6008	return 0;
6009}
6010
6011static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6012{
6013	return *ring->rptr_cpu_addr;
6014}
6015
6016static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6017{
6018	struct amdgpu_device *adev = ring->adev;
6019
6020	if (ring->use_doorbell)
6021		/* XXX check if swapping is necessary on BE */
6022		return *ring->wptr_cpu_addr;
6023	else
6024		return RREG32(mmCP_RB0_WPTR);
6025}
6026
6027static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6028{
6029	struct amdgpu_device *adev = ring->adev;
6030
6031	if (ring->use_doorbell) {
6032		/* XXX check if swapping is necessary on BE */
6033		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6034		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6035	} else {
6036		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6037		(void)RREG32(mmCP_RB0_WPTR);
6038	}
6039}
6040
6041static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6042{
6043	u32 ref_and_mask, reg_mem_engine;
6044
6045	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6046	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6047		switch (ring->me) {
6048		case 1:
6049			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6050			break;
6051		case 2:
6052			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6053			break;
6054		default:
6055			return;
6056		}
6057		reg_mem_engine = 0;
6058	} else {
6059		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6060		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6061	}
6062
6063	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6064	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6065				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6066				 reg_mem_engine));
6067	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6068	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6069	amdgpu_ring_write(ring, ref_and_mask);
6070	amdgpu_ring_write(ring, ref_and_mask);
6071	amdgpu_ring_write(ring, 0x20); /* poll interval */
6072}
6073
6074static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6075{
6076	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6077	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6078		EVENT_INDEX(4));
6079
6080	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6081	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6082		EVENT_INDEX(0));
6083}
6084
6085static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6086					struct amdgpu_job *job,
6087					struct amdgpu_ib *ib,
6088					uint32_t flags)
6089{
6090	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6091	u32 header, control = 0;
6092
6093	if (ib->flags & AMDGPU_IB_FLAG_CE)
6094		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6095	else
6096		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6097
6098	control |= ib->length_dw | (vmid << 24);
6099
6100	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6101		control |= INDIRECT_BUFFER_PRE_ENB(1);
6102
6103		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6104			gfx_v8_0_ring_emit_de_meta(ring);
6105	}
6106
6107	amdgpu_ring_write(ring, header);
6108	amdgpu_ring_write(ring,
6109#ifdef __BIG_ENDIAN
6110			  (2 << 0) |
6111#endif
6112			  (ib->gpu_addr & 0xFFFFFFFC));
6113	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6114	amdgpu_ring_write(ring, control);
6115}
6116
6117static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6118					  struct amdgpu_job *job,
6119					  struct amdgpu_ib *ib,
6120					  uint32_t flags)
6121{
6122	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6123	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6124
6125	/* Currently, there is a high possibility to get wave ID mismatch
6126	 * between ME and GDS, leading to a hw deadlock, because ME generates
6127	 * different wave IDs than the GDS expects. This situation happens
6128	 * randomly when at least 5 compute pipes use GDS ordered append.
6129	 * The wave IDs generated by ME are also wrong after suspend/resume.
6130	 * Those are probably bugs somewhere else in the kernel driver.
6131	 *
6132	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6133	 * GDS to 0 for this ring (me/pipe).
6134	 */
6135	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6136		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6137		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6138		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6139	}
6140
6141	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6142	amdgpu_ring_write(ring,
6143#ifdef __BIG_ENDIAN
6144				(2 << 0) |
6145#endif
6146				(ib->gpu_addr & 0xFFFFFFFC));
6147	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6148	amdgpu_ring_write(ring, control);
6149}
6150
6151static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6152					 u64 seq, unsigned flags)
6153{
6154	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6155	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6156
6157	/* Workaround for cache flush problems. First send a dummy EOP
6158	 * event down the pipe with seq one below.
6159	 */
6160	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6161	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6162				 EOP_TC_ACTION_EN |
6163				 EOP_TC_WB_ACTION_EN |
6164				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6165				 EVENT_INDEX(5)));
6166	amdgpu_ring_write(ring, addr & 0xfffffffc);
6167	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6168				DATA_SEL(1) | INT_SEL(0));
6169	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6170	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6171
6172	/* Then send the real EOP event down the pipe:
6173	 * EVENT_WRITE_EOP - flush caches, send int */
6174	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6175	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6176				 EOP_TC_ACTION_EN |
6177				 EOP_TC_WB_ACTION_EN |
6178				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6179				 EVENT_INDEX(5)));
6180	amdgpu_ring_write(ring, addr & 0xfffffffc);
6181	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6182			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6183	amdgpu_ring_write(ring, lower_32_bits(seq));
6184	amdgpu_ring_write(ring, upper_32_bits(seq));
6185
6186}
6187
6188static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6189{
6190	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6191	uint32_t seq = ring->fence_drv.sync_seq;
6192	uint64_t addr = ring->fence_drv.gpu_addr;
6193
6194	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6195	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6196				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6197				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6198	amdgpu_ring_write(ring, addr & 0xfffffffc);
6199	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6200	amdgpu_ring_write(ring, seq);
6201	amdgpu_ring_write(ring, 0xffffffff);
6202	amdgpu_ring_write(ring, 4); /* poll interval */
6203}
6204
6205static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6206					unsigned vmid, uint64_t pd_addr)
6207{
6208	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6209
6210	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6211
6212	/* wait for the invalidate to complete */
6213	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6214	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6215				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6216				 WAIT_REG_MEM_ENGINE(0))); /* me */
6217	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6218	amdgpu_ring_write(ring, 0);
6219	amdgpu_ring_write(ring, 0); /* ref */
6220	amdgpu_ring_write(ring, 0); /* mask */
6221	amdgpu_ring_write(ring, 0x20); /* poll interval */
6222
6223	/* compute doesn't have PFP */
6224	if (usepfp) {
6225		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6226		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6227		amdgpu_ring_write(ring, 0x0);
6228	}
6229}
6230
6231static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6232{
6233	return *ring->wptr_cpu_addr;
6234}
6235
6236static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6237{
6238	struct amdgpu_device *adev = ring->adev;
6239
6240	/* XXX check if swapping is necessary on BE */
6241	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6242	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6243}
6244
6245static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6246					     u64 addr, u64 seq,
6247					     unsigned flags)
6248{
6249	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6250	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6251
6252	/* RELEASE_MEM - flush caches, send int */
6253	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6254	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6255				 EOP_TC_ACTION_EN |
6256				 EOP_TC_WB_ACTION_EN |
6257				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6258				 EVENT_INDEX(5)));
6259	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6260	amdgpu_ring_write(ring, addr & 0xfffffffc);
6261	amdgpu_ring_write(ring, upper_32_bits(addr));
6262	amdgpu_ring_write(ring, lower_32_bits(seq));
6263	amdgpu_ring_write(ring, upper_32_bits(seq));
6264}
6265
6266static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6267					 u64 seq, unsigned int flags)
6268{
6269	/* we only allocate 32bit for each seq wb address */
6270	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6271
6272	/* write fence seq to the "addr" */
6273	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6274	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6275				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6276	amdgpu_ring_write(ring, lower_32_bits(addr));
6277	amdgpu_ring_write(ring, upper_32_bits(addr));
6278	amdgpu_ring_write(ring, lower_32_bits(seq));
6279
6280	if (flags & AMDGPU_FENCE_FLAG_INT) {
6281		/* set register to trigger INT */
6282		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6283		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6284					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6285		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6286		amdgpu_ring_write(ring, 0);
6287		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6288	}
6289}
6290
6291static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6292{
6293	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6294	amdgpu_ring_write(ring, 0);
6295}
6296
6297static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6298{
6299	uint32_t dw2 = 0;
6300
6301	if (amdgpu_sriov_vf(ring->adev))
6302		gfx_v8_0_ring_emit_ce_meta(ring);
6303
6304	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6305	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6306		gfx_v8_0_ring_emit_vgt_flush(ring);
6307		/* set load_global_config & load_global_uconfig */
6308		dw2 |= 0x8001;
6309		/* set load_cs_sh_regs */
6310		dw2 |= 0x01000000;
6311		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6312		dw2 |= 0x10002;
6313
6314		/* set load_ce_ram if preamble presented */
6315		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6316			dw2 |= 0x10000000;
6317	} else {
6318		/* still load_ce_ram if this is the first time preamble presented
6319		 * although there is no context switch happens.
6320		 */
6321		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6322			dw2 |= 0x10000000;
6323	}
6324
6325	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6326	amdgpu_ring_write(ring, dw2);
6327	amdgpu_ring_write(ring, 0);
6328}
6329
6330static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6331{
6332	unsigned ret;
6333
6334	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6335	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6336	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6337	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6338	ret = ring->wptr & ring->buf_mask;
6339	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6340	return ret;
6341}
6342
6343static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6344{
6345	unsigned cur;
6346
6347	BUG_ON(offset > ring->buf_mask);
6348	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6349
6350	cur = (ring->wptr & ring->buf_mask) - 1;
6351	if (likely(cur > offset))
6352		ring->ring[offset] = cur - offset;
6353	else
6354		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6355}
6356
6357static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6358				    uint32_t reg_val_offs)
6359{
6360	struct amdgpu_device *adev = ring->adev;
6361
6362	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6363	amdgpu_ring_write(ring, 0 |	/* src: register*/
6364				(5 << 8) |	/* dst: memory */
6365				(1 << 20));	/* write confirm */
6366	amdgpu_ring_write(ring, reg);
6367	amdgpu_ring_write(ring, 0);
6368	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6369				reg_val_offs * 4));
6370	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6371				reg_val_offs * 4));
6372}
6373
6374static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6375				  uint32_t val)
6376{
6377	uint32_t cmd;
6378
6379	switch (ring->funcs->type) {
6380	case AMDGPU_RING_TYPE_GFX:
6381		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6382		break;
6383	case AMDGPU_RING_TYPE_KIQ:
6384		cmd = 1 << 16; /* no inc addr */
6385		break;
6386	default:
6387		cmd = WR_CONFIRM;
6388		break;
6389	}
6390
6391	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6392	amdgpu_ring_write(ring, cmd);
6393	amdgpu_ring_write(ring, reg);
6394	amdgpu_ring_write(ring, 0);
6395	amdgpu_ring_write(ring, val);
6396}
6397
6398static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6399{
6400	struct amdgpu_device *adev = ring->adev;
6401	uint32_t value = 0;
6402
6403	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6404	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6405	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6406	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6407	WREG32(mmSQ_CMD, value);
6408}
6409
6410static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6411						 enum amdgpu_interrupt_state state)
6412{
6413	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6414		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6415}
6416
6417static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6418						     int me, int pipe,
6419						     enum amdgpu_interrupt_state state)
6420{
6421	u32 mec_int_cntl, mec_int_cntl_reg;
6422
6423	/*
6424	 * amdgpu controls only the first MEC. That's why this function only
6425	 * handles the setting of interrupts for this specific MEC. All other
6426	 * pipes' interrupts are set by amdkfd.
6427	 */
6428
6429	if (me == 1) {
6430		switch (pipe) {
6431		case 0:
6432			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6433			break;
6434		case 1:
6435			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6436			break;
6437		case 2:
6438			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6439			break;
6440		case 3:
6441			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6442			break;
6443		default:
6444			DRM_DEBUG("invalid pipe %d\n", pipe);
6445			return;
6446		}
6447	} else {
6448		DRM_DEBUG("invalid me %d\n", me);
6449		return;
6450	}
6451
6452	switch (state) {
6453	case AMDGPU_IRQ_STATE_DISABLE:
6454		mec_int_cntl = RREG32(mec_int_cntl_reg);
6455		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6456		WREG32(mec_int_cntl_reg, mec_int_cntl);
6457		break;
6458	case AMDGPU_IRQ_STATE_ENABLE:
6459		mec_int_cntl = RREG32(mec_int_cntl_reg);
6460		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6461		WREG32(mec_int_cntl_reg, mec_int_cntl);
6462		break;
6463	default:
6464		break;
6465	}
6466}
6467
6468static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6469					     struct amdgpu_irq_src *source,
6470					     unsigned type,
6471					     enum amdgpu_interrupt_state state)
6472{
6473	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6474		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6475
6476	return 0;
6477}
6478
6479static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6480					      struct amdgpu_irq_src *source,
6481					      unsigned type,
6482					      enum amdgpu_interrupt_state state)
6483{
6484	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6485		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6486
6487	return 0;
6488}
6489
6490static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6491					    struct amdgpu_irq_src *src,
6492					    unsigned type,
6493					    enum amdgpu_interrupt_state state)
6494{
6495	switch (type) {
6496	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6497		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6498		break;
6499	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6500		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6501		break;
6502	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6503		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6504		break;
6505	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6506		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6507		break;
6508	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6509		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6510		break;
6511	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6512		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6513		break;
6514	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6515		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6516		break;
6517	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6518		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6519		break;
6520	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6521		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6522		break;
6523	default:
6524		break;
6525	}
6526	return 0;
6527}
6528
6529static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6530					 struct amdgpu_irq_src *source,
6531					 unsigned int type,
6532					 enum amdgpu_interrupt_state state)
6533{
6534	int enable_flag;
6535
6536	switch (state) {
6537	case AMDGPU_IRQ_STATE_DISABLE:
6538		enable_flag = 0;
6539		break;
6540
6541	case AMDGPU_IRQ_STATE_ENABLE:
6542		enable_flag = 1;
6543		break;
6544
6545	default:
6546		return -EINVAL;
6547	}
6548
6549	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6550	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6551	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6552	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6553	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6554	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6555		     enable_flag);
6556	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6557		     enable_flag);
6558	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6559		     enable_flag);
6560	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6561		     enable_flag);
6562	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6563		     enable_flag);
6564	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6565		     enable_flag);
6566	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6567		     enable_flag);
6568	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6569		     enable_flag);
6570
6571	return 0;
6572}
6573
6574static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6575				     struct amdgpu_irq_src *source,
6576				     unsigned int type,
6577				     enum amdgpu_interrupt_state state)
6578{
6579	int enable_flag;
6580
6581	switch (state) {
6582	case AMDGPU_IRQ_STATE_DISABLE:
6583		enable_flag = 1;
6584		break;
6585
6586	case AMDGPU_IRQ_STATE_ENABLE:
6587		enable_flag = 0;
6588		break;
6589
6590	default:
6591		return -EINVAL;
6592	}
6593
6594	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6595		     enable_flag);
6596
6597	return 0;
6598}
6599
6600static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6601			    struct amdgpu_irq_src *source,
6602			    struct amdgpu_iv_entry *entry)
6603{
6604	int i;
6605	u8 me_id, pipe_id, queue_id;
6606	struct amdgpu_ring *ring;
6607
6608	DRM_DEBUG("IH: CP EOP\n");
6609	me_id = (entry->ring_id & 0x0c) >> 2;
6610	pipe_id = (entry->ring_id & 0x03) >> 0;
6611	queue_id = (entry->ring_id & 0x70) >> 4;
6612
6613	switch (me_id) {
6614	case 0:
6615		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6616		break;
6617	case 1:
6618	case 2:
6619		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6620			ring = &adev->gfx.compute_ring[i];
6621			/* Per-queue interrupt is supported for MEC starting from VI.
6622			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6623			  */
6624			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6625				amdgpu_fence_process(ring);
6626		}
6627		break;
6628	}
6629	return 0;
6630}
6631
6632static void gfx_v8_0_fault(struct amdgpu_device *adev,
6633			   struct amdgpu_iv_entry *entry)
6634{
6635	u8 me_id, pipe_id, queue_id;
6636	struct amdgpu_ring *ring;
6637	int i;
6638
6639	me_id = (entry->ring_id & 0x0c) >> 2;
6640	pipe_id = (entry->ring_id & 0x03) >> 0;
6641	queue_id = (entry->ring_id & 0x70) >> 4;
6642
6643	switch (me_id) {
6644	case 0:
6645		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6646		break;
6647	case 1:
6648	case 2:
6649		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6650			ring = &adev->gfx.compute_ring[i];
6651			if (ring->me == me_id && ring->pipe == pipe_id &&
6652			    ring->queue == queue_id)
6653				drm_sched_fault(&ring->sched);
6654		}
6655		break;
6656	}
6657}
6658
6659static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6660				 struct amdgpu_irq_src *source,
6661				 struct amdgpu_iv_entry *entry)
6662{
6663	DRM_ERROR("Illegal register access in command stream\n");
6664	gfx_v8_0_fault(adev, entry);
6665	return 0;
6666}
6667
6668static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6669				  struct amdgpu_irq_src *source,
6670				  struct amdgpu_iv_entry *entry)
6671{
6672	DRM_ERROR("Illegal instruction in command stream\n");
6673	gfx_v8_0_fault(adev, entry);
6674	return 0;
6675}
6676
6677static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6678				     struct amdgpu_irq_src *source,
6679				     struct amdgpu_iv_entry *entry)
6680{
6681	DRM_ERROR("CP EDC/ECC error detected.");
6682	return 0;
6683}
6684
6685static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6686				  bool from_wq)
6687{
6688	u32 enc, se_id, sh_id, cu_id;
6689	char type[20];
6690	int sq_edc_source = -1;
6691
6692	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6693	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6694
6695	switch (enc) {
6696		case 0:
6697			DRM_INFO("SQ general purpose intr detected:"
6698					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6699					"host_cmd_overflow %d, cmd_timestamp %d,"
6700					"reg_timestamp %d, thread_trace_buff_full %d,"
6701					"wlt %d, thread_trace %d.\n",
6702					se_id,
6703					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6704					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6705					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6706					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6707					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6708					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6709					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6710					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6711					);
6712			break;
6713		case 1:
6714		case 2:
6715
6716			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6717			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6718
6719			/*
6720			 * This function can be called either directly from ISR
6721			 * or from BH in which case we can access SQ_EDC_INFO
6722			 * instance
6723			 */
6724			if (from_wq) {
6725				mutex_lock(&adev->grbm_idx_mutex);
6726				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6727
6728				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6729
6730				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6731				mutex_unlock(&adev->grbm_idx_mutex);
6732			}
6733
6734			if (enc == 1)
6735				sprintf(type, "instruction intr");
6736			else
6737				sprintf(type, "EDC/ECC error");
6738
6739			DRM_INFO(
6740				"SQ %s detected: "
6741					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6742					"trap %s, sq_ed_info.source %s.\n",
6743					type, se_id, sh_id, cu_id,
6744					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6745					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6746					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6747					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6748					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6749				);
6750			break;
6751		default:
6752			DRM_ERROR("SQ invalid encoding type\n.");
6753	}
6754}
6755
6756static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6757{
6758
6759	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6760	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6761
6762	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6763}
6764
6765static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6766			   struct amdgpu_irq_src *source,
6767			   struct amdgpu_iv_entry *entry)
6768{
6769	unsigned ih_data = entry->src_data[0];
6770
6771	/*
6772	 * Try to submit work so SQ_EDC_INFO can be accessed from
6773	 * BH. If previous work submission hasn't finished yet
6774	 * just print whatever info is possible directly from the ISR.
6775	 */
6776	if (work_pending(&adev->gfx.sq_work.work)) {
6777		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6778	} else {
6779		adev->gfx.sq_work.ih_data = ih_data;
6780		schedule_work(&adev->gfx.sq_work.work);
6781	}
6782
6783	return 0;
6784}
6785
6786static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6787{
6788	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6789	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6790			  PACKET3_TC_ACTION_ENA |
6791			  PACKET3_SH_KCACHE_ACTION_ENA |
6792			  PACKET3_SH_ICACHE_ACTION_ENA |
6793			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6794	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6795	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6796	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6797}
6798
6799static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6800{
6801	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6802	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6803			  PACKET3_TC_ACTION_ENA |
6804			  PACKET3_SH_KCACHE_ACTION_ENA |
6805			  PACKET3_SH_ICACHE_ACTION_ENA |
6806			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6807	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6808	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6809	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6810	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6811	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6812}
6813
6814
6815/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6816#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6817static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6818					uint32_t pipe, bool enable)
6819{
6820	uint32_t val;
6821	uint32_t wcl_cs_reg;
6822
6823	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6824
6825	switch (pipe) {
6826	case 0:
6827		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6828		break;
6829	case 1:
6830		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6831		break;
6832	case 2:
6833		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6834		break;
6835	case 3:
6836		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6837		break;
6838	default:
6839		DRM_DEBUG("invalid pipe %d\n", pipe);
6840		return;
6841	}
6842
6843	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6844
6845}
6846
6847#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6848static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6849{
6850	struct amdgpu_device *adev = ring->adev;
6851	uint32_t val;
6852	int i;
6853
6854	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6855	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6856	 * around 25% of gpu resources.
6857	 */
6858	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6859	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6860
6861	/* Restrict waves for normal/low priority compute queues as well
6862	 * to get best QoS for high priority compute jobs.
6863	 *
6864	 * amdgpu controls only 1st ME(0-3 CS pipes).
6865	 */
6866	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6867		if (i != ring->pipe)
6868			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6869
6870	}
6871
6872}
6873
6874static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6875	.name = "gfx_v8_0",
6876	.early_init = gfx_v8_0_early_init,
6877	.late_init = gfx_v8_0_late_init,
6878	.sw_init = gfx_v8_0_sw_init,
6879	.sw_fini = gfx_v8_0_sw_fini,
6880	.hw_init = gfx_v8_0_hw_init,
6881	.hw_fini = gfx_v8_0_hw_fini,
6882	.suspend = gfx_v8_0_suspend,
6883	.resume = gfx_v8_0_resume,
6884	.is_idle = gfx_v8_0_is_idle,
6885	.wait_for_idle = gfx_v8_0_wait_for_idle,
6886	.check_soft_reset = gfx_v8_0_check_soft_reset,
6887	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6888	.soft_reset = gfx_v8_0_soft_reset,
6889	.post_soft_reset = gfx_v8_0_post_soft_reset,
6890	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6891	.set_powergating_state = gfx_v8_0_set_powergating_state,
6892	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6893};
6894
6895static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6896	.type = AMDGPU_RING_TYPE_GFX,
6897	.align_mask = 0xff,
6898	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6899	.support_64bit_ptrs = false,
6900	.get_rptr = gfx_v8_0_ring_get_rptr,
6901	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6902	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6903	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6904		5 +  /* COND_EXEC */
6905		7 +  /* PIPELINE_SYNC */
6906		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6907		12 +  /* FENCE for VM_FLUSH */
6908		20 + /* GDS switch */
6909		4 + /* double SWITCH_BUFFER,
6910		       the first COND_EXEC jump to the place just
6911			   prior to this double SWITCH_BUFFER  */
6912		5 + /* COND_EXEC */
6913		7 +	 /*	HDP_flush */
6914		4 +	 /*	VGT_flush */
6915		14 + /*	CE_META */
6916		31 + /*	DE_META */
6917		3 + /* CNTX_CTRL */
6918		5 + /* HDP_INVL */
6919		12 + 12 + /* FENCE x2 */
6920		2 + /* SWITCH_BUFFER */
6921		5, /* SURFACE_SYNC */
6922	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6923	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6924	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6925	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6926	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6927	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6928	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6929	.test_ring = gfx_v8_0_ring_test_ring,
6930	.test_ib = gfx_v8_0_ring_test_ib,
6931	.insert_nop = amdgpu_ring_insert_nop,
6932	.pad_ib = amdgpu_ring_generic_pad_ib,
6933	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6934	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6935	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6936	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6937	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6938	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6939	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6940};
6941
6942static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6943	.type = AMDGPU_RING_TYPE_COMPUTE,
6944	.align_mask = 0xff,
6945	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6946	.support_64bit_ptrs = false,
6947	.get_rptr = gfx_v8_0_ring_get_rptr,
6948	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6949	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6950	.emit_frame_size =
6951		20 + /* gfx_v8_0_ring_emit_gds_switch */
6952		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6953		5 + /* hdp_invalidate */
6954		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6955		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6956		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6957		7 + /* gfx_v8_0_emit_mem_sync_compute */
6958		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6959		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6960	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6961	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6962	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6963	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6964	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6965	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6966	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6967	.test_ring = gfx_v8_0_ring_test_ring,
6968	.test_ib = gfx_v8_0_ring_test_ib,
6969	.insert_nop = amdgpu_ring_insert_nop,
6970	.pad_ib = amdgpu_ring_generic_pad_ib,
6971	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6972	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6973	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
6974};
6975
6976static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6977	.type = AMDGPU_RING_TYPE_KIQ,
6978	.align_mask = 0xff,
6979	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6980	.support_64bit_ptrs = false,
6981	.get_rptr = gfx_v8_0_ring_get_rptr,
6982	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6983	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6984	.emit_frame_size =
6985		20 + /* gfx_v8_0_ring_emit_gds_switch */
6986		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6987		5 + /* hdp_invalidate */
6988		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6989		17 + /* gfx_v8_0_ring_emit_vm_flush */
6990		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6991	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6992	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6993	.test_ring = gfx_v8_0_ring_test_ring,
6994	.insert_nop = amdgpu_ring_insert_nop,
6995	.pad_ib = amdgpu_ring_generic_pad_ib,
6996	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6997	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6998};
6999
7000static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7001{
7002	int i;
7003
7004	adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7005
7006	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7007		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7008
7009	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7010		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7011}
7012
7013static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7014	.set = gfx_v8_0_set_eop_interrupt_state,
7015	.process = gfx_v8_0_eop_irq,
7016};
7017
7018static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7019	.set = gfx_v8_0_set_priv_reg_fault_state,
7020	.process = gfx_v8_0_priv_reg_irq,
7021};
7022
7023static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7024	.set = gfx_v8_0_set_priv_inst_fault_state,
7025	.process = gfx_v8_0_priv_inst_irq,
7026};
7027
7028static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7029	.set = gfx_v8_0_set_cp_ecc_int_state,
7030	.process = gfx_v8_0_cp_ecc_error_irq,
7031};
7032
7033static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7034	.set = gfx_v8_0_set_sq_int_state,
7035	.process = gfx_v8_0_sq_irq,
7036};
7037
7038static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7039{
7040	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7041	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7042
7043	adev->gfx.priv_reg_irq.num_types = 1;
7044	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7045
7046	adev->gfx.priv_inst_irq.num_types = 1;
7047	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7048
7049	adev->gfx.cp_ecc_error_irq.num_types = 1;
7050	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7051
7052	adev->gfx.sq_irq.num_types = 1;
7053	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7054}
7055
7056static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7057{
7058	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7059}
7060
7061static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7062{
7063	/* init asci gds info */
7064	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7065	adev->gds.gws_size = 64;
7066	adev->gds.oa_size = 16;
7067	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7068}
7069
7070static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7071						 u32 bitmap)
7072{
7073	u32 data;
7074
7075	if (!bitmap)
7076		return;
7077
7078	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7079	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7080
7081	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7082}
7083
7084static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7085{
7086	u32 data, mask;
7087
7088	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7089		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7090
7091	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7092
7093	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7094}
7095
7096static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7097{
7098	int i, j, k, counter, active_cu_number = 0;
7099	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7100	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7101	unsigned disable_masks[4 * 2];
7102	u32 ao_cu_num;
7103
7104	memset(cu_info, 0, sizeof(*cu_info));
7105
7106	if (adev->flags & AMD_IS_APU)
7107		ao_cu_num = 2;
7108	else
7109		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7110
7111	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7112
7113	mutex_lock(&adev->grbm_idx_mutex);
7114	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7115		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7116			mask = 1;
7117			ao_bitmap = 0;
7118			counter = 0;
7119			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7120			if (i < 4 && j < 2)
7121				gfx_v8_0_set_user_cu_inactive_bitmap(
7122					adev, disable_masks[i * 2 + j]);
7123			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7124			cu_info->bitmap[0][i][j] = bitmap;
7125
7126			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7127				if (bitmap & mask) {
7128					if (counter < ao_cu_num)
7129						ao_bitmap |= mask;
7130					counter ++;
7131				}
7132				mask <<= 1;
7133			}
7134			active_cu_number += counter;
7135			if (i < 2 && j < 2)
7136				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7137			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7138		}
7139	}
7140	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7141	mutex_unlock(&adev->grbm_idx_mutex);
7142
7143	cu_info->number = active_cu_number;
7144	cu_info->ao_cu_mask = ao_cu_mask;
7145	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7146	cu_info->max_waves_per_simd = 10;
7147	cu_info->max_scratch_slots_per_cu = 32;
7148	cu_info->wave_front_size = 64;
7149	cu_info->lds_size = 64;
7150}
7151
7152const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7153{
7154	.type = AMD_IP_BLOCK_TYPE_GFX,
7155	.major = 8,
7156	.minor = 0,
7157	.rev = 0,
7158	.funcs = &gfx_v8_0_ip_funcs,
7159};
7160
7161const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7162{
7163	.type = AMD_IP_BLOCK_TYPE_GFX,
7164	.major = 8,
7165	.minor = 1,
7166	.rev = 0,
7167	.funcs = &gfx_v8_0_ip_funcs,
7168};
7169
7170static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7171{
7172	uint64_t ce_payload_addr;
7173	int cnt_ce;
7174	union {
7175		struct vi_ce_ib_state regular;
7176		struct vi_ce_ib_state_chained_ib chained;
7177	} ce_payload = {};
7178
7179	if (ring->adev->virt.chained_ib_support) {
7180		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7181			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7182		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7183	} else {
7184		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7185			offsetof(struct vi_gfx_meta_data, ce_payload);
7186		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7187	}
7188
7189	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7190	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7191				WRITE_DATA_DST_SEL(8) |
7192				WR_CONFIRM) |
7193				WRITE_DATA_CACHE_POLICY(0));
7194	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7195	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7196	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7197}
7198
7199static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7200{
7201	uint64_t de_payload_addr, gds_addr, csa_addr;
7202	int cnt_de;
7203	union {
7204		struct vi_de_ib_state regular;
7205		struct vi_de_ib_state_chained_ib chained;
7206	} de_payload = {};
7207
7208	csa_addr = amdgpu_csa_vaddr(ring->adev);
7209	gds_addr = csa_addr + 4096;
7210	if (ring->adev->virt.chained_ib_support) {
7211		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7212		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7213		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7214		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7215	} else {
7216		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7217		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7218		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7219		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7220	}
7221
7222	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7223	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7224				WRITE_DATA_DST_SEL(8) |
7225				WR_CONFIRM) |
7226				WRITE_DATA_CACHE_POLICY(0));
7227	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7228	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7229	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7230}