Linux Audio

Check our new training course

Loading...
v6.9.4
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96	BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738	uint32_t data;
 739
 740	switch (adev->asic_type) {
 741	case CHIP_TOPAZ:
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_mgcg_cgcg_init,
 744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745		amdgpu_device_program_register_sequence(adev,
 746							golden_settings_iceland_a11,
 747							ARRAY_SIZE(golden_settings_iceland_a11));
 748		amdgpu_device_program_register_sequence(adev,
 749							iceland_golden_common_all,
 750							ARRAY_SIZE(iceland_golden_common_all));
 751		break;
 752	case CHIP_FIJI:
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_mgcg_cgcg_init,
 755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756		amdgpu_device_program_register_sequence(adev,
 757							golden_settings_fiji_a10,
 758							ARRAY_SIZE(golden_settings_fiji_a10));
 759		amdgpu_device_program_register_sequence(adev,
 760							fiji_golden_common_all,
 761							ARRAY_SIZE(fiji_golden_common_all));
 762		break;
 763
 764	case CHIP_TONGA:
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_mgcg_cgcg_init,
 767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768		amdgpu_device_program_register_sequence(adev,
 769							golden_settings_tonga_a11,
 770							ARRAY_SIZE(golden_settings_tonga_a11));
 771		amdgpu_device_program_register_sequence(adev,
 772							tonga_golden_common_all,
 773							ARRAY_SIZE(tonga_golden_common_all));
 774		break;
 775	case CHIP_VEGAM:
 776		amdgpu_device_program_register_sequence(adev,
 777							golden_settings_vegam_a11,
 778							ARRAY_SIZE(golden_settings_vegam_a11));
 779		amdgpu_device_program_register_sequence(adev,
 780							vegam_golden_common_all,
 781							ARRAY_SIZE(vegam_golden_common_all));
 782		break;
 783	case CHIP_POLARIS11:
 784	case CHIP_POLARIS12:
 785		amdgpu_device_program_register_sequence(adev,
 786							golden_settings_polaris11_a11,
 787							ARRAY_SIZE(golden_settings_polaris11_a11));
 788		amdgpu_device_program_register_sequence(adev,
 789							polaris11_golden_common_all,
 790							ARRAY_SIZE(polaris11_golden_common_all));
 791		break;
 792	case CHIP_POLARIS10:
 793		amdgpu_device_program_register_sequence(adev,
 794							golden_settings_polaris10_a11,
 795							ARRAY_SIZE(golden_settings_polaris10_a11));
 796		amdgpu_device_program_register_sequence(adev,
 797							polaris10_golden_common_all,
 798							ARRAY_SIZE(polaris10_golden_common_all));
 799		data = RREG32_SMC(ixCG_ACLK_CNTL);
 800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802		WREG32_SMC(ixCG_ACLK_CNTL, data);
 803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809		}
 810		break;
 811	case CHIP_CARRIZO:
 812		amdgpu_device_program_register_sequence(adev,
 813							cz_mgcg_cgcg_init,
 814							ARRAY_SIZE(cz_mgcg_cgcg_init));
 815		amdgpu_device_program_register_sequence(adev,
 816							cz_golden_settings_a11,
 817							ARRAY_SIZE(cz_golden_settings_a11));
 818		amdgpu_device_program_register_sequence(adev,
 819							cz_golden_common_all,
 820							ARRAY_SIZE(cz_golden_common_all));
 821		break;
 822	case CHIP_STONEY:
 823		amdgpu_device_program_register_sequence(adev,
 824							stoney_mgcg_cgcg_init,
 825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826		amdgpu_device_program_register_sequence(adev,
 827							stoney_golden_settings_a11,
 828							ARRAY_SIZE(stoney_golden_settings_a11));
 829		amdgpu_device_program_register_sequence(adev,
 830							stoney_golden_common_all,
 831							ARRAY_SIZE(stoney_golden_common_all));
 832		break;
 833	default:
 834		break;
 835	}
 836}
 837
 
 
 
 
 
 
 
 838static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 839{
 840	struct amdgpu_device *adev = ring->adev;
 
 841	uint32_t tmp = 0;
 842	unsigned i;
 843	int r;
 844
 845	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
 846	r = amdgpu_ring_alloc(ring, 3);
 847	if (r)
 848		return r;
 849
 
 
 
 
 
 850	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 851	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
 852	amdgpu_ring_write(ring, 0xDEADBEEF);
 853	amdgpu_ring_commit(ring);
 854
 855	for (i = 0; i < adev->usec_timeout; i++) {
 856		tmp = RREG32(mmSCRATCH_REG0);
 857		if (tmp == 0xDEADBEEF)
 858			break;
 859		udelay(1);
 860	}
 861
 862	if (i >= adev->usec_timeout)
 863		r = -ETIMEDOUT;
 864
 
 
 865	return r;
 866}
 867
 868static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 869{
 870	struct amdgpu_device *adev = ring->adev;
 871	struct amdgpu_ib ib;
 872	struct dma_fence *f = NULL;
 873
 874	unsigned int index;
 875	uint64_t gpu_addr;
 876	uint32_t tmp;
 877	long r;
 878
 879	r = amdgpu_device_wb_get(adev, &index);
 880	if (r)
 881		return r;
 882
 883	gpu_addr = adev->wb.gpu_addr + (index * 4);
 884	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 885	memset(&ib, 0, sizeof(ib));
 886
 887	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 888	if (r)
 889		goto err1;
 890
 891	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 892	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 893	ib.ptr[2] = lower_32_bits(gpu_addr);
 894	ib.ptr[3] = upper_32_bits(gpu_addr);
 895	ib.ptr[4] = 0xDEADBEEF;
 896	ib.length_dw = 5;
 897
 898	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 899	if (r)
 900		goto err2;
 901
 902	r = dma_fence_wait_timeout(f, false, timeout);
 903	if (r == 0) {
 904		r = -ETIMEDOUT;
 905		goto err2;
 906	} else if (r < 0) {
 907		goto err2;
 908	}
 909
 910	tmp = adev->wb.wb[index];
 911	if (tmp == 0xDEADBEEF)
 912		r = 0;
 913	else
 914		r = -EINVAL;
 915
 916err2:
 917	amdgpu_ib_free(adev, &ib, NULL);
 918	dma_fence_put(f);
 919err1:
 920	amdgpu_device_wb_free(adev, index);
 921	return r;
 922}
 923
 924
 925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 926{
 927	amdgpu_ucode_release(&adev->gfx.pfp_fw);
 928	amdgpu_ucode_release(&adev->gfx.me_fw);
 929	amdgpu_ucode_release(&adev->gfx.ce_fw);
 930	amdgpu_ucode_release(&adev->gfx.rlc_fw);
 931	amdgpu_ucode_release(&adev->gfx.mec_fw);
 
 
 
 
 
 932	if ((adev->asic_type != CHIP_STONEY) &&
 933	    (adev->asic_type != CHIP_TOPAZ))
 934		amdgpu_ucode_release(&adev->gfx.mec2_fw);
 
 935
 936	kfree(adev->gfx.rlc.register_list_format);
 937}
 938
 939static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 940{
 941	const char *chip_name;
 942	char fw_name[30];
 943	int err;
 944	struct amdgpu_firmware_info *info = NULL;
 945	const struct common_firmware_header *header = NULL;
 946	const struct gfx_firmware_header_v1_0 *cp_hdr;
 947	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 948	unsigned int *tmp = NULL, i;
 949
 950	DRM_DEBUG("\n");
 951
 952	switch (adev->asic_type) {
 953	case CHIP_TOPAZ:
 954		chip_name = "topaz";
 955		break;
 956	case CHIP_TONGA:
 957		chip_name = "tonga";
 958		break;
 959	case CHIP_CARRIZO:
 960		chip_name = "carrizo";
 961		break;
 962	case CHIP_FIJI:
 963		chip_name = "fiji";
 964		break;
 965	case CHIP_STONEY:
 966		chip_name = "stoney";
 967		break;
 968	case CHIP_POLARIS10:
 969		chip_name = "polaris10";
 970		break;
 971	case CHIP_POLARIS11:
 972		chip_name = "polaris11";
 973		break;
 974	case CHIP_POLARIS12:
 975		chip_name = "polaris12";
 976		break;
 977	case CHIP_VEGAM:
 978		chip_name = "vegam";
 979		break;
 980	default:
 981		BUG();
 982	}
 983
 984	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 985		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 986		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 987		if (err == -ENODEV) {
 988			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 989			err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 990		}
 991	} else {
 992		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 993		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
 994	}
 995	if (err)
 996		goto out;
 
 
 
 997	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 998	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 999	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1000
1001	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1002		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1003		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1004		if (err == -ENODEV) {
1005			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1006			err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1007		}
1008	} else {
1009		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1010		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1011	}
1012	if (err)
1013		goto out;
 
 
 
1014	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1015	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1016
1017	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1018
1019	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1020		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1021		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1022		if (err == -ENODEV) {
1023			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1024			err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1025		}
1026	} else {
1027		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1028		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1029	}
1030	if (err)
1031		goto out;
 
 
 
1032	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1033	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1034	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036	/*
1037	 * Support for MCBP/Virtualization in combination with chained IBs is
1038	 * formal released on feature version #46
1039	 */
1040	if (adev->gfx.ce_feature_version >= 46 &&
1041	    adev->gfx.pfp_feature_version >= 46) {
1042		adev->virt.chained_ib_support = true;
1043		DRM_INFO("Chained IB support enabled!\n");
1044	} else
1045		adev->virt.chained_ib_support = false;
1046
1047	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1048	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1049	if (err)
1050		goto out;
 
1051	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1052	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1053	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1054
1055	adev->gfx.rlc.save_and_restore_offset =
1056			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1057	adev->gfx.rlc.clear_state_descriptor_offset =
1058			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1059	adev->gfx.rlc.avail_scratch_ram_locations =
1060			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1061	adev->gfx.rlc.reg_restore_list_size =
1062			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1063	adev->gfx.rlc.reg_list_format_start =
1064			le32_to_cpu(rlc_hdr->reg_list_format_start);
1065	adev->gfx.rlc.reg_list_format_separate_start =
1066			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1067	adev->gfx.rlc.starting_offsets_start =
1068			le32_to_cpu(rlc_hdr->starting_offsets_start);
1069	adev->gfx.rlc.reg_list_format_size_bytes =
1070			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1071	adev->gfx.rlc.reg_list_size_bytes =
1072			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1073
1074	adev->gfx.rlc.register_list_format =
1075			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1076					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1077
1078	if (!adev->gfx.rlc.register_list_format) {
1079		err = -ENOMEM;
1080		goto out;
1081	}
1082
1083	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1084			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1085	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1086		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1087
1088	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1089
1090	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1091			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1092	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1093		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1094
1095	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1096		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1097		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1098		if (err == -ENODEV) {
1099			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1100			err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1101		}
1102	} else {
1103		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1104		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1105	}
1106	if (err)
1107		goto out;
 
 
 
1108	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1109	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1110	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1111
1112	if ((adev->asic_type != CHIP_STONEY) &&
1113	    (adev->asic_type != CHIP_TOPAZ)) {
1114		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1115			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1116			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1117			if (err == -ENODEV) {
1118				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1119				err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1120			}
1121		} else {
1122			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1123			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1124		}
1125		if (!err) {
 
 
 
1126			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1127				adev->gfx.mec2_fw->data;
1128			adev->gfx.mec2_fw_version =
1129				le32_to_cpu(cp_hdr->header.ucode_version);
1130			adev->gfx.mec2_feature_version =
1131				le32_to_cpu(cp_hdr->ucode_feature_version);
1132		} else {
1133			err = 0;
1134			adev->gfx.mec2_fw = NULL;
1135		}
1136	}
1137
1138	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1139	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1140	info->fw = adev->gfx.pfp_fw;
1141	header = (const struct common_firmware_header *)info->fw->data;
1142	adev->firmware.fw_size +=
1143		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1144
1145	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1146	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1147	info->fw = adev->gfx.me_fw;
1148	header = (const struct common_firmware_header *)info->fw->data;
1149	adev->firmware.fw_size +=
1150		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1151
1152	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1153	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1154	info->fw = adev->gfx.ce_fw;
1155	header = (const struct common_firmware_header *)info->fw->data;
1156	adev->firmware.fw_size +=
1157		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1158
1159	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1160	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1161	info->fw = adev->gfx.rlc_fw;
1162	header = (const struct common_firmware_header *)info->fw->data;
1163	adev->firmware.fw_size +=
1164		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1165
1166	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1167	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1168	info->fw = adev->gfx.mec_fw;
1169	header = (const struct common_firmware_header *)info->fw->data;
1170	adev->firmware.fw_size +=
1171		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1172
1173	/* we need account JT in */
1174	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1175	adev->firmware.fw_size +=
1176		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1177
1178	if (amdgpu_sriov_vf(adev)) {
1179		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1180		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1181		info->fw = adev->gfx.mec_fw;
1182		adev->firmware.fw_size +=
1183			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1184	}
1185
1186	if (adev->gfx.mec2_fw) {
1187		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1188		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1189		info->fw = adev->gfx.mec2_fw;
1190		header = (const struct common_firmware_header *)info->fw->data;
1191		adev->firmware.fw_size +=
1192			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1193	}
1194
1195out:
1196	if (err) {
1197		dev_err(adev->dev,
1198			"gfx8: Failed to load firmware \"%s\"\n",
1199			fw_name);
1200		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1201		amdgpu_ucode_release(&adev->gfx.me_fw);
1202		amdgpu_ucode_release(&adev->gfx.ce_fw);
1203		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1204		amdgpu_ucode_release(&adev->gfx.mec_fw);
1205		amdgpu_ucode_release(&adev->gfx.mec2_fw);
 
 
 
 
 
 
1206	}
1207	return err;
1208}
1209
1210static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1211				    volatile u32 *buffer)
1212{
1213	u32 count = 0, i;
1214	const struct cs_section_def *sect = NULL;
1215	const struct cs_extent_def *ext = NULL;
1216
1217	if (adev->gfx.rlc.cs_data == NULL)
1218		return;
1219	if (buffer == NULL)
1220		return;
1221
1222	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1223	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1224
1225	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1226	buffer[count++] = cpu_to_le32(0x80000000);
1227	buffer[count++] = cpu_to_le32(0x80000000);
1228
1229	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1230		for (ext = sect->section; ext->extent != NULL; ++ext) {
1231			if (sect->id == SECT_CONTEXT) {
1232				buffer[count++] =
1233					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1234				buffer[count++] = cpu_to_le32(ext->reg_index -
1235						PACKET3_SET_CONTEXT_REG_START);
1236				for (i = 0; i < ext->reg_count; i++)
1237					buffer[count++] = cpu_to_le32(ext->extent[i]);
1238			} else {
1239				return;
1240			}
1241		}
1242	}
1243
1244	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1245	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1246			PACKET3_SET_CONTEXT_REG_START);
1247	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1248	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1249
1250	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1252
1253	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1254	buffer[count++] = cpu_to_le32(0);
1255}
1256
1257static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1258{
1259	if (adev->asic_type == CHIP_CARRIZO)
1260		return 5;
1261	else
1262		return 4;
1263}
1264
1265static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1266{
1267	const struct cs_section_def *cs_data;
1268	int r;
1269
1270	adev->gfx.rlc.cs_data = vi_cs_data;
1271
1272	cs_data = adev->gfx.rlc.cs_data;
1273
1274	if (cs_data) {
1275		/* init clear state block */
1276		r = amdgpu_gfx_rlc_init_csb(adev);
1277		if (r)
1278			return r;
1279	}
1280
1281	if ((adev->asic_type == CHIP_CARRIZO) ||
1282	    (adev->asic_type == CHIP_STONEY)) {
1283		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1284		r = amdgpu_gfx_rlc_init_cpt(adev);
1285		if (r)
1286			return r;
1287	}
1288
1289	/* init spm vmid with 0xf */
1290	if (adev->gfx.rlc.funcs->update_spm_vmid)
1291		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1292
1293	return 0;
1294}
1295
1296static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1297{
1298	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1299}
1300
1301static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1302{
1303	int r;
1304	u32 *hpd;
1305	size_t mec_hpd_size;
1306
1307	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1308
1309	/* take ownership of the relevant compute queues */
1310	amdgpu_gfx_compute_queue_acquire(adev);
1311
1312	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1313	if (mec_hpd_size) {
1314		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1315					      AMDGPU_GEM_DOMAIN_VRAM |
1316					      AMDGPU_GEM_DOMAIN_GTT,
1317					      &adev->gfx.mec.hpd_eop_obj,
1318					      &adev->gfx.mec.hpd_eop_gpu_addr,
1319					      (void **)&hpd);
1320		if (r) {
1321			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1322			return r;
1323		}
1324
1325		memset(hpd, 0, mec_hpd_size);
1326
1327		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1328		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 
 
 
1329	}
1330
 
 
 
 
 
1331	return 0;
1332}
1333
1334static const u32 vgpr_init_compute_shader[] =
1335{
1336	0x7e000209, 0x7e020208,
1337	0x7e040207, 0x7e060206,
1338	0x7e080205, 0x7e0a0204,
1339	0x7e0c0203, 0x7e0e0202,
1340	0x7e100201, 0x7e120200,
1341	0x7e140209, 0x7e160208,
1342	0x7e180207, 0x7e1a0206,
1343	0x7e1c0205, 0x7e1e0204,
1344	0x7e200203, 0x7e220202,
1345	0x7e240201, 0x7e260200,
1346	0x7e280209, 0x7e2a0208,
1347	0x7e2c0207, 0x7e2e0206,
1348	0x7e300205, 0x7e320204,
1349	0x7e340203, 0x7e360202,
1350	0x7e380201, 0x7e3a0200,
1351	0x7e3c0209, 0x7e3e0208,
1352	0x7e400207, 0x7e420206,
1353	0x7e440205, 0x7e460204,
1354	0x7e480203, 0x7e4a0202,
1355	0x7e4c0201, 0x7e4e0200,
1356	0x7e500209, 0x7e520208,
1357	0x7e540207, 0x7e560206,
1358	0x7e580205, 0x7e5a0204,
1359	0x7e5c0203, 0x7e5e0202,
1360	0x7e600201, 0x7e620200,
1361	0x7e640209, 0x7e660208,
1362	0x7e680207, 0x7e6a0206,
1363	0x7e6c0205, 0x7e6e0204,
1364	0x7e700203, 0x7e720202,
1365	0x7e740201, 0x7e760200,
1366	0x7e780209, 0x7e7a0208,
1367	0x7e7c0207, 0x7e7e0206,
1368	0xbf8a0000, 0xbf810000,
1369};
1370
1371static const u32 sgpr_init_compute_shader[] =
1372{
1373	0xbe8a0100, 0xbe8c0102,
1374	0xbe8e0104, 0xbe900106,
1375	0xbe920108, 0xbe940100,
1376	0xbe960102, 0xbe980104,
1377	0xbe9a0106, 0xbe9c0108,
1378	0xbe9e0100, 0xbea00102,
1379	0xbea20104, 0xbea40106,
1380	0xbea60108, 0xbea80100,
1381	0xbeaa0102, 0xbeac0104,
1382	0xbeae0106, 0xbeb00108,
1383	0xbeb20100, 0xbeb40102,
1384	0xbeb60104, 0xbeb80106,
1385	0xbeba0108, 0xbebc0100,
1386	0xbebe0102, 0xbec00104,
1387	0xbec20106, 0xbec40108,
1388	0xbec60100, 0xbec80102,
1389	0xbee60004, 0xbee70005,
1390	0xbeea0006, 0xbeeb0007,
1391	0xbee80008, 0xbee90009,
1392	0xbefc0000, 0xbf8a0000,
1393	0xbf810000, 0x00000000,
1394};
1395
1396static const u32 vgpr_init_regs[] =
1397{
1398	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1399	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1400	mmCOMPUTE_NUM_THREAD_X, 256*4,
1401	mmCOMPUTE_NUM_THREAD_Y, 1,
1402	mmCOMPUTE_NUM_THREAD_Z, 1,
1403	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1404	mmCOMPUTE_PGM_RSRC2, 20,
1405	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1406	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1407	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1408	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1409	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1410	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1411	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1412	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1413	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1414	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1415};
1416
1417static const u32 sgpr1_init_regs[] =
1418{
1419	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1420	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1421	mmCOMPUTE_NUM_THREAD_X, 256*5,
1422	mmCOMPUTE_NUM_THREAD_Y, 1,
1423	mmCOMPUTE_NUM_THREAD_Z, 1,
1424	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1425	mmCOMPUTE_PGM_RSRC2, 20,
1426	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1427	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1428	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1429	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1430	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1431	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1432	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1433	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1434	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1435	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1436};
1437
1438static const u32 sgpr2_init_regs[] =
1439{
1440	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1441	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1442	mmCOMPUTE_NUM_THREAD_X, 256*5,
1443	mmCOMPUTE_NUM_THREAD_Y, 1,
1444	mmCOMPUTE_NUM_THREAD_Z, 1,
1445	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1446	mmCOMPUTE_PGM_RSRC2, 20,
1447	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1448	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1449	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1450	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1451	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1452	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1453	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1454	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1455	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1456	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1457};
1458
1459static const u32 sec_ded_counter_registers[] =
1460{
1461	mmCPC_EDC_ATC_CNT,
1462	mmCPC_EDC_SCRATCH_CNT,
1463	mmCPC_EDC_UCODE_CNT,
1464	mmCPF_EDC_ATC_CNT,
1465	mmCPF_EDC_ROQ_CNT,
1466	mmCPF_EDC_TAG_CNT,
1467	mmCPG_EDC_ATC_CNT,
1468	mmCPG_EDC_DMA_CNT,
1469	mmCPG_EDC_TAG_CNT,
1470	mmDC_EDC_CSINVOC_CNT,
1471	mmDC_EDC_RESTORE_CNT,
1472	mmDC_EDC_STATE_CNT,
1473	mmGDS_EDC_CNT,
1474	mmGDS_EDC_GRBM_CNT,
1475	mmGDS_EDC_OA_DED,
1476	mmSPI_EDC_CNT,
1477	mmSQC_ATC_EDC_GATCL1_CNT,
1478	mmSQC_EDC_CNT,
1479	mmSQ_EDC_DED_CNT,
1480	mmSQ_EDC_INFO,
1481	mmSQ_EDC_SEC_CNT,
1482	mmTCC_EDC_CNT,
1483	mmTCP_ATC_EDC_GATCL1_CNT,
1484	mmTCP_EDC_CNT,
1485	mmTD_EDC_CNT
1486};
1487
1488static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1489{
1490	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1491	struct amdgpu_ib ib;
1492	struct dma_fence *f = NULL;
1493	int r, i;
1494	u32 tmp;
1495	unsigned total_size, vgpr_offset, sgpr_offset;
1496	u64 gpu_addr;
1497
1498	/* only supported on CZ */
1499	if (adev->asic_type != CHIP_CARRIZO)
1500		return 0;
1501
1502	/* bail if the compute ring is not ready */
1503	if (!ring->sched.ready)
1504		return 0;
1505
1506	tmp = RREG32(mmGB_EDC_MODE);
1507	WREG32(mmGB_EDC_MODE, 0);
1508
1509	total_size =
1510		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1511	total_size +=
1512		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1513	total_size +=
1514		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1515	total_size = ALIGN(total_size, 256);
1516	vgpr_offset = total_size;
1517	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1518	sgpr_offset = total_size;
1519	total_size += sizeof(sgpr_init_compute_shader);
1520
1521	/* allocate an indirect buffer to put the commands in */
1522	memset(&ib, 0, sizeof(ib));
1523	r = amdgpu_ib_get(adev, NULL, total_size,
1524					AMDGPU_IB_POOL_DIRECT, &ib);
1525	if (r) {
1526		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1527		return r;
1528	}
1529
1530	/* load the compute shaders */
1531	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1532		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1533
1534	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1535		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1536
1537	/* init the ib length to 0 */
1538	ib.length_dw = 0;
1539
1540	/* VGPR */
1541	/* write the register state for the compute dispatch */
1542	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1543		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1544		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1545		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1546	}
1547	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1548	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1549	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1550	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1551	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1552	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1553
1554	/* write dispatch packet */
1555	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1556	ib.ptr[ib.length_dw++] = 8; /* x */
1557	ib.ptr[ib.length_dw++] = 1; /* y */
1558	ib.ptr[ib.length_dw++] = 1; /* z */
1559	ib.ptr[ib.length_dw++] =
1560		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1561
1562	/* write CS partial flush packet */
1563	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1564	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1565
1566	/* SGPR1 */
1567	/* write the register state for the compute dispatch */
1568	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1569		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1570		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1571		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1572	}
1573	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1574	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1575	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1576	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1577	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1578	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1579
1580	/* write dispatch packet */
1581	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1582	ib.ptr[ib.length_dw++] = 8; /* x */
1583	ib.ptr[ib.length_dw++] = 1; /* y */
1584	ib.ptr[ib.length_dw++] = 1; /* z */
1585	ib.ptr[ib.length_dw++] =
1586		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1587
1588	/* write CS partial flush packet */
1589	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1590	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1591
1592	/* SGPR2 */
1593	/* write the register state for the compute dispatch */
1594	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1595		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1596		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1597		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1598	}
1599	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1600	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1601	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1602	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1603	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1604	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1605
1606	/* write dispatch packet */
1607	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1608	ib.ptr[ib.length_dw++] = 8; /* x */
1609	ib.ptr[ib.length_dw++] = 1; /* y */
1610	ib.ptr[ib.length_dw++] = 1; /* z */
1611	ib.ptr[ib.length_dw++] =
1612		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1613
1614	/* write CS partial flush packet */
1615	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1616	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1617
1618	/* shedule the ib on the ring */
1619	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1620	if (r) {
1621		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1622		goto fail;
1623	}
1624
1625	/* wait for the GPU to finish processing the IB */
1626	r = dma_fence_wait(f, false);
1627	if (r) {
1628		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1629		goto fail;
1630	}
1631
1632	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1633	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1634	WREG32(mmGB_EDC_MODE, tmp);
1635
1636	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1637	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1638	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1639
1640
1641	/* read back registers to clear the counters */
1642	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1643		RREG32(sec_ded_counter_registers[i]);
1644
1645fail:
1646	amdgpu_ib_free(adev, &ib, NULL);
1647	dma_fence_put(f);
1648
1649	return r;
1650}
1651
1652static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1653{
1654	u32 gb_addr_config;
1655	u32 mc_arb_ramcfg;
1656	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1657	u32 tmp;
1658	int ret;
1659
1660	switch (adev->asic_type) {
1661	case CHIP_TOPAZ:
1662		adev->gfx.config.max_shader_engines = 1;
1663		adev->gfx.config.max_tile_pipes = 2;
1664		adev->gfx.config.max_cu_per_sh = 6;
1665		adev->gfx.config.max_sh_per_se = 1;
1666		adev->gfx.config.max_backends_per_se = 2;
1667		adev->gfx.config.max_texture_channel_caches = 2;
1668		adev->gfx.config.max_gprs = 256;
1669		adev->gfx.config.max_gs_threads = 32;
1670		adev->gfx.config.max_hw_contexts = 8;
1671
1672		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1673		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1674		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1675		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1676		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1677		break;
1678	case CHIP_FIJI:
1679		adev->gfx.config.max_shader_engines = 4;
1680		adev->gfx.config.max_tile_pipes = 16;
1681		adev->gfx.config.max_cu_per_sh = 16;
1682		adev->gfx.config.max_sh_per_se = 1;
1683		adev->gfx.config.max_backends_per_se = 4;
1684		adev->gfx.config.max_texture_channel_caches = 16;
1685		adev->gfx.config.max_gprs = 256;
1686		adev->gfx.config.max_gs_threads = 32;
1687		adev->gfx.config.max_hw_contexts = 8;
1688
1689		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1690		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1691		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1692		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1693		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1694		break;
1695	case CHIP_POLARIS11:
1696	case CHIP_POLARIS12:
1697		ret = amdgpu_atombios_get_gfx_info(adev);
1698		if (ret)
1699			return ret;
1700		adev->gfx.config.max_gprs = 256;
1701		adev->gfx.config.max_gs_threads = 32;
1702		adev->gfx.config.max_hw_contexts = 8;
1703
1704		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1709		break;
1710	case CHIP_POLARIS10:
1711	case CHIP_VEGAM:
1712		ret = amdgpu_atombios_get_gfx_info(adev);
1713		if (ret)
1714			return ret;
1715		adev->gfx.config.max_gprs = 256;
1716		adev->gfx.config.max_gs_threads = 32;
1717		adev->gfx.config.max_hw_contexts = 8;
1718
1719		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1720		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1721		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1722		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1723		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1724		break;
1725	case CHIP_TONGA:
1726		adev->gfx.config.max_shader_engines = 4;
1727		adev->gfx.config.max_tile_pipes = 8;
1728		adev->gfx.config.max_cu_per_sh = 8;
1729		adev->gfx.config.max_sh_per_se = 1;
1730		adev->gfx.config.max_backends_per_se = 2;
1731		adev->gfx.config.max_texture_channel_caches = 8;
1732		adev->gfx.config.max_gprs = 256;
1733		adev->gfx.config.max_gs_threads = 32;
1734		adev->gfx.config.max_hw_contexts = 8;
1735
1736		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1737		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1738		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1739		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1740		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1741		break;
1742	case CHIP_CARRIZO:
1743		adev->gfx.config.max_shader_engines = 1;
1744		adev->gfx.config.max_tile_pipes = 2;
1745		adev->gfx.config.max_sh_per_se = 1;
1746		adev->gfx.config.max_backends_per_se = 2;
1747		adev->gfx.config.max_cu_per_sh = 8;
1748		adev->gfx.config.max_texture_channel_caches = 2;
1749		adev->gfx.config.max_gprs = 256;
1750		adev->gfx.config.max_gs_threads = 32;
1751		adev->gfx.config.max_hw_contexts = 8;
1752
1753		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1754		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1755		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1756		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1757		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1758		break;
1759	case CHIP_STONEY:
1760		adev->gfx.config.max_shader_engines = 1;
1761		adev->gfx.config.max_tile_pipes = 2;
1762		adev->gfx.config.max_sh_per_se = 1;
1763		adev->gfx.config.max_backends_per_se = 1;
1764		adev->gfx.config.max_cu_per_sh = 3;
1765		adev->gfx.config.max_texture_channel_caches = 2;
1766		adev->gfx.config.max_gprs = 256;
1767		adev->gfx.config.max_gs_threads = 16;
1768		adev->gfx.config.max_hw_contexts = 8;
1769
1770		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1771		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1772		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1773		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1774		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1775		break;
1776	default:
1777		adev->gfx.config.max_shader_engines = 2;
1778		adev->gfx.config.max_tile_pipes = 4;
1779		adev->gfx.config.max_cu_per_sh = 2;
1780		adev->gfx.config.max_sh_per_se = 1;
1781		adev->gfx.config.max_backends_per_se = 2;
1782		adev->gfx.config.max_texture_channel_caches = 4;
1783		adev->gfx.config.max_gprs = 256;
1784		adev->gfx.config.max_gs_threads = 32;
1785		adev->gfx.config.max_hw_contexts = 8;
1786
1787		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1788		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1789		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1790		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1791		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1792		break;
1793	}
1794
1795	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1796	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1797
1798	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1799				MC_ARB_RAMCFG, NOOFBANK);
1800	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1801				MC_ARB_RAMCFG, NOOFRANKS);
1802
1803	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1804	adev->gfx.config.mem_max_burst_length_bytes = 256;
1805	if (adev->flags & AMD_IS_APU) {
1806		/* Get memory bank mapping mode. */
1807		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1808		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1809		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1810
1811		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1812		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1813		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1814
1815		/* Validate settings in case only one DIMM installed. */
1816		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1817			dimm00_addr_map = 0;
1818		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1819			dimm01_addr_map = 0;
1820		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1821			dimm10_addr_map = 0;
1822		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1823			dimm11_addr_map = 0;
1824
1825		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1826		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1827		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1828			adev->gfx.config.mem_row_size_in_kb = 2;
1829		else
1830			adev->gfx.config.mem_row_size_in_kb = 1;
1831	} else {
1832		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1833		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1834		if (adev->gfx.config.mem_row_size_in_kb > 4)
1835			adev->gfx.config.mem_row_size_in_kb = 4;
1836	}
1837
1838	adev->gfx.config.shader_engine_tile_size = 32;
1839	adev->gfx.config.num_gpus = 1;
1840	adev->gfx.config.multi_gpu_tile_size = 64;
1841
1842	/* fix up row size */
1843	switch (adev->gfx.config.mem_row_size_in_kb) {
1844	case 1:
1845	default:
1846		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1847		break;
1848	case 2:
1849		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1850		break;
1851	case 4:
1852		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1853		break;
1854	}
1855	adev->gfx.config.gb_addr_config = gb_addr_config;
1856
1857	return 0;
1858}
1859
1860static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1861					int mec, int pipe, int queue)
1862{
1863	int r;
1864	unsigned irq_type;
1865	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1866	unsigned int hw_prio;
1867
1868	ring = &adev->gfx.compute_ring[ring_id];
1869
1870	/* mec0 is me1 */
1871	ring->me = mec + 1;
1872	ring->pipe = pipe;
1873	ring->queue = queue;
1874
1875	ring->ring_obj = NULL;
1876	ring->use_doorbell = true;
1877	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1878	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1879				+ (ring_id * GFX8_MEC_HPD_SIZE);
1880	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1881
1882	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1883		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1884		+ ring->pipe;
1885
1886	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1887			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1888	/* type-2 packets are deprecated on MEC, use type-3 instead */
1889	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1890			     hw_prio, NULL);
1891	if (r)
1892		return r;
1893
1894
1895	return 0;
1896}
1897
1898static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1899
1900static int gfx_v8_0_sw_init(void *handle)
1901{
1902	int i, j, k, r, ring_id;
1903	int xcc_id = 0;
1904	struct amdgpu_ring *ring;
 
1905	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1906
1907	switch (adev->asic_type) {
1908	case CHIP_TONGA:
1909	case CHIP_CARRIZO:
1910	case CHIP_FIJI:
1911	case CHIP_POLARIS10:
1912	case CHIP_POLARIS11:
1913	case CHIP_POLARIS12:
1914	case CHIP_VEGAM:
1915		adev->gfx.mec.num_mec = 2;
1916		break;
1917	case CHIP_TOPAZ:
1918	case CHIP_STONEY:
1919	default:
1920		adev->gfx.mec.num_mec = 1;
1921		break;
1922	}
1923
1924	adev->gfx.mec.num_pipe_per_mec = 4;
1925	adev->gfx.mec.num_queue_per_pipe = 8;
1926
1927	/* EOP Event */
1928	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1929	if (r)
1930		return r;
1931
1932	/* Privileged reg */
1933	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1934			      &adev->gfx.priv_reg_irq);
1935	if (r)
1936		return r;
1937
1938	/* Privileged inst */
1939	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1940			      &adev->gfx.priv_inst_irq);
1941	if (r)
1942		return r;
1943
1944	/* Add CP EDC/ECC irq  */
1945	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1946			      &adev->gfx.cp_ecc_error_irq);
1947	if (r)
1948		return r;
1949
1950	/* SQ interrupts. */
1951	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1952			      &adev->gfx.sq_irq);
1953	if (r) {
1954		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1955		return r;
1956	}
1957
1958	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1959
1960	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1961
 
 
1962	r = gfx_v8_0_init_microcode(adev);
1963	if (r) {
1964		DRM_ERROR("Failed to load gfx firmware!\n");
1965		return r;
1966	}
1967
1968	r = adev->gfx.rlc.funcs->init(adev);
1969	if (r) {
1970		DRM_ERROR("Failed to init rlc BOs!\n");
1971		return r;
1972	}
1973
1974	r = gfx_v8_0_mec_init(adev);
1975	if (r) {
1976		DRM_ERROR("Failed to init MEC BOs!\n");
1977		return r;
1978	}
1979
1980	/* set up the gfx ring */
1981	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1982		ring = &adev->gfx.gfx_ring[i];
1983		ring->ring_obj = NULL;
1984		sprintf(ring->name, "gfx");
1985		/* no gfx doorbells on iceland */
1986		if (adev->asic_type != CHIP_TOPAZ) {
1987			ring->use_doorbell = true;
1988			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1989		}
1990
1991		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1992				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1993				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1994		if (r)
1995			return r;
1996	}
1997
1998
1999	/* set up the compute queues - allocate horizontally across pipes */
2000	ring_id = 0;
2001	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2002		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2003			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2004				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2005								     k, j))
2006					continue;
2007
2008				r = gfx_v8_0_compute_ring_init(adev,
2009								ring_id,
2010								i, k, j);
2011				if (r)
2012					return r;
2013
2014				ring_id++;
2015			}
2016		}
2017	}
2018
2019	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2020	if (r) {
2021		DRM_ERROR("Failed to init KIQ BOs!\n");
2022		return r;
2023	}
2024
2025	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
 
2026	if (r)
2027		return r;
2028
2029	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2030	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2031	if (r)
2032		return r;
2033
2034	adev->gfx.ce_ram_size = 0x8000;
2035
2036	r = gfx_v8_0_gpu_early_init(adev);
2037	if (r)
2038		return r;
2039
2040	return 0;
2041}
2042
2043static int gfx_v8_0_sw_fini(void *handle)
2044{
2045	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2046	int i;
2047
2048	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2049		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2050	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2051		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2052
2053	amdgpu_gfx_mqd_sw_fini(adev, 0);
2054	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2055	amdgpu_gfx_kiq_fini(adev, 0);
2056
2057	gfx_v8_0_mec_fini(adev);
2058	amdgpu_gfx_rlc_fini(adev);
2059	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2060				&adev->gfx.rlc.clear_state_gpu_addr,
2061				(void **)&adev->gfx.rlc.cs_ptr);
2062	if ((adev->asic_type == CHIP_CARRIZO) ||
2063	    (adev->asic_type == CHIP_STONEY)) {
2064		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2065				&adev->gfx.rlc.cp_table_gpu_addr,
2066				(void **)&adev->gfx.rlc.cp_table_ptr);
2067	}
2068	gfx_v8_0_free_microcode(adev);
2069
2070	return 0;
2071}
2072
2073static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2074{
2075	uint32_t *modearray, *mod2array;
2076	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2077	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2078	u32 reg_offset;
2079
2080	modearray = adev->gfx.config.tile_mode_array;
2081	mod2array = adev->gfx.config.macrotile_mode_array;
2082
2083	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2084		modearray[reg_offset] = 0;
2085
2086	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2087		mod2array[reg_offset] = 0;
2088
2089	switch (adev->asic_type) {
2090	case CHIP_TOPAZ:
2091		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2092				PIPE_CONFIG(ADDR_SURF_P2) |
2093				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2094				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2095		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096				PIPE_CONFIG(ADDR_SURF_P2) |
2097				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2098				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2099		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2100				PIPE_CONFIG(ADDR_SURF_P2) |
2101				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2102				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2103		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104				PIPE_CONFIG(ADDR_SURF_P2) |
2105				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2106				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2107		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2108				PIPE_CONFIG(ADDR_SURF_P2) |
2109				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2110				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2112				PIPE_CONFIG(ADDR_SURF_P2) |
2113				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2114				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2115		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2116				PIPE_CONFIG(ADDR_SURF_P2) |
2117				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2118				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2119		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2120				PIPE_CONFIG(ADDR_SURF_P2));
2121		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2122				PIPE_CONFIG(ADDR_SURF_P2) |
2123				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2124				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2125		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2126				 PIPE_CONFIG(ADDR_SURF_P2) |
2127				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2128				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2129		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2130				 PIPE_CONFIG(ADDR_SURF_P2) |
2131				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2132				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2133		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2134				 PIPE_CONFIG(ADDR_SURF_P2) |
2135				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2137		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2138				 PIPE_CONFIG(ADDR_SURF_P2) |
2139				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2140				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2141		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2142				 PIPE_CONFIG(ADDR_SURF_P2) |
2143				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2144				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2146				 PIPE_CONFIG(ADDR_SURF_P2) |
2147				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2148				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2149		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2150				 PIPE_CONFIG(ADDR_SURF_P2) |
2151				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2152				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2153		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2154				 PIPE_CONFIG(ADDR_SURF_P2) |
2155				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2156				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2157		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2158				 PIPE_CONFIG(ADDR_SURF_P2) |
2159				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2160				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2161		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2162				 PIPE_CONFIG(ADDR_SURF_P2) |
2163				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2164				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2165		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2166				 PIPE_CONFIG(ADDR_SURF_P2) |
2167				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2168				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2169		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2170				 PIPE_CONFIG(ADDR_SURF_P2) |
2171				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2172				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2173		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2174				 PIPE_CONFIG(ADDR_SURF_P2) |
2175				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2176				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2177		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2178				 PIPE_CONFIG(ADDR_SURF_P2) |
2179				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2180				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2182				 PIPE_CONFIG(ADDR_SURF_P2) |
2183				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2184				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2186				 PIPE_CONFIG(ADDR_SURF_P2) |
2187				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2188				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2190				 PIPE_CONFIG(ADDR_SURF_P2) |
2191				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2192				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2193
2194		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2195				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2197				NUM_BANKS(ADDR_SURF_8_BANK));
2198		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2199				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2200				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201				NUM_BANKS(ADDR_SURF_8_BANK));
2202		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2203				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2204				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205				NUM_BANKS(ADDR_SURF_8_BANK));
2206		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2209				NUM_BANKS(ADDR_SURF_8_BANK));
2210		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2212				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2213				NUM_BANKS(ADDR_SURF_8_BANK));
2214		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2217				NUM_BANKS(ADDR_SURF_8_BANK));
2218		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2221				NUM_BANKS(ADDR_SURF_8_BANK));
2222		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2223				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2224				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2225				NUM_BANKS(ADDR_SURF_16_BANK));
2226		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2227				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2228				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2229				NUM_BANKS(ADDR_SURF_16_BANK));
2230		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2231				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2232				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2233				 NUM_BANKS(ADDR_SURF_16_BANK));
2234		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2235				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2236				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2237				 NUM_BANKS(ADDR_SURF_16_BANK));
2238		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2239				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2240				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2241				 NUM_BANKS(ADDR_SURF_16_BANK));
2242		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2245				 NUM_BANKS(ADDR_SURF_16_BANK));
2246		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2248				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249				 NUM_BANKS(ADDR_SURF_8_BANK));
2250
2251		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2252			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2253			    reg_offset != 23)
2254				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2255
2256		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2257			if (reg_offset != 7)
2258				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2259
2260		break;
2261	case CHIP_FIJI:
2262	case CHIP_VEGAM:
2263		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2265				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2266				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2267		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2268				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2269				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2270				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2271		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2272				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2273				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2274				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2275		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2276				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2277				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2278				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2279		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2281				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2282				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2283		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2284				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2285				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2286				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2287		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2288				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2289				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2290				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2291		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2292				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2293				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2294				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2296				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2297		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2300				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2301		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2303				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2304				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2308				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2309		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2310				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2311				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2312				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2313		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2316				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2320				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2322				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2324				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2328				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2332				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2334				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2336				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2337		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2338				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2340				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2341		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2342				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2344				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2345		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2346				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2348				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2349		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2350				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2352				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2353		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2354				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2356				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2357		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2358				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2361		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2362				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2364				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2366				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2372				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2376				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2380				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2381		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2384				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385
2386		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2388				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2389				NUM_BANKS(ADDR_SURF_8_BANK));
2390		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2391				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2392				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2393				NUM_BANKS(ADDR_SURF_8_BANK));
2394		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2397				NUM_BANKS(ADDR_SURF_8_BANK));
2398		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2399				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2400				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2401				NUM_BANKS(ADDR_SURF_8_BANK));
2402		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2403				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2404				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2405				NUM_BANKS(ADDR_SURF_8_BANK));
2406		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2407				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2408				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2409				NUM_BANKS(ADDR_SURF_8_BANK));
2410		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2411				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2412				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2413				NUM_BANKS(ADDR_SURF_8_BANK));
2414		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2416				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417				NUM_BANKS(ADDR_SURF_8_BANK));
2418		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2420				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2421				NUM_BANKS(ADDR_SURF_8_BANK));
2422		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2424				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2425				 NUM_BANKS(ADDR_SURF_8_BANK));
2426		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2428				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2429				 NUM_BANKS(ADDR_SURF_8_BANK));
2430		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2432				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433				 NUM_BANKS(ADDR_SURF_8_BANK));
2434		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437				 NUM_BANKS(ADDR_SURF_8_BANK));
2438		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2441				 NUM_BANKS(ADDR_SURF_4_BANK));
2442
2443		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2444			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2445
2446		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2447			if (reg_offset != 7)
2448				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2449
2450		break;
2451	case CHIP_TONGA:
2452		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2453				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2454				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2455				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2456		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2458				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2459				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2460		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2462				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2463				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2464		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2465				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2466				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2467				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2468		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2470				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2471				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2472		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2473				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2474				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2475				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2476		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2477				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2478				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2479				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2480		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2481				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2482				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2483				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2486		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2487				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2489				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2490		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2491				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2492				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2493				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2494		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2495				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2497				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2498		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2500				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2501				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2502		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2503				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2505				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2506		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2509				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2511				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2517				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2521				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2523				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2525				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2526		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2527				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2529				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2530		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2531				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2533				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2534		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2535				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2537				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2538		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2539				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2541				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2542		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2543				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2545				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2546		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2547				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2550		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2551				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2553				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2555				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2561				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2565				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2569				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2570		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2573				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574
2575		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2577				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2578				NUM_BANKS(ADDR_SURF_16_BANK));
2579		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582				NUM_BANKS(ADDR_SURF_16_BANK));
2583		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2585				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2586				NUM_BANKS(ADDR_SURF_16_BANK));
2587		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2589				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2590				NUM_BANKS(ADDR_SURF_16_BANK));
2591		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2593				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594				NUM_BANKS(ADDR_SURF_16_BANK));
2595		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598				NUM_BANKS(ADDR_SURF_16_BANK));
2599		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602				NUM_BANKS(ADDR_SURF_16_BANK));
2603		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606				NUM_BANKS(ADDR_SURF_16_BANK));
2607		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614				 NUM_BANKS(ADDR_SURF_16_BANK));
2615		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				 NUM_BANKS(ADDR_SURF_16_BANK));
2619		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622				 NUM_BANKS(ADDR_SURF_8_BANK));
2623		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				 NUM_BANKS(ADDR_SURF_4_BANK));
2627		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				 NUM_BANKS(ADDR_SURF_4_BANK));
2631
2632		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2633			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2634
2635		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2636			if (reg_offset != 7)
2637				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2638
2639		break;
2640	case CHIP_POLARIS11:
2641	case CHIP_POLARIS12:
2642		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2644				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2645				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2646		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2648				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2649				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2650		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2652				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2653				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2654		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2657				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2658		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2661				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2662		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2663				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2665				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2666		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2667				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2669				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2670		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2671				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2673				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2674		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2675				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2676		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2677				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2679				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2683				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2687				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2688		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2689				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2692		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2695				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2699				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2700		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2701				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2703				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2704		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2705				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2707				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2708		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2711				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2712		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2713				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2715				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2716		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2717				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2719				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2720		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2721				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2723				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2724		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2725				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2727				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2728		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2729				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2731				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2732		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2733				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2735				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2736		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2737				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2739				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2740		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2741				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2743				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2744		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2745				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2747				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2748		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2751				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2755				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2757				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2759				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2760		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2763				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764
2765		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2768				NUM_BANKS(ADDR_SURF_16_BANK));
2769
2770		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2772				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2773				NUM_BANKS(ADDR_SURF_16_BANK));
2774
2775		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2777				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2778				NUM_BANKS(ADDR_SURF_16_BANK));
2779
2780		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2783				NUM_BANKS(ADDR_SURF_16_BANK));
2784
2785		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2786				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2787				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2788				NUM_BANKS(ADDR_SURF_16_BANK));
2789
2790		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2793				NUM_BANKS(ADDR_SURF_16_BANK));
2794
2795		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798				NUM_BANKS(ADDR_SURF_16_BANK));
2799
2800		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2801				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2802				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2803				NUM_BANKS(ADDR_SURF_16_BANK));
2804
2805		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2806				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2807				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2808				NUM_BANKS(ADDR_SURF_16_BANK));
2809
2810		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2812				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2813				NUM_BANKS(ADDR_SURF_16_BANK));
2814
2815		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2817				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2818				NUM_BANKS(ADDR_SURF_16_BANK));
2819
2820		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823				NUM_BANKS(ADDR_SURF_16_BANK));
2824
2825		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828				NUM_BANKS(ADDR_SURF_8_BANK));
2829
2830		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2832				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2833				NUM_BANKS(ADDR_SURF_4_BANK));
2834
2835		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2836			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2837
2838		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2839			if (reg_offset != 7)
2840				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2841
2842		break;
2843	case CHIP_POLARIS10:
2844		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2846				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2847				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2848		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2849				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2850				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2851				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2852		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2853				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2854				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2855				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2856		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2858				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2859				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2860		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2862				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2863				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2864		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2865				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2866				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2867				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2868		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2869				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2870				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2871				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2872		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2873				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2875				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2876		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2877				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2878		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2879				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2881				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2882		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2886		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2889				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2890		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2891				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2893				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2894		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2895				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2897				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2901				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2903				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2905				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2906		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2909				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2910		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2911				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2912				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2913				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2914		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2915				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2917				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2918		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2919				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2921				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2922		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2923				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2925				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2926		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2927				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2928				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2929				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2930		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2931				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2933				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2934		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2935				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2936				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2937				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2938		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2939				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2942		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2943				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2945				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2946		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2947				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2949				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2950		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2959				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2962		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2965				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966
2967		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2969				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970				NUM_BANKS(ADDR_SURF_16_BANK));
2971
2972		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975				NUM_BANKS(ADDR_SURF_16_BANK));
2976
2977		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2978				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2979				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980				NUM_BANKS(ADDR_SURF_16_BANK));
2981
2982		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2983				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2984				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2985				NUM_BANKS(ADDR_SURF_16_BANK));
2986
2987		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991
2992		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2995				NUM_BANKS(ADDR_SURF_16_BANK));
2996
2997		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3000				NUM_BANKS(ADDR_SURF_16_BANK));
3001
3002		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3003				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3004				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3005				NUM_BANKS(ADDR_SURF_16_BANK));
3006
3007		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3008				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010				NUM_BANKS(ADDR_SURF_16_BANK));
3011
3012		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3014				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015				NUM_BANKS(ADDR_SURF_16_BANK));
3016
3017		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3019				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3020				NUM_BANKS(ADDR_SURF_16_BANK));
3021
3022		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3024				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3025				NUM_BANKS(ADDR_SURF_8_BANK));
3026
3027		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3030				NUM_BANKS(ADDR_SURF_4_BANK));
3031
3032		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3035				NUM_BANKS(ADDR_SURF_4_BANK));
3036
3037		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3038			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3039
3040		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3041			if (reg_offset != 7)
3042				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3043
3044		break;
3045	case CHIP_STONEY:
3046		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3047				PIPE_CONFIG(ADDR_SURF_P2) |
3048				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3049				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3050		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3051				PIPE_CONFIG(ADDR_SURF_P2) |
3052				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3053				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3054		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3055				PIPE_CONFIG(ADDR_SURF_P2) |
3056				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3057				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3058		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3059				PIPE_CONFIG(ADDR_SURF_P2) |
3060				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3061				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3062		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3063				PIPE_CONFIG(ADDR_SURF_P2) |
3064				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3065				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3066		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3067				PIPE_CONFIG(ADDR_SURF_P2) |
3068				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3069				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3070		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3071				PIPE_CONFIG(ADDR_SURF_P2) |
3072				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3073				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3074		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3075				PIPE_CONFIG(ADDR_SURF_P2));
3076		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077				PIPE_CONFIG(ADDR_SURF_P2) |
3078				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3079				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081				 PIPE_CONFIG(ADDR_SURF_P2) |
3082				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3083				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085				 PIPE_CONFIG(ADDR_SURF_P2) |
3086				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3087				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3088		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3089				 PIPE_CONFIG(ADDR_SURF_P2) |
3090				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3091				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3092		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3093				 PIPE_CONFIG(ADDR_SURF_P2) |
3094				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3095				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3096		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3097				 PIPE_CONFIG(ADDR_SURF_P2) |
3098				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3099				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3100		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3101				 PIPE_CONFIG(ADDR_SURF_P2) |
3102				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3103				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3104		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3105				 PIPE_CONFIG(ADDR_SURF_P2) |
3106				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3107				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3108		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3109				 PIPE_CONFIG(ADDR_SURF_P2) |
3110				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3111				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3112		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3113				 PIPE_CONFIG(ADDR_SURF_P2) |
3114				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3115				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3116		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3117				 PIPE_CONFIG(ADDR_SURF_P2) |
3118				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3119				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3120		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3121				 PIPE_CONFIG(ADDR_SURF_P2) |
3122				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3123				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3124		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3125				 PIPE_CONFIG(ADDR_SURF_P2) |
3126				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3127				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3128		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3129				 PIPE_CONFIG(ADDR_SURF_P2) |
3130				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3131				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3132		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3133				 PIPE_CONFIG(ADDR_SURF_P2) |
3134				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3135				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3136		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3137				 PIPE_CONFIG(ADDR_SURF_P2) |
3138				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3139				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141				 PIPE_CONFIG(ADDR_SURF_P2) |
3142				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3143				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3145				 PIPE_CONFIG(ADDR_SURF_P2) |
3146				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3147				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3148
3149		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3150				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3151				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3152				NUM_BANKS(ADDR_SURF_8_BANK));
3153		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3155				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3156				NUM_BANKS(ADDR_SURF_8_BANK));
3157		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3160				NUM_BANKS(ADDR_SURF_8_BANK));
3161		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3162				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3163				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3164				NUM_BANKS(ADDR_SURF_8_BANK));
3165		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3167				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168				NUM_BANKS(ADDR_SURF_8_BANK));
3169		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3170				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3171				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3172				NUM_BANKS(ADDR_SURF_8_BANK));
3173		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3174				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3175				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3176				NUM_BANKS(ADDR_SURF_8_BANK));
3177		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3178				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3179				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3180				NUM_BANKS(ADDR_SURF_16_BANK));
3181		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3182				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3183				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3184				NUM_BANKS(ADDR_SURF_16_BANK));
3185		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3186				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3187				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3188				 NUM_BANKS(ADDR_SURF_16_BANK));
3189		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3190				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3191				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3192				 NUM_BANKS(ADDR_SURF_16_BANK));
3193		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3194				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3195				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3196				 NUM_BANKS(ADDR_SURF_16_BANK));
3197		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3198				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3199				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3200				 NUM_BANKS(ADDR_SURF_16_BANK));
3201		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3203				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3204				 NUM_BANKS(ADDR_SURF_8_BANK));
3205
3206		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3207			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3208			    reg_offset != 23)
3209				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3210
3211		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3212			if (reg_offset != 7)
3213				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3214
3215		break;
3216	default:
3217		dev_warn(adev->dev,
3218			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3219			 adev->asic_type);
3220		fallthrough;
3221
3222	case CHIP_CARRIZO:
3223		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3224				PIPE_CONFIG(ADDR_SURF_P2) |
3225				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3226				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3228				PIPE_CONFIG(ADDR_SURF_P2) |
3229				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3230				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3231		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232				PIPE_CONFIG(ADDR_SURF_P2) |
3233				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3234				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3235		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3236				PIPE_CONFIG(ADDR_SURF_P2) |
3237				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3238				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3239		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3240				PIPE_CONFIG(ADDR_SURF_P2) |
3241				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3242				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3243		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3244				PIPE_CONFIG(ADDR_SURF_P2) |
3245				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3246				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3247		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3248				PIPE_CONFIG(ADDR_SURF_P2) |
3249				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3250				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3251		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3252				PIPE_CONFIG(ADDR_SURF_P2));
3253		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3254				PIPE_CONFIG(ADDR_SURF_P2) |
3255				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3256				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3257		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3258				 PIPE_CONFIG(ADDR_SURF_P2) |
3259				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3260				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3261		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3262				 PIPE_CONFIG(ADDR_SURF_P2) |
3263				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3264				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3265		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3266				 PIPE_CONFIG(ADDR_SURF_P2) |
3267				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3269		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270				 PIPE_CONFIG(ADDR_SURF_P2) |
3271				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3272				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3273		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3274				 PIPE_CONFIG(ADDR_SURF_P2) |
3275				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3276				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3277		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3278				 PIPE_CONFIG(ADDR_SURF_P2) |
3279				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3281		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3282				 PIPE_CONFIG(ADDR_SURF_P2) |
3283				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3284				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3286				 PIPE_CONFIG(ADDR_SURF_P2) |
3287				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3290				 PIPE_CONFIG(ADDR_SURF_P2) |
3291				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3292				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3293		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3294				 PIPE_CONFIG(ADDR_SURF_P2) |
3295				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3296				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3297		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3298				 PIPE_CONFIG(ADDR_SURF_P2) |
3299				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3300				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3301		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3302				 PIPE_CONFIG(ADDR_SURF_P2) |
3303				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3304				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3305		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3306				 PIPE_CONFIG(ADDR_SURF_P2) |
3307				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3308				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3309		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3310				 PIPE_CONFIG(ADDR_SURF_P2) |
3311				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3312				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3313		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3314				 PIPE_CONFIG(ADDR_SURF_P2) |
3315				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3316				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318				 PIPE_CONFIG(ADDR_SURF_P2) |
3319				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3320				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3322				 PIPE_CONFIG(ADDR_SURF_P2) |
3323				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3324				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3325
3326		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3328				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3329				NUM_BANKS(ADDR_SURF_8_BANK));
3330		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3331				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3332				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333				NUM_BANKS(ADDR_SURF_8_BANK));
3334		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3337				NUM_BANKS(ADDR_SURF_8_BANK));
3338		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3339				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3340				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3341				NUM_BANKS(ADDR_SURF_8_BANK));
3342		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3343				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3344				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3345				NUM_BANKS(ADDR_SURF_8_BANK));
3346		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3348				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3349				NUM_BANKS(ADDR_SURF_8_BANK));
3350		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3353				NUM_BANKS(ADDR_SURF_8_BANK));
3354		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3355				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3356				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3357				NUM_BANKS(ADDR_SURF_16_BANK));
3358		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3359				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361				NUM_BANKS(ADDR_SURF_16_BANK));
3362		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3363				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3364				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3365				 NUM_BANKS(ADDR_SURF_16_BANK));
3366		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3367				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3368				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3369				 NUM_BANKS(ADDR_SURF_16_BANK));
3370		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373				 NUM_BANKS(ADDR_SURF_16_BANK));
3374		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3375				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3376				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3377				 NUM_BANKS(ADDR_SURF_16_BANK));
3378		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3380				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3381				 NUM_BANKS(ADDR_SURF_8_BANK));
3382
3383		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3384			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3385			    reg_offset != 23)
3386				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3387
3388		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3389			if (reg_offset != 7)
3390				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3391
3392		break;
3393	}
3394}
3395
3396static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3397				  u32 se_num, u32 sh_num, u32 instance,
3398				  int xcc_id)
3399{
3400	u32 data;
3401
3402	if (instance == 0xffffffff)
3403		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3404	else
3405		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3406
3407	if (se_num == 0xffffffff)
3408		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3409	else
3410		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3411
3412	if (sh_num == 0xffffffff)
3413		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3414	else
3415		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3416
3417	WREG32(mmGRBM_GFX_INDEX, data);
3418}
3419
3420static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3421				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3422{
3423	vi_srbm_select(adev, me, pipe, q, vm);
3424}
3425
3426static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3427{
3428	u32 data, mask;
3429
3430	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3431		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3432
3433	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3434
3435	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3436					 adev->gfx.config.max_sh_per_se);
3437
3438	return (~data) & mask;
3439}
3440
3441static void
3442gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3443{
3444	switch (adev->asic_type) {
3445	case CHIP_FIJI:
3446	case CHIP_VEGAM:
3447		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3448			  RB_XSEL2(1) | PKR_MAP(2) |
3449			  PKR_XSEL(1) | PKR_YSEL(1) |
3450			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3451		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3452			   SE_PAIR_YSEL(2);
3453		break;
3454	case CHIP_TONGA:
3455	case CHIP_POLARIS10:
3456		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3457			  SE_XSEL(1) | SE_YSEL(1);
3458		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3459			   SE_PAIR_YSEL(2);
3460		break;
3461	case CHIP_TOPAZ:
3462	case CHIP_CARRIZO:
3463		*rconf |= RB_MAP_PKR0(2);
3464		*rconf1 |= 0x0;
3465		break;
3466	case CHIP_POLARIS11:
3467	case CHIP_POLARIS12:
3468		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3469			  SE_XSEL(1) | SE_YSEL(1);
3470		*rconf1 |= 0x0;
3471		break;
3472	case CHIP_STONEY:
3473		*rconf |= 0x0;
3474		*rconf1 |= 0x0;
3475		break;
3476	default:
3477		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3478		break;
3479	}
3480}
3481
3482static void
3483gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3484					u32 raster_config, u32 raster_config_1,
3485					unsigned rb_mask, unsigned num_rb)
3486{
3487	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3488	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3489	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3490	unsigned rb_per_se = num_rb / num_se;
3491	unsigned se_mask[4];
3492	unsigned se;
3493
3494	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3495	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3496	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3497	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3498
3499	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3500	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3501	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3502
3503	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3504			     (!se_mask[2] && !se_mask[3]))) {
3505		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3506
3507		if (!se_mask[0] && !se_mask[1]) {
3508			raster_config_1 |=
3509				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3510		} else {
3511			raster_config_1 |=
3512				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3513		}
3514	}
3515
3516	for (se = 0; se < num_se; se++) {
3517		unsigned raster_config_se = raster_config;
3518		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3519		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3520		int idx = (se / 2) * 2;
3521
3522		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3523			raster_config_se &= ~SE_MAP_MASK;
3524
3525			if (!se_mask[idx]) {
3526				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3527			} else {
3528				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3529			}
3530		}
3531
3532		pkr0_mask &= rb_mask;
3533		pkr1_mask &= rb_mask;
3534		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3535			raster_config_se &= ~PKR_MAP_MASK;
3536
3537			if (!pkr0_mask) {
3538				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3539			} else {
3540				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3541			}
3542		}
3543
3544		if (rb_per_se >= 2) {
3545			unsigned rb0_mask = 1 << (se * rb_per_se);
3546			unsigned rb1_mask = rb0_mask << 1;
3547
3548			rb0_mask &= rb_mask;
3549			rb1_mask &= rb_mask;
3550			if (!rb0_mask || !rb1_mask) {
3551				raster_config_se &= ~RB_MAP_PKR0_MASK;
3552
3553				if (!rb0_mask) {
3554					raster_config_se |=
3555						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3556				} else {
3557					raster_config_se |=
3558						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3559				}
3560			}
3561
3562			if (rb_per_se > 2) {
3563				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3564				rb1_mask = rb0_mask << 1;
3565				rb0_mask &= rb_mask;
3566				rb1_mask &= rb_mask;
3567				if (!rb0_mask || !rb1_mask) {
3568					raster_config_se &= ~RB_MAP_PKR1_MASK;
3569
3570					if (!rb0_mask) {
3571						raster_config_se |=
3572							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3573					} else {
3574						raster_config_se |=
3575							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3576					}
3577				}
3578			}
3579		}
3580
3581		/* GRBM_GFX_INDEX has a different offset on VI */
3582		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3583		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3584		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3585	}
3586
3587	/* GRBM_GFX_INDEX has a different offset on VI */
3588	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3589}
3590
3591static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3592{
3593	int i, j;
3594	u32 data;
3595	u32 raster_config = 0, raster_config_1 = 0;
3596	u32 active_rbs = 0;
3597	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3598					adev->gfx.config.max_sh_per_se;
3599	unsigned num_rb_pipes;
3600
3601	mutex_lock(&adev->grbm_idx_mutex);
3602	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3603		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3604			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3605			data = gfx_v8_0_get_rb_active_bitmap(adev);
3606			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3607					       rb_bitmap_width_per_sh);
3608		}
3609	}
3610	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3611
3612	adev->gfx.config.backend_enable_mask = active_rbs;
3613	adev->gfx.config.num_rbs = hweight32(active_rbs);
3614
3615	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3616			     adev->gfx.config.max_shader_engines, 16);
3617
3618	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3619
3620	if (!adev->gfx.config.backend_enable_mask ||
3621			adev->gfx.config.num_rbs >= num_rb_pipes) {
3622		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3623		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3624	} else {
3625		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3626							adev->gfx.config.backend_enable_mask,
3627							num_rb_pipes);
3628	}
3629
3630	/* cache the values for userspace */
3631	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3632		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3633			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3634			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3635				RREG32(mmCC_RB_BACKEND_DISABLE);
3636			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3637				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3638			adev->gfx.config.rb_config[i][j].raster_config =
3639				RREG32(mmPA_SC_RASTER_CONFIG);
3640			adev->gfx.config.rb_config[i][j].raster_config_1 =
3641				RREG32(mmPA_SC_RASTER_CONFIG_1);
3642		}
3643	}
3644	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3645	mutex_unlock(&adev->grbm_idx_mutex);
3646}
3647
3648#define DEFAULT_SH_MEM_BASES	(0x6000)
3649/**
3650 * gfx_v8_0_init_compute_vmid - gart enable
3651 *
3652 * @adev: amdgpu_device pointer
3653 *
3654 * Initialize compute vmid sh_mem registers
3655 *
3656 */
 
3657static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3658{
3659	int i;
3660	uint32_t sh_mem_config;
3661	uint32_t sh_mem_bases;
3662
3663	/*
3664	 * Configure apertures:
3665	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3666	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3667	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3668	 */
3669	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3670
3671	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3672			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3673			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3674			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3675			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3676			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3677
3678	mutex_lock(&adev->srbm_mutex);
3679	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3680		vi_srbm_select(adev, 0, 0, 0, i);
3681		/* CP and shaders */
3682		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3683		WREG32(mmSH_MEM_APE1_BASE, 1);
3684		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3685		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3686	}
3687	vi_srbm_select(adev, 0, 0, 0, 0);
3688	mutex_unlock(&adev->srbm_mutex);
3689
3690	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3691	   access. These should be enabled by FW for target VMIDs. */
3692	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3693		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3694		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3695		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3696		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3697	}
3698}
3699
3700static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3701{
3702	int vmid;
3703
3704	/*
3705	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3706	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3707	 * the driver can enable them for graphics. VMID0 should maintain
3708	 * access so that HWS firmware can save/restore entries.
3709	 */
3710	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3711		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3712		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3713		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3714		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3715	}
3716}
3717
3718static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3719{
3720	switch (adev->asic_type) {
3721	default:
3722		adev->gfx.config.double_offchip_lds_buf = 1;
3723		break;
3724	case CHIP_CARRIZO:
3725	case CHIP_STONEY:
3726		adev->gfx.config.double_offchip_lds_buf = 0;
3727		break;
3728	}
3729}
3730
3731static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3732{
3733	u32 tmp, sh_static_mem_cfg;
3734	int i;
3735
3736	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3737	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740
3741	gfx_v8_0_tiling_mode_table_init(adev);
3742	gfx_v8_0_setup_rb(adev);
3743	gfx_v8_0_get_cu_info(adev);
3744	gfx_v8_0_config_init(adev);
3745
3746	/* XXX SH_MEM regs */
3747	/* where to put LDS, scratch, GPUVM in FSA64 space */
3748	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3749				   SWIZZLE_ENABLE, 1);
3750	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3751				   ELEMENT_SIZE, 1);
3752	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3753				   INDEX_STRIDE, 3);
3754	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3755
3756	mutex_lock(&adev->srbm_mutex);
3757	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3758		vi_srbm_select(adev, 0, 0, 0, i);
3759		/* CP and shaders */
3760		if (i == 0) {
3761			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3762			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3763			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3764					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3765			WREG32(mmSH_MEM_CONFIG, tmp);
3766			WREG32(mmSH_MEM_BASES, 0);
3767		} else {
3768			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3769			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3770			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3771					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3772			WREG32(mmSH_MEM_CONFIG, tmp);
3773			tmp = adev->gmc.shared_aperture_start >> 48;
3774			WREG32(mmSH_MEM_BASES, tmp);
3775		}
3776
3777		WREG32(mmSH_MEM_APE1_BASE, 1);
3778		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3779	}
3780	vi_srbm_select(adev, 0, 0, 0, 0);
3781	mutex_unlock(&adev->srbm_mutex);
3782
3783	gfx_v8_0_init_compute_vmid(adev);
3784	gfx_v8_0_init_gds_vmid(adev);
3785
3786	mutex_lock(&adev->grbm_idx_mutex);
3787	/*
3788	 * making sure that the following register writes will be broadcasted
3789	 * to all the shaders
3790	 */
3791	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3792
3793	WREG32(mmPA_SC_FIFO_SIZE,
3794		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3795			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3796		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3797			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3798		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3799			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3800		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3801			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3802
3803	tmp = RREG32(mmSPI_ARB_PRIORITY);
3804	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3805	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3806	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3807	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3808	WREG32(mmSPI_ARB_PRIORITY, tmp);
3809
3810	mutex_unlock(&adev->grbm_idx_mutex);
3811
3812}
3813
3814static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3815{
3816	u32 i, j, k;
3817	u32 mask;
3818
3819	mutex_lock(&adev->grbm_idx_mutex);
3820	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3821		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3822			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3823			for (k = 0; k < adev->usec_timeout; k++) {
3824				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3825					break;
3826				udelay(1);
3827			}
3828			if (k == adev->usec_timeout) {
3829				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3830						      0xffffffff, 0xffffffff, 0);
3831				mutex_unlock(&adev->grbm_idx_mutex);
3832				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3833					 i, j);
3834				return;
3835			}
3836		}
3837	}
3838	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3839	mutex_unlock(&adev->grbm_idx_mutex);
3840
3841	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3842		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3843		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3844		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3845	for (k = 0; k < adev->usec_timeout; k++) {
3846		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3847			break;
3848		udelay(1);
3849	}
3850}
3851
3852static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3853					       bool enable)
3854{
3855	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3856
3857	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3858	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3859	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3860	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3861
3862	WREG32(mmCP_INT_CNTL_RING0, tmp);
3863}
3864
3865static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3866{
3867	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3868	/* csib */
3869	WREG32(mmRLC_CSIB_ADDR_HI,
3870			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3871	WREG32(mmRLC_CSIB_ADDR_LO,
3872			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3873	WREG32(mmRLC_CSIB_LENGTH,
3874			adev->gfx.rlc.clear_state_size);
3875}
3876
3877static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3878				int ind_offset,
3879				int list_size,
3880				int *unique_indices,
3881				int *indices_count,
3882				int max_indices,
3883				int *ind_start_offsets,
3884				int *offset_count,
3885				int max_offset)
3886{
3887	int indices;
3888	bool new_entry = true;
3889
3890	for (; ind_offset < list_size; ind_offset++) {
3891
3892		if (new_entry) {
3893			new_entry = false;
3894			ind_start_offsets[*offset_count] = ind_offset;
3895			*offset_count = *offset_count + 1;
3896			BUG_ON(*offset_count >= max_offset);
3897		}
3898
3899		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3900			new_entry = true;
3901			continue;
3902		}
3903
3904		ind_offset += 2;
3905
3906		/* look for the matching indice */
3907		for (indices = 0;
3908			indices < *indices_count;
3909			indices++) {
3910			if (unique_indices[indices] ==
3911				register_list_format[ind_offset])
3912				break;
3913		}
3914
3915		if (indices >= *indices_count) {
3916			unique_indices[*indices_count] =
3917				register_list_format[ind_offset];
3918			indices = *indices_count;
3919			*indices_count = *indices_count + 1;
3920			BUG_ON(*indices_count >= max_indices);
3921		}
3922
3923		register_list_format[ind_offset] = indices;
3924	}
3925}
3926
3927static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3928{
3929	int i, temp, data;
3930	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3931	int indices_count = 0;
3932	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3933	int offset_count = 0;
3934
3935	int list_size;
3936	unsigned int *register_list_format =
3937		kmemdup(adev->gfx.rlc.register_list_format,
3938			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3939	if (!register_list_format)
3940		return -ENOMEM;
3941
3942	gfx_v8_0_parse_ind_reg_list(register_list_format,
3943				RLC_FormatDirectRegListLength,
3944				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3945				unique_indices,
3946				&indices_count,
3947				ARRAY_SIZE(unique_indices),
3948				indirect_start_offsets,
3949				&offset_count,
3950				ARRAY_SIZE(indirect_start_offsets));
3951
3952	/* save and restore list */
3953	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3954
3955	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3956	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3957		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3958
3959	/* indirect list */
3960	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3961	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3962		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3963
3964	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3965	list_size = list_size >> 1;
3966	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3967	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3968
3969	/* starting offsets starts */
3970	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3971		adev->gfx.rlc.starting_offsets_start);
3972	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3973		WREG32(mmRLC_GPM_SCRATCH_DATA,
3974				indirect_start_offsets[i]);
3975
3976	/* unique indices */
3977	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3978	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3979	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3980		if (unique_indices[i] != 0) {
3981			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3982			WREG32(data + i, unique_indices[i] >> 20);
3983		}
3984	}
3985	kfree(register_list_format);
3986
3987	return 0;
3988}
3989
3990static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3991{
3992	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3993}
3994
3995static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3996{
3997	uint32_t data;
3998
3999	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4000
4001	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4002	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4003	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4004	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4005	WREG32(mmRLC_PG_DELAY, data);
4006
4007	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4008	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4009
4010}
4011
4012static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4013						bool enable)
4014{
4015	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4016}
4017
4018static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4019						  bool enable)
4020{
4021	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4022}
4023
4024static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4025{
4026	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4027}
4028
4029static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4030{
4031	if ((adev->asic_type == CHIP_CARRIZO) ||
4032	    (adev->asic_type == CHIP_STONEY)) {
4033		gfx_v8_0_init_csb(adev);
4034		gfx_v8_0_init_save_restore_list(adev);
4035		gfx_v8_0_enable_save_restore_machine(adev);
4036		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4037		gfx_v8_0_init_power_gating(adev);
4038		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4039	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4040		   (adev->asic_type == CHIP_POLARIS12) ||
4041		   (adev->asic_type == CHIP_VEGAM)) {
4042		gfx_v8_0_init_csb(adev);
4043		gfx_v8_0_init_save_restore_list(adev);
4044		gfx_v8_0_enable_save_restore_machine(adev);
4045		gfx_v8_0_init_power_gating(adev);
4046	}
4047
4048}
4049
4050static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4051{
4052	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4053
4054	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4055	gfx_v8_0_wait_for_rlc_serdes(adev);
4056}
4057
4058static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4059{
4060	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4061	udelay(50);
4062
4063	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4064	udelay(50);
4065}
4066
4067static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4068{
4069	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4070
4071	/* carrizo do enable cp interrupt after cp inited */
4072	if (!(adev->flags & AMD_IS_APU))
4073		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4074
4075	udelay(50);
4076}
4077
4078static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4079{
4080	if (amdgpu_sriov_vf(adev)) {
4081		gfx_v8_0_init_csb(adev);
4082		return 0;
4083	}
4084
4085	adev->gfx.rlc.funcs->stop(adev);
4086	adev->gfx.rlc.funcs->reset(adev);
4087	gfx_v8_0_init_pg(adev);
4088	adev->gfx.rlc.funcs->start(adev);
4089
4090	return 0;
4091}
4092
4093static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4094{
4095	u32 tmp = RREG32(mmCP_ME_CNTL);
4096
4097	if (enable) {
4098		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4099		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4100		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4101	} else {
4102		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4103		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4104		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4105	}
4106	WREG32(mmCP_ME_CNTL, tmp);
4107	udelay(50);
4108}
4109
4110static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4111{
4112	u32 count = 0;
4113	const struct cs_section_def *sect = NULL;
4114	const struct cs_extent_def *ext = NULL;
4115
4116	/* begin clear state */
4117	count += 2;
4118	/* context control state */
4119	count += 3;
4120
4121	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4122		for (ext = sect->section; ext->extent != NULL; ++ext) {
4123			if (sect->id == SECT_CONTEXT)
4124				count += 2 + ext->reg_count;
4125			else
4126				return 0;
4127		}
4128	}
4129	/* pa_sc_raster_config/pa_sc_raster_config1 */
4130	count += 4;
4131	/* end clear state */
4132	count += 2;
4133	/* clear state */
4134	count += 2;
4135
4136	return count;
4137}
4138
4139static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4140{
4141	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4142	const struct cs_section_def *sect = NULL;
4143	const struct cs_extent_def *ext = NULL;
4144	int r, i;
4145
4146	/* init the CP */
4147	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4148	WREG32(mmCP_ENDIAN_SWAP, 0);
4149	WREG32(mmCP_DEVICE_ID, 1);
4150
4151	gfx_v8_0_cp_gfx_enable(adev, true);
4152
4153	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4154	if (r) {
4155		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4156		return r;
4157	}
4158
4159	/* clear state buffer */
4160	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4161	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4162
4163	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4164	amdgpu_ring_write(ring, 0x80000000);
4165	amdgpu_ring_write(ring, 0x80000000);
4166
4167	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4168		for (ext = sect->section; ext->extent != NULL; ++ext) {
4169			if (sect->id == SECT_CONTEXT) {
4170				amdgpu_ring_write(ring,
4171				       PACKET3(PACKET3_SET_CONTEXT_REG,
4172					       ext->reg_count));
4173				amdgpu_ring_write(ring,
4174				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4175				for (i = 0; i < ext->reg_count; i++)
4176					amdgpu_ring_write(ring, ext->extent[i]);
4177			}
4178		}
4179	}
4180
4181	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4182	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4183	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4184	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4185
4186	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4188
4189	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4190	amdgpu_ring_write(ring, 0);
4191
4192	/* init the CE partitions */
4193	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4194	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4195	amdgpu_ring_write(ring, 0x8000);
4196	amdgpu_ring_write(ring, 0x8000);
4197
4198	amdgpu_ring_commit(ring);
4199
4200	return 0;
4201}
4202static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4203{
4204	u32 tmp;
4205	/* no gfx doorbells on iceland */
4206	if (adev->asic_type == CHIP_TOPAZ)
4207		return;
4208
4209	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4210
4211	if (ring->use_doorbell) {
4212		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4213				DOORBELL_OFFSET, ring->doorbell_index);
4214		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4215						DOORBELL_HIT, 0);
4216		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4217					    DOORBELL_EN, 1);
4218	} else {
4219		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4220	}
4221
4222	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4223
4224	if (adev->flags & AMD_IS_APU)
4225		return;
4226
4227	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4228					DOORBELL_RANGE_LOWER,
4229					adev->doorbell_index.gfx_ring0);
4230	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4231
4232	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4233		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4234}
4235
4236static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4237{
4238	struct amdgpu_ring *ring;
4239	u32 tmp;
4240	u32 rb_bufsz;
4241	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4242
4243	/* Set the write pointer delay */
4244	WREG32(mmCP_RB_WPTR_DELAY, 0);
4245
4246	/* set the RB to use vmid 0 */
4247	WREG32(mmCP_RB_VMID, 0);
4248
4249	/* Set ring buffer size */
4250	ring = &adev->gfx.gfx_ring[0];
4251	rb_bufsz = order_base_2(ring->ring_size / 8);
4252	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4253	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4254	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4255	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4256#ifdef __BIG_ENDIAN
4257	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4258#endif
4259	WREG32(mmCP_RB0_CNTL, tmp);
4260
4261	/* Initialize the ring buffer's read and write pointers */
4262	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4263	ring->wptr = 0;
4264	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4265
4266	/* set the wb address wether it's enabled or not */
4267	rptr_addr = ring->rptr_gpu_addr;
4268	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4269	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4270
4271	wptr_gpu_addr = ring->wptr_gpu_addr;
4272	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4273	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4274	mdelay(1);
4275	WREG32(mmCP_RB0_CNTL, tmp);
4276
4277	rb_addr = ring->gpu_addr >> 8;
4278	WREG32(mmCP_RB0_BASE, rb_addr);
4279	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4280
4281	gfx_v8_0_set_cpg_door_bell(adev, ring);
4282	/* start the ring */
4283	amdgpu_ring_clear_ring(ring);
4284	gfx_v8_0_cp_gfx_start(adev);
 
4285
4286	return 0;
4287}
4288
4289static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4290{
4291	if (enable) {
4292		WREG32(mmCP_MEC_CNTL, 0);
4293	} else {
4294		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4295		adev->gfx.kiq[0].ring.sched.ready = false;
4296	}
4297	udelay(50);
4298}
4299
4300/* KIQ functions */
4301static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4302{
4303	uint32_t tmp;
4304	struct amdgpu_device *adev = ring->adev;
4305
4306	/* tell RLC which is KIQ queue */
4307	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4308	tmp &= 0xffffff00;
4309	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4310	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4311	tmp |= 0x80;
4312	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4313}
4314
4315static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4316{
4317	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4318	uint64_t queue_mask = 0;
4319	int r, i;
4320
4321	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4322		if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4323			continue;
4324
4325		/* This situation may be hit in the future if a new HW
4326		 * generation exposes more than 64 queues. If so, the
4327		 * definition of queue_mask needs updating */
4328		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4329			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4330			break;
4331		}
4332
4333		queue_mask |= (1ull << i);
4334	}
4335
4336	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4337	if (r) {
4338		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4339		return r;
4340	}
4341	/* set resources */
4342	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4343	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4344	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4345	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4346	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4347	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4348	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4349	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4350	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4351		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4352		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4353		uint64_t wptr_addr = ring->wptr_gpu_addr;
4354
4355		/* map queues */
4356		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4357		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4358		amdgpu_ring_write(kiq_ring,
4359				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4360		amdgpu_ring_write(kiq_ring,
4361				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4362				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4363				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4364				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4365		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4366		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4367		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4368		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4369	}
4370
4371	amdgpu_ring_commit(kiq_ring);
4372
4373	return 0;
4374}
4375
4376static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4377{
4378	int i, r = 0;
4379
4380	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4381		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4382		for (i = 0; i < adev->usec_timeout; i++) {
4383			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4384				break;
4385			udelay(1);
4386		}
4387		if (i == adev->usec_timeout)
4388			r = -ETIMEDOUT;
4389	}
4390	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4391	WREG32(mmCP_HQD_PQ_RPTR, 0);
4392	WREG32(mmCP_HQD_PQ_WPTR, 0);
4393
4394	return r;
4395}
4396
4397static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4398{
4399	struct amdgpu_device *adev = ring->adev;
4400
4401	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4402		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4403			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4404			mqd->cp_hqd_queue_priority =
4405				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4406		}
4407	}
4408}
4409
4410static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4411{
4412	struct amdgpu_device *adev = ring->adev;
4413	struct vi_mqd *mqd = ring->mqd_ptr;
4414	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4415	uint32_t tmp;
4416
4417	mqd->header = 0xC0310800;
4418	mqd->compute_pipelinestat_enable = 0x00000001;
4419	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4420	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4421	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4422	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4423	mqd->compute_misc_reserved = 0x00000003;
4424	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4425						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4426	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4427						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4428	eop_base_addr = ring->eop_gpu_addr >> 8;
4429	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4430	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4431
4432	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4433	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4434	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4435			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4436
4437	mqd->cp_hqd_eop_control = tmp;
4438
4439	/* enable doorbell? */
4440	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4441			    CP_HQD_PQ_DOORBELL_CONTROL,
4442			    DOORBELL_EN,
4443			    ring->use_doorbell ? 1 : 0);
4444
4445	mqd->cp_hqd_pq_doorbell_control = tmp;
4446
4447	/* set the pointer to the MQD */
4448	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4449	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4450
4451	/* set MQD vmid to 0 */
4452	tmp = RREG32(mmCP_MQD_CONTROL);
4453	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4454	mqd->cp_mqd_control = tmp;
4455
4456	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4457	hqd_gpu_addr = ring->gpu_addr >> 8;
4458	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4459	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4460
4461	/* set up the HQD, this is similar to CP_RB0_CNTL */
4462	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4463	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4464			    (order_base_2(ring->ring_size / 4) - 1));
4465	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4466			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4467#ifdef __BIG_ENDIAN
4468	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4469#endif
4470	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4471	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4472	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4473	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4474	mqd->cp_hqd_pq_control = tmp;
4475
4476	/* set the wb address whether it's enabled or not */
4477	wb_gpu_addr = ring->rptr_gpu_addr;
4478	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4479	mqd->cp_hqd_pq_rptr_report_addr_hi =
4480		upper_32_bits(wb_gpu_addr) & 0xffff;
4481
4482	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4483	wb_gpu_addr = ring->wptr_gpu_addr;
4484	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4485	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4486
4487	tmp = 0;
4488	/* enable the doorbell if requested */
4489	if (ring->use_doorbell) {
4490		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4491		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4492				DOORBELL_OFFSET, ring->doorbell_index);
4493
4494		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4495					 DOORBELL_EN, 1);
4496		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4497					 DOORBELL_SOURCE, 0);
4498		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4499					 DOORBELL_HIT, 0);
4500	}
4501
4502	mqd->cp_hqd_pq_doorbell_control = tmp;
4503
4504	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4505	ring->wptr = 0;
4506	mqd->cp_hqd_pq_wptr = ring->wptr;
4507	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4508
4509	/* set the vmid for the queue */
4510	mqd->cp_hqd_vmid = 0;
4511
4512	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4513	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4514	mqd->cp_hqd_persistent_state = tmp;
4515
4516	/* set MTYPE */
4517	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4518	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4519	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4520	mqd->cp_hqd_ib_control = tmp;
4521
4522	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4523	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4524	mqd->cp_hqd_iq_timer = tmp;
4525
4526	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4527	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4528	mqd->cp_hqd_ctx_save_control = tmp;
4529
4530	/* defaults */
4531	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4532	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4533	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4534	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4535	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4536	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4537	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4538	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4539	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4540	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4541	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4542	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4543
4544	/* set static priority for a queue/ring */
4545	gfx_v8_0_mqd_set_priority(ring, mqd);
4546	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4547
4548	/* map_queues packet doesn't need activate the queue,
4549	 * so only kiq need set this field.
4550	 */
4551	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4552		mqd->cp_hqd_active = 1;
4553
4554	return 0;
4555}
4556
4557static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4558			struct vi_mqd *mqd)
4559{
4560	uint32_t mqd_reg;
4561	uint32_t *mqd_data;
4562
4563	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4564	mqd_data = &mqd->cp_mqd_base_addr_lo;
4565
4566	/* disable wptr polling */
4567	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4568
4569	/* program all HQD registers */
4570	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4571		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4572
4573	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4574	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4575	 * on ASICs that do not support context-save.
4576	 * EOP writes/reads can start anywhere in the ring.
4577	 */
4578	if (adev->asic_type != CHIP_TONGA) {
4579		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4580		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4581		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4582	}
4583
4584	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4585		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4586
4587	/* activate the HQD */
4588	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4589		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4590
4591	return 0;
4592}
4593
4594static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4595{
4596	struct amdgpu_device *adev = ring->adev;
4597	struct vi_mqd *mqd = ring->mqd_ptr;
 
4598
4599	gfx_v8_0_kiq_setting(ring);
4600
4601	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4602		/* reset MQD to a clean status */
4603		if (adev->gfx.kiq[0].mqd_backup)
4604			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4605
4606		/* reset ring buffer */
4607		ring->wptr = 0;
4608		amdgpu_ring_clear_ring(ring);
4609		mutex_lock(&adev->srbm_mutex);
4610		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4611		gfx_v8_0_mqd_commit(adev, mqd);
4612		vi_srbm_select(adev, 0, 0, 0, 0);
4613		mutex_unlock(&adev->srbm_mutex);
4614	} else {
4615		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4616		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4617		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4618		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4619			amdgpu_ring_clear_ring(ring);
4620		mutex_lock(&adev->srbm_mutex);
4621		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4622		gfx_v8_0_mqd_init(ring);
4623		gfx_v8_0_mqd_commit(adev, mqd);
4624		vi_srbm_select(adev, 0, 0, 0, 0);
4625		mutex_unlock(&adev->srbm_mutex);
4626
4627		if (adev->gfx.kiq[0].mqd_backup)
4628			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4629	}
4630
4631	return 0;
4632}
4633
4634static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4635{
4636	struct amdgpu_device *adev = ring->adev;
4637	struct vi_mqd *mqd = ring->mqd_ptr;
4638	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4639
4640	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4641		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4642		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4643		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4644		mutex_lock(&adev->srbm_mutex);
4645		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4646		gfx_v8_0_mqd_init(ring);
4647		vi_srbm_select(adev, 0, 0, 0, 0);
4648		mutex_unlock(&adev->srbm_mutex);
4649
4650		if (adev->gfx.mec.mqd_backup[mqd_idx])
4651			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4652	} else {
4653		/* restore MQD to a clean status */
4654		if (adev->gfx.mec.mqd_backup[mqd_idx])
4655			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4656		/* reset ring buffer */
4657		ring->wptr = 0;
4658		amdgpu_ring_clear_ring(ring);
 
 
4659	}
4660	return 0;
4661}
4662
4663static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4664{
4665	if (adev->asic_type > CHIP_TONGA) {
4666		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4667		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4668	}
4669	/* enable doorbells */
4670	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4671}
4672
4673static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4674{
4675	struct amdgpu_ring *ring;
4676	int r;
4677
4678	ring = &adev->gfx.kiq[0].ring;
4679
4680	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4681	if (unlikely(r != 0))
4682		return r;
4683
4684	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4685	if (unlikely(r != 0)) {
4686		amdgpu_bo_unreserve(ring->mqd_obj);
4687		return r;
4688	}
4689
4690	gfx_v8_0_kiq_init_queue(ring);
4691	amdgpu_bo_kunmap(ring->mqd_obj);
4692	ring->mqd_ptr = NULL;
4693	amdgpu_bo_unreserve(ring->mqd_obj);
 
4694	return 0;
4695}
4696
4697static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4698{
4699	struct amdgpu_ring *ring = NULL;
4700	int r = 0, i;
4701
4702	gfx_v8_0_cp_compute_enable(adev, true);
4703
4704	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4705		ring = &adev->gfx.compute_ring[i];
4706
4707		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4708		if (unlikely(r != 0))
4709			goto done;
4710		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4711		if (!r) {
4712			r = gfx_v8_0_kcq_init_queue(ring);
4713			amdgpu_bo_kunmap(ring->mqd_obj);
4714			ring->mqd_ptr = NULL;
4715		}
4716		amdgpu_bo_unreserve(ring->mqd_obj);
4717		if (r)
4718			goto done;
4719	}
4720
4721	gfx_v8_0_set_mec_doorbell_range(adev);
4722
4723	r = gfx_v8_0_kiq_kcq_enable(adev);
4724	if (r)
4725		goto done;
4726
4727done:
4728	return r;
4729}
4730
4731static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4732{
4733	int r, i;
4734	struct amdgpu_ring *ring;
4735
4736	/* collect all the ring_tests here, gfx, kiq, compute */
4737	ring = &adev->gfx.gfx_ring[0];
4738	r = amdgpu_ring_test_helper(ring);
4739	if (r)
4740		return r;
4741
4742	ring = &adev->gfx.kiq[0].ring;
4743	r = amdgpu_ring_test_helper(ring);
4744	if (r)
4745		return r;
4746
4747	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748		ring = &adev->gfx.compute_ring[i];
4749		amdgpu_ring_test_helper(ring);
4750	}
4751
4752	return 0;
4753}
4754
4755static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4756{
4757	int r;
4758
4759	if (!(adev->flags & AMD_IS_APU))
4760		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4761
4762	r = gfx_v8_0_kiq_resume(adev);
4763	if (r)
4764		return r;
4765
4766	r = gfx_v8_0_cp_gfx_resume(adev);
4767	if (r)
4768		return r;
4769
4770	r = gfx_v8_0_kcq_resume(adev);
4771	if (r)
4772		return r;
4773
4774	r = gfx_v8_0_cp_test_all_rings(adev);
4775	if (r)
4776		return r;
4777
4778	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4779
4780	return 0;
4781}
4782
4783static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4784{
4785	gfx_v8_0_cp_gfx_enable(adev, enable);
4786	gfx_v8_0_cp_compute_enable(adev, enable);
4787}
4788
4789static int gfx_v8_0_hw_init(void *handle)
4790{
4791	int r;
4792	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4793
4794	gfx_v8_0_init_golden_registers(adev);
4795	gfx_v8_0_constants_init(adev);
4796
4797	r = adev->gfx.rlc.funcs->resume(adev);
4798	if (r)
4799		return r;
4800
4801	r = gfx_v8_0_cp_resume(adev);
4802
4803	return r;
4804}
4805
4806static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4807{
4808	int r, i;
4809	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4810
4811	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4812	if (r)
4813		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4814
4815	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4816		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4817
4818		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4819		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4820						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4821						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4822						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4823						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4824		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4825		amdgpu_ring_write(kiq_ring, 0);
4826		amdgpu_ring_write(kiq_ring, 0);
4827		amdgpu_ring_write(kiq_ring, 0);
4828	}
4829	r = amdgpu_ring_test_helper(kiq_ring);
4830	if (r)
4831		DRM_ERROR("KCQ disable failed\n");
4832
4833	return r;
4834}
4835
4836static bool gfx_v8_0_is_idle(void *handle)
4837{
4838	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4839
4840	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4841		|| RREG32(mmGRBM_STATUS2) != 0x8)
4842		return false;
4843	else
4844		return true;
4845}
4846
4847static bool gfx_v8_0_rlc_is_idle(void *handle)
4848{
4849	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4850
4851	if (RREG32(mmGRBM_STATUS2) != 0x8)
4852		return false;
4853	else
4854		return true;
4855}
4856
4857static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4858{
4859	unsigned int i;
4860	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4861
4862	for (i = 0; i < adev->usec_timeout; i++) {
4863		if (gfx_v8_0_rlc_is_idle(handle))
4864			return 0;
4865
4866		udelay(1);
4867	}
4868	return -ETIMEDOUT;
4869}
4870
4871static int gfx_v8_0_wait_for_idle(void *handle)
4872{
4873	unsigned int i;
4874	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4875
4876	for (i = 0; i < adev->usec_timeout; i++) {
4877		if (gfx_v8_0_is_idle(handle))
4878			return 0;
4879
4880		udelay(1);
4881	}
4882	return -ETIMEDOUT;
4883}
4884
4885static int gfx_v8_0_hw_fini(void *handle)
4886{
4887	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4888
4889	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4890	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4891
4892	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4893
4894	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4895
4896	/* disable KCQ to avoid CPC touch memory not valid anymore */
4897	gfx_v8_0_kcq_disable(adev);
4898
4899	if (amdgpu_sriov_vf(adev)) {
4900		pr_debug("For SRIOV client, shouldn't do anything.\n");
4901		return 0;
4902	}
4903	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4904	if (!gfx_v8_0_wait_for_idle(adev))
4905		gfx_v8_0_cp_enable(adev, false);
4906	else
4907		pr_err("cp is busy, skip halt cp\n");
4908	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4909		adev->gfx.rlc.funcs->stop(adev);
4910	else
4911		pr_err("rlc is busy, skip halt rlc\n");
4912	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4913
4914	return 0;
4915}
4916
4917static int gfx_v8_0_suspend(void *handle)
4918{
4919	return gfx_v8_0_hw_fini(handle);
4920}
4921
4922static int gfx_v8_0_resume(void *handle)
4923{
4924	return gfx_v8_0_hw_init(handle);
4925}
4926
4927static bool gfx_v8_0_check_soft_reset(void *handle)
4928{
4929	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4930	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4931	u32 tmp;
4932
4933	/* GRBM_STATUS */
4934	tmp = RREG32(mmGRBM_STATUS);
4935	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4936		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4937		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4938		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4939		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4940		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4941		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4942		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4943						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4944		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4945						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4946		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4947						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4948	}
4949
4950	/* GRBM_STATUS2 */
4951	tmp = RREG32(mmGRBM_STATUS2);
4952	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4953		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4954						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4955
4956	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4957	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4958	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4959		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4960						SOFT_RESET_CPF, 1);
4961		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4962						SOFT_RESET_CPC, 1);
4963		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4964						SOFT_RESET_CPG, 1);
4965		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4966						SOFT_RESET_GRBM, 1);
4967	}
4968
4969	/* SRBM_STATUS */
4970	tmp = RREG32(mmSRBM_STATUS);
4971	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4972		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4973						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4974	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4975		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4976						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4977
4978	if (grbm_soft_reset || srbm_soft_reset) {
4979		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4980		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4981		return true;
4982	} else {
4983		adev->gfx.grbm_soft_reset = 0;
4984		adev->gfx.srbm_soft_reset = 0;
4985		return false;
4986	}
4987}
4988
4989static int gfx_v8_0_pre_soft_reset(void *handle)
4990{
4991	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4992	u32 grbm_soft_reset = 0;
4993
4994	if ((!adev->gfx.grbm_soft_reset) &&
4995	    (!adev->gfx.srbm_soft_reset))
4996		return 0;
4997
4998	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4999
5000	/* stop the rlc */
5001	adev->gfx.rlc.funcs->stop(adev);
5002
5003	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5004	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5005		/* Disable GFX parsing/prefetching */
5006		gfx_v8_0_cp_gfx_enable(adev, false);
5007
5008	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5009	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5010	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5011	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5012		int i;
5013
5014		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5015			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5016
5017			mutex_lock(&adev->srbm_mutex);
5018			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5019			gfx_v8_0_deactivate_hqd(adev, 2);
5020			vi_srbm_select(adev, 0, 0, 0, 0);
5021			mutex_unlock(&adev->srbm_mutex);
5022		}
5023		/* Disable MEC parsing/prefetching */
5024		gfx_v8_0_cp_compute_enable(adev, false);
5025	}
5026
5027	return 0;
5028}
5029
5030static int gfx_v8_0_soft_reset(void *handle)
5031{
5032	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5033	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5034	u32 tmp;
5035
5036	if ((!adev->gfx.grbm_soft_reset) &&
5037	    (!adev->gfx.srbm_soft_reset))
5038		return 0;
5039
5040	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5041	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5042
5043	if (grbm_soft_reset || srbm_soft_reset) {
5044		tmp = RREG32(mmGMCON_DEBUG);
5045		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5046		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5047		WREG32(mmGMCON_DEBUG, tmp);
5048		udelay(50);
5049	}
5050
5051	if (grbm_soft_reset) {
5052		tmp = RREG32(mmGRBM_SOFT_RESET);
5053		tmp |= grbm_soft_reset;
5054		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5055		WREG32(mmGRBM_SOFT_RESET, tmp);
5056		tmp = RREG32(mmGRBM_SOFT_RESET);
5057
5058		udelay(50);
5059
5060		tmp &= ~grbm_soft_reset;
5061		WREG32(mmGRBM_SOFT_RESET, tmp);
5062		tmp = RREG32(mmGRBM_SOFT_RESET);
5063	}
5064
5065	if (srbm_soft_reset) {
5066		tmp = RREG32(mmSRBM_SOFT_RESET);
5067		tmp |= srbm_soft_reset;
5068		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5069		WREG32(mmSRBM_SOFT_RESET, tmp);
5070		tmp = RREG32(mmSRBM_SOFT_RESET);
5071
5072		udelay(50);
5073
5074		tmp &= ~srbm_soft_reset;
5075		WREG32(mmSRBM_SOFT_RESET, tmp);
5076		tmp = RREG32(mmSRBM_SOFT_RESET);
5077	}
5078
5079	if (grbm_soft_reset || srbm_soft_reset) {
5080		tmp = RREG32(mmGMCON_DEBUG);
5081		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5082		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5083		WREG32(mmGMCON_DEBUG, tmp);
5084	}
5085
5086	/* Wait a little for things to settle down */
5087	udelay(50);
5088
5089	return 0;
5090}
5091
5092static int gfx_v8_0_post_soft_reset(void *handle)
5093{
5094	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5095	u32 grbm_soft_reset = 0;
5096
5097	if ((!adev->gfx.grbm_soft_reset) &&
5098	    (!adev->gfx.srbm_soft_reset))
5099		return 0;
5100
5101	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5102
5103	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5104	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5105	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5106	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5107		int i;
5108
5109		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5110			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5111
5112			mutex_lock(&adev->srbm_mutex);
5113			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5114			gfx_v8_0_deactivate_hqd(adev, 2);
5115			vi_srbm_select(adev, 0, 0, 0, 0);
5116			mutex_unlock(&adev->srbm_mutex);
5117		}
5118		gfx_v8_0_kiq_resume(adev);
5119		gfx_v8_0_kcq_resume(adev);
5120	}
5121
5122	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5123	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5124		gfx_v8_0_cp_gfx_resume(adev);
5125
5126	gfx_v8_0_cp_test_all_rings(adev);
5127
5128	adev->gfx.rlc.funcs->start(adev);
5129
5130	return 0;
5131}
5132
5133/**
5134 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5135 *
5136 * @adev: amdgpu_device pointer
5137 *
5138 * Fetches a GPU clock counter snapshot.
5139 * Returns the 64 bit clock counter snapshot.
5140 */
5141static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5142{
5143	uint64_t clock;
5144
5145	mutex_lock(&adev->gfx.gpu_clock_mutex);
5146	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5147	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5148		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5149	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5150	return clock;
5151}
5152
5153static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5154					  uint32_t vmid,
5155					  uint32_t gds_base, uint32_t gds_size,
5156					  uint32_t gws_base, uint32_t gws_size,
5157					  uint32_t oa_base, uint32_t oa_size)
5158{
5159	/* GDS Base */
5160	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5161	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5162				WRITE_DATA_DST_SEL(0)));
5163	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5164	amdgpu_ring_write(ring, 0);
5165	amdgpu_ring_write(ring, gds_base);
5166
5167	/* GDS Size */
5168	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5169	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5170				WRITE_DATA_DST_SEL(0)));
5171	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5172	amdgpu_ring_write(ring, 0);
5173	amdgpu_ring_write(ring, gds_size);
5174
5175	/* GWS */
5176	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5177	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5178				WRITE_DATA_DST_SEL(0)));
5179	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5180	amdgpu_ring_write(ring, 0);
5181	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5182
5183	/* OA */
5184	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5185	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5186				WRITE_DATA_DST_SEL(0)));
5187	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5188	amdgpu_ring_write(ring, 0);
5189	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5190}
5191
5192static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5193{
5194	WREG32(mmSQ_IND_INDEX,
5195		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5196		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5197		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5198		(SQ_IND_INDEX__FORCE_READ_MASK));
5199	return RREG32(mmSQ_IND_DATA);
5200}
5201
5202static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5203			   uint32_t wave, uint32_t thread,
5204			   uint32_t regno, uint32_t num, uint32_t *out)
5205{
5206	WREG32(mmSQ_IND_INDEX,
5207		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5208		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5209		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5210		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5211		(SQ_IND_INDEX__FORCE_READ_MASK) |
5212		(SQ_IND_INDEX__AUTO_INCR_MASK));
5213	while (num--)
5214		*(out++) = RREG32(mmSQ_IND_DATA);
5215}
5216
5217static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5218{
5219	/* type 0 wave data */
5220	dst[(*no_fields)++] = 0;
5221	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5222	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5223	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5224	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5225	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5226	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5227	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5228	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5229	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5230	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5231	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5232	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5233	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5234	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5235	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5236	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5237	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5238	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5239	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5240}
5241
5242static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5243				     uint32_t wave, uint32_t start,
5244				     uint32_t size, uint32_t *dst)
5245{
5246	wave_read_regs(
5247		adev, simd, wave, 0,
5248		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5249}
5250
5251
5252static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5253	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5254	.select_se_sh = &gfx_v8_0_select_se_sh,
5255	.read_wave_data = &gfx_v8_0_read_wave_data,
5256	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5257	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5258};
5259
5260static int gfx_v8_0_early_init(void *handle)
5261{
5262	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5263
5264	adev->gfx.xcc_mask = 1;
5265	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5266	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5267					  AMDGPU_MAX_COMPUTE_RINGS);
5268	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5269	gfx_v8_0_set_ring_funcs(adev);
5270	gfx_v8_0_set_irq_funcs(adev);
5271	gfx_v8_0_set_gds_init(adev);
5272	gfx_v8_0_set_rlc_funcs(adev);
5273
5274	return 0;
5275}
5276
5277static int gfx_v8_0_late_init(void *handle)
5278{
5279	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5280	int r;
5281
5282	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5283	if (r)
5284		return r;
5285
5286	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5287	if (r)
5288		return r;
5289
5290	/* requires IBs so do in late init after IB pool is initialized */
5291	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5292	if (r)
5293		return r;
5294
5295	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5296	if (r) {
5297		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5298		return r;
5299	}
5300
5301	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5302	if (r) {
5303		DRM_ERROR(
5304			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5305			r);
5306		return r;
5307	}
5308
5309	return 0;
5310}
5311
5312static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5313						       bool enable)
5314{
5315	if ((adev->asic_type == CHIP_POLARIS11) ||
5316	    (adev->asic_type == CHIP_POLARIS12) ||
5317	    (adev->asic_type == CHIP_VEGAM))
 
5318		/* Send msg to SMU via Powerplay */
5319		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5320
5321	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5322}
5323
5324static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5325							bool enable)
5326{
5327	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5328}
5329
5330static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5331		bool enable)
5332{
5333	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5334}
5335
5336static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5337					  bool enable)
5338{
5339	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5340}
5341
5342static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5343						bool enable)
5344{
5345	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5346
5347	/* Read any GFX register to wake up GFX. */
5348	if (!enable)
5349		RREG32(mmDB_RENDER_CONTROL);
5350}
5351
5352static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5353					  bool enable)
5354{
5355	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5356		cz_enable_gfx_cg_power_gating(adev, true);
5357		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5358			cz_enable_gfx_pipeline_power_gating(adev, true);
5359	} else {
5360		cz_enable_gfx_cg_power_gating(adev, false);
5361		cz_enable_gfx_pipeline_power_gating(adev, false);
5362	}
5363}
5364
5365static int gfx_v8_0_set_powergating_state(void *handle,
5366					  enum amd_powergating_state state)
5367{
5368	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5369	bool enable = (state == AMD_PG_STATE_GATE);
5370
5371	if (amdgpu_sriov_vf(adev))
5372		return 0;
5373
5374	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5375				AMD_PG_SUPPORT_RLC_SMU_HS |
5376				AMD_PG_SUPPORT_CP |
5377				AMD_PG_SUPPORT_GFX_DMG))
5378		amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5379	switch (adev->asic_type) {
5380	case CHIP_CARRIZO:
5381	case CHIP_STONEY:
5382
5383		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5384			cz_enable_sck_slow_down_on_power_up(adev, true);
5385			cz_enable_sck_slow_down_on_power_down(adev, true);
5386		} else {
5387			cz_enable_sck_slow_down_on_power_up(adev, false);
5388			cz_enable_sck_slow_down_on_power_down(adev, false);
5389		}
5390		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5391			cz_enable_cp_power_gating(adev, true);
5392		else
5393			cz_enable_cp_power_gating(adev, false);
5394
5395		cz_update_gfx_cg_power_gating(adev, enable);
5396
5397		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5398			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5399		else
5400			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5401
5402		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5403			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5404		else
5405			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5406		break;
5407	case CHIP_POLARIS11:
5408	case CHIP_POLARIS12:
5409	case CHIP_VEGAM:
5410		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5411			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5412		else
5413			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5414
5415		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5416			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5417		else
5418			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5419
5420		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5421			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5422		else
5423			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5424		break;
5425	default:
5426		break;
5427	}
5428	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5429				AMD_PG_SUPPORT_RLC_SMU_HS |
5430				AMD_PG_SUPPORT_CP |
5431				AMD_PG_SUPPORT_GFX_DMG))
5432		amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5433	return 0;
5434}
5435
5436static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5437{
5438	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5439	int data;
5440
5441	if (amdgpu_sriov_vf(adev))
5442		*flags = 0;
5443
5444	/* AMD_CG_SUPPORT_GFX_MGCG */
5445	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5446	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5447		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5448
5449	/* AMD_CG_SUPPORT_GFX_CGLG */
5450	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5451	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5452		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5453
5454	/* AMD_CG_SUPPORT_GFX_CGLS */
5455	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5456		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5457
5458	/* AMD_CG_SUPPORT_GFX_CGTS */
5459	data = RREG32(mmCGTS_SM_CTRL_REG);
5460	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5461		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5462
5463	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5464	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5465		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5466
5467	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5468	data = RREG32(mmRLC_MEM_SLP_CNTL);
5469	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5470		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5471
5472	/* AMD_CG_SUPPORT_GFX_CP_LS */
5473	data = RREG32(mmCP_MEM_SLP_CNTL);
5474	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5475		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5476}
5477
5478static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5479				     uint32_t reg_addr, uint32_t cmd)
5480{
5481	uint32_t data;
5482
5483	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5484
5485	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5486	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5487
5488	data = RREG32(mmRLC_SERDES_WR_CTRL);
5489	if (adev->asic_type == CHIP_STONEY)
5490		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5491			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5492			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5493			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5494			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5495			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5496			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5497			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5498			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5499	else
5500		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5501			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5502			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5503			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5504			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5505			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5506			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5507			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5508			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5509			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5510			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5511	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5512		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5513		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5514		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5515
5516	WREG32(mmRLC_SERDES_WR_CTRL, data);
5517}
5518
5519#define MSG_ENTER_RLC_SAFE_MODE     1
5520#define MSG_EXIT_RLC_SAFE_MODE      0
5521#define RLC_GPR_REG2__REQ_MASK 0x00000001
5522#define RLC_GPR_REG2__REQ__SHIFT 0
5523#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5524#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5525
5526static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5527{
5528	uint32_t rlc_setting;
5529
5530	rlc_setting = RREG32(mmRLC_CNTL);
5531	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5532		return false;
5533
5534	return true;
5535}
5536
5537static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5538{
5539	uint32_t data;
5540	unsigned i;
5541	data = RREG32(mmRLC_CNTL);
5542	data |= RLC_SAFE_MODE__CMD_MASK;
5543	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5544	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5545	WREG32(mmRLC_SAFE_MODE, data);
5546
5547	/* wait for RLC_SAFE_MODE */
5548	for (i = 0; i < adev->usec_timeout; i++) {
5549		if ((RREG32(mmRLC_GPM_STAT) &
5550		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5551		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5552		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5553		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5554			break;
5555		udelay(1);
5556	}
5557	for (i = 0; i < adev->usec_timeout; i++) {
5558		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5559			break;
5560		udelay(1);
5561	}
5562}
5563
5564static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5565{
5566	uint32_t data;
5567	unsigned i;
5568
5569	data = RREG32(mmRLC_CNTL);
5570	data |= RLC_SAFE_MODE__CMD_MASK;
5571	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5572	WREG32(mmRLC_SAFE_MODE, data);
5573
5574	for (i = 0; i < adev->usec_timeout; i++) {
5575		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5576			break;
5577		udelay(1);
5578	}
5579}
5580
5581static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5582{
5583	u32 data;
5584
5585	amdgpu_gfx_off_ctrl(adev, false);
5586
5587	if (amdgpu_sriov_is_pp_one_vf(adev))
5588		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5589	else
5590		data = RREG32(mmRLC_SPM_VMID);
5591
5592	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5593	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5594
5595	if (amdgpu_sriov_is_pp_one_vf(adev))
5596		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5597	else
5598		WREG32(mmRLC_SPM_VMID, data);
5599
5600	amdgpu_gfx_off_ctrl(adev, true);
5601}
5602
5603static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5604	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5605	.set_safe_mode = gfx_v8_0_set_safe_mode,
5606	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5607	.init = gfx_v8_0_rlc_init,
5608	.get_csb_size = gfx_v8_0_get_csb_size,
5609	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5610	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5611	.resume = gfx_v8_0_rlc_resume,
5612	.stop = gfx_v8_0_rlc_stop,
5613	.reset = gfx_v8_0_rlc_reset,
5614	.start = gfx_v8_0_rlc_start,
5615	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5616};
5617
5618static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5619						      bool enable)
5620{
5621	uint32_t temp, data;
5622
5623	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5624
5625	/* It is disabled by HW by default */
5626	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5627		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5628			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5629				/* 1 - RLC memory Light sleep */
5630				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5631
5632			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5633				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5634		}
5635
5636		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5637		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5638		if (adev->flags & AMD_IS_APU)
5639			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5640				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5641				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5642		else
5643			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5644				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5645				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5646				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5647
5648		if (temp != data)
5649			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5650
5651		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5652		gfx_v8_0_wait_for_rlc_serdes(adev);
5653
5654		/* 5 - clear mgcg override */
5655		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5656
5657		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5658			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5659			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5660			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5661			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5662			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5663			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5664			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5665			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5666				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5667			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5668			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5669			if (temp != data)
5670				WREG32(mmCGTS_SM_CTRL_REG, data);
5671		}
5672		udelay(50);
5673
5674		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5675		gfx_v8_0_wait_for_rlc_serdes(adev);
5676	} else {
5677		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5678		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5679		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5680				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5681				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5682				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5683		if (temp != data)
5684			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5685
5686		/* 2 - disable MGLS in RLC */
5687		data = RREG32(mmRLC_MEM_SLP_CNTL);
5688		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5689			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5690			WREG32(mmRLC_MEM_SLP_CNTL, data);
5691		}
5692
5693		/* 3 - disable MGLS in CP */
5694		data = RREG32(mmCP_MEM_SLP_CNTL);
5695		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5696			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5697			WREG32(mmCP_MEM_SLP_CNTL, data);
5698		}
5699
5700		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5701		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5702		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5703				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5704		if (temp != data)
5705			WREG32(mmCGTS_SM_CTRL_REG, data);
5706
5707		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5708		gfx_v8_0_wait_for_rlc_serdes(adev);
5709
5710		/* 6 - set mgcg override */
5711		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5712
5713		udelay(50);
5714
5715		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5716		gfx_v8_0_wait_for_rlc_serdes(adev);
5717	}
5718
5719	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5720}
5721
5722static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5723						      bool enable)
5724{
5725	uint32_t temp, temp1, data, data1;
5726
5727	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5728
5729	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5730
5731	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5732		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5733		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5734		if (temp1 != data1)
5735			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736
5737		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5738		gfx_v8_0_wait_for_rlc_serdes(adev);
5739
5740		/* 2 - clear cgcg override */
5741		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5742
5743		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744		gfx_v8_0_wait_for_rlc_serdes(adev);
5745
5746		/* 3 - write cmd to set CGLS */
5747		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5748
5749		/* 4 - enable cgcg */
5750		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5751
5752		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5753			/* enable cgls*/
5754			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5755
5756			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5757			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5758
5759			if (temp1 != data1)
5760				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5761		} else {
5762			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5763		}
5764
5765		if (temp != data)
5766			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5767
5768		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5769		 * Cmp_busy/GFX_Idle interrupts
5770		 */
5771		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5772	} else {
5773		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5774		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5775
5776		/* TEST CGCG */
5777		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5778		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5779				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5780		if (temp1 != data1)
5781			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5782
5783		/* read gfx register to wake up cgcg */
5784		RREG32(mmCB_CGTT_SCLK_CTRL);
5785		RREG32(mmCB_CGTT_SCLK_CTRL);
5786		RREG32(mmCB_CGTT_SCLK_CTRL);
5787		RREG32(mmCB_CGTT_SCLK_CTRL);
5788
5789		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5790		gfx_v8_0_wait_for_rlc_serdes(adev);
5791
5792		/* write cmd to Set CGCG Override */
5793		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5794
5795		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5796		gfx_v8_0_wait_for_rlc_serdes(adev);
5797
5798		/* write cmd to Clear CGLS */
5799		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5800
5801		/* disable cgcg, cgls should be disabled too. */
5802		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5803			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5804		if (temp != data)
5805			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5806		/* enable interrupts again for PG */
5807		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5808	}
5809
5810	gfx_v8_0_wait_for_rlc_serdes(adev);
5811
5812	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5813}
5814static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5815					    bool enable)
5816{
5817	if (enable) {
5818		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5819		 * ===  MGCG + MGLS + TS(CG/LS) ===
5820		 */
5821		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5822		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5823	} else {
5824		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5825		 * ===  CGCG + CGLS ===
5826		 */
5827		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5829	}
5830	return 0;
5831}
5832
5833static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5834					  enum amd_clockgating_state state)
5835{
5836	uint32_t msg_id, pp_state = 0;
5837	uint32_t pp_support_state = 0;
5838
5839	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5840		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5841			pp_support_state = PP_STATE_SUPPORT_LS;
5842			pp_state = PP_STATE_LS;
5843		}
5844		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5845			pp_support_state |= PP_STATE_SUPPORT_CG;
5846			pp_state |= PP_STATE_CG;
5847		}
5848		if (state == AMD_CG_STATE_UNGATE)
5849			pp_state = 0;
5850
5851		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5852				PP_BLOCK_GFX_CG,
5853				pp_support_state,
5854				pp_state);
5855		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5856	}
5857
5858	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5859		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5860			pp_support_state = PP_STATE_SUPPORT_LS;
5861			pp_state = PP_STATE_LS;
5862		}
5863
5864		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5865			pp_support_state |= PP_STATE_SUPPORT_CG;
5866			pp_state |= PP_STATE_CG;
5867		}
5868
5869		if (state == AMD_CG_STATE_UNGATE)
5870			pp_state = 0;
5871
5872		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5873				PP_BLOCK_GFX_MG,
5874				pp_support_state,
5875				pp_state);
5876		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5877	}
5878
5879	return 0;
5880}
5881
5882static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5883					  enum amd_clockgating_state state)
5884{
5885
5886	uint32_t msg_id, pp_state = 0;
5887	uint32_t pp_support_state = 0;
5888
5889	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5890		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5891			pp_support_state = PP_STATE_SUPPORT_LS;
5892			pp_state = PP_STATE_LS;
5893		}
5894		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5895			pp_support_state |= PP_STATE_SUPPORT_CG;
5896			pp_state |= PP_STATE_CG;
5897		}
5898		if (state == AMD_CG_STATE_UNGATE)
5899			pp_state = 0;
5900
5901		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5902				PP_BLOCK_GFX_CG,
5903				pp_support_state,
5904				pp_state);
5905		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5906	}
5907
5908	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5909		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5910			pp_support_state = PP_STATE_SUPPORT_LS;
5911			pp_state = PP_STATE_LS;
5912		}
5913		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5914			pp_support_state |= PP_STATE_SUPPORT_CG;
5915			pp_state |= PP_STATE_CG;
5916		}
5917		if (state == AMD_CG_STATE_UNGATE)
5918			pp_state = 0;
5919
5920		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5921				PP_BLOCK_GFX_3D,
5922				pp_support_state,
5923				pp_state);
5924		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5925	}
5926
5927	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5928		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5929			pp_support_state = PP_STATE_SUPPORT_LS;
5930			pp_state = PP_STATE_LS;
5931		}
5932
5933		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5934			pp_support_state |= PP_STATE_SUPPORT_CG;
5935			pp_state |= PP_STATE_CG;
5936		}
5937
5938		if (state == AMD_CG_STATE_UNGATE)
5939			pp_state = 0;
5940
5941		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5942				PP_BLOCK_GFX_MG,
5943				pp_support_state,
5944				pp_state);
5945		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5946	}
5947
5948	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5949		pp_support_state = PP_STATE_SUPPORT_LS;
5950
5951		if (state == AMD_CG_STATE_UNGATE)
5952			pp_state = 0;
5953		else
5954			pp_state = PP_STATE_LS;
5955
5956		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5957				PP_BLOCK_GFX_RLC,
5958				pp_support_state,
5959				pp_state);
5960		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5961	}
5962
5963	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5964		pp_support_state = PP_STATE_SUPPORT_LS;
5965
5966		if (state == AMD_CG_STATE_UNGATE)
5967			pp_state = 0;
5968		else
5969			pp_state = PP_STATE_LS;
5970		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971			PP_BLOCK_GFX_CP,
5972			pp_support_state,
5973			pp_state);
5974		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5975	}
5976
5977	return 0;
5978}
5979
5980static int gfx_v8_0_set_clockgating_state(void *handle,
5981					  enum amd_clockgating_state state)
5982{
5983	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5984
5985	if (amdgpu_sriov_vf(adev))
5986		return 0;
5987
5988	switch (adev->asic_type) {
5989	case CHIP_FIJI:
5990	case CHIP_CARRIZO:
5991	case CHIP_STONEY:
5992		gfx_v8_0_update_gfx_clock_gating(adev,
5993						 state == AMD_CG_STATE_GATE);
5994		break;
5995	case CHIP_TONGA:
5996		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5997		break;
5998	case CHIP_POLARIS10:
5999	case CHIP_POLARIS11:
6000	case CHIP_POLARIS12:
6001	case CHIP_VEGAM:
6002		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6003		break;
6004	default:
6005		break;
6006	}
6007	return 0;
6008}
6009
6010static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6011{
6012	return *ring->rptr_cpu_addr;
6013}
6014
6015static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6016{
6017	struct amdgpu_device *adev = ring->adev;
6018
6019	if (ring->use_doorbell)
6020		/* XXX check if swapping is necessary on BE */
6021		return *ring->wptr_cpu_addr;
6022	else
6023		return RREG32(mmCP_RB0_WPTR);
6024}
6025
6026static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6027{
6028	struct amdgpu_device *adev = ring->adev;
6029
6030	if (ring->use_doorbell) {
6031		/* XXX check if swapping is necessary on BE */
6032		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6033		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6034	} else {
6035		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6036		(void)RREG32(mmCP_RB0_WPTR);
6037	}
6038}
6039
6040static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6041{
6042	u32 ref_and_mask, reg_mem_engine;
6043
6044	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6045	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6046		switch (ring->me) {
6047		case 1:
6048			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6049			break;
6050		case 2:
6051			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6052			break;
6053		default:
6054			return;
6055		}
6056		reg_mem_engine = 0;
6057	} else {
6058		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6059		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6060	}
6061
6062	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6063	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6064				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6065				 reg_mem_engine));
6066	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6067	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6068	amdgpu_ring_write(ring, ref_and_mask);
6069	amdgpu_ring_write(ring, ref_and_mask);
6070	amdgpu_ring_write(ring, 0x20); /* poll interval */
6071}
6072
6073static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6074{
6075	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6076	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6077		EVENT_INDEX(4));
6078
6079	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6080	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6081		EVENT_INDEX(0));
6082}
6083
6084static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6085					struct amdgpu_job *job,
6086					struct amdgpu_ib *ib,
6087					uint32_t flags)
6088{
6089	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6090	u32 header, control = 0;
6091
6092	if (ib->flags & AMDGPU_IB_FLAG_CE)
6093		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6094	else
6095		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6096
6097	control |= ib->length_dw | (vmid << 24);
6098
6099	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6100		control |= INDIRECT_BUFFER_PRE_ENB(1);
6101
6102		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6103			gfx_v8_0_ring_emit_de_meta(ring);
6104	}
6105
6106	amdgpu_ring_write(ring, header);
6107	amdgpu_ring_write(ring,
6108#ifdef __BIG_ENDIAN
6109			  (2 << 0) |
6110#endif
6111			  (ib->gpu_addr & 0xFFFFFFFC));
6112	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6113	amdgpu_ring_write(ring, control);
6114}
6115
6116static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6117					  struct amdgpu_job *job,
6118					  struct amdgpu_ib *ib,
6119					  uint32_t flags)
6120{
6121	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6122	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6123
6124	/* Currently, there is a high possibility to get wave ID mismatch
6125	 * between ME and GDS, leading to a hw deadlock, because ME generates
6126	 * different wave IDs than the GDS expects. This situation happens
6127	 * randomly when at least 5 compute pipes use GDS ordered append.
6128	 * The wave IDs generated by ME are also wrong after suspend/resume.
6129	 * Those are probably bugs somewhere else in the kernel driver.
6130	 *
6131	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6132	 * GDS to 0 for this ring (me/pipe).
6133	 */
6134	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6135		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6136		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6137		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6138	}
6139
6140	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6141	amdgpu_ring_write(ring,
6142#ifdef __BIG_ENDIAN
6143				(2 << 0) |
6144#endif
6145				(ib->gpu_addr & 0xFFFFFFFC));
6146	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6147	amdgpu_ring_write(ring, control);
6148}
6149
6150static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6151					 u64 seq, unsigned flags)
6152{
6153	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6154	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6155
6156	/* Workaround for cache flush problems. First send a dummy EOP
6157	 * event down the pipe with seq one below.
6158	 */
6159	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6160	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6161				 EOP_TC_ACTION_EN |
6162				 EOP_TC_WB_ACTION_EN |
6163				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6164				 EVENT_INDEX(5)));
6165	amdgpu_ring_write(ring, addr & 0xfffffffc);
6166	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6167				DATA_SEL(1) | INT_SEL(0));
6168	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6169	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6170
6171	/* Then send the real EOP event down the pipe:
6172	 * EVENT_WRITE_EOP - flush caches, send int */
6173	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6174	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6175				 EOP_TC_ACTION_EN |
6176				 EOP_TC_WB_ACTION_EN |
6177				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6178				 EVENT_INDEX(5)));
6179	amdgpu_ring_write(ring, addr & 0xfffffffc);
6180	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6181			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6182	amdgpu_ring_write(ring, lower_32_bits(seq));
6183	amdgpu_ring_write(ring, upper_32_bits(seq));
6184
6185}
6186
6187static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6188{
6189	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6190	uint32_t seq = ring->fence_drv.sync_seq;
6191	uint64_t addr = ring->fence_drv.gpu_addr;
6192
6193	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6194	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6195				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6196				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6197	amdgpu_ring_write(ring, addr & 0xfffffffc);
6198	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6199	amdgpu_ring_write(ring, seq);
6200	amdgpu_ring_write(ring, 0xffffffff);
6201	amdgpu_ring_write(ring, 4); /* poll interval */
6202}
6203
6204static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6205					unsigned vmid, uint64_t pd_addr)
6206{
6207	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6208
6209	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6210
6211	/* wait for the invalidate to complete */
6212	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6213	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6214				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6215				 WAIT_REG_MEM_ENGINE(0))); /* me */
6216	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6217	amdgpu_ring_write(ring, 0);
6218	amdgpu_ring_write(ring, 0); /* ref */
6219	amdgpu_ring_write(ring, 0); /* mask */
6220	amdgpu_ring_write(ring, 0x20); /* poll interval */
6221
6222	/* compute doesn't have PFP */
6223	if (usepfp) {
6224		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6225		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6226		amdgpu_ring_write(ring, 0x0);
6227	}
6228}
6229
6230static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6231{
6232	return *ring->wptr_cpu_addr;
6233}
6234
6235static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6236{
6237	struct amdgpu_device *adev = ring->adev;
6238
6239	/* XXX check if swapping is necessary on BE */
6240	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6241	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6242}
6243
6244static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6245					     u64 addr, u64 seq,
6246					     unsigned flags)
6247{
6248	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6249	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6250
6251	/* RELEASE_MEM - flush caches, send int */
6252	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6253	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6254				 EOP_TC_ACTION_EN |
6255				 EOP_TC_WB_ACTION_EN |
6256				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6257				 EVENT_INDEX(5)));
6258	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6259	amdgpu_ring_write(ring, addr & 0xfffffffc);
6260	amdgpu_ring_write(ring, upper_32_bits(addr));
6261	amdgpu_ring_write(ring, lower_32_bits(seq));
6262	amdgpu_ring_write(ring, upper_32_bits(seq));
6263}
6264
6265static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6266					 u64 seq, unsigned int flags)
6267{
6268	/* we only allocate 32bit for each seq wb address */
6269	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6270
6271	/* write fence seq to the "addr" */
6272	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6273	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6274				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6275	amdgpu_ring_write(ring, lower_32_bits(addr));
6276	amdgpu_ring_write(ring, upper_32_bits(addr));
6277	amdgpu_ring_write(ring, lower_32_bits(seq));
6278
6279	if (flags & AMDGPU_FENCE_FLAG_INT) {
6280		/* set register to trigger INT */
6281		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6282		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6283					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6284		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6285		amdgpu_ring_write(ring, 0);
6286		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6287	}
6288}
6289
6290static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6291{
6292	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6293	amdgpu_ring_write(ring, 0);
6294}
6295
6296static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6297{
6298	uint32_t dw2 = 0;
6299
6300	if (amdgpu_sriov_vf(ring->adev))
6301		gfx_v8_0_ring_emit_ce_meta(ring);
6302
6303	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6304	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6305		gfx_v8_0_ring_emit_vgt_flush(ring);
6306		/* set load_global_config & load_global_uconfig */
6307		dw2 |= 0x8001;
6308		/* set load_cs_sh_regs */
6309		dw2 |= 0x01000000;
6310		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6311		dw2 |= 0x10002;
6312
6313		/* set load_ce_ram if preamble presented */
6314		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6315			dw2 |= 0x10000000;
6316	} else {
6317		/* still load_ce_ram if this is the first time preamble presented
6318		 * although there is no context switch happens.
6319		 */
6320		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6321			dw2 |= 0x10000000;
6322	}
6323
6324	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6325	amdgpu_ring_write(ring, dw2);
6326	amdgpu_ring_write(ring, 0);
6327}
6328
6329static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6330						  uint64_t addr)
6331{
6332	unsigned ret;
6333
6334	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6335	amdgpu_ring_write(ring, lower_32_bits(addr));
6336	amdgpu_ring_write(ring, upper_32_bits(addr));
6337	/* discard following DWs if *cond_exec_gpu_addr==0 */
6338	amdgpu_ring_write(ring, 0);
6339	ret = ring->wptr & ring->buf_mask;
6340	/* patch dummy value later */
6341	amdgpu_ring_write(ring, 0);
6342	return ret;
6343}
6344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6345static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6346				    uint32_t reg_val_offs)
6347{
6348	struct amdgpu_device *adev = ring->adev;
6349
6350	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6351	amdgpu_ring_write(ring, 0 |	/* src: register*/
6352				(5 << 8) |	/* dst: memory */
6353				(1 << 20));	/* write confirm */
6354	amdgpu_ring_write(ring, reg);
6355	amdgpu_ring_write(ring, 0);
6356	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6357				reg_val_offs * 4));
6358	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6359				reg_val_offs * 4));
6360}
6361
6362static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6363				  uint32_t val)
6364{
6365	uint32_t cmd;
6366
6367	switch (ring->funcs->type) {
6368	case AMDGPU_RING_TYPE_GFX:
6369		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6370		break;
6371	case AMDGPU_RING_TYPE_KIQ:
6372		cmd = 1 << 16; /* no inc addr */
6373		break;
6374	default:
6375		cmd = WR_CONFIRM;
6376		break;
6377	}
6378
6379	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6380	amdgpu_ring_write(ring, cmd);
6381	amdgpu_ring_write(ring, reg);
6382	amdgpu_ring_write(ring, 0);
6383	amdgpu_ring_write(ring, val);
6384}
6385
6386static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6387{
6388	struct amdgpu_device *adev = ring->adev;
6389	uint32_t value = 0;
6390
6391	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6392	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6393	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6394	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6395	WREG32(mmSQ_CMD, value);
6396}
6397
6398static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6399						 enum amdgpu_interrupt_state state)
6400{
6401	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6402		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6403}
6404
6405static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6406						     int me, int pipe,
6407						     enum amdgpu_interrupt_state state)
6408{
6409	u32 mec_int_cntl, mec_int_cntl_reg;
6410
6411	/*
6412	 * amdgpu controls only the first MEC. That's why this function only
6413	 * handles the setting of interrupts for this specific MEC. All other
6414	 * pipes' interrupts are set by amdkfd.
6415	 */
6416
6417	if (me == 1) {
6418		switch (pipe) {
6419		case 0:
6420			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6421			break;
6422		case 1:
6423			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6424			break;
6425		case 2:
6426			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6427			break;
6428		case 3:
6429			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6430			break;
6431		default:
6432			DRM_DEBUG("invalid pipe %d\n", pipe);
6433			return;
6434		}
6435	} else {
6436		DRM_DEBUG("invalid me %d\n", me);
6437		return;
6438	}
6439
6440	switch (state) {
6441	case AMDGPU_IRQ_STATE_DISABLE:
6442		mec_int_cntl = RREG32(mec_int_cntl_reg);
6443		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6444		WREG32(mec_int_cntl_reg, mec_int_cntl);
6445		break;
6446	case AMDGPU_IRQ_STATE_ENABLE:
6447		mec_int_cntl = RREG32(mec_int_cntl_reg);
6448		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6449		WREG32(mec_int_cntl_reg, mec_int_cntl);
6450		break;
6451	default:
6452		break;
6453	}
6454}
6455
6456static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6457					     struct amdgpu_irq_src *source,
6458					     unsigned type,
6459					     enum amdgpu_interrupt_state state)
6460{
6461	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6462		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6463
6464	return 0;
6465}
6466
6467static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6468					      struct amdgpu_irq_src *source,
6469					      unsigned type,
6470					      enum amdgpu_interrupt_state state)
6471{
6472	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6473		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6474
6475	return 0;
6476}
6477
6478static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6479					    struct amdgpu_irq_src *src,
6480					    unsigned type,
6481					    enum amdgpu_interrupt_state state)
6482{
6483	switch (type) {
6484	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6485		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6486		break;
6487	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6488		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6489		break;
6490	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6491		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6492		break;
6493	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6494		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6495		break;
6496	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6497		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6498		break;
6499	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6500		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6501		break;
6502	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6503		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6504		break;
6505	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6506		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6507		break;
6508	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6509		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6510		break;
6511	default:
6512		break;
6513	}
6514	return 0;
6515}
6516
6517static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6518					 struct amdgpu_irq_src *source,
6519					 unsigned int type,
6520					 enum amdgpu_interrupt_state state)
6521{
6522	int enable_flag;
6523
6524	switch (state) {
6525	case AMDGPU_IRQ_STATE_DISABLE:
6526		enable_flag = 0;
6527		break;
6528
6529	case AMDGPU_IRQ_STATE_ENABLE:
6530		enable_flag = 1;
6531		break;
6532
6533	default:
6534		return -EINVAL;
6535	}
6536
6537	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6538	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6539	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6540	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6541	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6542	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6543		     enable_flag);
6544	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6545		     enable_flag);
6546	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6547		     enable_flag);
6548	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6549		     enable_flag);
6550	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6551		     enable_flag);
6552	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6553		     enable_flag);
6554	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6555		     enable_flag);
6556	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6557		     enable_flag);
6558
6559	return 0;
6560}
6561
6562static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6563				     struct amdgpu_irq_src *source,
6564				     unsigned int type,
6565				     enum amdgpu_interrupt_state state)
6566{
6567	int enable_flag;
6568
6569	switch (state) {
6570	case AMDGPU_IRQ_STATE_DISABLE:
6571		enable_flag = 1;
6572		break;
6573
6574	case AMDGPU_IRQ_STATE_ENABLE:
6575		enable_flag = 0;
6576		break;
6577
6578	default:
6579		return -EINVAL;
6580	}
6581
6582	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6583		     enable_flag);
6584
6585	return 0;
6586}
6587
6588static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6589			    struct amdgpu_irq_src *source,
6590			    struct amdgpu_iv_entry *entry)
6591{
6592	int i;
6593	u8 me_id, pipe_id, queue_id;
6594	struct amdgpu_ring *ring;
6595
6596	DRM_DEBUG("IH: CP EOP\n");
6597	me_id = (entry->ring_id & 0x0c) >> 2;
6598	pipe_id = (entry->ring_id & 0x03) >> 0;
6599	queue_id = (entry->ring_id & 0x70) >> 4;
6600
6601	switch (me_id) {
6602	case 0:
6603		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6604		break;
6605	case 1:
6606	case 2:
6607		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6608			ring = &adev->gfx.compute_ring[i];
6609			/* Per-queue interrupt is supported for MEC starting from VI.
6610			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6611			  */
6612			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6613				amdgpu_fence_process(ring);
6614		}
6615		break;
6616	}
6617	return 0;
6618}
6619
6620static void gfx_v8_0_fault(struct amdgpu_device *adev,
6621			   struct amdgpu_iv_entry *entry)
6622{
6623	u8 me_id, pipe_id, queue_id;
6624	struct amdgpu_ring *ring;
6625	int i;
6626
6627	me_id = (entry->ring_id & 0x0c) >> 2;
6628	pipe_id = (entry->ring_id & 0x03) >> 0;
6629	queue_id = (entry->ring_id & 0x70) >> 4;
6630
6631	switch (me_id) {
6632	case 0:
6633		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6634		break;
6635	case 1:
6636	case 2:
6637		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6638			ring = &adev->gfx.compute_ring[i];
6639			if (ring->me == me_id && ring->pipe == pipe_id &&
6640			    ring->queue == queue_id)
6641				drm_sched_fault(&ring->sched);
6642		}
6643		break;
6644	}
6645}
6646
6647static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6648				 struct amdgpu_irq_src *source,
6649				 struct amdgpu_iv_entry *entry)
6650{
6651	DRM_ERROR("Illegal register access in command stream\n");
6652	gfx_v8_0_fault(adev, entry);
6653	return 0;
6654}
6655
6656static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6657				  struct amdgpu_irq_src *source,
6658				  struct amdgpu_iv_entry *entry)
6659{
6660	DRM_ERROR("Illegal instruction in command stream\n");
6661	gfx_v8_0_fault(adev, entry);
6662	return 0;
6663}
6664
6665static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6666				     struct amdgpu_irq_src *source,
6667				     struct amdgpu_iv_entry *entry)
6668{
6669	DRM_ERROR("CP EDC/ECC error detected.");
6670	return 0;
6671}
6672
6673static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6674				  bool from_wq)
6675{
6676	u32 enc, se_id, sh_id, cu_id;
6677	char type[20];
6678	int sq_edc_source = -1;
6679
6680	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6681	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6682
6683	switch (enc) {
6684		case 0:
6685			DRM_INFO("SQ general purpose intr detected:"
6686					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6687					"host_cmd_overflow %d, cmd_timestamp %d,"
6688					"reg_timestamp %d, thread_trace_buff_full %d,"
6689					"wlt %d, thread_trace %d.\n",
6690					se_id,
6691					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6692					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6693					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6694					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6695					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6696					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6697					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6698					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6699					);
6700			break;
6701		case 1:
6702		case 2:
6703
6704			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6705			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6706
6707			/*
6708			 * This function can be called either directly from ISR
6709			 * or from BH in which case we can access SQ_EDC_INFO
6710			 * instance
6711			 */
6712			if (from_wq) {
6713				mutex_lock(&adev->grbm_idx_mutex);
6714				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6715
6716				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6717
6718				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6719				mutex_unlock(&adev->grbm_idx_mutex);
6720			}
6721
6722			if (enc == 1)
6723				sprintf(type, "instruction intr");
6724			else
6725				sprintf(type, "EDC/ECC error");
6726
6727			DRM_INFO(
6728				"SQ %s detected: "
6729					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6730					"trap %s, sq_ed_info.source %s.\n",
6731					type, se_id, sh_id, cu_id,
6732					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6733					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6734					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6735					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6736					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6737				);
6738			break;
6739		default:
6740			DRM_ERROR("SQ invalid encoding type\n.");
6741	}
6742}
6743
6744static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6745{
6746
6747	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6748	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6749
6750	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6751}
6752
6753static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6754			   struct amdgpu_irq_src *source,
6755			   struct amdgpu_iv_entry *entry)
6756{
6757	unsigned ih_data = entry->src_data[0];
6758
6759	/*
6760	 * Try to submit work so SQ_EDC_INFO can be accessed from
6761	 * BH. If previous work submission hasn't finished yet
6762	 * just print whatever info is possible directly from the ISR.
6763	 */
6764	if (work_pending(&adev->gfx.sq_work.work)) {
6765		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6766	} else {
6767		adev->gfx.sq_work.ih_data = ih_data;
6768		schedule_work(&adev->gfx.sq_work.work);
6769	}
6770
6771	return 0;
6772}
6773
6774static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6775{
6776	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6777	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6778			  PACKET3_TC_ACTION_ENA |
6779			  PACKET3_SH_KCACHE_ACTION_ENA |
6780			  PACKET3_SH_ICACHE_ACTION_ENA |
6781			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6782	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6783	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6784	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6785}
6786
6787static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6788{
6789	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6790	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6791			  PACKET3_TC_ACTION_ENA |
6792			  PACKET3_SH_KCACHE_ACTION_ENA |
6793			  PACKET3_SH_ICACHE_ACTION_ENA |
6794			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6795	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6796	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6797	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6798	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6799	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6800}
6801
6802
6803/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6804#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6805static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6806					uint32_t pipe, bool enable)
6807{
6808	uint32_t val;
6809	uint32_t wcl_cs_reg;
6810
6811	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6812
6813	switch (pipe) {
6814	case 0:
6815		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6816		break;
6817	case 1:
6818		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6819		break;
6820	case 2:
6821		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6822		break;
6823	case 3:
6824		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6825		break;
6826	default:
6827		DRM_DEBUG("invalid pipe %d\n", pipe);
6828		return;
6829	}
6830
6831	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6832
6833}
6834
6835#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6836static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6837{
6838	struct amdgpu_device *adev = ring->adev;
6839	uint32_t val;
6840	int i;
6841
6842	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6843	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6844	 * around 25% of gpu resources.
6845	 */
6846	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6847	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6848
6849	/* Restrict waves for normal/low priority compute queues as well
6850	 * to get best QoS for high priority compute jobs.
6851	 *
6852	 * amdgpu controls only 1st ME(0-3 CS pipes).
6853	 */
6854	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6855		if (i != ring->pipe)
6856			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6857
6858	}
6859
6860}
6861
6862static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6863	.name = "gfx_v8_0",
6864	.early_init = gfx_v8_0_early_init,
6865	.late_init = gfx_v8_0_late_init,
6866	.sw_init = gfx_v8_0_sw_init,
6867	.sw_fini = gfx_v8_0_sw_fini,
6868	.hw_init = gfx_v8_0_hw_init,
6869	.hw_fini = gfx_v8_0_hw_fini,
6870	.suspend = gfx_v8_0_suspend,
6871	.resume = gfx_v8_0_resume,
6872	.is_idle = gfx_v8_0_is_idle,
6873	.wait_for_idle = gfx_v8_0_wait_for_idle,
6874	.check_soft_reset = gfx_v8_0_check_soft_reset,
6875	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6876	.soft_reset = gfx_v8_0_soft_reset,
6877	.post_soft_reset = gfx_v8_0_post_soft_reset,
6878	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6879	.set_powergating_state = gfx_v8_0_set_powergating_state,
6880	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6881};
6882
6883static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6884	.type = AMDGPU_RING_TYPE_GFX,
6885	.align_mask = 0xff,
6886	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6887	.support_64bit_ptrs = false,
6888	.get_rptr = gfx_v8_0_ring_get_rptr,
6889	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6890	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6891	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6892		5 +  /* COND_EXEC */
6893		7 +  /* PIPELINE_SYNC */
6894		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6895		12 +  /* FENCE for VM_FLUSH */
6896		20 + /* GDS switch */
6897		4 + /* double SWITCH_BUFFER,
6898		       the first COND_EXEC jump to the place just
6899			   prior to this double SWITCH_BUFFER  */
6900		5 + /* COND_EXEC */
6901		7 +	 /*	HDP_flush */
6902		4 +	 /*	VGT_flush */
6903		14 + /*	CE_META */
6904		31 + /*	DE_META */
6905		3 + /* CNTX_CTRL */
6906		5 + /* HDP_INVL */
6907		12 + 12 + /* FENCE x2 */
6908		2 + /* SWITCH_BUFFER */
6909		5, /* SURFACE_SYNC */
6910	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6911	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6912	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6913	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6914	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6915	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6916	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6917	.test_ring = gfx_v8_0_ring_test_ring,
6918	.test_ib = gfx_v8_0_ring_test_ib,
6919	.insert_nop = amdgpu_ring_insert_nop,
6920	.pad_ib = amdgpu_ring_generic_pad_ib,
6921	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6922	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6923	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
 
6924	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6925	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6926	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6927};
6928
6929static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6930	.type = AMDGPU_RING_TYPE_COMPUTE,
6931	.align_mask = 0xff,
6932	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6933	.support_64bit_ptrs = false,
6934	.get_rptr = gfx_v8_0_ring_get_rptr,
6935	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6936	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6937	.emit_frame_size =
6938		20 + /* gfx_v8_0_ring_emit_gds_switch */
6939		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6940		5 + /* hdp_invalidate */
6941		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6942		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6943		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6944		7 + /* gfx_v8_0_emit_mem_sync_compute */
6945		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6946		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6947	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6948	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6949	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6950	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6951	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6952	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6953	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6954	.test_ring = gfx_v8_0_ring_test_ring,
6955	.test_ib = gfx_v8_0_ring_test_ib,
6956	.insert_nop = amdgpu_ring_insert_nop,
6957	.pad_ib = amdgpu_ring_generic_pad_ib,
6958	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6959	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6960	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
6961};
6962
6963static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6964	.type = AMDGPU_RING_TYPE_KIQ,
6965	.align_mask = 0xff,
6966	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6967	.support_64bit_ptrs = false,
6968	.get_rptr = gfx_v8_0_ring_get_rptr,
6969	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6970	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6971	.emit_frame_size =
6972		20 + /* gfx_v8_0_ring_emit_gds_switch */
6973		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6974		5 + /* hdp_invalidate */
6975		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6976		17 + /* gfx_v8_0_ring_emit_vm_flush */
6977		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6978	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6979	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6980	.test_ring = gfx_v8_0_ring_test_ring,
6981	.insert_nop = amdgpu_ring_insert_nop,
6982	.pad_ib = amdgpu_ring_generic_pad_ib,
6983	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6984	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6985};
6986
6987static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6988{
6989	int i;
6990
6991	adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6992
6993	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6994		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6995
6996	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6997		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6998}
6999
7000static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7001	.set = gfx_v8_0_set_eop_interrupt_state,
7002	.process = gfx_v8_0_eop_irq,
7003};
7004
7005static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7006	.set = gfx_v8_0_set_priv_reg_fault_state,
7007	.process = gfx_v8_0_priv_reg_irq,
7008};
7009
7010static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7011	.set = gfx_v8_0_set_priv_inst_fault_state,
7012	.process = gfx_v8_0_priv_inst_irq,
7013};
7014
7015static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7016	.set = gfx_v8_0_set_cp_ecc_int_state,
7017	.process = gfx_v8_0_cp_ecc_error_irq,
7018};
7019
7020static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7021	.set = gfx_v8_0_set_sq_int_state,
7022	.process = gfx_v8_0_sq_irq,
7023};
7024
7025static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7026{
7027	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7028	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7029
7030	adev->gfx.priv_reg_irq.num_types = 1;
7031	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7032
7033	adev->gfx.priv_inst_irq.num_types = 1;
7034	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7035
7036	adev->gfx.cp_ecc_error_irq.num_types = 1;
7037	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7038
7039	adev->gfx.sq_irq.num_types = 1;
7040	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7041}
7042
7043static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7044{
7045	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7046}
7047
7048static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7049{
7050	/* init asci gds info */
7051	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7052	adev->gds.gws_size = 64;
7053	adev->gds.oa_size = 16;
7054	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7055}
7056
7057static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7058						 u32 bitmap)
7059{
7060	u32 data;
7061
7062	if (!bitmap)
7063		return;
7064
7065	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7066	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7067
7068	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7069}
7070
7071static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7072{
7073	u32 data, mask;
7074
7075	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7076		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7077
7078	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7079
7080	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7081}
7082
7083static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7084{
7085	int i, j, k, counter, active_cu_number = 0;
7086	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7087	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7088	unsigned disable_masks[4 * 2];
7089	u32 ao_cu_num;
7090
7091	memset(cu_info, 0, sizeof(*cu_info));
7092
7093	if (adev->flags & AMD_IS_APU)
7094		ao_cu_num = 2;
7095	else
7096		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7097
7098	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7099
7100	mutex_lock(&adev->grbm_idx_mutex);
7101	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7102		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7103			mask = 1;
7104			ao_bitmap = 0;
7105			counter = 0;
7106			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7107			if (i < 4 && j < 2)
7108				gfx_v8_0_set_user_cu_inactive_bitmap(
7109					adev, disable_masks[i * 2 + j]);
7110			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7111			cu_info->bitmap[0][i][j] = bitmap;
7112
7113			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7114				if (bitmap & mask) {
7115					if (counter < ao_cu_num)
7116						ao_bitmap |= mask;
7117					counter ++;
7118				}
7119				mask <<= 1;
7120			}
7121			active_cu_number += counter;
7122			if (i < 2 && j < 2)
7123				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7124			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7125		}
7126	}
7127	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7128	mutex_unlock(&adev->grbm_idx_mutex);
7129
7130	cu_info->number = active_cu_number;
7131	cu_info->ao_cu_mask = ao_cu_mask;
7132	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7133	cu_info->max_waves_per_simd = 10;
7134	cu_info->max_scratch_slots_per_cu = 32;
7135	cu_info->wave_front_size = 64;
7136	cu_info->lds_size = 64;
7137}
7138
7139const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7140{
7141	.type = AMD_IP_BLOCK_TYPE_GFX,
7142	.major = 8,
7143	.minor = 0,
7144	.rev = 0,
7145	.funcs = &gfx_v8_0_ip_funcs,
7146};
7147
7148const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7149{
7150	.type = AMD_IP_BLOCK_TYPE_GFX,
7151	.major = 8,
7152	.minor = 1,
7153	.rev = 0,
7154	.funcs = &gfx_v8_0_ip_funcs,
7155};
7156
7157static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7158{
7159	uint64_t ce_payload_addr;
7160	int cnt_ce;
7161	union {
7162		struct vi_ce_ib_state regular;
7163		struct vi_ce_ib_state_chained_ib chained;
7164	} ce_payload = {};
7165
7166	if (ring->adev->virt.chained_ib_support) {
7167		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7168			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7169		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7170	} else {
7171		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7172			offsetof(struct vi_gfx_meta_data, ce_payload);
7173		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7174	}
7175
7176	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7177	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7178				WRITE_DATA_DST_SEL(8) |
7179				WR_CONFIRM) |
7180				WRITE_DATA_CACHE_POLICY(0));
7181	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7182	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7183	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7184}
7185
7186static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7187{
7188	uint64_t de_payload_addr, gds_addr, csa_addr;
7189	int cnt_de;
7190	union {
7191		struct vi_de_ib_state regular;
7192		struct vi_de_ib_state_chained_ib chained;
7193	} de_payload = {};
7194
7195	csa_addr = amdgpu_csa_vaddr(ring->adev);
7196	gds_addr = csa_addr + 4096;
7197	if (ring->adev->virt.chained_ib_support) {
7198		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7199		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7200		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7201		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7202	} else {
7203		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7204		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7205		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7206		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7207	}
7208
7209	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7210	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7211				WRITE_DATA_DST_SEL(8) |
7212				WR_CONFIRM) |
7213				WRITE_DATA_CACHE_POLICY(0));
7214	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7215	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7216	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7217}
v5.9
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
 
  32#include "vi.h"
  33#include "vi_structs.h"
  34#include "vid.h"
  35#include "amdgpu_ucode.h"
  36#include "amdgpu_atombios.h"
  37#include "atombios_i2c.h"
  38#include "clearstate_vi.h"
  39
  40#include "gmc/gmc_8_2_d.h"
  41#include "gmc/gmc_8_2_sh_mask.h"
  42
  43#include "oss/oss_3_0_d.h"
  44#include "oss/oss_3_0_sh_mask.h"
  45
  46#include "bif/bif_5_0_d.h"
  47#include "bif/bif_5_0_sh_mask.h"
  48#include "gca/gfx_8_0_d.h"
  49#include "gca/gfx_8_0_enum.h"
  50#include "gca/gfx_8_0_sh_mask.h"
  51
  52#include "dce/dce_10_0_d.h"
  53#include "dce/dce_10_0_sh_mask.h"
  54
  55#include "smu/smu_7_1_3_d.h"
  56
  57#include "ivsrcid/ivsrcid_vislands30.h"
  58
  59#define GFX8_NUM_GFX_RINGS     1
  60#define GFX8_MEC_HPD_SIZE 4096
  61
  62#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  63#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  65#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  66
  67#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  68#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  69#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  70#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  71#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  72#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  73#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  74#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  75#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  76
  77#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  78#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  79#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  80#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  82#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  83
  84/* BPM SERDES CMD */
  85#define SET_BPM_SERDES_CMD    1
  86#define CLE_BPM_SERDES_CMD    0
  87
  88/* BPM Register Address*/
  89enum {
  90	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  91	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  92	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  93	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  94	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  95	BPM_REG_FGCG_MAX
  96};
  97
  98#define RLC_FormatDirectRegListLength        14
  99
 100MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 101MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 108MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 112
 113MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 114MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 119
 120MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 121MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 125
 126MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 127MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 132
 133MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 134MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 144
 145MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 146MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 156
 157MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 158MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 168
 169MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 170MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 175
 176static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 177{
 178	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 179	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 180	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 181	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 182	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 183	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 184	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 185	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 186	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 187	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 188	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 189	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 190	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 191	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 192	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 193	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 194};
 195
 196static const u32 golden_settings_tonga_a11[] =
 197{
 198	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 199	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 200	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 201	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 202	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 203	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 204	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 205	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 206	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 207	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 208	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 209	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 210	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 211	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 212	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 213	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 214};
 215
 216static const u32 tonga_golden_common_all[] =
 217{
 218	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 219	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 220	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 221	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 222	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 223	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 226};
 227
 228static const u32 tonga_mgcg_cgcg_init[] =
 229{
 230	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 231	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 232	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 233	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 237	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 238	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 239	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 240	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 241	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 242	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 252	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 253	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 255	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 256	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 257	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 258	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 260	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 261	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 262	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 263	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 264	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 265	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 266	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 267	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 268	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 269	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 270	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 271	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 272	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 273	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 274	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 275	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 276	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 277	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 278	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 279	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 280	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 281	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 282	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 283	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 284	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 285	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 286	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 287	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 288	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 289	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 290	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 291	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 292	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 293	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 294	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 295	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 296	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 297	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 298	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 299	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 300	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 301	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 302	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 303	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 304	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 305};
 306
 307static const u32 golden_settings_vegam_a11[] =
 308{
 309	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 310	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 311	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 312	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 313	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 314	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 315	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 316	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 317	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 318	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 319	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 320	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 321	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 322	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 323	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 324	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 325	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 326};
 327
 328static const u32 vegam_golden_common_all[] =
 329{
 330	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 331	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 332	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 333	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 336};
 337
 338static const u32 golden_settings_polaris11_a11[] =
 339{
 340	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 341	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 342	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 343	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 344	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 345	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 346	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 347	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 348	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 349	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 350	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 351	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 352	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 353	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 354	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 355	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 356	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 357};
 358
 359static const u32 polaris11_golden_common_all[] =
 360{
 361	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 362	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 363	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 364	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 367};
 368
 369static const u32 golden_settings_polaris10_a11[] =
 370{
 371	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 372	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 373	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 374	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 375	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 376	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 377	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 378	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 379	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 380	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 381	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 382	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 383	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 384	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 385	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 386	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 387	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 388};
 389
 390static const u32 polaris10_golden_common_all[] =
 391{
 392	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 393	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 394	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 395	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 396	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 397	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 400};
 401
 402static const u32 fiji_golden_common_all[] =
 403{
 404	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 405	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 406	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 407	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 408	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 409	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 412	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 413	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 414};
 415
 416static const u32 golden_settings_fiji_a10[] =
 417{
 418	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 419	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 420	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 421	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 422	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 423	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 424	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 425	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 426	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 428	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 429};
 430
 431static const u32 fiji_mgcg_cgcg_init[] =
 432{
 433	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 434	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 435	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 436	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 440	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 441	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 442	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 443	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 444	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 445	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 455	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 456	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 458	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 459	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 460	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 461	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 463	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 464	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 465	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 466	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 467	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 468};
 469
 470static const u32 golden_settings_iceland_a11[] =
 471{
 472	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 473	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 474	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 475	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 476	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 477	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 478	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 479	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 480	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 481	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 482	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 483	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 484	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 485	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 486	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 487	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 488};
 489
 490static const u32 iceland_golden_common_all[] =
 491{
 492	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 493	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 494	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 495	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 496	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 497	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 500};
 501
 502static const u32 iceland_mgcg_cgcg_init[] =
 503{
 504	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 505	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 506	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 507	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 509	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 512	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 513	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 514	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 515	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 516	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 526	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 527	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 528	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 529	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 530	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 531	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 532	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 534	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 535	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 536	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 537	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 538	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 539	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 540	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 541	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 542	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 543	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 544	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 545	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 546	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 547	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 548	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 549	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 550	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 551	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 552	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 553	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 554	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 555	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 556	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 557	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 558	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 559	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 560	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 561	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 562	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 563	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 564	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 565	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 566	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 567	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 568};
 569
 570static const u32 cz_golden_settings_a11[] =
 571{
 572	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 573	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 574	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 575	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 576	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 577	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 578	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 579	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 580	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 581	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 582	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 583	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 584};
 585
 586static const u32 cz_golden_common_all[] =
 587{
 588	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 589	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 590	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 591	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 592	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 593	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 596};
 597
 598static const u32 cz_mgcg_cgcg_init[] =
 599{
 600	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 601	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 602	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 603	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 609	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 610	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 611	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 612	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 622	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 623	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 625	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 626	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 627	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 628	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 630	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 631	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 632	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 633	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 634	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 635	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 636	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 637	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 638	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 639	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 640	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 641	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 642	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 643	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 644	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 645	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 646	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 647	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 648	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 649	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 650	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 651	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 652	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 653	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 654	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 655	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 656	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 657	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 658	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 659	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 660	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 661	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 662	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 663	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 664	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 665	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 666	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 667	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 668	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 669	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 670	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 671	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 672	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 673	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 674	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 675};
 676
 677static const u32 stoney_golden_settings_a11[] =
 678{
 679	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 680	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 681	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 682	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 683	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 684	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 685	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 686	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 687	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 688	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 689};
 690
 691static const u32 stoney_golden_common_all[] =
 692{
 693	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 694	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 695	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 696	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 697	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 698	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 701};
 702
 703static const u32 stoney_mgcg_cgcg_init[] =
 704{
 705	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 706	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 707	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 708	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 710};
 711
 712
 713static const char * const sq_edc_source_names[] = {
 714	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 715	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 716	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 717	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 718	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 719	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 720	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 721};
 722
 723static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 724static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 727static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 728static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 729static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 730static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 731
 
 
 
 732static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 733{
 
 
 734	switch (adev->asic_type) {
 735	case CHIP_TOPAZ:
 736		amdgpu_device_program_register_sequence(adev,
 737							iceland_mgcg_cgcg_init,
 738							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 739		amdgpu_device_program_register_sequence(adev,
 740							golden_settings_iceland_a11,
 741							ARRAY_SIZE(golden_settings_iceland_a11));
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_golden_common_all,
 744							ARRAY_SIZE(iceland_golden_common_all));
 745		break;
 746	case CHIP_FIJI:
 747		amdgpu_device_program_register_sequence(adev,
 748							fiji_mgcg_cgcg_init,
 749							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 750		amdgpu_device_program_register_sequence(adev,
 751							golden_settings_fiji_a10,
 752							ARRAY_SIZE(golden_settings_fiji_a10));
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_golden_common_all,
 755							ARRAY_SIZE(fiji_golden_common_all));
 756		break;
 757
 758	case CHIP_TONGA:
 759		amdgpu_device_program_register_sequence(adev,
 760							tonga_mgcg_cgcg_init,
 761							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 762		amdgpu_device_program_register_sequence(adev,
 763							golden_settings_tonga_a11,
 764							ARRAY_SIZE(golden_settings_tonga_a11));
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_golden_common_all,
 767							ARRAY_SIZE(tonga_golden_common_all));
 768		break;
 769	case CHIP_VEGAM:
 770		amdgpu_device_program_register_sequence(adev,
 771							golden_settings_vegam_a11,
 772							ARRAY_SIZE(golden_settings_vegam_a11));
 773		amdgpu_device_program_register_sequence(adev,
 774							vegam_golden_common_all,
 775							ARRAY_SIZE(vegam_golden_common_all));
 776		break;
 777	case CHIP_POLARIS11:
 778	case CHIP_POLARIS12:
 779		amdgpu_device_program_register_sequence(adev,
 780							golden_settings_polaris11_a11,
 781							ARRAY_SIZE(golden_settings_polaris11_a11));
 782		amdgpu_device_program_register_sequence(adev,
 783							polaris11_golden_common_all,
 784							ARRAY_SIZE(polaris11_golden_common_all));
 785		break;
 786	case CHIP_POLARIS10:
 787		amdgpu_device_program_register_sequence(adev,
 788							golden_settings_polaris10_a11,
 789							ARRAY_SIZE(golden_settings_polaris10_a11));
 790		amdgpu_device_program_register_sequence(adev,
 791							polaris10_golden_common_all,
 792							ARRAY_SIZE(polaris10_golden_common_all));
 793		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 794		if (adev->pdev->revision == 0xc7 &&
 
 
 
 795		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 796		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 797		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 798			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 799			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 800		}
 801		break;
 802	case CHIP_CARRIZO:
 803		amdgpu_device_program_register_sequence(adev,
 804							cz_mgcg_cgcg_init,
 805							ARRAY_SIZE(cz_mgcg_cgcg_init));
 806		amdgpu_device_program_register_sequence(adev,
 807							cz_golden_settings_a11,
 808							ARRAY_SIZE(cz_golden_settings_a11));
 809		amdgpu_device_program_register_sequence(adev,
 810							cz_golden_common_all,
 811							ARRAY_SIZE(cz_golden_common_all));
 812		break;
 813	case CHIP_STONEY:
 814		amdgpu_device_program_register_sequence(adev,
 815							stoney_mgcg_cgcg_init,
 816							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 817		amdgpu_device_program_register_sequence(adev,
 818							stoney_golden_settings_a11,
 819							ARRAY_SIZE(stoney_golden_settings_a11));
 820		amdgpu_device_program_register_sequence(adev,
 821							stoney_golden_common_all,
 822							ARRAY_SIZE(stoney_golden_common_all));
 823		break;
 824	default:
 825		break;
 826	}
 827}
 828
 829static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 830{
 831	adev->gfx.scratch.num_reg = 8;
 832	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 833	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 834}
 835
 836static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 837{
 838	struct amdgpu_device *adev = ring->adev;
 839	uint32_t scratch;
 840	uint32_t tmp = 0;
 841	unsigned i;
 842	int r;
 843
 844	r = amdgpu_gfx_scratch_get(adev, &scratch);
 
 845	if (r)
 846		return r;
 847
 848	WREG32(scratch, 0xCAFEDEAD);
 849	r = amdgpu_ring_alloc(ring, 3);
 850	if (r)
 851		goto error_free_scratch;
 852
 853	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 854	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 855	amdgpu_ring_write(ring, 0xDEADBEEF);
 856	amdgpu_ring_commit(ring);
 857
 858	for (i = 0; i < adev->usec_timeout; i++) {
 859		tmp = RREG32(scratch);
 860		if (tmp == 0xDEADBEEF)
 861			break;
 862		udelay(1);
 863	}
 864
 865	if (i >= adev->usec_timeout)
 866		r = -ETIMEDOUT;
 867
 868error_free_scratch:
 869	amdgpu_gfx_scratch_free(adev, scratch);
 870	return r;
 871}
 872
 873static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 874{
 875	struct amdgpu_device *adev = ring->adev;
 876	struct amdgpu_ib ib;
 877	struct dma_fence *f = NULL;
 878
 879	unsigned int index;
 880	uint64_t gpu_addr;
 881	uint32_t tmp;
 882	long r;
 883
 884	r = amdgpu_device_wb_get(adev, &index);
 885	if (r)
 886		return r;
 887
 888	gpu_addr = adev->wb.gpu_addr + (index * 4);
 889	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 890	memset(&ib, 0, sizeof(ib));
 891	r = amdgpu_ib_get(adev, NULL, 16,
 892					AMDGPU_IB_POOL_DIRECT, &ib);
 893	if (r)
 894		goto err1;
 895
 896	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 897	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 898	ib.ptr[2] = lower_32_bits(gpu_addr);
 899	ib.ptr[3] = upper_32_bits(gpu_addr);
 900	ib.ptr[4] = 0xDEADBEEF;
 901	ib.length_dw = 5;
 902
 903	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 904	if (r)
 905		goto err2;
 906
 907	r = dma_fence_wait_timeout(f, false, timeout);
 908	if (r == 0) {
 909		r = -ETIMEDOUT;
 910		goto err2;
 911	} else if (r < 0) {
 912		goto err2;
 913	}
 914
 915	tmp = adev->wb.wb[index];
 916	if (tmp == 0xDEADBEEF)
 917		r = 0;
 918	else
 919		r = -EINVAL;
 920
 921err2:
 922	amdgpu_ib_free(adev, &ib, NULL);
 923	dma_fence_put(f);
 924err1:
 925	amdgpu_device_wb_free(adev, index);
 926	return r;
 927}
 928
 929
 930static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 931{
 932	release_firmware(adev->gfx.pfp_fw);
 933	adev->gfx.pfp_fw = NULL;
 934	release_firmware(adev->gfx.me_fw);
 935	adev->gfx.me_fw = NULL;
 936	release_firmware(adev->gfx.ce_fw);
 937	adev->gfx.ce_fw = NULL;
 938	release_firmware(adev->gfx.rlc_fw);
 939	adev->gfx.rlc_fw = NULL;
 940	release_firmware(adev->gfx.mec_fw);
 941	adev->gfx.mec_fw = NULL;
 942	if ((adev->asic_type != CHIP_STONEY) &&
 943	    (adev->asic_type != CHIP_TOPAZ))
 944		release_firmware(adev->gfx.mec2_fw);
 945	adev->gfx.mec2_fw = NULL;
 946
 947	kfree(adev->gfx.rlc.register_list_format);
 948}
 949
 950static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 951{
 952	const char *chip_name;
 953	char fw_name[30];
 954	int err;
 955	struct amdgpu_firmware_info *info = NULL;
 956	const struct common_firmware_header *header = NULL;
 957	const struct gfx_firmware_header_v1_0 *cp_hdr;
 958	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 959	unsigned int *tmp = NULL, i;
 960
 961	DRM_DEBUG("\n");
 962
 963	switch (adev->asic_type) {
 964	case CHIP_TOPAZ:
 965		chip_name = "topaz";
 966		break;
 967	case CHIP_TONGA:
 968		chip_name = "tonga";
 969		break;
 970	case CHIP_CARRIZO:
 971		chip_name = "carrizo";
 972		break;
 973	case CHIP_FIJI:
 974		chip_name = "fiji";
 975		break;
 976	case CHIP_STONEY:
 977		chip_name = "stoney";
 978		break;
 979	case CHIP_POLARIS10:
 980		chip_name = "polaris10";
 981		break;
 982	case CHIP_POLARIS11:
 983		chip_name = "polaris11";
 984		break;
 985	case CHIP_POLARIS12:
 986		chip_name = "polaris12";
 987		break;
 988	case CHIP_VEGAM:
 989		chip_name = "vegam";
 990		break;
 991	default:
 992		BUG();
 993	}
 994
 995	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 996		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 997		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 998		if (err == -ENOENT) {
 999			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1000			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1001		}
1002	} else {
1003		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1004		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1005	}
1006	if (err)
1007		goto out;
1008	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1009	if (err)
1010		goto out;
1011	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1012	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1013	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1014
1015	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1016		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1017		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1018		if (err == -ENOENT) {
1019			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1020			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1021		}
1022	} else {
1023		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1024		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1025	}
1026	if (err)
1027		goto out;
1028	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1029	if (err)
1030		goto out;
1031	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1032	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033
1034	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1035
1036	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1037		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1038		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1039		if (err == -ENOENT) {
1040			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1041			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1042		}
1043	} else {
1044		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1045		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1046	}
1047	if (err)
1048		goto out;
1049	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1050	if (err)
1051		goto out;
1052	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1053	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1054	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1055
1056	/*
1057	 * Support for MCBP/Virtualization in combination with chained IBs is
1058	 * formal released on feature version #46
1059	 */
1060	if (adev->gfx.ce_feature_version >= 46 &&
1061	    adev->gfx.pfp_feature_version >= 46) {
1062		adev->virt.chained_ib_support = true;
1063		DRM_INFO("Chained IB support enabled!\n");
1064	} else
1065		adev->virt.chained_ib_support = false;
1066
1067	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1068	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1069	if (err)
1070		goto out;
1071	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1072	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1073	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1074	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1075
1076	adev->gfx.rlc.save_and_restore_offset =
1077			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1078	adev->gfx.rlc.clear_state_descriptor_offset =
1079			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1080	adev->gfx.rlc.avail_scratch_ram_locations =
1081			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1082	adev->gfx.rlc.reg_restore_list_size =
1083			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1084	adev->gfx.rlc.reg_list_format_start =
1085			le32_to_cpu(rlc_hdr->reg_list_format_start);
1086	adev->gfx.rlc.reg_list_format_separate_start =
1087			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1088	adev->gfx.rlc.starting_offsets_start =
1089			le32_to_cpu(rlc_hdr->starting_offsets_start);
1090	adev->gfx.rlc.reg_list_format_size_bytes =
1091			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1092	adev->gfx.rlc.reg_list_size_bytes =
1093			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1094
1095	adev->gfx.rlc.register_list_format =
1096			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1097					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1098
1099	if (!adev->gfx.rlc.register_list_format) {
1100		err = -ENOMEM;
1101		goto out;
1102	}
1103
1104	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1105			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1106	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1107		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1108
1109	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1110
1111	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1112			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1113	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1114		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1115
1116	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1117		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1118		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1119		if (err == -ENOENT) {
1120			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1121			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1122		}
1123	} else {
1124		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1125		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1126	}
1127	if (err)
1128		goto out;
1129	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1130	if (err)
1131		goto out;
1132	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1133	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1134	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1135
1136	if ((adev->asic_type != CHIP_STONEY) &&
1137	    (adev->asic_type != CHIP_TOPAZ)) {
1138		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1139			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1140			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1141			if (err == -ENOENT) {
1142				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1143				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1144			}
1145		} else {
1146			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1147			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1148		}
1149		if (!err) {
1150			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1151			if (err)
1152				goto out;
1153			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1154				adev->gfx.mec2_fw->data;
1155			adev->gfx.mec2_fw_version =
1156				le32_to_cpu(cp_hdr->header.ucode_version);
1157			adev->gfx.mec2_feature_version =
1158				le32_to_cpu(cp_hdr->ucode_feature_version);
1159		} else {
1160			err = 0;
1161			adev->gfx.mec2_fw = NULL;
1162		}
1163	}
1164
1165	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1166	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1167	info->fw = adev->gfx.pfp_fw;
1168	header = (const struct common_firmware_header *)info->fw->data;
1169	adev->firmware.fw_size +=
1170		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1173	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1174	info->fw = adev->gfx.me_fw;
1175	header = (const struct common_firmware_header *)info->fw->data;
1176	adev->firmware.fw_size +=
1177		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1178
1179	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1180	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1181	info->fw = adev->gfx.ce_fw;
1182	header = (const struct common_firmware_header *)info->fw->data;
1183	adev->firmware.fw_size +=
1184		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1185
1186	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188	info->fw = adev->gfx.rlc_fw;
1189	header = (const struct common_firmware_header *)info->fw->data;
1190	adev->firmware.fw_size +=
1191		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1194	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1195	info->fw = adev->gfx.mec_fw;
1196	header = (const struct common_firmware_header *)info->fw->data;
1197	adev->firmware.fw_size +=
1198		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1199
1200	/* we need account JT in */
1201	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1202	adev->firmware.fw_size +=
1203		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1204
1205	if (amdgpu_sriov_vf(adev)) {
1206		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1207		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1208		info->fw = adev->gfx.mec_fw;
1209		adev->firmware.fw_size +=
1210			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1211	}
1212
1213	if (adev->gfx.mec2_fw) {
1214		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1215		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1216		info->fw = adev->gfx.mec2_fw;
1217		header = (const struct common_firmware_header *)info->fw->data;
1218		adev->firmware.fw_size +=
1219			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1220	}
1221
1222out:
1223	if (err) {
1224		dev_err(adev->dev,
1225			"gfx8: Failed to load firmware \"%s\"\n",
1226			fw_name);
1227		release_firmware(adev->gfx.pfp_fw);
1228		adev->gfx.pfp_fw = NULL;
1229		release_firmware(adev->gfx.me_fw);
1230		adev->gfx.me_fw = NULL;
1231		release_firmware(adev->gfx.ce_fw);
1232		adev->gfx.ce_fw = NULL;
1233		release_firmware(adev->gfx.rlc_fw);
1234		adev->gfx.rlc_fw = NULL;
1235		release_firmware(adev->gfx.mec_fw);
1236		adev->gfx.mec_fw = NULL;
1237		release_firmware(adev->gfx.mec2_fw);
1238		adev->gfx.mec2_fw = NULL;
1239	}
1240	return err;
1241}
1242
1243static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1244				    volatile u32 *buffer)
1245{
1246	u32 count = 0, i;
1247	const struct cs_section_def *sect = NULL;
1248	const struct cs_extent_def *ext = NULL;
1249
1250	if (adev->gfx.rlc.cs_data == NULL)
1251		return;
1252	if (buffer == NULL)
1253		return;
1254
1255	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1256	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1257
1258	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1259	buffer[count++] = cpu_to_le32(0x80000000);
1260	buffer[count++] = cpu_to_le32(0x80000000);
1261
1262	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1263		for (ext = sect->section; ext->extent != NULL; ++ext) {
1264			if (sect->id == SECT_CONTEXT) {
1265				buffer[count++] =
1266					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1267				buffer[count++] = cpu_to_le32(ext->reg_index -
1268						PACKET3_SET_CONTEXT_REG_START);
1269				for (i = 0; i < ext->reg_count; i++)
1270					buffer[count++] = cpu_to_le32(ext->extent[i]);
1271			} else {
1272				return;
1273			}
1274		}
1275	}
1276
1277	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1278	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1279			PACKET3_SET_CONTEXT_REG_START);
1280	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1281	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1282
1283	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1284	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1285
1286	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1287	buffer[count++] = cpu_to_le32(0);
1288}
1289
1290static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1291{
1292	if (adev->asic_type == CHIP_CARRIZO)
1293		return 5;
1294	else
1295		return 4;
1296}
1297
1298static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1299{
1300	const struct cs_section_def *cs_data;
1301	int r;
1302
1303	adev->gfx.rlc.cs_data = vi_cs_data;
1304
1305	cs_data = adev->gfx.rlc.cs_data;
1306
1307	if (cs_data) {
1308		/* init clear state block */
1309		r = amdgpu_gfx_rlc_init_csb(adev);
1310		if (r)
1311			return r;
1312	}
1313
1314	if ((adev->asic_type == CHIP_CARRIZO) ||
1315	    (adev->asic_type == CHIP_STONEY)) {
1316		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1317		r = amdgpu_gfx_rlc_init_cpt(adev);
1318		if (r)
1319			return r;
1320	}
1321
1322	/* init spm vmid with 0xf */
1323	if (adev->gfx.rlc.funcs->update_spm_vmid)
1324		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1325
1326	return 0;
1327}
1328
1329static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1330{
1331	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1332}
1333
1334static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1335{
1336	int r;
1337	u32 *hpd;
1338	size_t mec_hpd_size;
1339
1340	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1341
1342	/* take ownership of the relevant compute queues */
1343	amdgpu_gfx_compute_queue_acquire(adev);
1344
1345	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
 
 
 
 
 
 
 
 
 
 
 
1346
1347	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1348				      AMDGPU_GEM_DOMAIN_VRAM,
1349				      &adev->gfx.mec.hpd_eop_obj,
1350				      &adev->gfx.mec.hpd_eop_gpu_addr,
1351				      (void **)&hpd);
1352	if (r) {
1353		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1354		return r;
1355	}
1356
1357	memset(hpd, 0, mec_hpd_size);
1358
1359	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1360	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1361
1362	return 0;
1363}
1364
1365static const u32 vgpr_init_compute_shader[] =
1366{
1367	0x7e000209, 0x7e020208,
1368	0x7e040207, 0x7e060206,
1369	0x7e080205, 0x7e0a0204,
1370	0x7e0c0203, 0x7e0e0202,
1371	0x7e100201, 0x7e120200,
1372	0x7e140209, 0x7e160208,
1373	0x7e180207, 0x7e1a0206,
1374	0x7e1c0205, 0x7e1e0204,
1375	0x7e200203, 0x7e220202,
1376	0x7e240201, 0x7e260200,
1377	0x7e280209, 0x7e2a0208,
1378	0x7e2c0207, 0x7e2e0206,
1379	0x7e300205, 0x7e320204,
1380	0x7e340203, 0x7e360202,
1381	0x7e380201, 0x7e3a0200,
1382	0x7e3c0209, 0x7e3e0208,
1383	0x7e400207, 0x7e420206,
1384	0x7e440205, 0x7e460204,
1385	0x7e480203, 0x7e4a0202,
1386	0x7e4c0201, 0x7e4e0200,
1387	0x7e500209, 0x7e520208,
1388	0x7e540207, 0x7e560206,
1389	0x7e580205, 0x7e5a0204,
1390	0x7e5c0203, 0x7e5e0202,
1391	0x7e600201, 0x7e620200,
1392	0x7e640209, 0x7e660208,
1393	0x7e680207, 0x7e6a0206,
1394	0x7e6c0205, 0x7e6e0204,
1395	0x7e700203, 0x7e720202,
1396	0x7e740201, 0x7e760200,
1397	0x7e780209, 0x7e7a0208,
1398	0x7e7c0207, 0x7e7e0206,
1399	0xbf8a0000, 0xbf810000,
1400};
1401
1402static const u32 sgpr_init_compute_shader[] =
1403{
1404	0xbe8a0100, 0xbe8c0102,
1405	0xbe8e0104, 0xbe900106,
1406	0xbe920108, 0xbe940100,
1407	0xbe960102, 0xbe980104,
1408	0xbe9a0106, 0xbe9c0108,
1409	0xbe9e0100, 0xbea00102,
1410	0xbea20104, 0xbea40106,
1411	0xbea60108, 0xbea80100,
1412	0xbeaa0102, 0xbeac0104,
1413	0xbeae0106, 0xbeb00108,
1414	0xbeb20100, 0xbeb40102,
1415	0xbeb60104, 0xbeb80106,
1416	0xbeba0108, 0xbebc0100,
1417	0xbebe0102, 0xbec00104,
1418	0xbec20106, 0xbec40108,
1419	0xbec60100, 0xbec80102,
1420	0xbee60004, 0xbee70005,
1421	0xbeea0006, 0xbeeb0007,
1422	0xbee80008, 0xbee90009,
1423	0xbefc0000, 0xbf8a0000,
1424	0xbf810000, 0x00000000,
1425};
1426
1427static const u32 vgpr_init_regs[] =
1428{
1429	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1430	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1431	mmCOMPUTE_NUM_THREAD_X, 256*4,
1432	mmCOMPUTE_NUM_THREAD_Y, 1,
1433	mmCOMPUTE_NUM_THREAD_Z, 1,
1434	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1435	mmCOMPUTE_PGM_RSRC2, 20,
1436	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1437	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1438	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1439	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1440	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1441	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1442	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1443	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1444	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1445	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1446};
1447
1448static const u32 sgpr1_init_regs[] =
1449{
1450	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1451	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1452	mmCOMPUTE_NUM_THREAD_X, 256*5,
1453	mmCOMPUTE_NUM_THREAD_Y, 1,
1454	mmCOMPUTE_NUM_THREAD_Z, 1,
1455	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1456	mmCOMPUTE_PGM_RSRC2, 20,
1457	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1458	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1459	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1460	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1461	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1462	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1463	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1464	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1465	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1466	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1467};
1468
1469static const u32 sgpr2_init_regs[] =
1470{
1471	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1472	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1473	mmCOMPUTE_NUM_THREAD_X, 256*5,
1474	mmCOMPUTE_NUM_THREAD_Y, 1,
1475	mmCOMPUTE_NUM_THREAD_Z, 1,
1476	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1477	mmCOMPUTE_PGM_RSRC2, 20,
1478	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1479	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1480	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1481	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1482	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1483	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1484	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1485	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1486	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1487	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1488};
1489
1490static const u32 sec_ded_counter_registers[] =
1491{
1492	mmCPC_EDC_ATC_CNT,
1493	mmCPC_EDC_SCRATCH_CNT,
1494	mmCPC_EDC_UCODE_CNT,
1495	mmCPF_EDC_ATC_CNT,
1496	mmCPF_EDC_ROQ_CNT,
1497	mmCPF_EDC_TAG_CNT,
1498	mmCPG_EDC_ATC_CNT,
1499	mmCPG_EDC_DMA_CNT,
1500	mmCPG_EDC_TAG_CNT,
1501	mmDC_EDC_CSINVOC_CNT,
1502	mmDC_EDC_RESTORE_CNT,
1503	mmDC_EDC_STATE_CNT,
1504	mmGDS_EDC_CNT,
1505	mmGDS_EDC_GRBM_CNT,
1506	mmGDS_EDC_OA_DED,
1507	mmSPI_EDC_CNT,
1508	mmSQC_ATC_EDC_GATCL1_CNT,
1509	mmSQC_EDC_CNT,
1510	mmSQ_EDC_DED_CNT,
1511	mmSQ_EDC_INFO,
1512	mmSQ_EDC_SEC_CNT,
1513	mmTCC_EDC_CNT,
1514	mmTCP_ATC_EDC_GATCL1_CNT,
1515	mmTCP_EDC_CNT,
1516	mmTD_EDC_CNT
1517};
1518
1519static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1520{
1521	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1522	struct amdgpu_ib ib;
1523	struct dma_fence *f = NULL;
1524	int r, i;
1525	u32 tmp;
1526	unsigned total_size, vgpr_offset, sgpr_offset;
1527	u64 gpu_addr;
1528
1529	/* only supported on CZ */
1530	if (adev->asic_type != CHIP_CARRIZO)
1531		return 0;
1532
1533	/* bail if the compute ring is not ready */
1534	if (!ring->sched.ready)
1535		return 0;
1536
1537	tmp = RREG32(mmGB_EDC_MODE);
1538	WREG32(mmGB_EDC_MODE, 0);
1539
1540	total_size =
1541		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542	total_size +=
1543		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1544	total_size +=
1545		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1546	total_size = ALIGN(total_size, 256);
1547	vgpr_offset = total_size;
1548	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1549	sgpr_offset = total_size;
1550	total_size += sizeof(sgpr_init_compute_shader);
1551
1552	/* allocate an indirect buffer to put the commands in */
1553	memset(&ib, 0, sizeof(ib));
1554	r = amdgpu_ib_get(adev, NULL, total_size,
1555					AMDGPU_IB_POOL_DIRECT, &ib);
1556	if (r) {
1557		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1558		return r;
1559	}
1560
1561	/* load the compute shaders */
1562	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1563		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1564
1565	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1566		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1567
1568	/* init the ib length to 0 */
1569	ib.length_dw = 0;
1570
1571	/* VGPR */
1572	/* write the register state for the compute dispatch */
1573	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1574		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1575		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1576		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1577	}
1578	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1579	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1580	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1581	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1582	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1583	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1584
1585	/* write dispatch packet */
1586	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1587	ib.ptr[ib.length_dw++] = 8; /* x */
1588	ib.ptr[ib.length_dw++] = 1; /* y */
1589	ib.ptr[ib.length_dw++] = 1; /* z */
1590	ib.ptr[ib.length_dw++] =
1591		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1592
1593	/* write CS partial flush packet */
1594	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1595	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1596
1597	/* SGPR1 */
1598	/* write the register state for the compute dispatch */
1599	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1600		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1601		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1602		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1603	}
1604	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1605	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1606	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1607	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1608	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1609	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1610
1611	/* write dispatch packet */
1612	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1613	ib.ptr[ib.length_dw++] = 8; /* x */
1614	ib.ptr[ib.length_dw++] = 1; /* y */
1615	ib.ptr[ib.length_dw++] = 1; /* z */
1616	ib.ptr[ib.length_dw++] =
1617		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1618
1619	/* write CS partial flush packet */
1620	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1621	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1622
1623	/* SGPR2 */
1624	/* write the register state for the compute dispatch */
1625	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1626		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1627		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1628		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1629	}
1630	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1631	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1632	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1633	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1634	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1635	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1636
1637	/* write dispatch packet */
1638	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1639	ib.ptr[ib.length_dw++] = 8; /* x */
1640	ib.ptr[ib.length_dw++] = 1; /* y */
1641	ib.ptr[ib.length_dw++] = 1; /* z */
1642	ib.ptr[ib.length_dw++] =
1643		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1644
1645	/* write CS partial flush packet */
1646	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1647	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1648
1649	/* shedule the ib on the ring */
1650	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1651	if (r) {
1652		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1653		goto fail;
1654	}
1655
1656	/* wait for the GPU to finish processing the IB */
1657	r = dma_fence_wait(f, false);
1658	if (r) {
1659		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1660		goto fail;
1661	}
1662
1663	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1664	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1665	WREG32(mmGB_EDC_MODE, tmp);
1666
1667	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1668	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1669	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1670
1671
1672	/* read back registers to clear the counters */
1673	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1674		RREG32(sec_ded_counter_registers[i]);
1675
1676fail:
1677	amdgpu_ib_free(adev, &ib, NULL);
1678	dma_fence_put(f);
1679
1680	return r;
1681}
1682
1683static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1684{
1685	u32 gb_addr_config;
1686	u32 mc_arb_ramcfg;
1687	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1688	u32 tmp;
1689	int ret;
1690
1691	switch (adev->asic_type) {
1692	case CHIP_TOPAZ:
1693		adev->gfx.config.max_shader_engines = 1;
1694		adev->gfx.config.max_tile_pipes = 2;
1695		adev->gfx.config.max_cu_per_sh = 6;
1696		adev->gfx.config.max_sh_per_se = 1;
1697		adev->gfx.config.max_backends_per_se = 2;
1698		adev->gfx.config.max_texture_channel_caches = 2;
1699		adev->gfx.config.max_gprs = 256;
1700		adev->gfx.config.max_gs_threads = 32;
1701		adev->gfx.config.max_hw_contexts = 8;
1702
1703		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1704		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1705		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1706		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1707		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1708		break;
1709	case CHIP_FIJI:
1710		adev->gfx.config.max_shader_engines = 4;
1711		adev->gfx.config.max_tile_pipes = 16;
1712		adev->gfx.config.max_cu_per_sh = 16;
1713		adev->gfx.config.max_sh_per_se = 1;
1714		adev->gfx.config.max_backends_per_se = 4;
1715		adev->gfx.config.max_texture_channel_caches = 16;
1716		adev->gfx.config.max_gprs = 256;
1717		adev->gfx.config.max_gs_threads = 32;
1718		adev->gfx.config.max_hw_contexts = 8;
1719
1720		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1721		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1722		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1723		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1724		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1725		break;
1726	case CHIP_POLARIS11:
1727	case CHIP_POLARIS12:
1728		ret = amdgpu_atombios_get_gfx_info(adev);
1729		if (ret)
1730			return ret;
1731		adev->gfx.config.max_gprs = 256;
1732		adev->gfx.config.max_gs_threads = 32;
1733		adev->gfx.config.max_hw_contexts = 8;
1734
1735		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1736		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1737		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1738		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1739		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1740		break;
1741	case CHIP_POLARIS10:
1742	case CHIP_VEGAM:
1743		ret = amdgpu_atombios_get_gfx_info(adev);
1744		if (ret)
1745			return ret;
1746		adev->gfx.config.max_gprs = 256;
1747		adev->gfx.config.max_gs_threads = 32;
1748		adev->gfx.config.max_hw_contexts = 8;
1749
1750		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1755		break;
1756	case CHIP_TONGA:
1757		adev->gfx.config.max_shader_engines = 4;
1758		adev->gfx.config.max_tile_pipes = 8;
1759		adev->gfx.config.max_cu_per_sh = 8;
1760		adev->gfx.config.max_sh_per_se = 1;
1761		adev->gfx.config.max_backends_per_se = 2;
1762		adev->gfx.config.max_texture_channel_caches = 8;
1763		adev->gfx.config.max_gprs = 256;
1764		adev->gfx.config.max_gs_threads = 32;
1765		adev->gfx.config.max_hw_contexts = 8;
1766
1767		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1772		break;
1773	case CHIP_CARRIZO:
1774		adev->gfx.config.max_shader_engines = 1;
1775		adev->gfx.config.max_tile_pipes = 2;
1776		adev->gfx.config.max_sh_per_se = 1;
1777		adev->gfx.config.max_backends_per_se = 2;
1778		adev->gfx.config.max_cu_per_sh = 8;
1779		adev->gfx.config.max_texture_channel_caches = 2;
1780		adev->gfx.config.max_gprs = 256;
1781		adev->gfx.config.max_gs_threads = 32;
1782		adev->gfx.config.max_hw_contexts = 8;
1783
1784		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1789		break;
1790	case CHIP_STONEY:
1791		adev->gfx.config.max_shader_engines = 1;
1792		adev->gfx.config.max_tile_pipes = 2;
1793		adev->gfx.config.max_sh_per_se = 1;
1794		adev->gfx.config.max_backends_per_se = 1;
1795		adev->gfx.config.max_cu_per_sh = 3;
1796		adev->gfx.config.max_texture_channel_caches = 2;
1797		adev->gfx.config.max_gprs = 256;
1798		adev->gfx.config.max_gs_threads = 16;
1799		adev->gfx.config.max_hw_contexts = 8;
1800
1801		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1806		break;
1807	default:
1808		adev->gfx.config.max_shader_engines = 2;
1809		adev->gfx.config.max_tile_pipes = 4;
1810		adev->gfx.config.max_cu_per_sh = 2;
1811		adev->gfx.config.max_sh_per_se = 1;
1812		adev->gfx.config.max_backends_per_se = 2;
1813		adev->gfx.config.max_texture_channel_caches = 4;
1814		adev->gfx.config.max_gprs = 256;
1815		adev->gfx.config.max_gs_threads = 32;
1816		adev->gfx.config.max_hw_contexts = 8;
1817
1818		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823		break;
1824	}
1825
1826	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1827	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1828
1829	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1830				MC_ARB_RAMCFG, NOOFBANK);
1831	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1832				MC_ARB_RAMCFG, NOOFRANKS);
1833
1834	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1835	adev->gfx.config.mem_max_burst_length_bytes = 256;
1836	if (adev->flags & AMD_IS_APU) {
1837		/* Get memory bank mapping mode. */
1838		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1839		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1843		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1844		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1845
1846		/* Validate settings in case only one DIMM installed. */
1847		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1848			dimm00_addr_map = 0;
1849		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1850			dimm01_addr_map = 0;
1851		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1852			dimm10_addr_map = 0;
1853		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1854			dimm11_addr_map = 0;
1855
1856		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1857		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1858		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1859			adev->gfx.config.mem_row_size_in_kb = 2;
1860		else
1861			adev->gfx.config.mem_row_size_in_kb = 1;
1862	} else {
1863		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1864		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1865		if (adev->gfx.config.mem_row_size_in_kb > 4)
1866			adev->gfx.config.mem_row_size_in_kb = 4;
1867	}
1868
1869	adev->gfx.config.shader_engine_tile_size = 32;
1870	adev->gfx.config.num_gpus = 1;
1871	adev->gfx.config.multi_gpu_tile_size = 64;
1872
1873	/* fix up row size */
1874	switch (adev->gfx.config.mem_row_size_in_kb) {
1875	case 1:
1876	default:
1877		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1878		break;
1879	case 2:
1880		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1881		break;
1882	case 4:
1883		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1884		break;
1885	}
1886	adev->gfx.config.gb_addr_config = gb_addr_config;
1887
1888	return 0;
1889}
1890
1891static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1892					int mec, int pipe, int queue)
1893{
1894	int r;
1895	unsigned irq_type;
1896	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1897	unsigned int hw_prio;
1898
1899	ring = &adev->gfx.compute_ring[ring_id];
1900
1901	/* mec0 is me1 */
1902	ring->me = mec + 1;
1903	ring->pipe = pipe;
1904	ring->queue = queue;
1905
1906	ring->ring_obj = NULL;
1907	ring->use_doorbell = true;
1908	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1909	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1910				+ (ring_id * GFX8_MEC_HPD_SIZE);
1911	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1912
1913	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1914		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1915		+ ring->pipe;
1916
1917	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ?
1918			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1919	/* type-2 packets are deprecated on MEC, use type-3 instead */
1920	r = amdgpu_ring_init(adev, ring, 1024,
1921			     &adev->gfx.eop_irq, irq_type, hw_prio);
1922	if (r)
1923		return r;
1924
1925
1926	return 0;
1927}
1928
1929static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1930
1931static int gfx_v8_0_sw_init(void *handle)
1932{
1933	int i, j, k, r, ring_id;
 
1934	struct amdgpu_ring *ring;
1935	struct amdgpu_kiq *kiq;
1936	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1937
1938	switch (adev->asic_type) {
1939	case CHIP_TONGA:
1940	case CHIP_CARRIZO:
1941	case CHIP_FIJI:
1942	case CHIP_POLARIS10:
1943	case CHIP_POLARIS11:
1944	case CHIP_POLARIS12:
1945	case CHIP_VEGAM:
1946		adev->gfx.mec.num_mec = 2;
1947		break;
1948	case CHIP_TOPAZ:
1949	case CHIP_STONEY:
1950	default:
1951		adev->gfx.mec.num_mec = 1;
1952		break;
1953	}
1954
1955	adev->gfx.mec.num_pipe_per_mec = 4;
1956	adev->gfx.mec.num_queue_per_pipe = 8;
1957
1958	/* EOP Event */
1959	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1960	if (r)
1961		return r;
1962
1963	/* Privileged reg */
1964	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1965			      &adev->gfx.priv_reg_irq);
1966	if (r)
1967		return r;
1968
1969	/* Privileged inst */
1970	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1971			      &adev->gfx.priv_inst_irq);
1972	if (r)
1973		return r;
1974
1975	/* Add CP EDC/ECC irq  */
1976	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1977			      &adev->gfx.cp_ecc_error_irq);
1978	if (r)
1979		return r;
1980
1981	/* SQ interrupts. */
1982	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1983			      &adev->gfx.sq_irq);
1984	if (r) {
1985		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1986		return r;
1987	}
1988
1989	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1990
1991	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1992
1993	gfx_v8_0_scratch_init(adev);
1994
1995	r = gfx_v8_0_init_microcode(adev);
1996	if (r) {
1997		DRM_ERROR("Failed to load gfx firmware!\n");
1998		return r;
1999	}
2000
2001	r = adev->gfx.rlc.funcs->init(adev);
2002	if (r) {
2003		DRM_ERROR("Failed to init rlc BOs!\n");
2004		return r;
2005	}
2006
2007	r = gfx_v8_0_mec_init(adev);
2008	if (r) {
2009		DRM_ERROR("Failed to init MEC BOs!\n");
2010		return r;
2011	}
2012
2013	/* set up the gfx ring */
2014	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2015		ring = &adev->gfx.gfx_ring[i];
2016		ring->ring_obj = NULL;
2017		sprintf(ring->name, "gfx");
2018		/* no gfx doorbells on iceland */
2019		if (adev->asic_type != CHIP_TOPAZ) {
2020			ring->use_doorbell = true;
2021			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2022		}
2023
2024		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2025				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2026				     AMDGPU_RING_PRIO_DEFAULT);
2027		if (r)
2028			return r;
2029	}
2030
2031
2032	/* set up the compute queues - allocate horizontally across pipes */
2033	ring_id = 0;
2034	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2035		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2036			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2037				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
 
2038					continue;
2039
2040				r = gfx_v8_0_compute_ring_init(adev,
2041								ring_id,
2042								i, k, j);
2043				if (r)
2044					return r;
2045
2046				ring_id++;
2047			}
2048		}
2049	}
2050
2051	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2052	if (r) {
2053		DRM_ERROR("Failed to init KIQ BOs!\n");
2054		return r;
2055	}
2056
2057	kiq = &adev->gfx.kiq;
2058	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2059	if (r)
2060		return r;
2061
2062	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2063	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2064	if (r)
2065		return r;
2066
2067	adev->gfx.ce_ram_size = 0x8000;
2068
2069	r = gfx_v8_0_gpu_early_init(adev);
2070	if (r)
2071		return r;
2072
2073	return 0;
2074}
2075
2076static int gfx_v8_0_sw_fini(void *handle)
2077{
2078	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2079	int i;
2080
2081	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2082		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2083	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2084		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2085
2086	amdgpu_gfx_mqd_sw_fini(adev);
2087	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2088	amdgpu_gfx_kiq_fini(adev);
2089
2090	gfx_v8_0_mec_fini(adev);
2091	amdgpu_gfx_rlc_fini(adev);
2092	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2093				&adev->gfx.rlc.clear_state_gpu_addr,
2094				(void **)&adev->gfx.rlc.cs_ptr);
2095	if ((adev->asic_type == CHIP_CARRIZO) ||
2096	    (adev->asic_type == CHIP_STONEY)) {
2097		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2098				&adev->gfx.rlc.cp_table_gpu_addr,
2099				(void **)&adev->gfx.rlc.cp_table_ptr);
2100	}
2101	gfx_v8_0_free_microcode(adev);
2102
2103	return 0;
2104}
2105
2106static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2107{
2108	uint32_t *modearray, *mod2array;
2109	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2110	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2111	u32 reg_offset;
2112
2113	modearray = adev->gfx.config.tile_mode_array;
2114	mod2array = adev->gfx.config.macrotile_mode_array;
2115
2116	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2117		modearray[reg_offset] = 0;
2118
2119	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2120		mod2array[reg_offset] = 0;
2121
2122	switch (adev->asic_type) {
2123	case CHIP_TOPAZ:
2124		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2125				PIPE_CONFIG(ADDR_SURF_P2) |
2126				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2127				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2128		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129				PIPE_CONFIG(ADDR_SURF_P2) |
2130				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2131				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2132		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2133				PIPE_CONFIG(ADDR_SURF_P2) |
2134				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2135				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2136		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137				PIPE_CONFIG(ADDR_SURF_P2) |
2138				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2139				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141				PIPE_CONFIG(ADDR_SURF_P2) |
2142				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2143				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145				PIPE_CONFIG(ADDR_SURF_P2) |
2146				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2147				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149				PIPE_CONFIG(ADDR_SURF_P2) |
2150				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2151				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2153				PIPE_CONFIG(ADDR_SURF_P2));
2154		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155				PIPE_CONFIG(ADDR_SURF_P2) |
2156				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2157				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2158		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159				 PIPE_CONFIG(ADDR_SURF_P2) |
2160				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2162		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163				 PIPE_CONFIG(ADDR_SURF_P2) |
2164				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2165				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2166		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167				 PIPE_CONFIG(ADDR_SURF_P2) |
2168				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2169				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171				 PIPE_CONFIG(ADDR_SURF_P2) |
2172				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2173				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2175				 PIPE_CONFIG(ADDR_SURF_P2) |
2176				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2177				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2178		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2179				 PIPE_CONFIG(ADDR_SURF_P2) |
2180				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2182		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2183				 PIPE_CONFIG(ADDR_SURF_P2) |
2184				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2186		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2187				 PIPE_CONFIG(ADDR_SURF_P2) |
2188				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2189				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2190		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2191				 PIPE_CONFIG(ADDR_SURF_P2) |
2192				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2193				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2194		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2195				 PIPE_CONFIG(ADDR_SURF_P2) |
2196				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2197				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2199				 PIPE_CONFIG(ADDR_SURF_P2) |
2200				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203				 PIPE_CONFIG(ADDR_SURF_P2) |
2204				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2207				 PIPE_CONFIG(ADDR_SURF_P2) |
2208				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2211				 PIPE_CONFIG(ADDR_SURF_P2) |
2212				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2215				 PIPE_CONFIG(ADDR_SURF_P2) |
2216				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219				 PIPE_CONFIG(ADDR_SURF_P2) |
2220				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2221				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2223				 PIPE_CONFIG(ADDR_SURF_P2) |
2224				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2225				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2226
2227		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2228				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2230				NUM_BANKS(ADDR_SURF_8_BANK));
2231		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2232				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2234				NUM_BANKS(ADDR_SURF_8_BANK));
2235		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2236				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2237				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2238				NUM_BANKS(ADDR_SURF_8_BANK));
2239		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242				NUM_BANKS(ADDR_SURF_8_BANK));
2243		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2245				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246				NUM_BANKS(ADDR_SURF_8_BANK));
2247		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250				NUM_BANKS(ADDR_SURF_8_BANK));
2251		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2253				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254				NUM_BANKS(ADDR_SURF_8_BANK));
2255		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2257				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2258				NUM_BANKS(ADDR_SURF_16_BANK));
2259		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2262				NUM_BANKS(ADDR_SURF_16_BANK));
2263		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266				 NUM_BANKS(ADDR_SURF_16_BANK));
2267		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2268				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270				 NUM_BANKS(ADDR_SURF_16_BANK));
2271		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274				 NUM_BANKS(ADDR_SURF_16_BANK));
2275		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278				 NUM_BANKS(ADDR_SURF_16_BANK));
2279		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282				 NUM_BANKS(ADDR_SURF_8_BANK));
2283
2284		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2285			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2286			    reg_offset != 23)
2287				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2288
2289		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2290			if (reg_offset != 7)
2291				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2292
2293		break;
2294	case CHIP_FIJI:
2295	case CHIP_VEGAM:
2296		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2299				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2300		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2302				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2303				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2304		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2305				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2307				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2308		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2311				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2312		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2315				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2316		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2319				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2320		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2323				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2324		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2326				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2327				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2329				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2330		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2333				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2334		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2337				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2338		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2339				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2341				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2342		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2344				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2346		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2349				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2351				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2353				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2354		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2355				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2358		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2359				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2361				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2362		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2364				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2366		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2367				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2369				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2370		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2371				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2373				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2374		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2375				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2377				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2378		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2379				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2381				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2382		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2383				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2385				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2386		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2387				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2388				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2389				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2390		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2391				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2394		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2395				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2397				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2399				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2403				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2405				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2413				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2414		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2415				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2417				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2418
2419		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2421				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2422				NUM_BANKS(ADDR_SURF_8_BANK));
2423		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2425				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2426				NUM_BANKS(ADDR_SURF_8_BANK));
2427		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2429				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430				NUM_BANKS(ADDR_SURF_8_BANK));
2431		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434				NUM_BANKS(ADDR_SURF_8_BANK));
2435		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2437				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438				NUM_BANKS(ADDR_SURF_8_BANK));
2439		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2440				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2441				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2442				NUM_BANKS(ADDR_SURF_8_BANK));
2443		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446				NUM_BANKS(ADDR_SURF_8_BANK));
2447		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2449				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450				NUM_BANKS(ADDR_SURF_8_BANK));
2451		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454				NUM_BANKS(ADDR_SURF_8_BANK));
2455		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2457				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458				 NUM_BANKS(ADDR_SURF_8_BANK));
2459		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462				 NUM_BANKS(ADDR_SURF_8_BANK));
2463		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2466				 NUM_BANKS(ADDR_SURF_8_BANK));
2467		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2470				 NUM_BANKS(ADDR_SURF_8_BANK));
2471		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2474				 NUM_BANKS(ADDR_SURF_4_BANK));
2475
2476		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2477			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2478
2479		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2480			if (reg_offset != 7)
2481				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2482
2483		break;
2484	case CHIP_TONGA:
2485		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2487				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2488				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2489		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2491				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2492				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2493		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2495				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2496				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2497		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2498				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2500				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2501		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2504				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2505		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2508				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2509		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2512				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2513		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2515				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2516				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2518				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2519		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2520				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2522				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2523		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2526				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2527		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2528				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2530				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2531		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2534				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2535		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2538				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2539		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2542				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2543		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2544				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2546				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2548				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2550				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2551		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2552				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2553				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2554				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2555		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2556				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2558				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2559		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2560				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2562				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2563		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2564				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2566				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2567		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2568				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2570				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2571		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2572				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2574				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2575		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2576				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2578				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2579		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2580				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2583		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2584				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2586				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2588				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2594				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2598				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2599		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2602				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2603		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2604				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2606				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2607
2608		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2611				NUM_BANKS(ADDR_SURF_16_BANK));
2612		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2614				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2615				NUM_BANKS(ADDR_SURF_16_BANK));
2616		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619				NUM_BANKS(ADDR_SURF_16_BANK));
2620		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623				NUM_BANKS(ADDR_SURF_16_BANK));
2624		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627				NUM_BANKS(ADDR_SURF_16_BANK));
2628		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2631				NUM_BANKS(ADDR_SURF_16_BANK));
2632		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635				NUM_BANKS(ADDR_SURF_16_BANK));
2636		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2638				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639				NUM_BANKS(ADDR_SURF_16_BANK));
2640		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643				NUM_BANKS(ADDR_SURF_16_BANK));
2644		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2646				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2647				 NUM_BANKS(ADDR_SURF_16_BANK));
2648		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2651				 NUM_BANKS(ADDR_SURF_16_BANK));
2652		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2654				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2655				 NUM_BANKS(ADDR_SURF_8_BANK));
2656		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659				 NUM_BANKS(ADDR_SURF_4_BANK));
2660		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663				 NUM_BANKS(ADDR_SURF_4_BANK));
2664
2665		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2666			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2667
2668		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2669			if (reg_offset != 7)
2670				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2671
2672		break;
2673	case CHIP_POLARIS11:
2674	case CHIP_POLARIS12:
2675		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2676				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2678				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2679		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2680				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2682				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2683		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2690				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2694				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2698				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2699		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2702				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2706				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2708				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2709		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2710				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2716				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2717		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2718				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2720				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2721		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2725		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2732				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2734				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2738				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2741		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2744				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2746				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2748				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2749		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2750				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2752				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2753		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2754				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2756				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2757		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2758				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2760				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2761		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2762				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2764				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2765		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2766				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2768				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2769		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2770				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2774				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2778				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2784				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2788				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2789		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2790				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2793		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2797
2798		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801				NUM_BANKS(ADDR_SURF_16_BANK));
2802
2803		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2806				NUM_BANKS(ADDR_SURF_16_BANK));
2807
2808		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811				NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2815				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816				NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2820				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2821				NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2825				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2826				NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831				NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836				NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2839				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841				NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2845				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846				NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861				NUM_BANKS(ADDR_SURF_8_BANK));
2862
2863		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2866				NUM_BANKS(ADDR_SURF_4_BANK));
2867
2868		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2869			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2870
2871		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2872			if (reg_offset != 7)
2873				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2874
2875		break;
2876	case CHIP_POLARIS10:
2877		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2880				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2881		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2883				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2884				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2885		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2888				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2889		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2892				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2893		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2896				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2897		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2900				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2901		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2904				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2908				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2910				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2911		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2912				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2915		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2920				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2922				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2923		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2925				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2926				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2927		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2935		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2936				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2943		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2945				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2946				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2948				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2950				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2951		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2952				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2954				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2955		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2956				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2958				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2959		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2960				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2962				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2963		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2964				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2966				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2967		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2968				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2970				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2971		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2972				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2976				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2980				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2984				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2986				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2988				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2990				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2991		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2992				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2995		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2996				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2998				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2999
3000		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003				NUM_BANKS(ADDR_SURF_16_BANK));
3004
3005		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008				NUM_BANKS(ADDR_SURF_16_BANK));
3009
3010		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013				NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018				NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3022				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3023				NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3028				NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3033				NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3037				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038				NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043				NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3047				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048				NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053				NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058				NUM_BANKS(ADDR_SURF_8_BANK));
3059
3060		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063				NUM_BANKS(ADDR_SURF_4_BANK));
3064
3065		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068				NUM_BANKS(ADDR_SURF_4_BANK));
3069
3070		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3071			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3072
3073		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3074			if (reg_offset != 7)
3075				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3076
3077		break;
3078	case CHIP_STONEY:
3079		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3080				PIPE_CONFIG(ADDR_SURF_P2) |
3081				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3082				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3083		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3084				PIPE_CONFIG(ADDR_SURF_P2) |
3085				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3086				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3087		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088				PIPE_CONFIG(ADDR_SURF_P2) |
3089				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3090				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3091		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092				PIPE_CONFIG(ADDR_SURF_P2) |
3093				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3094				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3095		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3096				PIPE_CONFIG(ADDR_SURF_P2) |
3097				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3098				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3099		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3100				PIPE_CONFIG(ADDR_SURF_P2) |
3101				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3102				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3103		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3104				PIPE_CONFIG(ADDR_SURF_P2) |
3105				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3106				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3108				PIPE_CONFIG(ADDR_SURF_P2));
3109		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110				PIPE_CONFIG(ADDR_SURF_P2) |
3111				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3112				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3113		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114				 PIPE_CONFIG(ADDR_SURF_P2) |
3115				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3116				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3117		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3118				 PIPE_CONFIG(ADDR_SURF_P2) |
3119				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3120				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3121		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3122				 PIPE_CONFIG(ADDR_SURF_P2) |
3123				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3125		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126				 PIPE_CONFIG(ADDR_SURF_P2) |
3127				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3128				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3129		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3130				 PIPE_CONFIG(ADDR_SURF_P2) |
3131				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3132				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3133		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134				 PIPE_CONFIG(ADDR_SURF_P2) |
3135				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3136				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3137		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3138				 PIPE_CONFIG(ADDR_SURF_P2) |
3139				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3140				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3141		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3142				 PIPE_CONFIG(ADDR_SURF_P2) |
3143				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3144				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3145		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3146				 PIPE_CONFIG(ADDR_SURF_P2) |
3147				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3148				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3149		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3150				 PIPE_CONFIG(ADDR_SURF_P2) |
3151				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3152				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3153		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3154				 PIPE_CONFIG(ADDR_SURF_P2) |
3155				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3156				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3157		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3158				 PIPE_CONFIG(ADDR_SURF_P2) |
3159				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3161		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3162				 PIPE_CONFIG(ADDR_SURF_P2) |
3163				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3164				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3166				 PIPE_CONFIG(ADDR_SURF_P2) |
3167				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3170				 PIPE_CONFIG(ADDR_SURF_P2) |
3171				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3172				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3173		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174				 PIPE_CONFIG(ADDR_SURF_P2) |
3175				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3176				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3177		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3178				 PIPE_CONFIG(ADDR_SURF_P2) |
3179				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3180				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3181
3182		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3183				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185				NUM_BANKS(ADDR_SURF_8_BANK));
3186		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3187				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3188				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189				NUM_BANKS(ADDR_SURF_8_BANK));
3190		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3192				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3193				NUM_BANKS(ADDR_SURF_8_BANK));
3194		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3197				NUM_BANKS(ADDR_SURF_8_BANK));
3198		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201				NUM_BANKS(ADDR_SURF_8_BANK));
3202		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3203				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3204				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3205				NUM_BANKS(ADDR_SURF_8_BANK));
3206		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3208				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3209				NUM_BANKS(ADDR_SURF_8_BANK));
3210		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3211				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3212				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213				NUM_BANKS(ADDR_SURF_16_BANK));
3214		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3215				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3216				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217				NUM_BANKS(ADDR_SURF_16_BANK));
3218		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3219				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3220				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3221				 NUM_BANKS(ADDR_SURF_16_BANK));
3222		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3223				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3224				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3225				 NUM_BANKS(ADDR_SURF_16_BANK));
3226		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3228				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3229				 NUM_BANKS(ADDR_SURF_16_BANK));
3230		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3233				 NUM_BANKS(ADDR_SURF_16_BANK));
3234		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3236				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3237				 NUM_BANKS(ADDR_SURF_8_BANK));
3238
3239		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3240			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3241			    reg_offset != 23)
3242				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3243
3244		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3245			if (reg_offset != 7)
3246				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3247
3248		break;
3249	default:
3250		dev_warn(adev->dev,
3251			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3252			 adev->asic_type);
3253		fallthrough;
3254
3255	case CHIP_CARRIZO:
3256		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257				PIPE_CONFIG(ADDR_SURF_P2) |
3258				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3259				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3260		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3261				PIPE_CONFIG(ADDR_SURF_P2) |
3262				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3263				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3264		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3265				PIPE_CONFIG(ADDR_SURF_P2) |
3266				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3267				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3268		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269				PIPE_CONFIG(ADDR_SURF_P2) |
3270				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3271				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3272		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3273				PIPE_CONFIG(ADDR_SURF_P2) |
3274				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3275				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3276		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3277				PIPE_CONFIG(ADDR_SURF_P2) |
3278				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3279				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281				PIPE_CONFIG(ADDR_SURF_P2) |
3282				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3283				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3285				PIPE_CONFIG(ADDR_SURF_P2));
3286		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287				PIPE_CONFIG(ADDR_SURF_P2) |
3288				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3289				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291				 PIPE_CONFIG(ADDR_SURF_P2) |
3292				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3293				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3294		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3295				 PIPE_CONFIG(ADDR_SURF_P2) |
3296				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3297				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3298		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299				 PIPE_CONFIG(ADDR_SURF_P2) |
3300				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3301				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3303				 PIPE_CONFIG(ADDR_SURF_P2) |
3304				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3305				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3306		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3307				 PIPE_CONFIG(ADDR_SURF_P2) |
3308				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3309				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3311				 PIPE_CONFIG(ADDR_SURF_P2) |
3312				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3313				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3314		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3315				 PIPE_CONFIG(ADDR_SURF_P2) |
3316				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3317				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3318		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3319				 PIPE_CONFIG(ADDR_SURF_P2) |
3320				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3321				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3322		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3323				 PIPE_CONFIG(ADDR_SURF_P2) |
3324				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3325				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3326		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3327				 PIPE_CONFIG(ADDR_SURF_P2) |
3328				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3329				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3330		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3331				 PIPE_CONFIG(ADDR_SURF_P2) |
3332				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3333				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3334		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3335				 PIPE_CONFIG(ADDR_SURF_P2) |
3336				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3339				 PIPE_CONFIG(ADDR_SURF_P2) |
3340				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3343				 PIPE_CONFIG(ADDR_SURF_P2) |
3344				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3345				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3347				 PIPE_CONFIG(ADDR_SURF_P2) |
3348				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3349				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3350		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3351				 PIPE_CONFIG(ADDR_SURF_P2) |
3352				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3353				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3354		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3355				 PIPE_CONFIG(ADDR_SURF_P2) |
3356				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3357				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3358
3359		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3360				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3361				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362				NUM_BANKS(ADDR_SURF_8_BANK));
3363		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3364				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3365				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366				NUM_BANKS(ADDR_SURF_8_BANK));
3367		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3369				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3370				NUM_BANKS(ADDR_SURF_8_BANK));
3371		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3373				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3374				NUM_BANKS(ADDR_SURF_8_BANK));
3375		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378				NUM_BANKS(ADDR_SURF_8_BANK));
3379		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3381				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3382				NUM_BANKS(ADDR_SURF_8_BANK));
3383		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3385				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3386				NUM_BANKS(ADDR_SURF_8_BANK));
3387		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3388				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3389				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390				NUM_BANKS(ADDR_SURF_16_BANK));
3391		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3392				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3393				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3394				NUM_BANKS(ADDR_SURF_16_BANK));
3395		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3396				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3397				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3398				 NUM_BANKS(ADDR_SURF_16_BANK));
3399		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3400				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3401				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3402				 NUM_BANKS(ADDR_SURF_16_BANK));
3403		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3405				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3406				 NUM_BANKS(ADDR_SURF_16_BANK));
3407		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3408				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3409				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410				 NUM_BANKS(ADDR_SURF_16_BANK));
3411		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3413				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3414				 NUM_BANKS(ADDR_SURF_8_BANK));
3415
3416		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3417			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3418			    reg_offset != 23)
3419				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3420
3421		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3422			if (reg_offset != 7)
3423				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3424
3425		break;
3426	}
3427}
3428
3429static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3430				  u32 se_num, u32 sh_num, u32 instance)
 
3431{
3432	u32 data;
3433
3434	if (instance == 0xffffffff)
3435		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3436	else
3437		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3438
3439	if (se_num == 0xffffffff)
3440		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3441	else
3442		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3443
3444	if (sh_num == 0xffffffff)
3445		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3446	else
3447		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3448
3449	WREG32(mmGRBM_GFX_INDEX, data);
3450}
3451
3452static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3453				  u32 me, u32 pipe, u32 q, u32 vm)
3454{
3455	vi_srbm_select(adev, me, pipe, q, vm);
3456}
3457
3458static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3459{
3460	u32 data, mask;
3461
3462	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3463		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3464
3465	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3466
3467	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3468					 adev->gfx.config.max_sh_per_se);
3469
3470	return (~data) & mask;
3471}
3472
3473static void
3474gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3475{
3476	switch (adev->asic_type) {
3477	case CHIP_FIJI:
3478	case CHIP_VEGAM:
3479		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3480			  RB_XSEL2(1) | PKR_MAP(2) |
3481			  PKR_XSEL(1) | PKR_YSEL(1) |
3482			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3483		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3484			   SE_PAIR_YSEL(2);
3485		break;
3486	case CHIP_TONGA:
3487	case CHIP_POLARIS10:
3488		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3489			  SE_XSEL(1) | SE_YSEL(1);
3490		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3491			   SE_PAIR_YSEL(2);
3492		break;
3493	case CHIP_TOPAZ:
3494	case CHIP_CARRIZO:
3495		*rconf |= RB_MAP_PKR0(2);
3496		*rconf1 |= 0x0;
3497		break;
3498	case CHIP_POLARIS11:
3499	case CHIP_POLARIS12:
3500		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3501			  SE_XSEL(1) | SE_YSEL(1);
3502		*rconf1 |= 0x0;
3503		break;
3504	case CHIP_STONEY:
3505		*rconf |= 0x0;
3506		*rconf1 |= 0x0;
3507		break;
3508	default:
3509		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3510		break;
3511	}
3512}
3513
3514static void
3515gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3516					u32 raster_config, u32 raster_config_1,
3517					unsigned rb_mask, unsigned num_rb)
3518{
3519	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3520	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3521	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3522	unsigned rb_per_se = num_rb / num_se;
3523	unsigned se_mask[4];
3524	unsigned se;
3525
3526	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3527	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3528	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3529	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3530
3531	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3532	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3533	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3534
3535	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3536			     (!se_mask[2] && !se_mask[3]))) {
3537		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3538
3539		if (!se_mask[0] && !se_mask[1]) {
3540			raster_config_1 |=
3541				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3542		} else {
3543			raster_config_1 |=
3544				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3545		}
3546	}
3547
3548	for (se = 0; se < num_se; se++) {
3549		unsigned raster_config_se = raster_config;
3550		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3551		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3552		int idx = (se / 2) * 2;
3553
3554		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3555			raster_config_se &= ~SE_MAP_MASK;
3556
3557			if (!se_mask[idx]) {
3558				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3559			} else {
3560				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3561			}
3562		}
3563
3564		pkr0_mask &= rb_mask;
3565		pkr1_mask &= rb_mask;
3566		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3567			raster_config_se &= ~PKR_MAP_MASK;
3568
3569			if (!pkr0_mask) {
3570				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3571			} else {
3572				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3573			}
3574		}
3575
3576		if (rb_per_se >= 2) {
3577			unsigned rb0_mask = 1 << (se * rb_per_se);
3578			unsigned rb1_mask = rb0_mask << 1;
3579
3580			rb0_mask &= rb_mask;
3581			rb1_mask &= rb_mask;
3582			if (!rb0_mask || !rb1_mask) {
3583				raster_config_se &= ~RB_MAP_PKR0_MASK;
3584
3585				if (!rb0_mask) {
3586					raster_config_se |=
3587						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3588				} else {
3589					raster_config_se |=
3590						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3591				}
3592			}
3593
3594			if (rb_per_se > 2) {
3595				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3596				rb1_mask = rb0_mask << 1;
3597				rb0_mask &= rb_mask;
3598				rb1_mask &= rb_mask;
3599				if (!rb0_mask || !rb1_mask) {
3600					raster_config_se &= ~RB_MAP_PKR1_MASK;
3601
3602					if (!rb0_mask) {
3603						raster_config_se |=
3604							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3605					} else {
3606						raster_config_se |=
3607							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3608					}
3609				}
3610			}
3611		}
3612
3613		/* GRBM_GFX_INDEX has a different offset on VI */
3614		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3615		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3616		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3617	}
3618
3619	/* GRBM_GFX_INDEX has a different offset on VI */
3620	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3621}
3622
3623static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3624{
3625	int i, j;
3626	u32 data;
3627	u32 raster_config = 0, raster_config_1 = 0;
3628	u32 active_rbs = 0;
3629	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3630					adev->gfx.config.max_sh_per_se;
3631	unsigned num_rb_pipes;
3632
3633	mutex_lock(&adev->grbm_idx_mutex);
3634	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3635		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3636			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3637			data = gfx_v8_0_get_rb_active_bitmap(adev);
3638			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3639					       rb_bitmap_width_per_sh);
3640		}
3641	}
3642	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3643
3644	adev->gfx.config.backend_enable_mask = active_rbs;
3645	adev->gfx.config.num_rbs = hweight32(active_rbs);
3646
3647	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3648			     adev->gfx.config.max_shader_engines, 16);
3649
3650	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3651
3652	if (!adev->gfx.config.backend_enable_mask ||
3653			adev->gfx.config.num_rbs >= num_rb_pipes) {
3654		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3655		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3656	} else {
3657		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3658							adev->gfx.config.backend_enable_mask,
3659							num_rb_pipes);
3660	}
3661
3662	/* cache the values for userspace */
3663	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3664		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3665			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3666			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3667				RREG32(mmCC_RB_BACKEND_DISABLE);
3668			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3669				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3670			adev->gfx.config.rb_config[i][j].raster_config =
3671				RREG32(mmPA_SC_RASTER_CONFIG);
3672			adev->gfx.config.rb_config[i][j].raster_config_1 =
3673				RREG32(mmPA_SC_RASTER_CONFIG_1);
3674		}
3675	}
3676	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3677	mutex_unlock(&adev->grbm_idx_mutex);
3678}
3679
 
3680/**
3681 * gfx_v8_0_init_compute_vmid - gart enable
3682 *
3683 * @adev: amdgpu_device pointer
3684 *
3685 * Initialize compute vmid sh_mem registers
3686 *
3687 */
3688#define DEFAULT_SH_MEM_BASES	(0x6000)
3689static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3690{
3691	int i;
3692	uint32_t sh_mem_config;
3693	uint32_t sh_mem_bases;
3694
3695	/*
3696	 * Configure apertures:
3697	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3698	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3699	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3700	 */
3701	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3702
3703	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3704			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3705			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3706			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3707			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3708			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3709
3710	mutex_lock(&adev->srbm_mutex);
3711	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3712		vi_srbm_select(adev, 0, 0, 0, i);
3713		/* CP and shaders */
3714		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3715		WREG32(mmSH_MEM_APE1_BASE, 1);
3716		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3717		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3718	}
3719	vi_srbm_select(adev, 0, 0, 0, 0);
3720	mutex_unlock(&adev->srbm_mutex);
3721
3722	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3723	   acccess. These should be enabled by FW for target VMIDs. */
3724	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3725		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3726		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3727		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3728		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3729	}
3730}
3731
3732static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3733{
3734	int vmid;
3735
3736	/*
3737	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3738	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3739	 * the driver can enable them for graphics. VMID0 should maintain
3740	 * access so that HWS firmware can save/restore entries.
3741	 */
3742	for (vmid = 1; vmid < 16; vmid++) {
3743		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3744		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3745		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3746		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3747	}
3748}
3749
3750static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3751{
3752	switch (adev->asic_type) {
3753	default:
3754		adev->gfx.config.double_offchip_lds_buf = 1;
3755		break;
3756	case CHIP_CARRIZO:
3757	case CHIP_STONEY:
3758		adev->gfx.config.double_offchip_lds_buf = 0;
3759		break;
3760	}
3761}
3762
3763static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3764{
3765	u32 tmp, sh_static_mem_cfg;
3766	int i;
3767
3768	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3769	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3770	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3771	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3772
3773	gfx_v8_0_tiling_mode_table_init(adev);
3774	gfx_v8_0_setup_rb(adev);
3775	gfx_v8_0_get_cu_info(adev);
3776	gfx_v8_0_config_init(adev);
3777
3778	/* XXX SH_MEM regs */
3779	/* where to put LDS, scratch, GPUVM in FSA64 space */
3780	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3781				   SWIZZLE_ENABLE, 1);
3782	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3783				   ELEMENT_SIZE, 1);
3784	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3785				   INDEX_STRIDE, 3);
3786	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3787
3788	mutex_lock(&adev->srbm_mutex);
3789	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3790		vi_srbm_select(adev, 0, 0, 0, i);
3791		/* CP and shaders */
3792		if (i == 0) {
3793			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3794			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3795			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3796					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3797			WREG32(mmSH_MEM_CONFIG, tmp);
3798			WREG32(mmSH_MEM_BASES, 0);
3799		} else {
3800			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3801			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3802			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3803					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3804			WREG32(mmSH_MEM_CONFIG, tmp);
3805			tmp = adev->gmc.shared_aperture_start >> 48;
3806			WREG32(mmSH_MEM_BASES, tmp);
3807		}
3808
3809		WREG32(mmSH_MEM_APE1_BASE, 1);
3810		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3811	}
3812	vi_srbm_select(adev, 0, 0, 0, 0);
3813	mutex_unlock(&adev->srbm_mutex);
3814
3815	gfx_v8_0_init_compute_vmid(adev);
3816	gfx_v8_0_init_gds_vmid(adev);
3817
3818	mutex_lock(&adev->grbm_idx_mutex);
3819	/*
3820	 * making sure that the following register writes will be broadcasted
3821	 * to all the shaders
3822	 */
3823	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3824
3825	WREG32(mmPA_SC_FIFO_SIZE,
3826		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3827			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3828		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3829			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3830		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3831			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3832		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3833			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3834
3835	tmp = RREG32(mmSPI_ARB_PRIORITY);
3836	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3837	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3838	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3839	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3840	WREG32(mmSPI_ARB_PRIORITY, tmp);
3841
3842	mutex_unlock(&adev->grbm_idx_mutex);
3843
3844}
3845
3846static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3847{
3848	u32 i, j, k;
3849	u32 mask;
3850
3851	mutex_lock(&adev->grbm_idx_mutex);
3852	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3853		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3854			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3855			for (k = 0; k < adev->usec_timeout; k++) {
3856				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3857					break;
3858				udelay(1);
3859			}
3860			if (k == adev->usec_timeout) {
3861				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3862						      0xffffffff, 0xffffffff);
3863				mutex_unlock(&adev->grbm_idx_mutex);
3864				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3865					 i, j);
3866				return;
3867			}
3868		}
3869	}
3870	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3871	mutex_unlock(&adev->grbm_idx_mutex);
3872
3873	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3874		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3875		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3876		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3877	for (k = 0; k < adev->usec_timeout; k++) {
3878		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3879			break;
3880		udelay(1);
3881	}
3882}
3883
3884static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3885					       bool enable)
3886{
3887	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3888
3889	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3890	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3891	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3892	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3893
3894	WREG32(mmCP_INT_CNTL_RING0, tmp);
3895}
3896
3897static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3898{
3899	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3900	/* csib */
3901	WREG32(mmRLC_CSIB_ADDR_HI,
3902			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3903	WREG32(mmRLC_CSIB_ADDR_LO,
3904			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3905	WREG32(mmRLC_CSIB_LENGTH,
3906			adev->gfx.rlc.clear_state_size);
3907}
3908
3909static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3910				int ind_offset,
3911				int list_size,
3912				int *unique_indices,
3913				int *indices_count,
3914				int max_indices,
3915				int *ind_start_offsets,
3916				int *offset_count,
3917				int max_offset)
3918{
3919	int indices;
3920	bool new_entry = true;
3921
3922	for (; ind_offset < list_size; ind_offset++) {
3923
3924		if (new_entry) {
3925			new_entry = false;
3926			ind_start_offsets[*offset_count] = ind_offset;
3927			*offset_count = *offset_count + 1;
3928			BUG_ON(*offset_count >= max_offset);
3929		}
3930
3931		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3932			new_entry = true;
3933			continue;
3934		}
3935
3936		ind_offset += 2;
3937
3938		/* look for the matching indice */
3939		for (indices = 0;
3940			indices < *indices_count;
3941			indices++) {
3942			if (unique_indices[indices] ==
3943				register_list_format[ind_offset])
3944				break;
3945		}
3946
3947		if (indices >= *indices_count) {
3948			unique_indices[*indices_count] =
3949				register_list_format[ind_offset];
3950			indices = *indices_count;
3951			*indices_count = *indices_count + 1;
3952			BUG_ON(*indices_count >= max_indices);
3953		}
3954
3955		register_list_format[ind_offset] = indices;
3956	}
3957}
3958
3959static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3960{
3961	int i, temp, data;
3962	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3963	int indices_count = 0;
3964	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3965	int offset_count = 0;
3966
3967	int list_size;
3968	unsigned int *register_list_format =
3969		kmemdup(adev->gfx.rlc.register_list_format,
3970			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3971	if (!register_list_format)
3972		return -ENOMEM;
3973
3974	gfx_v8_0_parse_ind_reg_list(register_list_format,
3975				RLC_FormatDirectRegListLength,
3976				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3977				unique_indices,
3978				&indices_count,
3979				ARRAY_SIZE(unique_indices),
3980				indirect_start_offsets,
3981				&offset_count,
3982				ARRAY_SIZE(indirect_start_offsets));
3983
3984	/* save and restore list */
3985	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3986
3987	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3988	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3989		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3990
3991	/* indirect list */
3992	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3993	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3994		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3995
3996	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3997	list_size = list_size >> 1;
3998	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3999	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4000
4001	/* starting offsets starts */
4002	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4003		adev->gfx.rlc.starting_offsets_start);
4004	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4005		WREG32(mmRLC_GPM_SCRATCH_DATA,
4006				indirect_start_offsets[i]);
4007
4008	/* unique indices */
4009	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4010	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4011	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4012		if (unique_indices[i] != 0) {
4013			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4014			WREG32(data + i, unique_indices[i] >> 20);
4015		}
4016	}
4017	kfree(register_list_format);
4018
4019	return 0;
4020}
4021
4022static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4023{
4024	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4025}
4026
4027static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4028{
4029	uint32_t data;
4030
4031	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4032
4033	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4034	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4035	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4036	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4037	WREG32(mmRLC_PG_DELAY, data);
4038
4039	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4040	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4041
4042}
4043
4044static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4045						bool enable)
4046{
4047	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4048}
4049
4050static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4051						  bool enable)
4052{
4053	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4054}
4055
4056static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4057{
4058	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4059}
4060
4061static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4062{
4063	if ((adev->asic_type == CHIP_CARRIZO) ||
4064	    (adev->asic_type == CHIP_STONEY)) {
4065		gfx_v8_0_init_csb(adev);
4066		gfx_v8_0_init_save_restore_list(adev);
4067		gfx_v8_0_enable_save_restore_machine(adev);
4068		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4069		gfx_v8_0_init_power_gating(adev);
4070		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4071	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4072		   (adev->asic_type == CHIP_POLARIS12) ||
4073		   (adev->asic_type == CHIP_VEGAM)) {
4074		gfx_v8_0_init_csb(adev);
4075		gfx_v8_0_init_save_restore_list(adev);
4076		gfx_v8_0_enable_save_restore_machine(adev);
4077		gfx_v8_0_init_power_gating(adev);
4078	}
4079
4080}
4081
4082static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4083{
4084	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4085
4086	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4087	gfx_v8_0_wait_for_rlc_serdes(adev);
4088}
4089
4090static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4091{
4092	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4093	udelay(50);
4094
4095	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4096	udelay(50);
4097}
4098
4099static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4100{
4101	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4102
4103	/* carrizo do enable cp interrupt after cp inited */
4104	if (!(adev->flags & AMD_IS_APU))
4105		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4106
4107	udelay(50);
4108}
4109
4110static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4111{
4112	if (amdgpu_sriov_vf(adev)) {
4113		gfx_v8_0_init_csb(adev);
4114		return 0;
4115	}
4116
4117	adev->gfx.rlc.funcs->stop(adev);
4118	adev->gfx.rlc.funcs->reset(adev);
4119	gfx_v8_0_init_pg(adev);
4120	adev->gfx.rlc.funcs->start(adev);
4121
4122	return 0;
4123}
4124
4125static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4126{
4127	u32 tmp = RREG32(mmCP_ME_CNTL);
4128
4129	if (enable) {
4130		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4131		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4132		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4133	} else {
4134		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4135		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4136		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4137	}
4138	WREG32(mmCP_ME_CNTL, tmp);
4139	udelay(50);
4140}
4141
4142static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4143{
4144	u32 count = 0;
4145	const struct cs_section_def *sect = NULL;
4146	const struct cs_extent_def *ext = NULL;
4147
4148	/* begin clear state */
4149	count += 2;
4150	/* context control state */
4151	count += 3;
4152
4153	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4154		for (ext = sect->section; ext->extent != NULL; ++ext) {
4155			if (sect->id == SECT_CONTEXT)
4156				count += 2 + ext->reg_count;
4157			else
4158				return 0;
4159		}
4160	}
4161	/* pa_sc_raster_config/pa_sc_raster_config1 */
4162	count += 4;
4163	/* end clear state */
4164	count += 2;
4165	/* clear state */
4166	count += 2;
4167
4168	return count;
4169}
4170
4171static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4172{
4173	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4174	const struct cs_section_def *sect = NULL;
4175	const struct cs_extent_def *ext = NULL;
4176	int r, i;
4177
4178	/* init the CP */
4179	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4180	WREG32(mmCP_ENDIAN_SWAP, 0);
4181	WREG32(mmCP_DEVICE_ID, 1);
4182
4183	gfx_v8_0_cp_gfx_enable(adev, true);
4184
4185	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4186	if (r) {
4187		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4188		return r;
4189	}
4190
4191	/* clear state buffer */
4192	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4193	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4194
4195	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4196	amdgpu_ring_write(ring, 0x80000000);
4197	amdgpu_ring_write(ring, 0x80000000);
4198
4199	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4200		for (ext = sect->section; ext->extent != NULL; ++ext) {
4201			if (sect->id == SECT_CONTEXT) {
4202				amdgpu_ring_write(ring,
4203				       PACKET3(PACKET3_SET_CONTEXT_REG,
4204					       ext->reg_count));
4205				amdgpu_ring_write(ring,
4206				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4207				for (i = 0; i < ext->reg_count; i++)
4208					amdgpu_ring_write(ring, ext->extent[i]);
4209			}
4210		}
4211	}
4212
4213	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4214	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4215	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4216	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4217
4218	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4219	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4220
4221	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4222	amdgpu_ring_write(ring, 0);
4223
4224	/* init the CE partitions */
4225	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4226	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4227	amdgpu_ring_write(ring, 0x8000);
4228	amdgpu_ring_write(ring, 0x8000);
4229
4230	amdgpu_ring_commit(ring);
4231
4232	return 0;
4233}
4234static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4235{
4236	u32 tmp;
4237	/* no gfx doorbells on iceland */
4238	if (adev->asic_type == CHIP_TOPAZ)
4239		return;
4240
4241	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4242
4243	if (ring->use_doorbell) {
4244		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4245				DOORBELL_OFFSET, ring->doorbell_index);
4246		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4247						DOORBELL_HIT, 0);
4248		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4249					    DOORBELL_EN, 1);
4250	} else {
4251		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4252	}
4253
4254	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4255
4256	if (adev->flags & AMD_IS_APU)
4257		return;
4258
4259	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4260					DOORBELL_RANGE_LOWER,
4261					adev->doorbell_index.gfx_ring0);
4262	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4263
4264	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4265		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4266}
4267
4268static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4269{
4270	struct amdgpu_ring *ring;
4271	u32 tmp;
4272	u32 rb_bufsz;
4273	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4274
4275	/* Set the write pointer delay */
4276	WREG32(mmCP_RB_WPTR_DELAY, 0);
4277
4278	/* set the RB to use vmid 0 */
4279	WREG32(mmCP_RB_VMID, 0);
4280
4281	/* Set ring buffer size */
4282	ring = &adev->gfx.gfx_ring[0];
4283	rb_bufsz = order_base_2(ring->ring_size / 8);
4284	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4285	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4286	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4287	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4288#ifdef __BIG_ENDIAN
4289	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4290#endif
4291	WREG32(mmCP_RB0_CNTL, tmp);
4292
4293	/* Initialize the ring buffer's read and write pointers */
4294	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4295	ring->wptr = 0;
4296	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4297
4298	/* set the wb address wether it's enabled or not */
4299	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4300	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4301	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4302
4303	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4304	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4305	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4306	mdelay(1);
4307	WREG32(mmCP_RB0_CNTL, tmp);
4308
4309	rb_addr = ring->gpu_addr >> 8;
4310	WREG32(mmCP_RB0_BASE, rb_addr);
4311	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4312
4313	gfx_v8_0_set_cpg_door_bell(adev, ring);
4314	/* start the ring */
4315	amdgpu_ring_clear_ring(ring);
4316	gfx_v8_0_cp_gfx_start(adev);
4317	ring->sched.ready = true;
4318
4319	return 0;
4320}
4321
4322static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4323{
4324	if (enable) {
4325		WREG32(mmCP_MEC_CNTL, 0);
4326	} else {
4327		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4328		adev->gfx.kiq.ring.sched.ready = false;
4329	}
4330	udelay(50);
4331}
4332
4333/* KIQ functions */
4334static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4335{
4336	uint32_t tmp;
4337	struct amdgpu_device *adev = ring->adev;
4338
4339	/* tell RLC which is KIQ queue */
4340	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4341	tmp &= 0xffffff00;
4342	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4343	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4344	tmp |= 0x80;
4345	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4346}
4347
4348static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4349{
4350	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4351	uint64_t queue_mask = 0;
4352	int r, i;
4353
4354	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4355		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4356			continue;
4357
4358		/* This situation may be hit in the future if a new HW
4359		 * generation exposes more than 64 queues. If so, the
4360		 * definition of queue_mask needs updating */
4361		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4362			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4363			break;
4364		}
4365
4366		queue_mask |= (1ull << i);
4367	}
4368
4369	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4370	if (r) {
4371		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4372		return r;
4373	}
4374	/* set resources */
4375	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4376	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4377	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4378	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4379	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4380	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4381	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4382	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4383	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4384		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4385		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4386		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4387
4388		/* map queues */
4389		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4390		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4391		amdgpu_ring_write(kiq_ring,
4392				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4393		amdgpu_ring_write(kiq_ring,
4394				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4395				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4396				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4397				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4398		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4399		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4400		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4401		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4402	}
4403
4404	amdgpu_ring_commit(kiq_ring);
4405
4406	return 0;
4407}
4408
4409static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4410{
4411	int i, r = 0;
4412
4413	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4414		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4415		for (i = 0; i < adev->usec_timeout; i++) {
4416			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4417				break;
4418			udelay(1);
4419		}
4420		if (i == adev->usec_timeout)
4421			r = -ETIMEDOUT;
4422	}
4423	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4424	WREG32(mmCP_HQD_PQ_RPTR, 0);
4425	WREG32(mmCP_HQD_PQ_WPTR, 0);
4426
4427	return r;
4428}
4429
4430static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4431{
4432	struct amdgpu_device *adev = ring->adev;
4433
4434	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4435		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
4436			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4437			mqd->cp_hqd_queue_priority =
4438				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4439		}
4440	}
4441}
4442
4443static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4444{
4445	struct amdgpu_device *adev = ring->adev;
4446	struct vi_mqd *mqd = ring->mqd_ptr;
4447	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4448	uint32_t tmp;
4449
4450	mqd->header = 0xC0310800;
4451	mqd->compute_pipelinestat_enable = 0x00000001;
4452	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4453	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4454	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4455	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4456	mqd->compute_misc_reserved = 0x00000003;
4457	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4458						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4459	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4460						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4461	eop_base_addr = ring->eop_gpu_addr >> 8;
4462	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4463	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4464
4465	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4466	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4467	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4468			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4469
4470	mqd->cp_hqd_eop_control = tmp;
4471
4472	/* enable doorbell? */
4473	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4474			    CP_HQD_PQ_DOORBELL_CONTROL,
4475			    DOORBELL_EN,
4476			    ring->use_doorbell ? 1 : 0);
4477
4478	mqd->cp_hqd_pq_doorbell_control = tmp;
4479
4480	/* set the pointer to the MQD */
4481	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4482	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4483
4484	/* set MQD vmid to 0 */
4485	tmp = RREG32(mmCP_MQD_CONTROL);
4486	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4487	mqd->cp_mqd_control = tmp;
4488
4489	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4490	hqd_gpu_addr = ring->gpu_addr >> 8;
4491	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4492	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4493
4494	/* set up the HQD, this is similar to CP_RB0_CNTL */
4495	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4496	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4497			    (order_base_2(ring->ring_size / 4) - 1));
4498	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4499			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4500#ifdef __BIG_ENDIAN
4501	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4502#endif
4503	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4504	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4505	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4506	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4507	mqd->cp_hqd_pq_control = tmp;
4508
4509	/* set the wb address whether it's enabled or not */
4510	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4511	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4512	mqd->cp_hqd_pq_rptr_report_addr_hi =
4513		upper_32_bits(wb_gpu_addr) & 0xffff;
4514
4515	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4516	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4517	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4518	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4519
4520	tmp = 0;
4521	/* enable the doorbell if requested */
4522	if (ring->use_doorbell) {
4523		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4524		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4525				DOORBELL_OFFSET, ring->doorbell_index);
4526
4527		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4528					 DOORBELL_EN, 1);
4529		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4530					 DOORBELL_SOURCE, 0);
4531		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4532					 DOORBELL_HIT, 0);
4533	}
4534
4535	mqd->cp_hqd_pq_doorbell_control = tmp;
4536
4537	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4538	ring->wptr = 0;
4539	mqd->cp_hqd_pq_wptr = ring->wptr;
4540	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4541
4542	/* set the vmid for the queue */
4543	mqd->cp_hqd_vmid = 0;
4544
4545	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4546	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4547	mqd->cp_hqd_persistent_state = tmp;
4548
4549	/* set MTYPE */
4550	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4551	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4552	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4553	mqd->cp_hqd_ib_control = tmp;
4554
4555	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4556	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4557	mqd->cp_hqd_iq_timer = tmp;
4558
4559	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4560	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4561	mqd->cp_hqd_ctx_save_control = tmp;
4562
4563	/* defaults */
4564	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4565	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4566	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4567	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4568	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4569	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4570	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4571	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4572	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4573	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4574	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4575	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4576
4577	/* set static priority for a queue/ring */
4578	gfx_v8_0_mqd_set_priority(ring, mqd);
4579	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4580
4581	/* map_queues packet doesn't need activate the queue,
4582	 * so only kiq need set this field.
4583	 */
4584	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4585		mqd->cp_hqd_active = 1;
4586
4587	return 0;
4588}
4589
4590static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4591			struct vi_mqd *mqd)
4592{
4593	uint32_t mqd_reg;
4594	uint32_t *mqd_data;
4595
4596	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4597	mqd_data = &mqd->cp_mqd_base_addr_lo;
4598
4599	/* disable wptr polling */
4600	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4601
4602	/* program all HQD registers */
4603	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4604		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4605
4606	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4607	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4608	 * on ASICs that do not support context-save.
4609	 * EOP writes/reads can start anywhere in the ring.
4610	 */
4611	if (adev->asic_type != CHIP_TONGA) {
4612		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4613		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4614		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4615	}
4616
4617	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4618		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4619
4620	/* activate the HQD */
4621	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4622		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4623
4624	return 0;
4625}
4626
4627static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4628{
4629	struct amdgpu_device *adev = ring->adev;
4630	struct vi_mqd *mqd = ring->mqd_ptr;
4631	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4632
4633	gfx_v8_0_kiq_setting(ring);
4634
4635	if (adev->in_gpu_reset) { /* for GPU_RESET case */
4636		/* reset MQD to a clean status */
4637		if (adev->gfx.mec.mqd_backup[mqd_idx])
4638			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4639
4640		/* reset ring buffer */
4641		ring->wptr = 0;
4642		amdgpu_ring_clear_ring(ring);
4643		mutex_lock(&adev->srbm_mutex);
4644		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4645		gfx_v8_0_mqd_commit(adev, mqd);
4646		vi_srbm_select(adev, 0, 0, 0, 0);
4647		mutex_unlock(&adev->srbm_mutex);
4648	} else {
4649		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4650		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4651		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
 
 
4652		mutex_lock(&adev->srbm_mutex);
4653		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4654		gfx_v8_0_mqd_init(ring);
4655		gfx_v8_0_mqd_commit(adev, mqd);
4656		vi_srbm_select(adev, 0, 0, 0, 0);
4657		mutex_unlock(&adev->srbm_mutex);
4658
4659		if (adev->gfx.mec.mqd_backup[mqd_idx])
4660			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4661	}
4662
4663	return 0;
4664}
4665
4666static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4667{
4668	struct amdgpu_device *adev = ring->adev;
4669	struct vi_mqd *mqd = ring->mqd_ptr;
4670	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4671
4672	if (!adev->in_gpu_reset && !adev->in_suspend) {
4673		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4674		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4675		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4676		mutex_lock(&adev->srbm_mutex);
4677		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4678		gfx_v8_0_mqd_init(ring);
4679		vi_srbm_select(adev, 0, 0, 0, 0);
4680		mutex_unlock(&adev->srbm_mutex);
4681
4682		if (adev->gfx.mec.mqd_backup[mqd_idx])
4683			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4684	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4685		/* reset MQD to a clean status */
4686		if (adev->gfx.mec.mqd_backup[mqd_idx])
4687			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4688		/* reset ring buffer */
4689		ring->wptr = 0;
4690		amdgpu_ring_clear_ring(ring);
4691	} else {
4692		amdgpu_ring_clear_ring(ring);
4693	}
4694	return 0;
4695}
4696
4697static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4698{
4699	if (adev->asic_type > CHIP_TONGA) {
4700		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4701		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4702	}
4703	/* enable doorbells */
4704	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4705}
4706
4707static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4708{
4709	struct amdgpu_ring *ring;
4710	int r;
4711
4712	ring = &adev->gfx.kiq.ring;
4713
4714	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4715	if (unlikely(r != 0))
4716		return r;
4717
4718	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4719	if (unlikely(r != 0))
 
4720		return r;
 
4721
4722	gfx_v8_0_kiq_init_queue(ring);
4723	amdgpu_bo_kunmap(ring->mqd_obj);
4724	ring->mqd_ptr = NULL;
4725	amdgpu_bo_unreserve(ring->mqd_obj);
4726	ring->sched.ready = true;
4727	return 0;
4728}
4729
4730static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4731{
4732	struct amdgpu_ring *ring = NULL;
4733	int r = 0, i;
4734
4735	gfx_v8_0_cp_compute_enable(adev, true);
4736
4737	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4738		ring = &adev->gfx.compute_ring[i];
4739
4740		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4741		if (unlikely(r != 0))
4742			goto done;
4743		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4744		if (!r) {
4745			r = gfx_v8_0_kcq_init_queue(ring);
4746			amdgpu_bo_kunmap(ring->mqd_obj);
4747			ring->mqd_ptr = NULL;
4748		}
4749		amdgpu_bo_unreserve(ring->mqd_obj);
4750		if (r)
4751			goto done;
4752	}
4753
4754	gfx_v8_0_set_mec_doorbell_range(adev);
4755
4756	r = gfx_v8_0_kiq_kcq_enable(adev);
4757	if (r)
4758		goto done;
4759
4760done:
4761	return r;
4762}
4763
4764static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4765{
4766	int r, i;
4767	struct amdgpu_ring *ring;
4768
4769	/* collect all the ring_tests here, gfx, kiq, compute */
4770	ring = &adev->gfx.gfx_ring[0];
4771	r = amdgpu_ring_test_helper(ring);
4772	if (r)
4773		return r;
4774
4775	ring = &adev->gfx.kiq.ring;
4776	r = amdgpu_ring_test_helper(ring);
4777	if (r)
4778		return r;
4779
4780	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4781		ring = &adev->gfx.compute_ring[i];
4782		amdgpu_ring_test_helper(ring);
4783	}
4784
4785	return 0;
4786}
4787
4788static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4789{
4790	int r;
4791
4792	if (!(adev->flags & AMD_IS_APU))
4793		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4794
4795	r = gfx_v8_0_kiq_resume(adev);
4796	if (r)
4797		return r;
4798
4799	r = gfx_v8_0_cp_gfx_resume(adev);
4800	if (r)
4801		return r;
4802
4803	r = gfx_v8_0_kcq_resume(adev);
4804	if (r)
4805		return r;
4806
4807	r = gfx_v8_0_cp_test_all_rings(adev);
4808	if (r)
4809		return r;
4810
4811	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4812
4813	return 0;
4814}
4815
4816static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4817{
4818	gfx_v8_0_cp_gfx_enable(adev, enable);
4819	gfx_v8_0_cp_compute_enable(adev, enable);
4820}
4821
4822static int gfx_v8_0_hw_init(void *handle)
4823{
4824	int r;
4825	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4826
4827	gfx_v8_0_init_golden_registers(adev);
4828	gfx_v8_0_constants_init(adev);
4829
4830	r = adev->gfx.rlc.funcs->resume(adev);
4831	if (r)
4832		return r;
4833
4834	r = gfx_v8_0_cp_resume(adev);
4835
4836	return r;
4837}
4838
4839static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4840{
4841	int r, i;
4842	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4843
4844	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4845	if (r)
4846		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4847
4848	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4849		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4850
4851		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4852		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4853						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4854						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4855						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4856						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4857		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4858		amdgpu_ring_write(kiq_ring, 0);
4859		amdgpu_ring_write(kiq_ring, 0);
4860		amdgpu_ring_write(kiq_ring, 0);
4861	}
4862	r = amdgpu_ring_test_helper(kiq_ring);
4863	if (r)
4864		DRM_ERROR("KCQ disable failed\n");
4865
4866	return r;
4867}
4868
4869static bool gfx_v8_0_is_idle(void *handle)
4870{
4871	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4872
4873	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4874		|| RREG32(mmGRBM_STATUS2) != 0x8)
4875		return false;
4876	else
4877		return true;
4878}
4879
4880static bool gfx_v8_0_rlc_is_idle(void *handle)
4881{
4882	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883
4884	if (RREG32(mmGRBM_STATUS2) != 0x8)
4885		return false;
4886	else
4887		return true;
4888}
4889
4890static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4891{
4892	unsigned int i;
4893	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4894
4895	for (i = 0; i < adev->usec_timeout; i++) {
4896		if (gfx_v8_0_rlc_is_idle(handle))
4897			return 0;
4898
4899		udelay(1);
4900	}
4901	return -ETIMEDOUT;
4902}
4903
4904static int gfx_v8_0_wait_for_idle(void *handle)
4905{
4906	unsigned int i;
4907	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4908
4909	for (i = 0; i < adev->usec_timeout; i++) {
4910		if (gfx_v8_0_is_idle(handle))
4911			return 0;
4912
4913		udelay(1);
4914	}
4915	return -ETIMEDOUT;
4916}
4917
4918static int gfx_v8_0_hw_fini(void *handle)
4919{
4920	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4921
4922	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4923	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4924
4925	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4926
4927	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4928
4929	/* disable KCQ to avoid CPC touch memory not valid anymore */
4930	gfx_v8_0_kcq_disable(adev);
4931
4932	if (amdgpu_sriov_vf(adev)) {
4933		pr_debug("For SRIOV client, shouldn't do anything.\n");
4934		return 0;
4935	}
4936	amdgpu_gfx_rlc_enter_safe_mode(adev);
4937	if (!gfx_v8_0_wait_for_idle(adev))
4938		gfx_v8_0_cp_enable(adev, false);
4939	else
4940		pr_err("cp is busy, skip halt cp\n");
4941	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4942		adev->gfx.rlc.funcs->stop(adev);
4943	else
4944		pr_err("rlc is busy, skip halt rlc\n");
4945	amdgpu_gfx_rlc_exit_safe_mode(adev);
4946
4947	return 0;
4948}
4949
4950static int gfx_v8_0_suspend(void *handle)
4951{
4952	return gfx_v8_0_hw_fini(handle);
4953}
4954
4955static int gfx_v8_0_resume(void *handle)
4956{
4957	return gfx_v8_0_hw_init(handle);
4958}
4959
4960static bool gfx_v8_0_check_soft_reset(void *handle)
4961{
4962	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4963	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4964	u32 tmp;
4965
4966	/* GRBM_STATUS */
4967	tmp = RREG32(mmGRBM_STATUS);
4968	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4969		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4970		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4971		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4972		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4973		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4974		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4975		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4976						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4977		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4978						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4979		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4980						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4981	}
4982
4983	/* GRBM_STATUS2 */
4984	tmp = RREG32(mmGRBM_STATUS2);
4985	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4986		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4987						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4988
4989	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4990	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4991	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4992		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4993						SOFT_RESET_CPF, 1);
4994		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4995						SOFT_RESET_CPC, 1);
4996		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4997						SOFT_RESET_CPG, 1);
4998		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4999						SOFT_RESET_GRBM, 1);
5000	}
5001
5002	/* SRBM_STATUS */
5003	tmp = RREG32(mmSRBM_STATUS);
5004	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5005		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5006						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5007	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5008		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5009						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5010
5011	if (grbm_soft_reset || srbm_soft_reset) {
5012		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5013		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5014		return true;
5015	} else {
5016		adev->gfx.grbm_soft_reset = 0;
5017		adev->gfx.srbm_soft_reset = 0;
5018		return false;
5019	}
5020}
5021
5022static int gfx_v8_0_pre_soft_reset(void *handle)
5023{
5024	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5025	u32 grbm_soft_reset = 0;
5026
5027	if ((!adev->gfx.grbm_soft_reset) &&
5028	    (!adev->gfx.srbm_soft_reset))
5029		return 0;
5030
5031	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5032
5033	/* stop the rlc */
5034	adev->gfx.rlc.funcs->stop(adev);
5035
5036	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5037	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5038		/* Disable GFX parsing/prefetching */
5039		gfx_v8_0_cp_gfx_enable(adev, false);
5040
5041	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5042	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5043	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5044	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5045		int i;
5046
5047		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5048			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5049
5050			mutex_lock(&adev->srbm_mutex);
5051			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5052			gfx_v8_0_deactivate_hqd(adev, 2);
5053			vi_srbm_select(adev, 0, 0, 0, 0);
5054			mutex_unlock(&adev->srbm_mutex);
5055		}
5056		/* Disable MEC parsing/prefetching */
5057		gfx_v8_0_cp_compute_enable(adev, false);
5058	}
5059
5060       return 0;
5061}
5062
5063static int gfx_v8_0_soft_reset(void *handle)
5064{
5065	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5066	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5067	u32 tmp;
5068
5069	if ((!adev->gfx.grbm_soft_reset) &&
5070	    (!adev->gfx.srbm_soft_reset))
5071		return 0;
5072
5073	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5074	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5075
5076	if (grbm_soft_reset || srbm_soft_reset) {
5077		tmp = RREG32(mmGMCON_DEBUG);
5078		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5079		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5080		WREG32(mmGMCON_DEBUG, tmp);
5081		udelay(50);
5082	}
5083
5084	if (grbm_soft_reset) {
5085		tmp = RREG32(mmGRBM_SOFT_RESET);
5086		tmp |= grbm_soft_reset;
5087		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5088		WREG32(mmGRBM_SOFT_RESET, tmp);
5089		tmp = RREG32(mmGRBM_SOFT_RESET);
5090
5091		udelay(50);
5092
5093		tmp &= ~grbm_soft_reset;
5094		WREG32(mmGRBM_SOFT_RESET, tmp);
5095		tmp = RREG32(mmGRBM_SOFT_RESET);
5096	}
5097
5098	if (srbm_soft_reset) {
5099		tmp = RREG32(mmSRBM_SOFT_RESET);
5100		tmp |= srbm_soft_reset;
5101		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5102		WREG32(mmSRBM_SOFT_RESET, tmp);
5103		tmp = RREG32(mmSRBM_SOFT_RESET);
5104
5105		udelay(50);
5106
5107		tmp &= ~srbm_soft_reset;
5108		WREG32(mmSRBM_SOFT_RESET, tmp);
5109		tmp = RREG32(mmSRBM_SOFT_RESET);
5110	}
5111
5112	if (grbm_soft_reset || srbm_soft_reset) {
5113		tmp = RREG32(mmGMCON_DEBUG);
5114		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5115		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5116		WREG32(mmGMCON_DEBUG, tmp);
5117	}
5118
5119	/* Wait a little for things to settle down */
5120	udelay(50);
5121
5122	return 0;
5123}
5124
5125static int gfx_v8_0_post_soft_reset(void *handle)
5126{
5127	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5128	u32 grbm_soft_reset = 0;
5129
5130	if ((!adev->gfx.grbm_soft_reset) &&
5131	    (!adev->gfx.srbm_soft_reset))
5132		return 0;
5133
5134	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5135
5136	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5137	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5138	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5139	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5140		int i;
5141
5142		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5143			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5144
5145			mutex_lock(&adev->srbm_mutex);
5146			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5147			gfx_v8_0_deactivate_hqd(adev, 2);
5148			vi_srbm_select(adev, 0, 0, 0, 0);
5149			mutex_unlock(&adev->srbm_mutex);
5150		}
5151		gfx_v8_0_kiq_resume(adev);
5152		gfx_v8_0_kcq_resume(adev);
5153	}
5154
5155	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5156	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5157		gfx_v8_0_cp_gfx_resume(adev);
5158
5159	gfx_v8_0_cp_test_all_rings(adev);
5160
5161	adev->gfx.rlc.funcs->start(adev);
5162
5163	return 0;
5164}
5165
5166/**
5167 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5168 *
5169 * @adev: amdgpu_device pointer
5170 *
5171 * Fetches a GPU clock counter snapshot.
5172 * Returns the 64 bit clock counter snapshot.
5173 */
5174static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5175{
5176	uint64_t clock;
5177
5178	mutex_lock(&adev->gfx.gpu_clock_mutex);
5179	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5180	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5181		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5182	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5183	return clock;
5184}
5185
5186static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5187					  uint32_t vmid,
5188					  uint32_t gds_base, uint32_t gds_size,
5189					  uint32_t gws_base, uint32_t gws_size,
5190					  uint32_t oa_base, uint32_t oa_size)
5191{
5192	/* GDS Base */
5193	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5194	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5195				WRITE_DATA_DST_SEL(0)));
5196	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5197	amdgpu_ring_write(ring, 0);
5198	amdgpu_ring_write(ring, gds_base);
5199
5200	/* GDS Size */
5201	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5202	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5203				WRITE_DATA_DST_SEL(0)));
5204	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5205	amdgpu_ring_write(ring, 0);
5206	amdgpu_ring_write(ring, gds_size);
5207
5208	/* GWS */
5209	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211				WRITE_DATA_DST_SEL(0)));
5212	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5213	amdgpu_ring_write(ring, 0);
5214	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5215
5216	/* OA */
5217	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219				WRITE_DATA_DST_SEL(0)));
5220	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5221	amdgpu_ring_write(ring, 0);
5222	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5223}
5224
5225static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5226{
5227	WREG32(mmSQ_IND_INDEX,
5228		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5229		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5230		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5231		(SQ_IND_INDEX__FORCE_READ_MASK));
5232	return RREG32(mmSQ_IND_DATA);
5233}
5234
5235static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5236			   uint32_t wave, uint32_t thread,
5237			   uint32_t regno, uint32_t num, uint32_t *out)
5238{
5239	WREG32(mmSQ_IND_INDEX,
5240		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5241		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5242		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5243		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5244		(SQ_IND_INDEX__FORCE_READ_MASK) |
5245		(SQ_IND_INDEX__AUTO_INCR_MASK));
5246	while (num--)
5247		*(out++) = RREG32(mmSQ_IND_DATA);
5248}
5249
5250static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5251{
5252	/* type 0 wave data */
5253	dst[(*no_fields)++] = 0;
5254	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5255	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5256	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5257	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5258	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5259	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5260	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5261	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5262	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5263	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5264	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5265	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5266	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5267	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5268	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5269	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5270	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5271	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
 
5272}
5273
5274static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5275				     uint32_t wave, uint32_t start,
5276				     uint32_t size, uint32_t *dst)
5277{
5278	wave_read_regs(
5279		adev, simd, wave, 0,
5280		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5281}
5282
5283
5284static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5285	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5286	.select_se_sh = &gfx_v8_0_select_se_sh,
5287	.read_wave_data = &gfx_v8_0_read_wave_data,
5288	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5289	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5290};
5291
5292static int gfx_v8_0_early_init(void *handle)
5293{
5294	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5295
 
5296	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5297	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 
5298	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5299	gfx_v8_0_set_ring_funcs(adev);
5300	gfx_v8_0_set_irq_funcs(adev);
5301	gfx_v8_0_set_gds_init(adev);
5302	gfx_v8_0_set_rlc_funcs(adev);
5303
5304	return 0;
5305}
5306
5307static int gfx_v8_0_late_init(void *handle)
5308{
5309	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5310	int r;
5311
5312	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5313	if (r)
5314		return r;
5315
5316	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5317	if (r)
5318		return r;
5319
5320	/* requires IBs so do in late init after IB pool is initialized */
5321	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5322	if (r)
5323		return r;
5324
5325	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5326	if (r) {
5327		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5328		return r;
5329	}
5330
5331	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5332	if (r) {
5333		DRM_ERROR(
5334			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5335			r);
5336		return r;
5337	}
5338
5339	return 0;
5340}
5341
5342static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5343						       bool enable)
5344{
5345	if (((adev->asic_type == CHIP_POLARIS11) ||
5346	    (adev->asic_type == CHIP_POLARIS12) ||
5347	    (adev->asic_type == CHIP_VEGAM)) &&
5348	    adev->powerplay.pp_funcs->set_powergating_by_smu)
5349		/* Send msg to SMU via Powerplay */
5350		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5351
5352	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5353}
5354
5355static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5356							bool enable)
5357{
5358	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5359}
5360
5361static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5362		bool enable)
5363{
5364	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5365}
5366
5367static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5368					  bool enable)
5369{
5370	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5371}
5372
5373static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5374						bool enable)
5375{
5376	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5377
5378	/* Read any GFX register to wake up GFX. */
5379	if (!enable)
5380		RREG32(mmDB_RENDER_CONTROL);
5381}
5382
5383static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5384					  bool enable)
5385{
5386	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5387		cz_enable_gfx_cg_power_gating(adev, true);
5388		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5389			cz_enable_gfx_pipeline_power_gating(adev, true);
5390	} else {
5391		cz_enable_gfx_cg_power_gating(adev, false);
5392		cz_enable_gfx_pipeline_power_gating(adev, false);
5393	}
5394}
5395
5396static int gfx_v8_0_set_powergating_state(void *handle,
5397					  enum amd_powergating_state state)
5398{
5399	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5400	bool enable = (state == AMD_PG_STATE_GATE);
5401
5402	if (amdgpu_sriov_vf(adev))
5403		return 0;
5404
5405	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5406				AMD_PG_SUPPORT_RLC_SMU_HS |
5407				AMD_PG_SUPPORT_CP |
5408				AMD_PG_SUPPORT_GFX_DMG))
5409		amdgpu_gfx_rlc_enter_safe_mode(adev);
5410	switch (adev->asic_type) {
5411	case CHIP_CARRIZO:
5412	case CHIP_STONEY:
5413
5414		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5415			cz_enable_sck_slow_down_on_power_up(adev, true);
5416			cz_enable_sck_slow_down_on_power_down(adev, true);
5417		} else {
5418			cz_enable_sck_slow_down_on_power_up(adev, false);
5419			cz_enable_sck_slow_down_on_power_down(adev, false);
5420		}
5421		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5422			cz_enable_cp_power_gating(adev, true);
5423		else
5424			cz_enable_cp_power_gating(adev, false);
5425
5426		cz_update_gfx_cg_power_gating(adev, enable);
5427
5428		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5429			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5430		else
5431			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5432
5433		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5434			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5435		else
5436			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5437		break;
5438	case CHIP_POLARIS11:
5439	case CHIP_POLARIS12:
5440	case CHIP_VEGAM:
5441		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5442			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5443		else
5444			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5445
5446		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5447			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5448		else
5449			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5450
5451		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5452			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5453		else
5454			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5455		break;
5456	default:
5457		break;
5458	}
5459	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5460				AMD_PG_SUPPORT_RLC_SMU_HS |
5461				AMD_PG_SUPPORT_CP |
5462				AMD_PG_SUPPORT_GFX_DMG))
5463		amdgpu_gfx_rlc_exit_safe_mode(adev);
5464	return 0;
5465}
5466
5467static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5468{
5469	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5470	int data;
5471
5472	if (amdgpu_sriov_vf(adev))
5473		*flags = 0;
5474
5475	/* AMD_CG_SUPPORT_GFX_MGCG */
5476	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5477	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5478		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5479
5480	/* AMD_CG_SUPPORT_GFX_CGLG */
5481	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5482	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5483		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5484
5485	/* AMD_CG_SUPPORT_GFX_CGLS */
5486	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5487		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5488
5489	/* AMD_CG_SUPPORT_GFX_CGTS */
5490	data = RREG32(mmCGTS_SM_CTRL_REG);
5491	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5492		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5493
5494	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5495	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5496		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5497
5498	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5499	data = RREG32(mmRLC_MEM_SLP_CNTL);
5500	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5501		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5502
5503	/* AMD_CG_SUPPORT_GFX_CP_LS */
5504	data = RREG32(mmCP_MEM_SLP_CNTL);
5505	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5506		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5507}
5508
5509static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5510				     uint32_t reg_addr, uint32_t cmd)
5511{
5512	uint32_t data;
5513
5514	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5515
5516	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5517	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5518
5519	data = RREG32(mmRLC_SERDES_WR_CTRL);
5520	if (adev->asic_type == CHIP_STONEY)
5521		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5522			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5523			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5524			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5525			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5526			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5527			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5528			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5529			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5530	else
5531		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5532			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5533			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5534			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5535			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5536			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5537			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5538			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5539			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5540			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5541			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5542	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5543		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5544		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5545		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5546
5547	WREG32(mmRLC_SERDES_WR_CTRL, data);
5548}
5549
5550#define MSG_ENTER_RLC_SAFE_MODE     1
5551#define MSG_EXIT_RLC_SAFE_MODE      0
5552#define RLC_GPR_REG2__REQ_MASK 0x00000001
5553#define RLC_GPR_REG2__REQ__SHIFT 0
5554#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5555#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5556
5557static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5558{
5559	uint32_t rlc_setting;
5560
5561	rlc_setting = RREG32(mmRLC_CNTL);
5562	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5563		return false;
5564
5565	return true;
5566}
5567
5568static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5569{
5570	uint32_t data;
5571	unsigned i;
5572	data = RREG32(mmRLC_CNTL);
5573	data |= RLC_SAFE_MODE__CMD_MASK;
5574	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5575	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5576	WREG32(mmRLC_SAFE_MODE, data);
5577
5578	/* wait for RLC_SAFE_MODE */
5579	for (i = 0; i < adev->usec_timeout; i++) {
5580		if ((RREG32(mmRLC_GPM_STAT) &
5581		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5582		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5583		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5584		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5585			break;
5586		udelay(1);
5587	}
5588	for (i = 0; i < adev->usec_timeout; i++) {
5589		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5590			break;
5591		udelay(1);
5592	}
5593}
5594
5595static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5596{
5597	uint32_t data;
5598	unsigned i;
5599
5600	data = RREG32(mmRLC_CNTL);
5601	data |= RLC_SAFE_MODE__CMD_MASK;
5602	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5603	WREG32(mmRLC_SAFE_MODE, data);
5604
5605	for (i = 0; i < adev->usec_timeout; i++) {
5606		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5607			break;
5608		udelay(1);
5609	}
5610}
5611
5612static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5613{
5614	u32 data;
5615
 
 
5616	if (amdgpu_sriov_is_pp_one_vf(adev))
5617		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5618	else
5619		data = RREG32(mmRLC_SPM_VMID);
5620
5621	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5622	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5623
5624	if (amdgpu_sriov_is_pp_one_vf(adev))
5625		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5626	else
5627		WREG32(mmRLC_SPM_VMID, data);
 
 
5628}
5629
5630static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5631	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5632	.set_safe_mode = gfx_v8_0_set_safe_mode,
5633	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5634	.init = gfx_v8_0_rlc_init,
5635	.get_csb_size = gfx_v8_0_get_csb_size,
5636	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5637	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5638	.resume = gfx_v8_0_rlc_resume,
5639	.stop = gfx_v8_0_rlc_stop,
5640	.reset = gfx_v8_0_rlc_reset,
5641	.start = gfx_v8_0_rlc_start,
5642	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5643};
5644
5645static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5646						      bool enable)
5647{
5648	uint32_t temp, data;
5649
5650	amdgpu_gfx_rlc_enter_safe_mode(adev);
5651
5652	/* It is disabled by HW by default */
5653	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5654		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5655			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5656				/* 1 - RLC memory Light sleep */
5657				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5658
5659			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5660				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5661		}
5662
5663		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5664		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665		if (adev->flags & AMD_IS_APU)
5666			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5667				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5668				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5669		else
5670			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5671				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5672				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5673				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5674
5675		if (temp != data)
5676			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5677
5678		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5679		gfx_v8_0_wait_for_rlc_serdes(adev);
5680
5681		/* 5 - clear mgcg override */
5682		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5683
5684		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5685			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5686			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5687			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5688			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5689			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5690			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5691			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5692			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5693				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5694			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5695			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5696			if (temp != data)
5697				WREG32(mmCGTS_SM_CTRL_REG, data);
5698		}
5699		udelay(50);
5700
5701		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702		gfx_v8_0_wait_for_rlc_serdes(adev);
5703	} else {
5704		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5705		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5706		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5707				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5708				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5709				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5710		if (temp != data)
5711			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5712
5713		/* 2 - disable MGLS in RLC */
5714		data = RREG32(mmRLC_MEM_SLP_CNTL);
5715		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5716			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5717			WREG32(mmRLC_MEM_SLP_CNTL, data);
5718		}
5719
5720		/* 3 - disable MGLS in CP */
5721		data = RREG32(mmCP_MEM_SLP_CNTL);
5722		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5723			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5724			WREG32(mmCP_MEM_SLP_CNTL, data);
5725		}
5726
5727		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5728		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5729		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5730				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5731		if (temp != data)
5732			WREG32(mmCGTS_SM_CTRL_REG, data);
5733
5734		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5735		gfx_v8_0_wait_for_rlc_serdes(adev);
5736
5737		/* 6 - set mgcg override */
5738		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5739
5740		udelay(50);
5741
5742		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743		gfx_v8_0_wait_for_rlc_serdes(adev);
5744	}
5745
5746	amdgpu_gfx_rlc_exit_safe_mode(adev);
5747}
5748
5749static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5750						      bool enable)
5751{
5752	uint32_t temp, temp1, data, data1;
5753
5754	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5755
5756	amdgpu_gfx_rlc_enter_safe_mode(adev);
5757
5758	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5759		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5760		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5761		if (temp1 != data1)
5762			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5763
5764		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765		gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767		/* 2 - clear cgcg override */
5768		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5769
5770		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771		gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773		/* 3 - write cmd to set CGLS */
5774		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5775
5776		/* 4 - enable cgcg */
5777		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5778
5779		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5780			/* enable cgls*/
5781			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5782
5783			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5785
5786			if (temp1 != data1)
5787				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788		} else {
5789			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5790		}
5791
5792		if (temp != data)
5793			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5794
5795		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5796		 * Cmp_busy/GFX_Idle interrupts
5797		 */
5798		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5799	} else {
5800		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5801		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5802
5803		/* TEST CGCG */
5804		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5805		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5806				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5807		if (temp1 != data1)
5808			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5809
5810		/* read gfx register to wake up cgcg */
5811		RREG32(mmCB_CGTT_SCLK_CTRL);
5812		RREG32(mmCB_CGTT_SCLK_CTRL);
5813		RREG32(mmCB_CGTT_SCLK_CTRL);
5814		RREG32(mmCB_CGTT_SCLK_CTRL);
5815
5816		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5817		gfx_v8_0_wait_for_rlc_serdes(adev);
5818
5819		/* write cmd to Set CGCG Overrride */
5820		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5821
5822		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5823		gfx_v8_0_wait_for_rlc_serdes(adev);
5824
5825		/* write cmd to Clear CGLS */
5826		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5827
5828		/* disable cgcg, cgls should be disabled too. */
5829		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5830			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5831		if (temp != data)
5832			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5833		/* enable interrupts again for PG */
5834		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5835	}
5836
5837	gfx_v8_0_wait_for_rlc_serdes(adev);
5838
5839	amdgpu_gfx_rlc_exit_safe_mode(adev);
5840}
5841static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5842					    bool enable)
5843{
5844	if (enable) {
5845		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5846		 * ===  MGCG + MGLS + TS(CG/LS) ===
5847		 */
5848		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5849		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5850	} else {
5851		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5852		 * ===  CGCG + CGLS ===
5853		 */
5854		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5855		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5856	}
5857	return 0;
5858}
5859
5860static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5861					  enum amd_clockgating_state state)
5862{
5863	uint32_t msg_id, pp_state = 0;
5864	uint32_t pp_support_state = 0;
5865
5866	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5867		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5868			pp_support_state = PP_STATE_SUPPORT_LS;
5869			pp_state = PP_STATE_LS;
5870		}
5871		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5872			pp_support_state |= PP_STATE_SUPPORT_CG;
5873			pp_state |= PP_STATE_CG;
5874		}
5875		if (state == AMD_CG_STATE_UNGATE)
5876			pp_state = 0;
5877
5878		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5879				PP_BLOCK_GFX_CG,
5880				pp_support_state,
5881				pp_state);
5882		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5883			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884	}
5885
5886	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888			pp_support_state = PP_STATE_SUPPORT_LS;
5889			pp_state = PP_STATE_LS;
5890		}
5891
5892		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893			pp_support_state |= PP_STATE_SUPPORT_CG;
5894			pp_state |= PP_STATE_CG;
5895		}
5896
5897		if (state == AMD_CG_STATE_UNGATE)
5898			pp_state = 0;
5899
5900		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901				PP_BLOCK_GFX_MG,
5902				pp_support_state,
5903				pp_state);
5904		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5905			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5906	}
5907
5908	return 0;
5909}
5910
5911static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5912					  enum amd_clockgating_state state)
5913{
5914
5915	uint32_t msg_id, pp_state = 0;
5916	uint32_t pp_support_state = 0;
5917
5918	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920			pp_support_state = PP_STATE_SUPPORT_LS;
5921			pp_state = PP_STATE_LS;
5922		}
5923		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924			pp_support_state |= PP_STATE_SUPPORT_CG;
5925			pp_state |= PP_STATE_CG;
5926		}
5927		if (state == AMD_CG_STATE_UNGATE)
5928			pp_state = 0;
5929
5930		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5931				PP_BLOCK_GFX_CG,
5932				pp_support_state,
5933				pp_state);
5934		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5935			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5936	}
5937
5938	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5939		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5940			pp_support_state = PP_STATE_SUPPORT_LS;
5941			pp_state = PP_STATE_LS;
5942		}
5943		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5944			pp_support_state |= PP_STATE_SUPPORT_CG;
5945			pp_state |= PP_STATE_CG;
5946		}
5947		if (state == AMD_CG_STATE_UNGATE)
5948			pp_state = 0;
5949
5950		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5951				PP_BLOCK_GFX_3D,
5952				pp_support_state,
5953				pp_state);
5954		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5955			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5956	}
5957
5958	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5959		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5960			pp_support_state = PP_STATE_SUPPORT_LS;
5961			pp_state = PP_STATE_LS;
5962		}
5963
5964		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5965			pp_support_state |= PP_STATE_SUPPORT_CG;
5966			pp_state |= PP_STATE_CG;
5967		}
5968
5969		if (state == AMD_CG_STATE_UNGATE)
5970			pp_state = 0;
5971
5972		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5973				PP_BLOCK_GFX_MG,
5974				pp_support_state,
5975				pp_state);
5976		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5977			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5978	}
5979
5980	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5981		pp_support_state = PP_STATE_SUPPORT_LS;
5982
5983		if (state == AMD_CG_STATE_UNGATE)
5984			pp_state = 0;
5985		else
5986			pp_state = PP_STATE_LS;
5987
5988		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5989				PP_BLOCK_GFX_RLC,
5990				pp_support_state,
5991				pp_state);
5992		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5993			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5994	}
5995
5996	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5997		pp_support_state = PP_STATE_SUPPORT_LS;
5998
5999		if (state == AMD_CG_STATE_UNGATE)
6000			pp_state = 0;
6001		else
6002			pp_state = PP_STATE_LS;
6003		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004			PP_BLOCK_GFX_CP,
6005			pp_support_state,
6006			pp_state);
6007		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6008			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6009	}
6010
6011	return 0;
6012}
6013
6014static int gfx_v8_0_set_clockgating_state(void *handle,
6015					  enum amd_clockgating_state state)
6016{
6017	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6018
6019	if (amdgpu_sriov_vf(adev))
6020		return 0;
6021
6022	switch (adev->asic_type) {
6023	case CHIP_FIJI:
6024	case CHIP_CARRIZO:
6025	case CHIP_STONEY:
6026		gfx_v8_0_update_gfx_clock_gating(adev,
6027						 state == AMD_CG_STATE_GATE);
6028		break;
6029	case CHIP_TONGA:
6030		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6031		break;
6032	case CHIP_POLARIS10:
6033	case CHIP_POLARIS11:
6034	case CHIP_POLARIS12:
6035	case CHIP_VEGAM:
6036		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6037		break;
6038	default:
6039		break;
6040	}
6041	return 0;
6042}
6043
6044static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6045{
6046	return ring->adev->wb.wb[ring->rptr_offs];
6047}
6048
6049static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6050{
6051	struct amdgpu_device *adev = ring->adev;
6052
6053	if (ring->use_doorbell)
6054		/* XXX check if swapping is necessary on BE */
6055		return ring->adev->wb.wb[ring->wptr_offs];
6056	else
6057		return RREG32(mmCP_RB0_WPTR);
6058}
6059
6060static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6061{
6062	struct amdgpu_device *adev = ring->adev;
6063
6064	if (ring->use_doorbell) {
6065		/* XXX check if swapping is necessary on BE */
6066		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6067		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6068	} else {
6069		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6070		(void)RREG32(mmCP_RB0_WPTR);
6071	}
6072}
6073
6074static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6075{
6076	u32 ref_and_mask, reg_mem_engine;
6077
6078	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6079	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6080		switch (ring->me) {
6081		case 1:
6082			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6083			break;
6084		case 2:
6085			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6086			break;
6087		default:
6088			return;
6089		}
6090		reg_mem_engine = 0;
6091	} else {
6092		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6093		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6094	}
6095
6096	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6097	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6098				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6099				 reg_mem_engine));
6100	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6101	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6102	amdgpu_ring_write(ring, ref_and_mask);
6103	amdgpu_ring_write(ring, ref_and_mask);
6104	amdgpu_ring_write(ring, 0x20); /* poll interval */
6105}
6106
6107static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6108{
6109	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6110	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6111		EVENT_INDEX(4));
6112
6113	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6114	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6115		EVENT_INDEX(0));
6116}
6117
6118static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6119					struct amdgpu_job *job,
6120					struct amdgpu_ib *ib,
6121					uint32_t flags)
6122{
6123	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6124	u32 header, control = 0;
6125
6126	if (ib->flags & AMDGPU_IB_FLAG_CE)
6127		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6128	else
6129		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6130
6131	control |= ib->length_dw | (vmid << 24);
6132
6133	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6134		control |= INDIRECT_BUFFER_PRE_ENB(1);
6135
6136		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6137			gfx_v8_0_ring_emit_de_meta(ring);
6138	}
6139
6140	amdgpu_ring_write(ring, header);
6141	amdgpu_ring_write(ring,
6142#ifdef __BIG_ENDIAN
6143			  (2 << 0) |
6144#endif
6145			  (ib->gpu_addr & 0xFFFFFFFC));
6146	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6147	amdgpu_ring_write(ring, control);
6148}
6149
6150static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6151					  struct amdgpu_job *job,
6152					  struct amdgpu_ib *ib,
6153					  uint32_t flags)
6154{
6155	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6156	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6157
6158	/* Currently, there is a high possibility to get wave ID mismatch
6159	 * between ME and GDS, leading to a hw deadlock, because ME generates
6160	 * different wave IDs than the GDS expects. This situation happens
6161	 * randomly when at least 5 compute pipes use GDS ordered append.
6162	 * The wave IDs generated by ME are also wrong after suspend/resume.
6163	 * Those are probably bugs somewhere else in the kernel driver.
6164	 *
6165	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6166	 * GDS to 0 for this ring (me/pipe).
6167	 */
6168	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6169		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6170		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6171		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6172	}
6173
6174	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6175	amdgpu_ring_write(ring,
6176#ifdef __BIG_ENDIAN
6177				(2 << 0) |
6178#endif
6179				(ib->gpu_addr & 0xFFFFFFFC));
6180	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6181	amdgpu_ring_write(ring, control);
6182}
6183
6184static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6185					 u64 seq, unsigned flags)
6186{
6187	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6188	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6189
6190	/* Workaround for cache flush problems. First send a dummy EOP
6191	 * event down the pipe with seq one below.
6192	 */
6193	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6194	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6195				 EOP_TC_ACTION_EN |
6196				 EOP_TC_WB_ACTION_EN |
6197				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6198				 EVENT_INDEX(5)));
6199	amdgpu_ring_write(ring, addr & 0xfffffffc);
6200	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6201				DATA_SEL(1) | INT_SEL(0));
6202	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6203	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6204
6205	/* Then send the real EOP event down the pipe:
6206	 * EVENT_WRITE_EOP - flush caches, send int */
6207	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6208	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6209				 EOP_TC_ACTION_EN |
6210				 EOP_TC_WB_ACTION_EN |
6211				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6212				 EVENT_INDEX(5)));
6213	amdgpu_ring_write(ring, addr & 0xfffffffc);
6214	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6215			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6216	amdgpu_ring_write(ring, lower_32_bits(seq));
6217	amdgpu_ring_write(ring, upper_32_bits(seq));
6218
6219}
6220
6221static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6222{
6223	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6224	uint32_t seq = ring->fence_drv.sync_seq;
6225	uint64_t addr = ring->fence_drv.gpu_addr;
6226
6227	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6229				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6230				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6231	amdgpu_ring_write(ring, addr & 0xfffffffc);
6232	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6233	amdgpu_ring_write(ring, seq);
6234	amdgpu_ring_write(ring, 0xffffffff);
6235	amdgpu_ring_write(ring, 4); /* poll interval */
6236}
6237
6238static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6239					unsigned vmid, uint64_t pd_addr)
6240{
6241	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6242
6243	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6244
6245	/* wait for the invalidate to complete */
6246	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6247	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6248				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6249				 WAIT_REG_MEM_ENGINE(0))); /* me */
6250	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6251	amdgpu_ring_write(ring, 0);
6252	amdgpu_ring_write(ring, 0); /* ref */
6253	amdgpu_ring_write(ring, 0); /* mask */
6254	amdgpu_ring_write(ring, 0x20); /* poll interval */
6255
6256	/* compute doesn't have PFP */
6257	if (usepfp) {
6258		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6259		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6260		amdgpu_ring_write(ring, 0x0);
6261	}
6262}
6263
6264static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6265{
6266	return ring->adev->wb.wb[ring->wptr_offs];
6267}
6268
6269static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6270{
6271	struct amdgpu_device *adev = ring->adev;
6272
6273	/* XXX check if swapping is necessary on BE */
6274	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6275	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6276}
6277
6278static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6279					     u64 addr, u64 seq,
6280					     unsigned flags)
6281{
6282	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6283	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6284
6285	/* RELEASE_MEM - flush caches, send int */
6286	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6287	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6288				 EOP_TC_ACTION_EN |
6289				 EOP_TC_WB_ACTION_EN |
6290				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6291				 EVENT_INDEX(5)));
6292	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6293	amdgpu_ring_write(ring, addr & 0xfffffffc);
6294	amdgpu_ring_write(ring, upper_32_bits(addr));
6295	amdgpu_ring_write(ring, lower_32_bits(seq));
6296	amdgpu_ring_write(ring, upper_32_bits(seq));
6297}
6298
6299static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6300					 u64 seq, unsigned int flags)
6301{
6302	/* we only allocate 32bit for each seq wb address */
6303	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6304
6305	/* write fence seq to the "addr" */
6306	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6307	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6308				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6309	amdgpu_ring_write(ring, lower_32_bits(addr));
6310	amdgpu_ring_write(ring, upper_32_bits(addr));
6311	amdgpu_ring_write(ring, lower_32_bits(seq));
6312
6313	if (flags & AMDGPU_FENCE_FLAG_INT) {
6314		/* set register to trigger INT */
6315		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6316		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6317					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6318		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6319		amdgpu_ring_write(ring, 0);
6320		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6321	}
6322}
6323
6324static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6325{
6326	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6327	amdgpu_ring_write(ring, 0);
6328}
6329
6330static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6331{
6332	uint32_t dw2 = 0;
6333
6334	if (amdgpu_sriov_vf(ring->adev))
6335		gfx_v8_0_ring_emit_ce_meta(ring);
6336
6337	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339		gfx_v8_0_ring_emit_vgt_flush(ring);
6340		/* set load_global_config & load_global_uconfig */
6341		dw2 |= 0x8001;
6342		/* set load_cs_sh_regs */
6343		dw2 |= 0x01000000;
6344		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6345		dw2 |= 0x10002;
6346
6347		/* set load_ce_ram if preamble presented */
6348		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6349			dw2 |= 0x10000000;
6350	} else {
6351		/* still load_ce_ram if this is the first time preamble presented
6352		 * although there is no context switch happens.
6353		 */
6354		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6355			dw2 |= 0x10000000;
6356	}
6357
6358	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6359	amdgpu_ring_write(ring, dw2);
6360	amdgpu_ring_write(ring, 0);
6361}
6362
6363static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
 
6364{
6365	unsigned ret;
6366
6367	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6368	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6369	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6370	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
 
6371	ret = ring->wptr & ring->buf_mask;
6372	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
 
6373	return ret;
6374}
6375
6376static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6377{
6378	unsigned cur;
6379
6380	BUG_ON(offset > ring->buf_mask);
6381	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6382
6383	cur = (ring->wptr & ring->buf_mask) - 1;
6384	if (likely(cur > offset))
6385		ring->ring[offset] = cur - offset;
6386	else
6387		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6388}
6389
6390static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6391				    uint32_t reg_val_offs)
6392{
6393	struct amdgpu_device *adev = ring->adev;
6394
6395	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6396	amdgpu_ring_write(ring, 0 |	/* src: register*/
6397				(5 << 8) |	/* dst: memory */
6398				(1 << 20));	/* write confirm */
6399	amdgpu_ring_write(ring, reg);
6400	amdgpu_ring_write(ring, 0);
6401	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6402				reg_val_offs * 4));
6403	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6404				reg_val_offs * 4));
6405}
6406
6407static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6408				  uint32_t val)
6409{
6410	uint32_t cmd;
6411
6412	switch (ring->funcs->type) {
6413	case AMDGPU_RING_TYPE_GFX:
6414		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6415		break;
6416	case AMDGPU_RING_TYPE_KIQ:
6417		cmd = 1 << 16; /* no inc addr */
6418		break;
6419	default:
6420		cmd = WR_CONFIRM;
6421		break;
6422	}
6423
6424	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6425	amdgpu_ring_write(ring, cmd);
6426	amdgpu_ring_write(ring, reg);
6427	amdgpu_ring_write(ring, 0);
6428	amdgpu_ring_write(ring, val);
6429}
6430
6431static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6432{
6433	struct amdgpu_device *adev = ring->adev;
6434	uint32_t value = 0;
6435
6436	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6437	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6438	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6439	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6440	WREG32(mmSQ_CMD, value);
6441}
6442
6443static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6444						 enum amdgpu_interrupt_state state)
6445{
6446	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6447		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6448}
6449
6450static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6451						     int me, int pipe,
6452						     enum amdgpu_interrupt_state state)
6453{
6454	u32 mec_int_cntl, mec_int_cntl_reg;
6455
6456	/*
6457	 * amdgpu controls only the first MEC. That's why this function only
6458	 * handles the setting of interrupts for this specific MEC. All other
6459	 * pipes' interrupts are set by amdkfd.
6460	 */
6461
6462	if (me == 1) {
6463		switch (pipe) {
6464		case 0:
6465			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6466			break;
6467		case 1:
6468			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6469			break;
6470		case 2:
6471			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6472			break;
6473		case 3:
6474			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6475			break;
6476		default:
6477			DRM_DEBUG("invalid pipe %d\n", pipe);
6478			return;
6479		}
6480	} else {
6481		DRM_DEBUG("invalid me %d\n", me);
6482		return;
6483	}
6484
6485	switch (state) {
6486	case AMDGPU_IRQ_STATE_DISABLE:
6487		mec_int_cntl = RREG32(mec_int_cntl_reg);
6488		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6489		WREG32(mec_int_cntl_reg, mec_int_cntl);
6490		break;
6491	case AMDGPU_IRQ_STATE_ENABLE:
6492		mec_int_cntl = RREG32(mec_int_cntl_reg);
6493		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6494		WREG32(mec_int_cntl_reg, mec_int_cntl);
6495		break;
6496	default:
6497		break;
6498	}
6499}
6500
6501static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6502					     struct amdgpu_irq_src *source,
6503					     unsigned type,
6504					     enum amdgpu_interrupt_state state)
6505{
6506	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6507		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6508
6509	return 0;
6510}
6511
6512static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6513					      struct amdgpu_irq_src *source,
6514					      unsigned type,
6515					      enum amdgpu_interrupt_state state)
6516{
6517	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6518		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6519
6520	return 0;
6521}
6522
6523static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6524					    struct amdgpu_irq_src *src,
6525					    unsigned type,
6526					    enum amdgpu_interrupt_state state)
6527{
6528	switch (type) {
6529	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6530		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6531		break;
6532	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6533		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6534		break;
6535	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6536		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6537		break;
6538	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6539		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6540		break;
6541	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6542		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6543		break;
6544	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6545		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6546		break;
6547	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6548		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6549		break;
6550	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6551		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6552		break;
6553	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6554		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6555		break;
6556	default:
6557		break;
6558	}
6559	return 0;
6560}
6561
6562static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6563					 struct amdgpu_irq_src *source,
6564					 unsigned int type,
6565					 enum amdgpu_interrupt_state state)
6566{
6567	int enable_flag;
6568
6569	switch (state) {
6570	case AMDGPU_IRQ_STATE_DISABLE:
6571		enable_flag = 0;
6572		break;
6573
6574	case AMDGPU_IRQ_STATE_ENABLE:
6575		enable_flag = 1;
6576		break;
6577
6578	default:
6579		return -EINVAL;
6580	}
6581
6582	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6583	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6584	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6585	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588		     enable_flag);
6589	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590		     enable_flag);
6591	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592		     enable_flag);
6593	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594		     enable_flag);
6595	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596		     enable_flag);
6597	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6598		     enable_flag);
6599	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6600		     enable_flag);
6601	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6602		     enable_flag);
6603
6604	return 0;
6605}
6606
6607static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6608				     struct amdgpu_irq_src *source,
6609				     unsigned int type,
6610				     enum amdgpu_interrupt_state state)
6611{
6612	int enable_flag;
6613
6614	switch (state) {
6615	case AMDGPU_IRQ_STATE_DISABLE:
6616		enable_flag = 1;
6617		break;
6618
6619	case AMDGPU_IRQ_STATE_ENABLE:
6620		enable_flag = 0;
6621		break;
6622
6623	default:
6624		return -EINVAL;
6625	}
6626
6627	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6628		     enable_flag);
6629
6630	return 0;
6631}
6632
6633static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6634			    struct amdgpu_irq_src *source,
6635			    struct amdgpu_iv_entry *entry)
6636{
6637	int i;
6638	u8 me_id, pipe_id, queue_id;
6639	struct amdgpu_ring *ring;
6640
6641	DRM_DEBUG("IH: CP EOP\n");
6642	me_id = (entry->ring_id & 0x0c) >> 2;
6643	pipe_id = (entry->ring_id & 0x03) >> 0;
6644	queue_id = (entry->ring_id & 0x70) >> 4;
6645
6646	switch (me_id) {
6647	case 0:
6648		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6649		break;
6650	case 1:
6651	case 2:
6652		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6653			ring = &adev->gfx.compute_ring[i];
6654			/* Per-queue interrupt is supported for MEC starting from VI.
6655			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6656			  */
6657			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6658				amdgpu_fence_process(ring);
6659		}
6660		break;
6661	}
6662	return 0;
6663}
6664
6665static void gfx_v8_0_fault(struct amdgpu_device *adev,
6666			   struct amdgpu_iv_entry *entry)
6667{
6668	u8 me_id, pipe_id, queue_id;
6669	struct amdgpu_ring *ring;
6670	int i;
6671
6672	me_id = (entry->ring_id & 0x0c) >> 2;
6673	pipe_id = (entry->ring_id & 0x03) >> 0;
6674	queue_id = (entry->ring_id & 0x70) >> 4;
6675
6676	switch (me_id) {
6677	case 0:
6678		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6679		break;
6680	case 1:
6681	case 2:
6682		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6683			ring = &adev->gfx.compute_ring[i];
6684			if (ring->me == me_id && ring->pipe == pipe_id &&
6685			    ring->queue == queue_id)
6686				drm_sched_fault(&ring->sched);
6687		}
6688		break;
6689	}
6690}
6691
6692static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6693				 struct amdgpu_irq_src *source,
6694				 struct amdgpu_iv_entry *entry)
6695{
6696	DRM_ERROR("Illegal register access in command stream\n");
6697	gfx_v8_0_fault(adev, entry);
6698	return 0;
6699}
6700
6701static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6702				  struct amdgpu_irq_src *source,
6703				  struct amdgpu_iv_entry *entry)
6704{
6705	DRM_ERROR("Illegal instruction in command stream\n");
6706	gfx_v8_0_fault(adev, entry);
6707	return 0;
6708}
6709
6710static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6711				     struct amdgpu_irq_src *source,
6712				     struct amdgpu_iv_entry *entry)
6713{
6714	DRM_ERROR("CP EDC/ECC error detected.");
6715	return 0;
6716}
6717
6718static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
 
6719{
6720	u32 enc, se_id, sh_id, cu_id;
6721	char type[20];
6722	int sq_edc_source = -1;
6723
6724	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6725	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6726
6727	switch (enc) {
6728		case 0:
6729			DRM_INFO("SQ general purpose intr detected:"
6730					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6731					"host_cmd_overflow %d, cmd_timestamp %d,"
6732					"reg_timestamp %d, thread_trace_buff_full %d,"
6733					"wlt %d, thread_trace %d.\n",
6734					se_id,
6735					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6736					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6737					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6738					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6739					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6740					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6741					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6742					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6743					);
6744			break;
6745		case 1:
6746		case 2:
6747
6748			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6749			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6750
6751			/*
6752			 * This function can be called either directly from ISR
6753			 * or from BH in which case we can access SQ_EDC_INFO
6754			 * instance
6755			 */
6756			if (in_task()) {
6757				mutex_lock(&adev->grbm_idx_mutex);
6758				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6759
6760				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6761
6762				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6763				mutex_unlock(&adev->grbm_idx_mutex);
6764			}
6765
6766			if (enc == 1)
6767				sprintf(type, "instruction intr");
6768			else
6769				sprintf(type, "EDC/ECC error");
6770
6771			DRM_INFO(
6772				"SQ %s detected: "
6773					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6774					"trap %s, sq_ed_info.source %s.\n",
6775					type, se_id, sh_id, cu_id,
6776					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6777					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6778					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6779					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6780					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6781				);
6782			break;
6783		default:
6784			DRM_ERROR("SQ invalid encoding type\n.");
6785	}
6786}
6787
6788static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6789{
6790
6791	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6792	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6793
6794	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6795}
6796
6797static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6798			   struct amdgpu_irq_src *source,
6799			   struct amdgpu_iv_entry *entry)
6800{
6801	unsigned ih_data = entry->src_data[0];
6802
6803	/*
6804	 * Try to submit work so SQ_EDC_INFO can be accessed from
6805	 * BH. If previous work submission hasn't finished yet
6806	 * just print whatever info is possible directly from the ISR.
6807	 */
6808	if (work_pending(&adev->gfx.sq_work.work)) {
6809		gfx_v8_0_parse_sq_irq(adev, ih_data);
6810	} else {
6811		adev->gfx.sq_work.ih_data = ih_data;
6812		schedule_work(&adev->gfx.sq_work.work);
6813	}
6814
6815	return 0;
6816}
6817
6818static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6819{
6820	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6821	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6822			  PACKET3_TC_ACTION_ENA |
6823			  PACKET3_SH_KCACHE_ACTION_ENA |
6824			  PACKET3_SH_ICACHE_ACTION_ENA |
6825			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6826	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6827	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6828	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6829}
6830
6831static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6832{
6833	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6834	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6835			  PACKET3_TC_ACTION_ENA |
6836			  PACKET3_SH_KCACHE_ACTION_ENA |
6837			  PACKET3_SH_ICACHE_ACTION_ENA |
6838			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6839	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6840	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6841	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6842	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6843	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6844}
6845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6846static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6847	.name = "gfx_v8_0",
6848	.early_init = gfx_v8_0_early_init,
6849	.late_init = gfx_v8_0_late_init,
6850	.sw_init = gfx_v8_0_sw_init,
6851	.sw_fini = gfx_v8_0_sw_fini,
6852	.hw_init = gfx_v8_0_hw_init,
6853	.hw_fini = gfx_v8_0_hw_fini,
6854	.suspend = gfx_v8_0_suspend,
6855	.resume = gfx_v8_0_resume,
6856	.is_idle = gfx_v8_0_is_idle,
6857	.wait_for_idle = gfx_v8_0_wait_for_idle,
6858	.check_soft_reset = gfx_v8_0_check_soft_reset,
6859	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6860	.soft_reset = gfx_v8_0_soft_reset,
6861	.post_soft_reset = gfx_v8_0_post_soft_reset,
6862	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6863	.set_powergating_state = gfx_v8_0_set_powergating_state,
6864	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6865};
6866
6867static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6868	.type = AMDGPU_RING_TYPE_GFX,
6869	.align_mask = 0xff,
6870	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6871	.support_64bit_ptrs = false,
6872	.get_rptr = gfx_v8_0_ring_get_rptr,
6873	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6874	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6875	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6876		5 +  /* COND_EXEC */
6877		7 +  /* PIPELINE_SYNC */
6878		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6879		12 +  /* FENCE for VM_FLUSH */
6880		20 + /* GDS switch */
6881		4 + /* double SWITCH_BUFFER,
6882		       the first COND_EXEC jump to the place just
6883			   prior to this double SWITCH_BUFFER  */
6884		5 + /* COND_EXEC */
6885		7 +	 /*	HDP_flush */
6886		4 +	 /*	VGT_flush */
6887		14 + /*	CE_META */
6888		31 + /*	DE_META */
6889		3 + /* CNTX_CTRL */
6890		5 + /* HDP_INVL */
6891		12 + 12 + /* FENCE x2 */
6892		2 + /* SWITCH_BUFFER */
6893		5, /* SURFACE_SYNC */
6894	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6895	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6896	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6897	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6898	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6899	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6900	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6901	.test_ring = gfx_v8_0_ring_test_ring,
6902	.test_ib = gfx_v8_0_ring_test_ib,
6903	.insert_nop = amdgpu_ring_insert_nop,
6904	.pad_ib = amdgpu_ring_generic_pad_ib,
6905	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6906	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6907	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6908	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6909	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6910	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6911	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6912};
6913
6914static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6915	.type = AMDGPU_RING_TYPE_COMPUTE,
6916	.align_mask = 0xff,
6917	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6918	.support_64bit_ptrs = false,
6919	.get_rptr = gfx_v8_0_ring_get_rptr,
6920	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6921	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6922	.emit_frame_size =
6923		20 + /* gfx_v8_0_ring_emit_gds_switch */
6924		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6925		5 + /* hdp_invalidate */
6926		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6927		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6928		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6929		7, /* gfx_v8_0_emit_mem_sync_compute */
 
 
6930	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6931	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6932	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6933	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6934	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6935	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6936	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6937	.test_ring = gfx_v8_0_ring_test_ring,
6938	.test_ib = gfx_v8_0_ring_test_ib,
6939	.insert_nop = amdgpu_ring_insert_nop,
6940	.pad_ib = amdgpu_ring_generic_pad_ib,
6941	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6942	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
 
6943};
6944
6945static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6946	.type = AMDGPU_RING_TYPE_KIQ,
6947	.align_mask = 0xff,
6948	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6949	.support_64bit_ptrs = false,
6950	.get_rptr = gfx_v8_0_ring_get_rptr,
6951	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6952	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6953	.emit_frame_size =
6954		20 + /* gfx_v8_0_ring_emit_gds_switch */
6955		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6956		5 + /* hdp_invalidate */
6957		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6958		17 + /* gfx_v8_0_ring_emit_vm_flush */
6959		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6960	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6961	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6962	.test_ring = gfx_v8_0_ring_test_ring,
6963	.insert_nop = amdgpu_ring_insert_nop,
6964	.pad_ib = amdgpu_ring_generic_pad_ib,
6965	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6966	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6967};
6968
6969static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6970{
6971	int i;
6972
6973	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6974
6975	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6976		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6977
6978	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6979		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6980}
6981
6982static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6983	.set = gfx_v8_0_set_eop_interrupt_state,
6984	.process = gfx_v8_0_eop_irq,
6985};
6986
6987static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6988	.set = gfx_v8_0_set_priv_reg_fault_state,
6989	.process = gfx_v8_0_priv_reg_irq,
6990};
6991
6992static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6993	.set = gfx_v8_0_set_priv_inst_fault_state,
6994	.process = gfx_v8_0_priv_inst_irq,
6995};
6996
6997static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6998	.set = gfx_v8_0_set_cp_ecc_int_state,
6999	.process = gfx_v8_0_cp_ecc_error_irq,
7000};
7001
7002static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7003	.set = gfx_v8_0_set_sq_int_state,
7004	.process = gfx_v8_0_sq_irq,
7005};
7006
7007static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7008{
7009	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7010	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7011
7012	adev->gfx.priv_reg_irq.num_types = 1;
7013	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7014
7015	adev->gfx.priv_inst_irq.num_types = 1;
7016	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7017
7018	adev->gfx.cp_ecc_error_irq.num_types = 1;
7019	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7020
7021	adev->gfx.sq_irq.num_types = 1;
7022	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7023}
7024
7025static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7026{
7027	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7028}
7029
7030static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7031{
7032	/* init asci gds info */
7033	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7034	adev->gds.gws_size = 64;
7035	adev->gds.oa_size = 16;
7036	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7037}
7038
7039static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7040						 u32 bitmap)
7041{
7042	u32 data;
7043
7044	if (!bitmap)
7045		return;
7046
7047	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7048	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7049
7050	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7051}
7052
7053static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7054{
7055	u32 data, mask;
7056
7057	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7058		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7059
7060	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7061
7062	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7063}
7064
7065static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7066{
7067	int i, j, k, counter, active_cu_number = 0;
7068	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7069	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7070	unsigned disable_masks[4 * 2];
7071	u32 ao_cu_num;
7072
7073	memset(cu_info, 0, sizeof(*cu_info));
7074
7075	if (adev->flags & AMD_IS_APU)
7076		ao_cu_num = 2;
7077	else
7078		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7079
7080	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7081
7082	mutex_lock(&adev->grbm_idx_mutex);
7083	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7084		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7085			mask = 1;
7086			ao_bitmap = 0;
7087			counter = 0;
7088			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7089			if (i < 4 && j < 2)
7090				gfx_v8_0_set_user_cu_inactive_bitmap(
7091					adev, disable_masks[i * 2 + j]);
7092			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7093			cu_info->bitmap[i][j] = bitmap;
7094
7095			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7096				if (bitmap & mask) {
7097					if (counter < ao_cu_num)
7098						ao_bitmap |= mask;
7099					counter ++;
7100				}
7101				mask <<= 1;
7102			}
7103			active_cu_number += counter;
7104			if (i < 2 && j < 2)
7105				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7106			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7107		}
7108	}
7109	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7110	mutex_unlock(&adev->grbm_idx_mutex);
7111
7112	cu_info->number = active_cu_number;
7113	cu_info->ao_cu_mask = ao_cu_mask;
7114	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7115	cu_info->max_waves_per_simd = 10;
7116	cu_info->max_scratch_slots_per_cu = 32;
7117	cu_info->wave_front_size = 64;
7118	cu_info->lds_size = 64;
7119}
7120
7121const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7122{
7123	.type = AMD_IP_BLOCK_TYPE_GFX,
7124	.major = 8,
7125	.minor = 0,
7126	.rev = 0,
7127	.funcs = &gfx_v8_0_ip_funcs,
7128};
7129
7130const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7131{
7132	.type = AMD_IP_BLOCK_TYPE_GFX,
7133	.major = 8,
7134	.minor = 1,
7135	.rev = 0,
7136	.funcs = &gfx_v8_0_ip_funcs,
7137};
7138
7139static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7140{
7141	uint64_t ce_payload_addr;
7142	int cnt_ce;
7143	union {
7144		struct vi_ce_ib_state regular;
7145		struct vi_ce_ib_state_chained_ib chained;
7146	} ce_payload = {};
7147
7148	if (ring->adev->virt.chained_ib_support) {
7149		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7150			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7151		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7152	} else {
7153		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7154			offsetof(struct vi_gfx_meta_data, ce_payload);
7155		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7156	}
7157
7158	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7159	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7160				WRITE_DATA_DST_SEL(8) |
7161				WR_CONFIRM) |
7162				WRITE_DATA_CACHE_POLICY(0));
7163	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7164	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7165	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7166}
7167
7168static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7169{
7170	uint64_t de_payload_addr, gds_addr, csa_addr;
7171	int cnt_de;
7172	union {
7173		struct vi_de_ib_state regular;
7174		struct vi_de_ib_state_chained_ib chained;
7175	} de_payload = {};
7176
7177	csa_addr = amdgpu_csa_vaddr(ring->adev);
7178	gds_addr = csa_addr + 4096;
7179	if (ring->adev->virt.chained_ib_support) {
7180		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7181		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7182		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7183		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7184	} else {
7185		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7186		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7187		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7188		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7189	}
7190
7191	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7192	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7193				WRITE_DATA_DST_SEL(8) |
7194				WR_CONFIRM) |
7195				WRITE_DATA_CACHE_POLICY(0));
7196	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7197	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7198	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7199}