Linux Audio

Check our new training course

Loading...
v4.17
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
 
 
  23#include <linux/kernel.h>
  24#include <linux/firmware.h>
  25#include <drm/drmP.h>
 
 
  26#include "amdgpu.h"
  27#include "amdgpu_gfx.h"
 
  28#include "vi.h"
  29#include "vi_structs.h"
  30#include "vid.h"
  31#include "amdgpu_ucode.h"
  32#include "amdgpu_atombios.h"
  33#include "atombios_i2c.h"
  34#include "clearstate_vi.h"
  35
  36#include "gmc/gmc_8_2_d.h"
  37#include "gmc/gmc_8_2_sh_mask.h"
  38
  39#include "oss/oss_3_0_d.h"
  40#include "oss/oss_3_0_sh_mask.h"
  41
  42#include "bif/bif_5_0_d.h"
  43#include "bif/bif_5_0_sh_mask.h"
  44#include "gca/gfx_8_0_d.h"
  45#include "gca/gfx_8_0_enum.h"
  46#include "gca/gfx_8_0_sh_mask.h"
  47#include "gca/gfx_8_0_enum.h"
  48
  49#include "dce/dce_10_0_d.h"
  50#include "dce/dce_10_0_sh_mask.h"
  51
  52#include "smu/smu_7_1_3_d.h"
  53
 
 
  54#define GFX8_NUM_GFX_RINGS     1
  55#define GFX8_MEC_HPD_SIZE 2048
  56
  57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  61
  62#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  63#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  64#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  65#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  66#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  67#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  68#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  69#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  70#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  71
  72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  78
  79/* BPM SERDES CMD */
  80#define SET_BPM_SERDES_CMD    1
  81#define CLE_BPM_SERDES_CMD    0
  82
  83/* BPM Register Address*/
  84enum {
  85	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  86	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  87	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  88	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  89	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  90	BPM_REG_FGCG_MAX
  91};
  92
  93#define RLC_FormatDirectRegListLength        14
  94
  95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 101
 102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 114
 115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 127
 128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 129MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 130MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 131MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 132MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 133MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 134MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 135MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 137MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 139
 140MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 145MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 146MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 147MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 149MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 151
 
 
 
 
 
 
 
 
 
 
 
 
 152MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 153MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 155MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 157MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 158MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 163
 
 
 
 
 
 
 
 164static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 165{
 166	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 167	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 168	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 169	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 170	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 171	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 172	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 173	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 174	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 175	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 176	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 177	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 178	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 179	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 180	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 181	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 182};
 183
 184static const u32 golden_settings_tonga_a11[] =
 185{
 186	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 187	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 188	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 189	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 190	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 191	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 192	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 193	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 194	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 195	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 196	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 197	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 198	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 199	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 200	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 201	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 202};
 203
 204static const u32 tonga_golden_common_all[] =
 205{
 206	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 207	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 208	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 209	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 210	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 211	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 212	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 213	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 214};
 215
 216static const u32 tonga_mgcg_cgcg_init[] =
 217{
 218	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 221	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 222	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 223	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 224	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 225	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 226	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 227	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 228	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 229	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 230	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 231	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 232	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 233	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 236	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 237	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 238	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 239	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 240	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 242	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 243	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 244	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 245	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 246	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 247	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 248	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 249	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 250	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 251	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 252	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 253	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 254	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 255	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 256	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 257	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 258	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 259	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 260	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 261	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 262	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 263	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 264	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 265	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 266	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 267	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 268	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 269	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 270	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 271	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 272	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 273	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 274	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 275	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 276	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 277	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 278	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 279	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 280	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 281	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 282	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 283	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 284	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 285	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 286	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 287	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 288	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 289	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 290	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 291	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 292	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 293};
 294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 295static const u32 golden_settings_polaris11_a11[] =
 296{
 297	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 298	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 299	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 300	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 301	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 302	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 303	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 304	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 305	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 306	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 307	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 308	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 309	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 310	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 311	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 312	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 313	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 314};
 315
 316static const u32 polaris11_golden_common_all[] =
 317{
 318	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 319	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 320	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 321	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 322	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 323	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 324};
 325
 326static const u32 golden_settings_polaris10_a11[] =
 327{
 328	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 329	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 330	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 331	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 332	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 333	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 334	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 335	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 336	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 337	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 338	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 339	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 340	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 341	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 342	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 343	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 344	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 345};
 346
 347static const u32 polaris10_golden_common_all[] =
 348{
 349	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 350	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 351	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 352	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 353	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 354	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 355	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 356	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 357};
 358
 359static const u32 fiji_golden_common_all[] =
 360{
 361	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 362	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 363	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 364	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 365	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 368	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 369	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 370	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 371};
 372
 373static const u32 golden_settings_fiji_a10[] =
 374{
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 380	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 381	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 382	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 383	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 384	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 385	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 386};
 387
 388static const u32 fiji_mgcg_cgcg_init[] =
 389{
 390	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 391	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 392	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 393	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 394	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 395	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 396	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 397	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 398	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 399	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 400	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 401	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 402	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 403	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 404	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 405	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 406	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 407	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 408	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 409	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 410	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 411	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 412	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 413	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 414	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 415	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 416	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 417	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 418	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 419	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 420	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 421	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 422	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 423	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 424	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 425};
 426
 427static const u32 golden_settings_iceland_a11[] =
 428{
 429	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 430	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 431	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 432	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 433	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 434	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 435	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 436	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 437	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 438	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 439	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 440	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 441	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 442	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 443	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 444	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 445};
 446
 447static const u32 iceland_golden_common_all[] =
 448{
 449	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 450	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 451	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 452	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 453	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 454	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 455	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 456	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 457};
 458
 459static const u32 iceland_mgcg_cgcg_init[] =
 460{
 461	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 462	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 463	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 464	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 465	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 466	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 467	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 468	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 469	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 470	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 471	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 472	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 473	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 474	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 475	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 476	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 477	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 478	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 479	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 480	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 481	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 482	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 483	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 484	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 485	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 486	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 487	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 488	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 489	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 490	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 491	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 492	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 493	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 494	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 495	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 496	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 497	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 498	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 499	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 500	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 501	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 502	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 503	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 504	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 505	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 506	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 507	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 508	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 509	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 510	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 511	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 512	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 513	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 514	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 515	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 516	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 517	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 518	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 519	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 520	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 521	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 522	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 523	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 524	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 525};
 526
 527static const u32 cz_golden_settings_a11[] =
 528{
 529	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 530	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 531	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 532	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 533	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 534	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 535	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 536	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 537	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 538	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 539	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 540	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 541};
 542
 543static const u32 cz_golden_common_all[] =
 544{
 545	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 546	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 547	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 548	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 549	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 550	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 551	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 552	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 553};
 554
 555static const u32 cz_mgcg_cgcg_init[] =
 556{
 557	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 558	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 559	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 560	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 561	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 562	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 563	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 564	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 565	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 566	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 567	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 568	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 569	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 570	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 571	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 572	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 573	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 574	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 575	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 576	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 577	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 578	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 579	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 580	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 581	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 582	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 583	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 584	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 585	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 586	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 587	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 588	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 589	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 590	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 591	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 592	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 593	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 594	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 595	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 596	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 597	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 598	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 599	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 600	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 601	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 602	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 603	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 604	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 605	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 606	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 607	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 608	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 609	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 610	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 611	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 612	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 613	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 614	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 615	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 616	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 617	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 618	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 619	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 620	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 621	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 622	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 623	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 624	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 625	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 626	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 627	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 628	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 629	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 630	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 631	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 632};
 633
 634static const u32 stoney_golden_settings_a11[] =
 635{
 636	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 637	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 638	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 639	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 640	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 641	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 642	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 643	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 644	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 645	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 646};
 647
 648static const u32 stoney_golden_common_all[] =
 649{
 650	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 651	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 652	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 653	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 654	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 655	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 656	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 657	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 658};
 659
 660static const u32 stoney_mgcg_cgcg_init[] =
 661{
 662	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 663	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 664	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 665	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 666	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 667};
 668
 
 
 
 
 
 
 
 
 
 
 
 669static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 670static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 671static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 672static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 673static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 674static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 675static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 676static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 677
 
 
 
 678static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 679{
 
 
 680	switch (adev->asic_type) {
 681	case CHIP_TOPAZ:
 682		amdgpu_device_program_register_sequence(adev,
 683							iceland_mgcg_cgcg_init,
 684							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 685		amdgpu_device_program_register_sequence(adev,
 686							golden_settings_iceland_a11,
 687							ARRAY_SIZE(golden_settings_iceland_a11));
 688		amdgpu_device_program_register_sequence(adev,
 689							iceland_golden_common_all,
 690							ARRAY_SIZE(iceland_golden_common_all));
 691		break;
 692	case CHIP_FIJI:
 693		amdgpu_device_program_register_sequence(adev,
 694							fiji_mgcg_cgcg_init,
 695							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 696		amdgpu_device_program_register_sequence(adev,
 697							golden_settings_fiji_a10,
 698							ARRAY_SIZE(golden_settings_fiji_a10));
 699		amdgpu_device_program_register_sequence(adev,
 700							fiji_golden_common_all,
 701							ARRAY_SIZE(fiji_golden_common_all));
 702		break;
 703
 704	case CHIP_TONGA:
 705		amdgpu_device_program_register_sequence(adev,
 706							tonga_mgcg_cgcg_init,
 707							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 708		amdgpu_device_program_register_sequence(adev,
 709							golden_settings_tonga_a11,
 710							ARRAY_SIZE(golden_settings_tonga_a11));
 711		amdgpu_device_program_register_sequence(adev,
 712							tonga_golden_common_all,
 713							ARRAY_SIZE(tonga_golden_common_all));
 714		break;
 
 
 
 
 
 
 
 
 715	case CHIP_POLARIS11:
 716	case CHIP_POLARIS12:
 717		amdgpu_device_program_register_sequence(adev,
 718							golden_settings_polaris11_a11,
 719							ARRAY_SIZE(golden_settings_polaris11_a11));
 720		amdgpu_device_program_register_sequence(adev,
 721							polaris11_golden_common_all,
 722							ARRAY_SIZE(polaris11_golden_common_all));
 723		break;
 724	case CHIP_POLARIS10:
 725		amdgpu_device_program_register_sequence(adev,
 726							golden_settings_polaris10_a11,
 727							ARRAY_SIZE(golden_settings_polaris10_a11));
 728		amdgpu_device_program_register_sequence(adev,
 729							polaris10_golden_common_all,
 730							ARRAY_SIZE(polaris10_golden_common_all));
 731		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 732		if (adev->pdev->revision == 0xc7 &&
 
 
 
 733		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 734		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 735		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 736			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 737			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 738		}
 739		break;
 740	case CHIP_CARRIZO:
 741		amdgpu_device_program_register_sequence(adev,
 742							cz_mgcg_cgcg_init,
 743							ARRAY_SIZE(cz_mgcg_cgcg_init));
 744		amdgpu_device_program_register_sequence(adev,
 745							cz_golden_settings_a11,
 746							ARRAY_SIZE(cz_golden_settings_a11));
 747		amdgpu_device_program_register_sequence(adev,
 748							cz_golden_common_all,
 749							ARRAY_SIZE(cz_golden_common_all));
 750		break;
 751	case CHIP_STONEY:
 752		amdgpu_device_program_register_sequence(adev,
 753							stoney_mgcg_cgcg_init,
 754							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 755		amdgpu_device_program_register_sequence(adev,
 756							stoney_golden_settings_a11,
 757							ARRAY_SIZE(stoney_golden_settings_a11));
 758		amdgpu_device_program_register_sequence(adev,
 759							stoney_golden_common_all,
 760							ARRAY_SIZE(stoney_golden_common_all));
 761		break;
 762	default:
 763		break;
 764	}
 765}
 766
 767static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 768{
 769	adev->gfx.scratch.num_reg = 8;
 770	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 771	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 772}
 773
 774static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 775{
 776	struct amdgpu_device *adev = ring->adev;
 777	uint32_t scratch;
 778	uint32_t tmp = 0;
 779	unsigned i;
 780	int r;
 781
 782	r = amdgpu_gfx_scratch_get(adev, &scratch);
 783	if (r) {
 784		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 785		return r;
 786	}
 787	WREG32(scratch, 0xCAFEDEAD);
 788	r = amdgpu_ring_alloc(ring, 3);
 789	if (r) {
 790		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 791			  ring->idx, r);
 792		amdgpu_gfx_scratch_free(adev, scratch);
 793		return r;
 794	}
 795	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 796	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 797	amdgpu_ring_write(ring, 0xDEADBEEF);
 798	amdgpu_ring_commit(ring);
 799
 800	for (i = 0; i < adev->usec_timeout; i++) {
 801		tmp = RREG32(scratch);
 802		if (tmp == 0xDEADBEEF)
 803			break;
 804		DRM_UDELAY(1);
 805	}
 806	if (i < adev->usec_timeout) {
 807		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
 808			 ring->idx, i);
 809	} else {
 810		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 811			  ring->idx, scratch, tmp);
 812		r = -EINVAL;
 813	}
 
 
 
 
 
 814	amdgpu_gfx_scratch_free(adev, scratch);
 815	return r;
 816}
 817
 818static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 819{
 820	struct amdgpu_device *adev = ring->adev;
 821	struct amdgpu_ib ib;
 822	struct dma_fence *f = NULL;
 823	uint32_t scratch;
 824	uint32_t tmp = 0;
 
 
 825	long r;
 826
 827	r = amdgpu_gfx_scratch_get(adev, &scratch);
 828	if (r) {
 829		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 830		return r;
 831	}
 832	WREG32(scratch, 0xCAFEDEAD);
 
 833	memset(&ib, 0, sizeof(ib));
 834	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 835	if (r) {
 836		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 837		goto err1;
 838	}
 839	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 840	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 841	ib.ptr[2] = 0xDEADBEEF;
 842	ib.length_dw = 3;
 
 
 843
 844	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 845	if (r)
 846		goto err2;
 847
 848	r = dma_fence_wait_timeout(f, false, timeout);
 849	if (r == 0) {
 850		DRM_ERROR("amdgpu: IB test timed out.\n");
 851		r = -ETIMEDOUT;
 852		goto err2;
 853	} else if (r < 0) {
 854		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 855		goto err2;
 856	}
 857	tmp = RREG32(scratch);
 858	if (tmp == 0xDEADBEEF) {
 859		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
 860		r = 0;
 861	} else {
 862		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 863			  scratch, tmp);
 864		r = -EINVAL;
 865	}
 866err2:
 867	amdgpu_ib_free(adev, &ib, NULL);
 868	dma_fence_put(f);
 869err1:
 870	amdgpu_gfx_scratch_free(adev, scratch);
 871	return r;
 872}
 873
 874
 875static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 876{
 877	release_firmware(adev->gfx.pfp_fw);
 878	adev->gfx.pfp_fw = NULL;
 879	release_firmware(adev->gfx.me_fw);
 880	adev->gfx.me_fw = NULL;
 881	release_firmware(adev->gfx.ce_fw);
 882	adev->gfx.ce_fw = NULL;
 883	release_firmware(adev->gfx.rlc_fw);
 884	adev->gfx.rlc_fw = NULL;
 885	release_firmware(adev->gfx.mec_fw);
 886	adev->gfx.mec_fw = NULL;
 887	if ((adev->asic_type != CHIP_STONEY) &&
 888	    (adev->asic_type != CHIP_TOPAZ))
 889		release_firmware(adev->gfx.mec2_fw);
 890	adev->gfx.mec2_fw = NULL;
 891
 892	kfree(adev->gfx.rlc.register_list_format);
 893}
 894
 895static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 896{
 897	const char *chip_name;
 898	char fw_name[30];
 899	int err;
 900	struct amdgpu_firmware_info *info = NULL;
 901	const struct common_firmware_header *header = NULL;
 902	const struct gfx_firmware_header_v1_0 *cp_hdr;
 903	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 904	unsigned int *tmp = NULL, i;
 905
 906	DRM_DEBUG("\n");
 907
 908	switch (adev->asic_type) {
 909	case CHIP_TOPAZ:
 910		chip_name = "topaz";
 911		break;
 912	case CHIP_TONGA:
 913		chip_name = "tonga";
 914		break;
 915	case CHIP_CARRIZO:
 916		chip_name = "carrizo";
 917		break;
 918	case CHIP_FIJI:
 919		chip_name = "fiji";
 920		break;
 921	case CHIP_POLARIS11:
 922		chip_name = "polaris11";
 923		break;
 924	case CHIP_POLARIS10:
 925		chip_name = "polaris10";
 926		break;
 
 
 
 927	case CHIP_POLARIS12:
 928		chip_name = "polaris12";
 929		break;
 930	case CHIP_STONEY:
 931		chip_name = "stoney";
 932		break;
 933	default:
 934		BUG();
 935	}
 936
 937	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 938		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 939		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 940		if (err == -ENOENT) {
 941			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 942			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 943		}
 944	} else {
 945		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 946		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 947	}
 948	if (err)
 949		goto out;
 950	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 951	if (err)
 952		goto out;
 953	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 954	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 955	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 956
 957	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 958		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
 959		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 960		if (err == -ENOENT) {
 961			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 962			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 963		}
 964	} else {
 965		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 966		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 967	}
 968	if (err)
 969		goto out;
 970	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 971	if (err)
 972		goto out;
 973	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 974	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 975
 976	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 977
 978	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 979		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
 980		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 981		if (err == -ENOENT) {
 982			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 983			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 984		}
 985	} else {
 986		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 987		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 988	}
 989	if (err)
 990		goto out;
 991	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 992	if (err)
 993		goto out;
 994	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 995	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 996	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 997
 998	/*
 999	 * Support for MCBP/Virtualization in combination with chained IBs is
1000	 * formal released on feature version #46
1001	 */
1002	if (adev->gfx.ce_feature_version >= 46 &&
1003	    adev->gfx.pfp_feature_version >= 46) {
1004		adev->virt.chained_ib_support = true;
1005		DRM_INFO("Chained IB support enabled!\n");
1006	} else
1007		adev->virt.chained_ib_support = false;
1008
1009	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011	if (err)
1012		goto out;
1013	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018	adev->gfx.rlc.save_and_restore_offset =
1019			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020	adev->gfx.rlc.clear_state_descriptor_offset =
1021			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022	adev->gfx.rlc.avail_scratch_ram_locations =
1023			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024	adev->gfx.rlc.reg_restore_list_size =
1025			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026	adev->gfx.rlc.reg_list_format_start =
1027			le32_to_cpu(rlc_hdr->reg_list_format_start);
1028	adev->gfx.rlc.reg_list_format_separate_start =
1029			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030	adev->gfx.rlc.starting_offsets_start =
1031			le32_to_cpu(rlc_hdr->starting_offsets_start);
1032	adev->gfx.rlc.reg_list_format_size_bytes =
1033			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034	adev->gfx.rlc.reg_list_size_bytes =
1035			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037	adev->gfx.rlc.register_list_format =
1038			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041	if (!adev->gfx.rlc.register_list_format) {
1042		err = -ENOMEM;
1043		goto out;
1044	}
1045
1046	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1050
1051	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061		if (err == -ENOENT) {
1062			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064		}
1065	} else {
1066		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068	}
1069	if (err)
1070		goto out;
1071	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072	if (err)
1073		goto out;
1074	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078	if ((adev->asic_type != CHIP_STONEY) &&
1079	    (adev->asic_type != CHIP_TOPAZ)) {
1080		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083			if (err == -ENOENT) {
1084				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086			}
1087		} else {
1088			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090		}
1091		if (!err) {
1092			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093			if (err)
1094				goto out;
1095			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096				adev->gfx.mec2_fw->data;
1097			adev->gfx.mec2_fw_version =
1098				le32_to_cpu(cp_hdr->header.ucode_version);
1099			adev->gfx.mec2_feature_version =
1100				le32_to_cpu(cp_hdr->ucode_feature_version);
1101		} else {
1102			err = 0;
1103			adev->gfx.mec2_fw = NULL;
1104		}
1105	}
1106
1107	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110		info->fw = adev->gfx.pfp_fw;
1111		header = (const struct common_firmware_header *)info->fw->data;
1112		adev->firmware.fw_size +=
1113			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117		info->fw = adev->gfx.me_fw;
1118		header = (const struct common_firmware_header *)info->fw->data;
1119		adev->firmware.fw_size +=
1120			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
1122		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124		info->fw = adev->gfx.ce_fw;
1125		header = (const struct common_firmware_header *)info->fw->data;
1126		adev->firmware.fw_size +=
1127			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131		info->fw = adev->gfx.rlc_fw;
1132		header = (const struct common_firmware_header *)info->fw->data;
1133		adev->firmware.fw_size +=
1134			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
1135
1136		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138		info->fw = adev->gfx.mec_fw;
 
1139		header = (const struct common_firmware_header *)info->fw->data;
1140		adev->firmware.fw_size +=
1141			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143		/* we need account JT in */
1144		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145		adev->firmware.fw_size +=
1146			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148		if (amdgpu_sriov_vf(adev)) {
1149			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151			info->fw = adev->gfx.mec_fw;
1152			adev->firmware.fw_size +=
1153				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154		}
1155
1156		if (adev->gfx.mec2_fw) {
1157			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159			info->fw = adev->gfx.mec2_fw;
1160			header = (const struct common_firmware_header *)info->fw->data;
1161			adev->firmware.fw_size +=
1162				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163		}
1164
1165	}
1166
1167out:
1168	if (err) {
1169		dev_err(adev->dev,
1170			"gfx8: Failed to load firmware \"%s\"\n",
1171			fw_name);
1172		release_firmware(adev->gfx.pfp_fw);
1173		adev->gfx.pfp_fw = NULL;
1174		release_firmware(adev->gfx.me_fw);
1175		adev->gfx.me_fw = NULL;
1176		release_firmware(adev->gfx.ce_fw);
1177		adev->gfx.ce_fw = NULL;
1178		release_firmware(adev->gfx.rlc_fw);
1179		adev->gfx.rlc_fw = NULL;
1180		release_firmware(adev->gfx.mec_fw);
1181		adev->gfx.mec_fw = NULL;
1182		release_firmware(adev->gfx.mec2_fw);
1183		adev->gfx.mec2_fw = NULL;
1184	}
1185	return err;
1186}
1187
1188static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189				    volatile u32 *buffer)
1190{
1191	u32 count = 0, i;
1192	const struct cs_section_def *sect = NULL;
1193	const struct cs_extent_def *ext = NULL;
1194
1195	if (adev->gfx.rlc.cs_data == NULL)
1196		return;
1197	if (buffer == NULL)
1198		return;
1199
1200	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204	buffer[count++] = cpu_to_le32(0x80000000);
1205	buffer[count++] = cpu_to_le32(0x80000000);
1206
1207	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208		for (ext = sect->section; ext->extent != NULL; ++ext) {
1209			if (sect->id == SECT_CONTEXT) {
1210				buffer[count++] =
1211					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212				buffer[count++] = cpu_to_le32(ext->reg_index -
1213						PACKET3_SET_CONTEXT_REG_START);
1214				for (i = 0; i < ext->reg_count; i++)
1215					buffer[count++] = cpu_to_le32(ext->extent[i]);
1216			} else {
1217				return;
1218			}
1219		}
1220	}
1221
1222	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224			PACKET3_SET_CONTEXT_REG_START);
1225	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232	buffer[count++] = cpu_to_le32(0);
1233}
1234
1235static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236{
1237	const __le32 *fw_data;
1238	volatile u32 *dst_ptr;
1239	int me, i, max_me = 4;
1240	u32 bo_offset = 0;
1241	u32 table_offset, table_size;
1242
1243	if (adev->asic_type == CHIP_CARRIZO)
1244		max_me = 5;
1245
1246	/* write the cp table buffer */
1247	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248	for (me = 0; me < max_me; me++) {
1249		if (me == 0) {
1250			const struct gfx_firmware_header_v1_0 *hdr =
1251				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252			fw_data = (const __le32 *)
1253				(adev->gfx.ce_fw->data +
1254				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255			table_offset = le32_to_cpu(hdr->jt_offset);
1256			table_size = le32_to_cpu(hdr->jt_size);
1257		} else if (me == 1) {
1258			const struct gfx_firmware_header_v1_0 *hdr =
1259				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260			fw_data = (const __le32 *)
1261				(adev->gfx.pfp_fw->data +
1262				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263			table_offset = le32_to_cpu(hdr->jt_offset);
1264			table_size = le32_to_cpu(hdr->jt_size);
1265		} else if (me == 2) {
1266			const struct gfx_firmware_header_v1_0 *hdr =
1267				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268			fw_data = (const __le32 *)
1269				(adev->gfx.me_fw->data +
1270				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271			table_offset = le32_to_cpu(hdr->jt_offset);
1272			table_size = le32_to_cpu(hdr->jt_size);
1273		} else if (me == 3) {
1274			const struct gfx_firmware_header_v1_0 *hdr =
1275				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276			fw_data = (const __le32 *)
1277				(adev->gfx.mec_fw->data +
1278				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279			table_offset = le32_to_cpu(hdr->jt_offset);
1280			table_size = le32_to_cpu(hdr->jt_size);
1281		} else  if (me == 4) {
1282			const struct gfx_firmware_header_v1_0 *hdr =
1283				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284			fw_data = (const __le32 *)
1285				(adev->gfx.mec2_fw->data +
1286				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287			table_offset = le32_to_cpu(hdr->jt_offset);
1288			table_size = le32_to_cpu(hdr->jt_size);
1289		}
1290
1291		for (i = 0; i < table_size; i ++) {
1292			dst_ptr[bo_offset + i] =
1293				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294		}
1295
1296		bo_offset += table_size;
1297	}
1298}
1299
1300static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301{
1302	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304}
1305
1306static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307{
1308	volatile u32 *dst_ptr;
1309	u32 dws;
1310	const struct cs_section_def *cs_data;
1311	int r;
1312
1313	adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315	cs_data = adev->gfx.rlc.cs_data;
1316
1317	if (cs_data) {
1318		/* clear state block */
1319		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321		r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322					      AMDGPU_GEM_DOMAIN_VRAM,
1323					      &adev->gfx.rlc.clear_state_obj,
1324					      &adev->gfx.rlc.clear_state_gpu_addr,
1325					      (void **)&adev->gfx.rlc.cs_ptr);
1326		if (r) {
1327			dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328			gfx_v8_0_rlc_fini(adev);
1329			return r;
1330		}
1331
1332		/* set up the cs buffer */
1333		dst_ptr = adev->gfx.rlc.cs_ptr;
1334		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337	}
1338
1339	if ((adev->asic_type == CHIP_CARRIZO) ||
1340	    (adev->asic_type == CHIP_STONEY)) {
1341		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342		r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344					      &adev->gfx.rlc.cp_table_obj,
1345					      &adev->gfx.rlc.cp_table_gpu_addr,
1346					      (void **)&adev->gfx.rlc.cp_table_ptr);
1347		if (r) {
1348			dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349			return r;
1350		}
1351
1352		cz_init_cp_jump_table(adev);
1353
1354		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356	}
1357
 
 
 
 
1358	return 0;
1359}
1360
1361static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362{
1363	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364}
1365
1366static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367{
1368	int r;
1369	u32 *hpd;
1370	size_t mec_hpd_size;
1371
1372	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374	/* take ownership of the relevant compute queues */
1375	amdgpu_gfx_compute_queue_acquire(adev);
1376
1377	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
 
 
 
 
 
 
 
 
 
 
1378
1379	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380				      AMDGPU_GEM_DOMAIN_GTT,
1381				      &adev->gfx.mec.hpd_eop_obj,
1382				      &adev->gfx.mec.hpd_eop_gpu_addr,
1383				      (void **)&hpd);
1384	if (r) {
1385		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386		return r;
1387	}
1388
1389	memset(hpd, 0, mec_hpd_size);
1390
1391	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
1393
1394	return 0;
1395}
1396
1397static const u32 vgpr_init_compute_shader[] =
1398{
1399	0x7e000209, 0x7e020208,
1400	0x7e040207, 0x7e060206,
1401	0x7e080205, 0x7e0a0204,
1402	0x7e0c0203, 0x7e0e0202,
1403	0x7e100201, 0x7e120200,
1404	0x7e140209, 0x7e160208,
1405	0x7e180207, 0x7e1a0206,
1406	0x7e1c0205, 0x7e1e0204,
1407	0x7e200203, 0x7e220202,
1408	0x7e240201, 0x7e260200,
1409	0x7e280209, 0x7e2a0208,
1410	0x7e2c0207, 0x7e2e0206,
1411	0x7e300205, 0x7e320204,
1412	0x7e340203, 0x7e360202,
1413	0x7e380201, 0x7e3a0200,
1414	0x7e3c0209, 0x7e3e0208,
1415	0x7e400207, 0x7e420206,
1416	0x7e440205, 0x7e460204,
1417	0x7e480203, 0x7e4a0202,
1418	0x7e4c0201, 0x7e4e0200,
1419	0x7e500209, 0x7e520208,
1420	0x7e540207, 0x7e560206,
1421	0x7e580205, 0x7e5a0204,
1422	0x7e5c0203, 0x7e5e0202,
1423	0x7e600201, 0x7e620200,
1424	0x7e640209, 0x7e660208,
1425	0x7e680207, 0x7e6a0206,
1426	0x7e6c0205, 0x7e6e0204,
1427	0x7e700203, 0x7e720202,
1428	0x7e740201, 0x7e760200,
1429	0x7e780209, 0x7e7a0208,
1430	0x7e7c0207, 0x7e7e0206,
1431	0xbf8a0000, 0xbf810000,
1432};
1433
1434static const u32 sgpr_init_compute_shader[] =
1435{
1436	0xbe8a0100, 0xbe8c0102,
1437	0xbe8e0104, 0xbe900106,
1438	0xbe920108, 0xbe940100,
1439	0xbe960102, 0xbe980104,
1440	0xbe9a0106, 0xbe9c0108,
1441	0xbe9e0100, 0xbea00102,
1442	0xbea20104, 0xbea40106,
1443	0xbea60108, 0xbea80100,
1444	0xbeaa0102, 0xbeac0104,
1445	0xbeae0106, 0xbeb00108,
1446	0xbeb20100, 0xbeb40102,
1447	0xbeb60104, 0xbeb80106,
1448	0xbeba0108, 0xbebc0100,
1449	0xbebe0102, 0xbec00104,
1450	0xbec20106, 0xbec40108,
1451	0xbec60100, 0xbec80102,
1452	0xbee60004, 0xbee70005,
1453	0xbeea0006, 0xbeeb0007,
1454	0xbee80008, 0xbee90009,
1455	0xbefc0000, 0xbf8a0000,
1456	0xbf810000, 0x00000000,
1457};
1458
1459static const u32 vgpr_init_regs[] =
1460{
1461	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1463	mmCOMPUTE_NUM_THREAD_X, 256*4,
1464	mmCOMPUTE_NUM_THREAD_Y, 1,
1465	mmCOMPUTE_NUM_THREAD_Z, 1,
1466	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1467	mmCOMPUTE_PGM_RSRC2, 20,
1468	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1469	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1470	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1471	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1472	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1473	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1474	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1475	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1476	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1477	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1478};
1479
1480static const u32 sgpr1_init_regs[] =
1481{
1482	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1483	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1484	mmCOMPUTE_NUM_THREAD_X, 256*5,
1485	mmCOMPUTE_NUM_THREAD_Y, 1,
1486	mmCOMPUTE_NUM_THREAD_Z, 1,
1487	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1488	mmCOMPUTE_PGM_RSRC2, 20,
1489	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1490	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1491	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1492	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1493	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1494	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1495	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1496	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1497	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1498	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499};
1500
1501static const u32 sgpr2_init_regs[] =
1502{
1503	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1504	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1505	mmCOMPUTE_NUM_THREAD_X, 256*5,
1506	mmCOMPUTE_NUM_THREAD_Y, 1,
1507	mmCOMPUTE_NUM_THREAD_Z, 1,
1508	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1509	mmCOMPUTE_PGM_RSRC2, 20,
1510	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1511	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1512	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1513	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1514	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1515	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1516	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1517	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1518	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1519	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1520};
1521
1522static const u32 sec_ded_counter_registers[] =
1523{
1524	mmCPC_EDC_ATC_CNT,
1525	mmCPC_EDC_SCRATCH_CNT,
1526	mmCPC_EDC_UCODE_CNT,
1527	mmCPF_EDC_ATC_CNT,
1528	mmCPF_EDC_ROQ_CNT,
1529	mmCPF_EDC_TAG_CNT,
1530	mmCPG_EDC_ATC_CNT,
1531	mmCPG_EDC_DMA_CNT,
1532	mmCPG_EDC_TAG_CNT,
1533	mmDC_EDC_CSINVOC_CNT,
1534	mmDC_EDC_RESTORE_CNT,
1535	mmDC_EDC_STATE_CNT,
1536	mmGDS_EDC_CNT,
1537	mmGDS_EDC_GRBM_CNT,
1538	mmGDS_EDC_OA_DED,
1539	mmSPI_EDC_CNT,
1540	mmSQC_ATC_EDC_GATCL1_CNT,
1541	mmSQC_EDC_CNT,
1542	mmSQ_EDC_DED_CNT,
1543	mmSQ_EDC_INFO,
1544	mmSQ_EDC_SEC_CNT,
1545	mmTCC_EDC_CNT,
1546	mmTCP_ATC_EDC_GATCL1_CNT,
1547	mmTCP_EDC_CNT,
1548	mmTD_EDC_CNT
1549};
1550
1551static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1552{
1553	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1554	struct amdgpu_ib ib;
1555	struct dma_fence *f = NULL;
1556	int r, i;
1557	u32 tmp;
1558	unsigned total_size, vgpr_offset, sgpr_offset;
1559	u64 gpu_addr;
1560
1561	/* only supported on CZ */
1562	if (adev->asic_type != CHIP_CARRIZO)
1563		return 0;
1564
1565	/* bail if the compute ring is not ready */
1566	if (!ring->ready)
1567		return 0;
1568
1569	tmp = RREG32(mmGB_EDC_MODE);
1570	WREG32(mmGB_EDC_MODE, 0);
1571
1572	total_size =
1573		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574	total_size +=
1575		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1576	total_size +=
1577		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1578	total_size = ALIGN(total_size, 256);
1579	vgpr_offset = total_size;
1580	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1581	sgpr_offset = total_size;
1582	total_size += sizeof(sgpr_init_compute_shader);
1583
1584	/* allocate an indirect buffer to put the commands in */
1585	memset(&ib, 0, sizeof(ib));
1586	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
 
1587	if (r) {
1588		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1589		return r;
1590	}
1591
1592	/* load the compute shaders */
1593	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1594		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1595
1596	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1597		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1598
1599	/* init the ib length to 0 */
1600	ib.length_dw = 0;
1601
1602	/* VGPR */
1603	/* write the register state for the compute dispatch */
1604	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1605		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1606		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1607		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1608	}
1609	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1610	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1611	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1612	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1613	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1614	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1615
1616	/* write dispatch packet */
1617	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1618	ib.ptr[ib.length_dw++] = 8; /* x */
1619	ib.ptr[ib.length_dw++] = 1; /* y */
1620	ib.ptr[ib.length_dw++] = 1; /* z */
1621	ib.ptr[ib.length_dw++] =
1622		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1623
1624	/* write CS partial flush packet */
1625	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1626	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1627
1628	/* SGPR1 */
1629	/* write the register state for the compute dispatch */
1630	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1631		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1632		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1633		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1634	}
1635	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1636	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1637	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1638	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1639	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1640	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1641
1642	/* write dispatch packet */
1643	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1644	ib.ptr[ib.length_dw++] = 8; /* x */
1645	ib.ptr[ib.length_dw++] = 1; /* y */
1646	ib.ptr[ib.length_dw++] = 1; /* z */
1647	ib.ptr[ib.length_dw++] =
1648		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1649
1650	/* write CS partial flush packet */
1651	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1652	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1653
1654	/* SGPR2 */
1655	/* write the register state for the compute dispatch */
1656	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1657		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1658		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1659		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1660	}
1661	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1662	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1663	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1664	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1665	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1666	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1667
1668	/* write dispatch packet */
1669	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1670	ib.ptr[ib.length_dw++] = 8; /* x */
1671	ib.ptr[ib.length_dw++] = 1; /* y */
1672	ib.ptr[ib.length_dw++] = 1; /* z */
1673	ib.ptr[ib.length_dw++] =
1674		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1675
1676	/* write CS partial flush packet */
1677	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1678	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1679
1680	/* shedule the ib on the ring */
1681	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1682	if (r) {
1683		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1684		goto fail;
1685	}
1686
1687	/* wait for the GPU to finish processing the IB */
1688	r = dma_fence_wait(f, false);
1689	if (r) {
1690		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1691		goto fail;
1692	}
1693
1694	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1695	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1696	WREG32(mmGB_EDC_MODE, tmp);
1697
1698	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1699	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1700	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1701
1702
1703	/* read back registers to clear the counters */
1704	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1705		RREG32(sec_ded_counter_registers[i]);
1706
1707fail:
1708	amdgpu_ib_free(adev, &ib, NULL);
1709	dma_fence_put(f);
1710
1711	return r;
1712}
1713
1714static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1715{
1716	u32 gb_addr_config;
1717	u32 mc_shared_chmap, mc_arb_ramcfg;
1718	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1719	u32 tmp;
1720	int ret;
1721
1722	switch (adev->asic_type) {
1723	case CHIP_TOPAZ:
1724		adev->gfx.config.max_shader_engines = 1;
1725		adev->gfx.config.max_tile_pipes = 2;
1726		adev->gfx.config.max_cu_per_sh = 6;
1727		adev->gfx.config.max_sh_per_se = 1;
1728		adev->gfx.config.max_backends_per_se = 2;
1729		adev->gfx.config.max_texture_channel_caches = 2;
1730		adev->gfx.config.max_gprs = 256;
1731		adev->gfx.config.max_gs_threads = 32;
1732		adev->gfx.config.max_hw_contexts = 8;
1733
1734		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1735		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1736		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1737		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1738		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1739		break;
1740	case CHIP_FIJI:
1741		adev->gfx.config.max_shader_engines = 4;
1742		adev->gfx.config.max_tile_pipes = 16;
1743		adev->gfx.config.max_cu_per_sh = 16;
1744		adev->gfx.config.max_sh_per_se = 1;
1745		adev->gfx.config.max_backends_per_se = 4;
1746		adev->gfx.config.max_texture_channel_caches = 16;
1747		adev->gfx.config.max_gprs = 256;
1748		adev->gfx.config.max_gs_threads = 32;
1749		adev->gfx.config.max_hw_contexts = 8;
1750
1751		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1752		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1753		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1754		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1755		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1756		break;
1757	case CHIP_POLARIS11:
1758	case CHIP_POLARIS12:
1759		ret = amdgpu_atombios_get_gfx_info(adev);
1760		if (ret)
1761			return ret;
1762		adev->gfx.config.max_gprs = 256;
1763		adev->gfx.config.max_gs_threads = 32;
1764		adev->gfx.config.max_hw_contexts = 8;
1765
1766		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1771		break;
1772	case CHIP_POLARIS10:
 
1773		ret = amdgpu_atombios_get_gfx_info(adev);
1774		if (ret)
1775			return ret;
1776		adev->gfx.config.max_gprs = 256;
1777		adev->gfx.config.max_gs_threads = 32;
1778		adev->gfx.config.max_hw_contexts = 8;
1779
1780		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1785		break;
1786	case CHIP_TONGA:
1787		adev->gfx.config.max_shader_engines = 4;
1788		adev->gfx.config.max_tile_pipes = 8;
1789		adev->gfx.config.max_cu_per_sh = 8;
1790		adev->gfx.config.max_sh_per_se = 1;
1791		adev->gfx.config.max_backends_per_se = 2;
1792		adev->gfx.config.max_texture_channel_caches = 8;
1793		adev->gfx.config.max_gprs = 256;
1794		adev->gfx.config.max_gs_threads = 32;
1795		adev->gfx.config.max_hw_contexts = 8;
1796
1797		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1802		break;
1803	case CHIP_CARRIZO:
1804		adev->gfx.config.max_shader_engines = 1;
1805		adev->gfx.config.max_tile_pipes = 2;
1806		adev->gfx.config.max_sh_per_se = 1;
1807		adev->gfx.config.max_backends_per_se = 2;
1808		adev->gfx.config.max_cu_per_sh = 8;
1809		adev->gfx.config.max_texture_channel_caches = 2;
1810		adev->gfx.config.max_gprs = 256;
1811		adev->gfx.config.max_gs_threads = 32;
1812		adev->gfx.config.max_hw_contexts = 8;
1813
1814		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1819		break;
1820	case CHIP_STONEY:
1821		adev->gfx.config.max_shader_engines = 1;
1822		adev->gfx.config.max_tile_pipes = 2;
1823		adev->gfx.config.max_sh_per_se = 1;
1824		adev->gfx.config.max_backends_per_se = 1;
1825		adev->gfx.config.max_cu_per_sh = 3;
1826		adev->gfx.config.max_texture_channel_caches = 2;
1827		adev->gfx.config.max_gprs = 256;
1828		adev->gfx.config.max_gs_threads = 16;
1829		adev->gfx.config.max_hw_contexts = 8;
1830
1831		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1832		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1833		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1834		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1835		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1836		break;
1837	default:
1838		adev->gfx.config.max_shader_engines = 2;
1839		adev->gfx.config.max_tile_pipes = 4;
1840		adev->gfx.config.max_cu_per_sh = 2;
1841		adev->gfx.config.max_sh_per_se = 1;
1842		adev->gfx.config.max_backends_per_se = 2;
1843		adev->gfx.config.max_texture_channel_caches = 4;
1844		adev->gfx.config.max_gprs = 256;
1845		adev->gfx.config.max_gs_threads = 32;
1846		adev->gfx.config.max_hw_contexts = 8;
1847
1848		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1849		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1850		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1851		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1852		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853		break;
1854	}
1855
1856	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1857	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1858	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1859
 
 
 
 
 
1860	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1861	adev->gfx.config.mem_max_burst_length_bytes = 256;
1862	if (adev->flags & AMD_IS_APU) {
1863		/* Get memory bank mapping mode. */
1864		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1865		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1866		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1867
1868		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1869		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1870		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1871
1872		/* Validate settings in case only one DIMM installed. */
1873		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1874			dimm00_addr_map = 0;
1875		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1876			dimm01_addr_map = 0;
1877		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1878			dimm10_addr_map = 0;
1879		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1880			dimm11_addr_map = 0;
1881
1882		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1883		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1884		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1885			adev->gfx.config.mem_row_size_in_kb = 2;
1886		else
1887			adev->gfx.config.mem_row_size_in_kb = 1;
1888	} else {
1889		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1890		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1891		if (adev->gfx.config.mem_row_size_in_kb > 4)
1892			adev->gfx.config.mem_row_size_in_kb = 4;
1893	}
1894
1895	adev->gfx.config.shader_engine_tile_size = 32;
1896	adev->gfx.config.num_gpus = 1;
1897	adev->gfx.config.multi_gpu_tile_size = 64;
1898
1899	/* fix up row size */
1900	switch (adev->gfx.config.mem_row_size_in_kb) {
1901	case 1:
1902	default:
1903		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1904		break;
1905	case 2:
1906		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1907		break;
1908	case 4:
1909		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1910		break;
1911	}
1912	adev->gfx.config.gb_addr_config = gb_addr_config;
1913
1914	return 0;
1915}
1916
1917static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1918					int mec, int pipe, int queue)
1919{
1920	int r;
1921	unsigned irq_type;
1922	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
1923
1924	ring = &adev->gfx.compute_ring[ring_id];
1925
1926	/* mec0 is me1 */
1927	ring->me = mec + 1;
1928	ring->pipe = pipe;
1929	ring->queue = queue;
1930
1931	ring->ring_obj = NULL;
1932	ring->use_doorbell = true;
1933	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1934	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1935				+ (ring_id * GFX8_MEC_HPD_SIZE);
1936	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1937
1938	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1939		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1940		+ ring->pipe;
1941
 
 
1942	/* type-2 packets are deprecated on MEC, use type-3 instead */
1943	r = amdgpu_ring_init(adev, ring, 1024,
1944			&adev->gfx.eop_irq, irq_type);
1945	if (r)
1946		return r;
1947
1948
1949	return 0;
1950}
1951
 
 
1952static int gfx_v8_0_sw_init(void *handle)
1953{
1954	int i, j, k, r, ring_id;
1955	struct amdgpu_ring *ring;
1956	struct amdgpu_kiq *kiq;
1957	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1958
1959	switch (adev->asic_type) {
1960	case CHIP_FIJI:
1961	case CHIP_TONGA:
 
 
 
1962	case CHIP_POLARIS11:
1963	case CHIP_POLARIS12:
1964	case CHIP_POLARIS10:
1965	case CHIP_CARRIZO:
1966		adev->gfx.mec.num_mec = 2;
1967		break;
1968	case CHIP_TOPAZ:
1969	case CHIP_STONEY:
1970	default:
1971		adev->gfx.mec.num_mec = 1;
1972		break;
1973	}
1974
1975	adev->gfx.mec.num_pipe_per_mec = 4;
1976	adev->gfx.mec.num_queue_per_pipe = 8;
1977
1978	/* KIQ event */
1979	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1980	if (r)
1981		return r;
1982
1983	/* EOP Event */
1984	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1985	if (r)
1986		return r;
1987
1988	/* Privileged reg */
1989	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1990			      &adev->gfx.priv_reg_irq);
1991	if (r)
1992		return r;
1993
1994	/* Privileged inst */
1995	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1996			      &adev->gfx.priv_inst_irq);
1997	if (r)
1998		return r;
1999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2000	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2001
2002	gfx_v8_0_scratch_init(adev);
2003
2004	r = gfx_v8_0_init_microcode(adev);
2005	if (r) {
2006		DRM_ERROR("Failed to load gfx firmware!\n");
2007		return r;
2008	}
2009
2010	r = gfx_v8_0_rlc_init(adev);
2011	if (r) {
2012		DRM_ERROR("Failed to init rlc BOs!\n");
2013		return r;
2014	}
2015
2016	r = gfx_v8_0_mec_init(adev);
2017	if (r) {
2018		DRM_ERROR("Failed to init MEC BOs!\n");
2019		return r;
2020	}
2021
2022	/* set up the gfx ring */
2023	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2024		ring = &adev->gfx.gfx_ring[i];
2025		ring->ring_obj = NULL;
2026		sprintf(ring->name, "gfx");
2027		/* no gfx doorbells on iceland */
2028		if (adev->asic_type != CHIP_TOPAZ) {
2029			ring->use_doorbell = true;
2030			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2031		}
2032
2033		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2034				     AMDGPU_CP_IRQ_GFX_EOP);
 
2035		if (r)
2036			return r;
2037	}
2038
2039
2040	/* set up the compute queues - allocate horizontally across pipes */
2041	ring_id = 0;
2042	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2043		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2044			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2045				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2046					continue;
2047
2048				r = gfx_v8_0_compute_ring_init(adev,
2049								ring_id,
2050								i, k, j);
2051				if (r)
2052					return r;
2053
2054				ring_id++;
2055			}
2056		}
2057	}
2058
2059	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2060	if (r) {
2061		DRM_ERROR("Failed to init KIQ BOs!\n");
2062		return r;
2063	}
2064
2065	kiq = &adev->gfx.kiq;
2066	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2067	if (r)
2068		return r;
2069
2070	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2071	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2072	if (r)
2073		return r;
2074
2075	/* reserve GDS, GWS and OA resource for gfx */
2076	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2077				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2078				    &adev->gds.gds_gfx_bo, NULL, NULL);
2079	if (r)
2080		return r;
2081
2082	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2083				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2084				    &adev->gds.gws_gfx_bo, NULL, NULL);
2085	if (r)
2086		return r;
2087
2088	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2089				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2090				    &adev->gds.oa_gfx_bo, NULL, NULL);
2091	if (r)
2092		return r;
2093
2094	adev->gfx.ce_ram_size = 0x8000;
2095
2096	r = gfx_v8_0_gpu_early_init(adev);
2097	if (r)
2098		return r;
2099
2100	return 0;
2101}
2102
2103static int gfx_v8_0_sw_fini(void *handle)
2104{
2105	int i;
2106	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2107
2108	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2109	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2110	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2111
2112	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2113		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2114	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2115		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2116
2117	amdgpu_gfx_compute_mqd_sw_fini(adev);
2118	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2119	amdgpu_gfx_kiq_fini(adev);
2120
2121	gfx_v8_0_mec_fini(adev);
2122	gfx_v8_0_rlc_fini(adev);
2123	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2124				&adev->gfx.rlc.clear_state_gpu_addr,
2125				(void **)&adev->gfx.rlc.cs_ptr);
2126	if ((adev->asic_type == CHIP_CARRIZO) ||
2127	    (adev->asic_type == CHIP_STONEY)) {
2128		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2129				&adev->gfx.rlc.cp_table_gpu_addr,
2130				(void **)&adev->gfx.rlc.cp_table_ptr);
2131	}
2132	gfx_v8_0_free_microcode(adev);
2133
2134	return 0;
2135}
2136
2137static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2138{
2139	uint32_t *modearray, *mod2array;
2140	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2141	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2142	u32 reg_offset;
2143
2144	modearray = adev->gfx.config.tile_mode_array;
2145	mod2array = adev->gfx.config.macrotile_mode_array;
2146
2147	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2148		modearray[reg_offset] = 0;
2149
2150	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2151		mod2array[reg_offset] = 0;
2152
2153	switch (adev->asic_type) {
2154	case CHIP_TOPAZ:
2155		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156				PIPE_CONFIG(ADDR_SURF_P2) |
2157				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2158				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160				PIPE_CONFIG(ADDR_SURF_P2) |
2161				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2162				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164				PIPE_CONFIG(ADDR_SURF_P2) |
2165				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2166				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168				PIPE_CONFIG(ADDR_SURF_P2) |
2169				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2170				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172				PIPE_CONFIG(ADDR_SURF_P2) |
2173				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176				PIPE_CONFIG(ADDR_SURF_P2) |
2177				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2178				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2179		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180				PIPE_CONFIG(ADDR_SURF_P2) |
2181				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2182				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2183		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2184				PIPE_CONFIG(ADDR_SURF_P2));
2185		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186				PIPE_CONFIG(ADDR_SURF_P2) |
2187				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190				 PIPE_CONFIG(ADDR_SURF_P2) |
2191				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2192				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2194				 PIPE_CONFIG(ADDR_SURF_P2) |
2195				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2196				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2197		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198				 PIPE_CONFIG(ADDR_SURF_P2) |
2199				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202				 PIPE_CONFIG(ADDR_SURF_P2) |
2203				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2206				 PIPE_CONFIG(ADDR_SURF_P2) |
2207				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210				 PIPE_CONFIG(ADDR_SURF_P2) |
2211				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2212				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2213		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2214				 PIPE_CONFIG(ADDR_SURF_P2) |
2215				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2216				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2218				 PIPE_CONFIG(ADDR_SURF_P2) |
2219				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222				 PIPE_CONFIG(ADDR_SURF_P2) |
2223				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2226				 PIPE_CONFIG(ADDR_SURF_P2) |
2227				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2230				 PIPE_CONFIG(ADDR_SURF_P2) |
2231				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2234				 PIPE_CONFIG(ADDR_SURF_P2) |
2235				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2236				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2238				 PIPE_CONFIG(ADDR_SURF_P2) |
2239				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2240				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2241		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2242				 PIPE_CONFIG(ADDR_SURF_P2) |
2243				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2244				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2245		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246				 PIPE_CONFIG(ADDR_SURF_P2) |
2247				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250				 PIPE_CONFIG(ADDR_SURF_P2) |
2251				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2252				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254				 PIPE_CONFIG(ADDR_SURF_P2) |
2255				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2256				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257
2258		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2259				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261				NUM_BANKS(ADDR_SURF_8_BANK));
2262		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2263				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265				NUM_BANKS(ADDR_SURF_8_BANK));
2266		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269				NUM_BANKS(ADDR_SURF_8_BANK));
2270		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2272				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273				NUM_BANKS(ADDR_SURF_8_BANK));
2274		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277				NUM_BANKS(ADDR_SURF_8_BANK));
2278		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2281				NUM_BANKS(ADDR_SURF_8_BANK));
2282		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2284				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2285				NUM_BANKS(ADDR_SURF_8_BANK));
2286		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2287				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2288				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289				NUM_BANKS(ADDR_SURF_16_BANK));
2290		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2291				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293				NUM_BANKS(ADDR_SURF_16_BANK));
2294		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2295				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297				 NUM_BANKS(ADDR_SURF_16_BANK));
2298		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2299				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301				 NUM_BANKS(ADDR_SURF_16_BANK));
2302		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305				 NUM_BANKS(ADDR_SURF_16_BANK));
2306		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2309				 NUM_BANKS(ADDR_SURF_16_BANK));
2310		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2312				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2313				 NUM_BANKS(ADDR_SURF_8_BANK));
2314
2315		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2316			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2317			    reg_offset != 23)
2318				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2319
2320		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2321			if (reg_offset != 7)
2322				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2323
2324		break;
2325	case CHIP_FIJI:
 
2326		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2329				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2333				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2337				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2341				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2353				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2354		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2355				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2356				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2357				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2359				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2360		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2385				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2394				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2397				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2401				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2405				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2409				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2413				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2415				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2417				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2418				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2421				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2425				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2427				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2428		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2429				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2431				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2432		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2443				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448
2449		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452				NUM_BANKS(ADDR_SURF_8_BANK));
2453		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456				NUM_BANKS(ADDR_SURF_8_BANK));
2457		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460				NUM_BANKS(ADDR_SURF_8_BANK));
2461		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464				NUM_BANKS(ADDR_SURF_8_BANK));
2465		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468				NUM_BANKS(ADDR_SURF_8_BANK));
2469		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472				NUM_BANKS(ADDR_SURF_8_BANK));
2473		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476				NUM_BANKS(ADDR_SURF_8_BANK));
2477		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2479				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480				NUM_BANKS(ADDR_SURF_8_BANK));
2481		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2483				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484				NUM_BANKS(ADDR_SURF_8_BANK));
2485		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488				 NUM_BANKS(ADDR_SURF_8_BANK));
2489		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492				 NUM_BANKS(ADDR_SURF_8_BANK));
2493		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496				 NUM_BANKS(ADDR_SURF_8_BANK));
2497		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500				 NUM_BANKS(ADDR_SURF_8_BANK));
2501		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504				 NUM_BANKS(ADDR_SURF_4_BANK));
2505
2506		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2508
2509		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2510			if (reg_offset != 7)
2511				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2512
2513		break;
2514	case CHIP_TONGA:
2515		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2522				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2526				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2530				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2540				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2542				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2546				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2548				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2549		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2550				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2566				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2574				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2586				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2590				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2594				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2598				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2602				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2606				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2610				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2614				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2618				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2620				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2632				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2636				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637
2638		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641				NUM_BANKS(ADDR_SURF_16_BANK));
2642		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645				NUM_BANKS(ADDR_SURF_16_BANK));
2646		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649				NUM_BANKS(ADDR_SURF_16_BANK));
2650		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653				NUM_BANKS(ADDR_SURF_16_BANK));
2654		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657				NUM_BANKS(ADDR_SURF_16_BANK));
2658		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2661				NUM_BANKS(ADDR_SURF_16_BANK));
2662		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665				NUM_BANKS(ADDR_SURF_16_BANK));
2666		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2668				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669				NUM_BANKS(ADDR_SURF_16_BANK));
2670		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2672				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2673				NUM_BANKS(ADDR_SURF_16_BANK));
2674		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677				 NUM_BANKS(ADDR_SURF_16_BANK));
2678		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681				 NUM_BANKS(ADDR_SURF_16_BANK));
2682		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685				 NUM_BANKS(ADDR_SURF_8_BANK));
2686		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2689				 NUM_BANKS(ADDR_SURF_4_BANK));
2690		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693				 NUM_BANKS(ADDR_SURF_4_BANK));
2694
2695		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2696			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2697
2698		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2699			if (reg_offset != 7)
2700				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2701
2702		break;
2703	case CHIP_POLARIS11:
2704	case CHIP_POLARIS12:
2705		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2708				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2712				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2716				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2736				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2738				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2739		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2740				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2754				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2764				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2780				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2784				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2788				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2796				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2802				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2804				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2808				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827
2828		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831				NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836				NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841				NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846				NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861				NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2864				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2865				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2866				NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871				NUM_BANKS(ADDR_SURF_16_BANK));
2872
2873		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876				NUM_BANKS(ADDR_SURF_16_BANK));
2877
2878		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2880				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2881				NUM_BANKS(ADDR_SURF_16_BANK));
2882
2883		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2885				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2886				NUM_BANKS(ADDR_SURF_16_BANK));
2887
2888		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891				NUM_BANKS(ADDR_SURF_8_BANK));
2892
2893		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896				NUM_BANKS(ADDR_SURF_4_BANK));
2897
2898		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2900
2901		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2902			if (reg_offset != 7)
2903				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2904
2905		break;
2906	case CHIP_POLARIS10:
2907		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2910				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2914				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2918				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2922				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2938				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2941		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2942				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2966				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2982				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2986				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2990				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2998				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2999				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3004				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3006				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3010				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3018				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3028				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029
3030		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3032				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033				NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038				NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043				NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3047				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048				NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3052				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053				NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058				NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063				NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068				NUM_BANKS(ADDR_SURF_16_BANK));
3069
3070		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073				NUM_BANKS(ADDR_SURF_16_BANK));
3074
3075		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3077				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078				NUM_BANKS(ADDR_SURF_16_BANK));
3079
3080		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083				NUM_BANKS(ADDR_SURF_16_BANK));
3084
3085		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3088				NUM_BANKS(ADDR_SURF_8_BANK));
3089
3090		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3092				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3093				NUM_BANKS(ADDR_SURF_4_BANK));
3094
3095		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3098				NUM_BANKS(ADDR_SURF_4_BANK));
3099
3100		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3101			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3102
3103		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3104			if (reg_offset != 7)
3105				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3106
3107		break;
3108	case CHIP_STONEY:
3109		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110				PIPE_CONFIG(ADDR_SURF_P2) |
3111				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3112				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114				PIPE_CONFIG(ADDR_SURF_P2) |
3115				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3116				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118				PIPE_CONFIG(ADDR_SURF_P2) |
3119				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3120				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122				PIPE_CONFIG(ADDR_SURF_P2) |
3123				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3124				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126				PIPE_CONFIG(ADDR_SURF_P2) |
3127				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130				PIPE_CONFIG(ADDR_SURF_P2) |
3131				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134				PIPE_CONFIG(ADDR_SURF_P2) |
3135				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3136				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3138				PIPE_CONFIG(ADDR_SURF_P2));
3139		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140				PIPE_CONFIG(ADDR_SURF_P2) |
3141				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144				 PIPE_CONFIG(ADDR_SURF_P2) |
3145				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3148				 PIPE_CONFIG(ADDR_SURF_P2) |
3149				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3150				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3151		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152				 PIPE_CONFIG(ADDR_SURF_P2) |
3153				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3156				 PIPE_CONFIG(ADDR_SURF_P2) |
3157				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3160				 PIPE_CONFIG(ADDR_SURF_P2) |
3161				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164				 PIPE_CONFIG(ADDR_SURF_P2) |
3165				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168				 PIPE_CONFIG(ADDR_SURF_P2) |
3169				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3172				 PIPE_CONFIG(ADDR_SURF_P2) |
3173				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176				 PIPE_CONFIG(ADDR_SURF_P2) |
3177				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3180				 PIPE_CONFIG(ADDR_SURF_P2) |
3181				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3184				 PIPE_CONFIG(ADDR_SURF_P2) |
3185				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3188				 PIPE_CONFIG(ADDR_SURF_P2) |
3189				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3192				 PIPE_CONFIG(ADDR_SURF_P2) |
3193				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3196				 PIPE_CONFIG(ADDR_SURF_P2) |
3197				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200				 PIPE_CONFIG(ADDR_SURF_P2) |
3201				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204				 PIPE_CONFIG(ADDR_SURF_P2) |
3205				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208				 PIPE_CONFIG(ADDR_SURF_P2) |
3209				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3210				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211
3212		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215				NUM_BANKS(ADDR_SURF_8_BANK));
3216		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219				NUM_BANKS(ADDR_SURF_8_BANK));
3220		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223				NUM_BANKS(ADDR_SURF_8_BANK));
3224		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227				NUM_BANKS(ADDR_SURF_8_BANK));
3228		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231				NUM_BANKS(ADDR_SURF_8_BANK));
3232		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235				NUM_BANKS(ADDR_SURF_8_BANK));
3236		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239				NUM_BANKS(ADDR_SURF_8_BANK));
3240		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3242				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243				NUM_BANKS(ADDR_SURF_16_BANK));
3244		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3245				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247				NUM_BANKS(ADDR_SURF_16_BANK));
3248		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3250				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251				 NUM_BANKS(ADDR_SURF_16_BANK));
3252		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255				 NUM_BANKS(ADDR_SURF_16_BANK));
3256		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259				 NUM_BANKS(ADDR_SURF_16_BANK));
3260		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263				 NUM_BANKS(ADDR_SURF_16_BANK));
3264		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3267				 NUM_BANKS(ADDR_SURF_8_BANK));
3268
3269		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3270			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3271			    reg_offset != 23)
3272				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3273
3274		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3275			if (reg_offset != 7)
3276				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3277
3278		break;
3279	default:
3280		dev_warn(adev->dev,
3281			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3282			 adev->asic_type);
 
3283
3284	case CHIP_CARRIZO:
3285		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286				PIPE_CONFIG(ADDR_SURF_P2) |
3287				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3288				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290				PIPE_CONFIG(ADDR_SURF_P2) |
3291				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3292				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294				PIPE_CONFIG(ADDR_SURF_P2) |
3295				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3296				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298				PIPE_CONFIG(ADDR_SURF_P2) |
3299				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3300				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302				PIPE_CONFIG(ADDR_SURF_P2) |
3303				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306				PIPE_CONFIG(ADDR_SURF_P2) |
3307				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3310				PIPE_CONFIG(ADDR_SURF_P2) |
3311				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3312				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3314				PIPE_CONFIG(ADDR_SURF_P2));
3315		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316				PIPE_CONFIG(ADDR_SURF_P2) |
3317				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320				 PIPE_CONFIG(ADDR_SURF_P2) |
3321				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3324				 PIPE_CONFIG(ADDR_SURF_P2) |
3325				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3326				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3327		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328				 PIPE_CONFIG(ADDR_SURF_P2) |
3329				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3332				 PIPE_CONFIG(ADDR_SURF_P2) |
3333				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3336				 PIPE_CONFIG(ADDR_SURF_P2) |
3337				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340				 PIPE_CONFIG(ADDR_SURF_P2) |
3341				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344				 PIPE_CONFIG(ADDR_SURF_P2) |
3345				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3348				 PIPE_CONFIG(ADDR_SURF_P2) |
3349				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3352				 PIPE_CONFIG(ADDR_SURF_P2) |
3353				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3356				 PIPE_CONFIG(ADDR_SURF_P2) |
3357				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3360				 PIPE_CONFIG(ADDR_SURF_P2) |
3361				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3362				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3364				 PIPE_CONFIG(ADDR_SURF_P2) |
3365				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3366				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3368				 PIPE_CONFIG(ADDR_SURF_P2) |
3369				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3372				 PIPE_CONFIG(ADDR_SURF_P2) |
3373				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376				 PIPE_CONFIG(ADDR_SURF_P2) |
3377				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380				 PIPE_CONFIG(ADDR_SURF_P2) |
3381				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3384				 PIPE_CONFIG(ADDR_SURF_P2) |
3385				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3386				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3387
3388		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3390				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391				NUM_BANKS(ADDR_SURF_8_BANK));
3392		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395				NUM_BANKS(ADDR_SURF_8_BANK));
3396		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399				NUM_BANKS(ADDR_SURF_8_BANK));
3400		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403				NUM_BANKS(ADDR_SURF_8_BANK));
3404		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407				NUM_BANKS(ADDR_SURF_8_BANK));
3408		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411				NUM_BANKS(ADDR_SURF_8_BANK));
3412		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415				NUM_BANKS(ADDR_SURF_8_BANK));
3416		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3418				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419				NUM_BANKS(ADDR_SURF_16_BANK));
3420		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3421				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423				NUM_BANKS(ADDR_SURF_16_BANK));
3424		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3426				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427				 NUM_BANKS(ADDR_SURF_16_BANK));
3428		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3429				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431				 NUM_BANKS(ADDR_SURF_16_BANK));
3432		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3434				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435				 NUM_BANKS(ADDR_SURF_16_BANK));
3436		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439				 NUM_BANKS(ADDR_SURF_16_BANK));
3440		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3442				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3443				 NUM_BANKS(ADDR_SURF_8_BANK));
3444
3445		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3446			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3447			    reg_offset != 23)
3448				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3449
3450		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3451			if (reg_offset != 7)
3452				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3453
3454		break;
3455	}
3456}
3457
3458static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3459				  u32 se_num, u32 sh_num, u32 instance)
3460{
3461	u32 data;
3462
3463	if (instance == 0xffffffff)
3464		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3465	else
3466		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3467
3468	if (se_num == 0xffffffff)
3469		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470	else
3471		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3472
3473	if (sh_num == 0xffffffff)
3474		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3475	else
3476		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3477
3478	WREG32(mmGRBM_GFX_INDEX, data);
3479}
3480
3481static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3482				  u32 me, u32 pipe, u32 q)
3483{
3484	vi_srbm_select(adev, me, pipe, q, 0);
3485}
3486
3487static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3488{
3489	u32 data, mask;
3490
3491	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3492		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3493
3494	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3495
3496	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3497					 adev->gfx.config.max_sh_per_se);
3498
3499	return (~data) & mask;
3500}
3501
3502static void
3503gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3504{
3505	switch (adev->asic_type) {
3506	case CHIP_FIJI:
 
3507		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3508			  RB_XSEL2(1) | PKR_MAP(2) |
3509			  PKR_XSEL(1) | PKR_YSEL(1) |
3510			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3511		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3512			   SE_PAIR_YSEL(2);
3513		break;
3514	case CHIP_TONGA:
3515	case CHIP_POLARIS10:
3516		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3517			  SE_XSEL(1) | SE_YSEL(1);
3518		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3519			   SE_PAIR_YSEL(2);
3520		break;
3521	case CHIP_TOPAZ:
3522	case CHIP_CARRIZO:
3523		*rconf |= RB_MAP_PKR0(2);
3524		*rconf1 |= 0x0;
3525		break;
3526	case CHIP_POLARIS11:
3527	case CHIP_POLARIS12:
3528		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3529			  SE_XSEL(1) | SE_YSEL(1);
3530		*rconf1 |= 0x0;
3531		break;
3532	case CHIP_STONEY:
3533		*rconf |= 0x0;
3534		*rconf1 |= 0x0;
3535		break;
3536	default:
3537		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3538		break;
3539	}
3540}
3541
3542static void
3543gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3544					u32 raster_config, u32 raster_config_1,
3545					unsigned rb_mask, unsigned num_rb)
3546{
3547	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3548	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3549	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3550	unsigned rb_per_se = num_rb / num_se;
3551	unsigned se_mask[4];
3552	unsigned se;
3553
3554	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3555	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3556	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3557	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3558
3559	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3560	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3561	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3562
3563	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3564			     (!se_mask[2] && !se_mask[3]))) {
3565		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3566
3567		if (!se_mask[0] && !se_mask[1]) {
3568			raster_config_1 |=
3569				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3570		} else {
3571			raster_config_1 |=
3572				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3573		}
3574	}
3575
3576	for (se = 0; se < num_se; se++) {
3577		unsigned raster_config_se = raster_config;
3578		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3579		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3580		int idx = (se / 2) * 2;
3581
3582		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3583			raster_config_se &= ~SE_MAP_MASK;
3584
3585			if (!se_mask[idx]) {
3586				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3587			} else {
3588				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3589			}
3590		}
3591
3592		pkr0_mask &= rb_mask;
3593		pkr1_mask &= rb_mask;
3594		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3595			raster_config_se &= ~PKR_MAP_MASK;
3596
3597			if (!pkr0_mask) {
3598				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3599			} else {
3600				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3601			}
3602		}
3603
3604		if (rb_per_se >= 2) {
3605			unsigned rb0_mask = 1 << (se * rb_per_se);
3606			unsigned rb1_mask = rb0_mask << 1;
3607
3608			rb0_mask &= rb_mask;
3609			rb1_mask &= rb_mask;
3610			if (!rb0_mask || !rb1_mask) {
3611				raster_config_se &= ~RB_MAP_PKR0_MASK;
3612
3613				if (!rb0_mask) {
3614					raster_config_se |=
3615						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3616				} else {
3617					raster_config_se |=
3618						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3619				}
3620			}
3621
3622			if (rb_per_se > 2) {
3623				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3624				rb1_mask = rb0_mask << 1;
3625				rb0_mask &= rb_mask;
3626				rb1_mask &= rb_mask;
3627				if (!rb0_mask || !rb1_mask) {
3628					raster_config_se &= ~RB_MAP_PKR1_MASK;
3629
3630					if (!rb0_mask) {
3631						raster_config_se |=
3632							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3633					} else {
3634						raster_config_se |=
3635							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3636					}
3637				}
3638			}
3639		}
3640
3641		/* GRBM_GFX_INDEX has a different offset on VI */
3642		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3643		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3644		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3645	}
3646
3647	/* GRBM_GFX_INDEX has a different offset on VI */
3648	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649}
3650
3651static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3652{
3653	int i, j;
3654	u32 data;
3655	u32 raster_config = 0, raster_config_1 = 0;
3656	u32 active_rbs = 0;
3657	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3658					adev->gfx.config.max_sh_per_se;
3659	unsigned num_rb_pipes;
3660
3661	mutex_lock(&adev->grbm_idx_mutex);
3662	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3663		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3664			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3665			data = gfx_v8_0_get_rb_active_bitmap(adev);
3666			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3667					       rb_bitmap_width_per_sh);
3668		}
3669	}
3670	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671
3672	adev->gfx.config.backend_enable_mask = active_rbs;
3673	adev->gfx.config.num_rbs = hweight32(active_rbs);
3674
3675	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3676			     adev->gfx.config.max_shader_engines, 16);
3677
3678	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3679
3680	if (!adev->gfx.config.backend_enable_mask ||
3681			adev->gfx.config.num_rbs >= num_rb_pipes) {
3682		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3683		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3684	} else {
3685		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3686							adev->gfx.config.backend_enable_mask,
3687							num_rb_pipes);
3688	}
3689
3690	/* cache the values for userspace */
3691	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3692		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3693			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3694			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3695				RREG32(mmCC_RB_BACKEND_DISABLE);
3696			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3697				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3698			adev->gfx.config.rb_config[i][j].raster_config =
3699				RREG32(mmPA_SC_RASTER_CONFIG);
3700			adev->gfx.config.rb_config[i][j].raster_config_1 =
3701				RREG32(mmPA_SC_RASTER_CONFIG_1);
3702		}
3703	}
3704	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3705	mutex_unlock(&adev->grbm_idx_mutex);
3706}
3707
 
3708/**
3709 * gfx_v8_0_init_compute_vmid - gart enable
3710 *
3711 * @adev: amdgpu_device pointer
3712 *
3713 * Initialize compute vmid sh_mem registers
3714 *
3715 */
3716#define DEFAULT_SH_MEM_BASES	(0x6000)
3717#define FIRST_COMPUTE_VMID	(8)
3718#define LAST_COMPUTE_VMID	(16)
3719static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3720{
3721	int i;
3722	uint32_t sh_mem_config;
3723	uint32_t sh_mem_bases;
3724
3725	/*
3726	 * Configure apertures:
3727	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3728	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3729	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3730	 */
3731	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3732
3733	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3734			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3735			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3736			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3737			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3738			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3739
3740	mutex_lock(&adev->srbm_mutex);
3741	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3742		vi_srbm_select(adev, 0, 0, 0, i);
3743		/* CP and shaders */
3744		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3745		WREG32(mmSH_MEM_APE1_BASE, 1);
3746		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3747		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3748	}
3749	vi_srbm_select(adev, 0, 0, 0, 0);
3750	mutex_unlock(&adev->srbm_mutex);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3751}
3752
3753static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3754{
3755	switch (adev->asic_type) {
3756	default:
3757		adev->gfx.config.double_offchip_lds_buf = 1;
3758		break;
3759	case CHIP_CARRIZO:
3760	case CHIP_STONEY:
3761		adev->gfx.config.double_offchip_lds_buf = 0;
3762		break;
3763	}
3764}
3765
3766static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3767{
3768	u32 tmp, sh_static_mem_cfg;
3769	int i;
3770
3771	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3772	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3773	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3774	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3775
3776	gfx_v8_0_tiling_mode_table_init(adev);
3777	gfx_v8_0_setup_rb(adev);
3778	gfx_v8_0_get_cu_info(adev);
3779	gfx_v8_0_config_init(adev);
3780
3781	/* XXX SH_MEM regs */
3782	/* where to put LDS, scratch, GPUVM in FSA64 space */
3783	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3784				   SWIZZLE_ENABLE, 1);
3785	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3786				   ELEMENT_SIZE, 1);
3787	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3788				   INDEX_STRIDE, 3);
3789	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3790
3791	mutex_lock(&adev->srbm_mutex);
3792	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3793		vi_srbm_select(adev, 0, 0, 0, i);
3794		/* CP and shaders */
3795		if (i == 0) {
3796			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3797			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3798			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3799					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3800			WREG32(mmSH_MEM_CONFIG, tmp);
3801			WREG32(mmSH_MEM_BASES, 0);
3802		} else {
3803			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3804			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807			WREG32(mmSH_MEM_CONFIG, tmp);
3808			tmp = adev->gmc.shared_aperture_start >> 48;
3809			WREG32(mmSH_MEM_BASES, tmp);
3810		}
3811
3812		WREG32(mmSH_MEM_APE1_BASE, 1);
3813		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3814	}
3815	vi_srbm_select(adev, 0, 0, 0, 0);
3816	mutex_unlock(&adev->srbm_mutex);
3817
3818	gfx_v8_0_init_compute_vmid(adev);
 
3819
3820	mutex_lock(&adev->grbm_idx_mutex);
3821	/*
3822	 * making sure that the following register writes will be broadcasted
3823	 * to all the shaders
3824	 */
3825	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3826
3827	WREG32(mmPA_SC_FIFO_SIZE,
3828		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3829			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3830		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3831			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3832		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3833			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3834		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3835			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3836
3837	tmp = RREG32(mmSPI_ARB_PRIORITY);
3838	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3839	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3840	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3841	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3842	WREG32(mmSPI_ARB_PRIORITY, tmp);
3843
3844	mutex_unlock(&adev->grbm_idx_mutex);
3845
3846}
3847
3848static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3849{
3850	u32 i, j, k;
3851	u32 mask;
3852
3853	mutex_lock(&adev->grbm_idx_mutex);
3854	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3855		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3856			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3857			for (k = 0; k < adev->usec_timeout; k++) {
3858				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3859					break;
3860				udelay(1);
3861			}
3862			if (k == adev->usec_timeout) {
3863				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3864						      0xffffffff, 0xffffffff);
3865				mutex_unlock(&adev->grbm_idx_mutex);
3866				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3867					 i, j);
3868				return;
3869			}
3870		}
3871	}
3872	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3873	mutex_unlock(&adev->grbm_idx_mutex);
3874
3875	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3876		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3877		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3878		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3879	for (k = 0; k < adev->usec_timeout; k++) {
3880		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3881			break;
3882		udelay(1);
3883	}
3884}
3885
3886static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3887					       bool enable)
3888{
3889	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3890
3891	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3892	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3893	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3894	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3895
3896	WREG32(mmCP_INT_CNTL_RING0, tmp);
3897}
3898
3899static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3900{
 
3901	/* csib */
3902	WREG32(mmRLC_CSIB_ADDR_HI,
3903			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3904	WREG32(mmRLC_CSIB_ADDR_LO,
3905			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3906	WREG32(mmRLC_CSIB_LENGTH,
3907			adev->gfx.rlc.clear_state_size);
3908}
3909
3910static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3911				int ind_offset,
3912				int list_size,
3913				int *unique_indices,
3914				int *indices_count,
3915				int max_indices,
3916				int *ind_start_offsets,
3917				int *offset_count,
3918				int max_offset)
3919{
3920	int indices;
3921	bool new_entry = true;
3922
3923	for (; ind_offset < list_size; ind_offset++) {
3924
3925		if (new_entry) {
3926			new_entry = false;
3927			ind_start_offsets[*offset_count] = ind_offset;
3928			*offset_count = *offset_count + 1;
3929			BUG_ON(*offset_count >= max_offset);
3930		}
3931
3932		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3933			new_entry = true;
3934			continue;
3935		}
3936
3937		ind_offset += 2;
3938
3939		/* look for the matching indice */
3940		for (indices = 0;
3941			indices < *indices_count;
3942			indices++) {
3943			if (unique_indices[indices] ==
3944				register_list_format[ind_offset])
3945				break;
3946		}
3947
3948		if (indices >= *indices_count) {
3949			unique_indices[*indices_count] =
3950				register_list_format[ind_offset];
3951			indices = *indices_count;
3952			*indices_count = *indices_count + 1;
3953			BUG_ON(*indices_count >= max_indices);
3954		}
3955
3956		register_list_format[ind_offset] = indices;
3957	}
3958}
3959
3960static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3961{
3962	int i, temp, data;
3963	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3964	int indices_count = 0;
3965	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3966	int offset_count = 0;
3967
3968	int list_size;
3969	unsigned int *register_list_format =
3970		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
 
3971	if (!register_list_format)
3972		return -ENOMEM;
3973	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3974			adev->gfx.rlc.reg_list_format_size_bytes);
3975
3976	gfx_v8_0_parse_ind_reg_list(register_list_format,
3977				RLC_FormatDirectRegListLength,
3978				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3979				unique_indices,
3980				&indices_count,
3981				ARRAY_SIZE(unique_indices),
3982				indirect_start_offsets,
3983				&offset_count,
3984				ARRAY_SIZE(indirect_start_offsets));
3985
3986	/* save and restore list */
3987	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3988
3989	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3990	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3991		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3992
3993	/* indirect list */
3994	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3995	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3996		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3997
3998	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3999	list_size = list_size >> 1;
4000	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4001	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4002
4003	/* starting offsets starts */
4004	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4005		adev->gfx.rlc.starting_offsets_start);
4006	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4007		WREG32(mmRLC_GPM_SCRATCH_DATA,
4008				indirect_start_offsets[i]);
4009
4010	/* unique indices */
4011	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4012	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4013	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4014		if (unique_indices[i] != 0) {
4015			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4016			WREG32(data + i, unique_indices[i] >> 20);
4017		}
4018	}
4019	kfree(register_list_format);
4020
4021	return 0;
4022}
4023
4024static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4025{
4026	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4027}
4028
4029static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4030{
4031	uint32_t data;
4032
4033	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4034
4035	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4036	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4037	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4038	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4039	WREG32(mmRLC_PG_DELAY, data);
4040
4041	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4042	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4043
4044}
4045
4046static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4047						bool enable)
4048{
4049	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4050}
4051
4052static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4053						  bool enable)
4054{
4055	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4056}
4057
4058static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4059{
4060	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4061}
4062
4063static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4064{
4065	if ((adev->asic_type == CHIP_CARRIZO) ||
4066	    (adev->asic_type == CHIP_STONEY)) {
4067		gfx_v8_0_init_csb(adev);
4068		gfx_v8_0_init_save_restore_list(adev);
4069		gfx_v8_0_enable_save_restore_machine(adev);
4070		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4071		gfx_v8_0_init_power_gating(adev);
4072		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4073	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4074		   (adev->asic_type == CHIP_POLARIS12)) {
 
4075		gfx_v8_0_init_csb(adev);
4076		gfx_v8_0_init_save_restore_list(adev);
4077		gfx_v8_0_enable_save_restore_machine(adev);
4078		gfx_v8_0_init_power_gating(adev);
4079	}
4080
4081}
4082
4083static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4084{
4085	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4086
4087	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4088	gfx_v8_0_wait_for_rlc_serdes(adev);
4089}
4090
4091static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4092{
4093	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4094	udelay(50);
4095
4096	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4097	udelay(50);
4098}
4099
4100static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4101{
4102	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4103
4104	/* carrizo do enable cp interrupt after cp inited */
4105	if (!(adev->flags & AMD_IS_APU))
4106		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4107
4108	udelay(50);
4109}
4110
4111static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4112{
4113	const struct rlc_firmware_header_v2_0 *hdr;
4114	const __le32 *fw_data;
4115	unsigned i, fw_size;
4116
4117	if (!adev->gfx.rlc_fw)
4118		return -EINVAL;
4119
4120	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4121	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4122
4123	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4124			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4125	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4126
4127	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4128	for (i = 0; i < fw_size; i++)
4129		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4130	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4131
4132	return 0;
4133}
4134
4135static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4136{
4137	int r;
4138	u32 tmp;
4139
4140	gfx_v8_0_rlc_stop(adev);
4141
4142	/* disable CG */
4143	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4144	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4145		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4146	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4147	if (adev->asic_type == CHIP_POLARIS11 ||
4148	    adev->asic_type == CHIP_POLARIS10 ||
4149	    adev->asic_type == CHIP_POLARIS12) {
4150		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4151		tmp &= ~0x3;
4152		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4153	}
4154
4155	/* disable PG */
4156	WREG32(mmRLC_PG_CNTL, 0);
4157
4158	gfx_v8_0_rlc_reset(adev);
4159	gfx_v8_0_init_pg(adev);
4160
4161
4162	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4163		/* legacy rlc firmware loading */
4164		r = gfx_v8_0_rlc_load_microcode(adev);
4165		if (r)
4166			return r;
4167	}
4168
4169	gfx_v8_0_rlc_start(adev);
4170
4171	return 0;
4172}
4173
4174static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4175{
4176	int i;
4177	u32 tmp = RREG32(mmCP_ME_CNTL);
4178
4179	if (enable) {
4180		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4181		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4182		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4183	} else {
4184		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4185		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4186		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4187		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4188			adev->gfx.gfx_ring[i].ready = false;
4189	}
4190	WREG32(mmCP_ME_CNTL, tmp);
4191	udelay(50);
4192}
4193
4194static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4195{
4196	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4197	const struct gfx_firmware_header_v1_0 *ce_hdr;
4198	const struct gfx_firmware_header_v1_0 *me_hdr;
4199	const __le32 *fw_data;
4200	unsigned i, fw_size;
4201
4202	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4203		return -EINVAL;
4204
4205	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4206		adev->gfx.pfp_fw->data;
4207	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4208		adev->gfx.ce_fw->data;
4209	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4210		adev->gfx.me_fw->data;
4211
4212	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4213	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4214	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4215
4216	gfx_v8_0_cp_gfx_enable(adev, false);
4217
4218	/* PFP */
4219	fw_data = (const __le32 *)
4220		(adev->gfx.pfp_fw->data +
4221		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4222	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4223	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4224	for (i = 0; i < fw_size; i++)
4225		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4226	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4227
4228	/* CE */
4229	fw_data = (const __le32 *)
4230		(adev->gfx.ce_fw->data +
4231		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4232	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4233	WREG32(mmCP_CE_UCODE_ADDR, 0);
4234	for (i = 0; i < fw_size; i++)
4235		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4236	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4237
4238	/* ME */
4239	fw_data = (const __le32 *)
4240		(adev->gfx.me_fw->data +
4241		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4242	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4243	WREG32(mmCP_ME_RAM_WADDR, 0);
4244	for (i = 0; i < fw_size; i++)
4245		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4246	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4247
4248	return 0;
4249}
4250
4251static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4252{
4253	u32 count = 0;
4254	const struct cs_section_def *sect = NULL;
4255	const struct cs_extent_def *ext = NULL;
4256
4257	/* begin clear state */
4258	count += 2;
4259	/* context control state */
4260	count += 3;
4261
4262	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4263		for (ext = sect->section; ext->extent != NULL; ++ext) {
4264			if (sect->id == SECT_CONTEXT)
4265				count += 2 + ext->reg_count;
4266			else
4267				return 0;
4268		}
4269	}
4270	/* pa_sc_raster_config/pa_sc_raster_config1 */
4271	count += 4;
4272	/* end clear state */
4273	count += 2;
4274	/* clear state */
4275	count += 2;
4276
4277	return count;
4278}
4279
4280static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4281{
4282	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4283	const struct cs_section_def *sect = NULL;
4284	const struct cs_extent_def *ext = NULL;
4285	int r, i;
4286
4287	/* init the CP */
4288	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4289	WREG32(mmCP_ENDIAN_SWAP, 0);
4290	WREG32(mmCP_DEVICE_ID, 1);
4291
4292	gfx_v8_0_cp_gfx_enable(adev, true);
4293
4294	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4295	if (r) {
4296		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4297		return r;
4298	}
4299
4300	/* clear state buffer */
4301	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4302	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4303
4304	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4305	amdgpu_ring_write(ring, 0x80000000);
4306	amdgpu_ring_write(ring, 0x80000000);
4307
4308	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4309		for (ext = sect->section; ext->extent != NULL; ++ext) {
4310			if (sect->id == SECT_CONTEXT) {
4311				amdgpu_ring_write(ring,
4312				       PACKET3(PACKET3_SET_CONTEXT_REG,
4313					       ext->reg_count));
4314				amdgpu_ring_write(ring,
4315				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4316				for (i = 0; i < ext->reg_count; i++)
4317					amdgpu_ring_write(ring, ext->extent[i]);
4318			}
4319		}
4320	}
4321
4322	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4323	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4324	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4325	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4326
4327	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4328	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4329
4330	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4331	amdgpu_ring_write(ring, 0);
4332
4333	/* init the CE partitions */
4334	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336	amdgpu_ring_write(ring, 0x8000);
4337	amdgpu_ring_write(ring, 0x8000);
4338
4339	amdgpu_ring_commit(ring);
4340
4341	return 0;
4342}
4343static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4344{
4345	u32 tmp;
4346	/* no gfx doorbells on iceland */
4347	if (adev->asic_type == CHIP_TOPAZ)
4348		return;
4349
4350	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4351
4352	if (ring->use_doorbell) {
4353		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4354				DOORBELL_OFFSET, ring->doorbell_index);
4355		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4356						DOORBELL_HIT, 0);
4357		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4358					    DOORBELL_EN, 1);
4359	} else {
4360		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4361	}
4362
4363	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4364
4365	if (adev->flags & AMD_IS_APU)
4366		return;
4367
4368	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4369					DOORBELL_RANGE_LOWER,
4370					AMDGPU_DOORBELL_GFX_RING0);
4371	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4372
4373	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4374		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4375}
4376
4377static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4378{
4379	struct amdgpu_ring *ring;
4380	u32 tmp;
4381	u32 rb_bufsz;
4382	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4383	int r;
4384
4385	/* Set the write pointer delay */
4386	WREG32(mmCP_RB_WPTR_DELAY, 0);
4387
4388	/* set the RB to use vmid 0 */
4389	WREG32(mmCP_RB_VMID, 0);
4390
4391	/* Set ring buffer size */
4392	ring = &adev->gfx.gfx_ring[0];
4393	rb_bufsz = order_base_2(ring->ring_size / 8);
4394	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4395	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4396	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4397	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4398#ifdef __BIG_ENDIAN
4399	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4400#endif
4401	WREG32(mmCP_RB0_CNTL, tmp);
4402
4403	/* Initialize the ring buffer's read and write pointers */
4404	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4405	ring->wptr = 0;
4406	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4407
4408	/* set the wb address wether it's enabled or not */
4409	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4410	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4411	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4412
4413	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4414	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4415	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4416	mdelay(1);
4417	WREG32(mmCP_RB0_CNTL, tmp);
4418
4419	rb_addr = ring->gpu_addr >> 8;
4420	WREG32(mmCP_RB0_BASE, rb_addr);
4421	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4422
4423	gfx_v8_0_set_cpg_door_bell(adev, ring);
4424	/* start the ring */
4425	amdgpu_ring_clear_ring(ring);
4426	gfx_v8_0_cp_gfx_start(adev);
4427	ring->ready = true;
4428	r = amdgpu_ring_test_ring(ring);
4429	if (r)
4430		ring->ready = false;
4431
4432	return r;
4433}
4434
4435static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4436{
4437	int i;
4438
4439	if (enable) {
4440		WREG32(mmCP_MEC_CNTL, 0);
4441	} else {
4442		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4443		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4444			adev->gfx.compute_ring[i].ready = false;
4445		adev->gfx.kiq.ring.ready = false;
4446	}
4447	udelay(50);
4448}
4449
4450static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4451{
4452	const struct gfx_firmware_header_v1_0 *mec_hdr;
4453	const __le32 *fw_data;
4454	unsigned i, fw_size;
4455
4456	if (!adev->gfx.mec_fw)
4457		return -EINVAL;
4458
4459	gfx_v8_0_cp_compute_enable(adev, false);
4460
4461	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4462	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4463
4464	fw_data = (const __le32 *)
4465		(adev->gfx.mec_fw->data +
4466		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4467	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4468
4469	/* MEC1 */
4470	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4471	for (i = 0; i < fw_size; i++)
4472		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4473	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4474
4475	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4476	if (adev->gfx.mec2_fw) {
4477		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4478
4479		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4480		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4481
4482		fw_data = (const __le32 *)
4483			(adev->gfx.mec2_fw->data +
4484			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4485		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4486
4487		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4488		for (i = 0; i < fw_size; i++)
4489			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4490		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4491	}
4492
4493	return 0;
4494}
4495
4496/* KIQ functions */
4497static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4498{
4499	uint32_t tmp;
4500	struct amdgpu_device *adev = ring->adev;
4501
4502	/* tell RLC which is KIQ queue */
4503	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4504	tmp &= 0xffffff00;
4505	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4506	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4507	tmp |= 0x80;
4508	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4509}
4510
4511static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4512{
4513	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4514	uint32_t scratch, tmp = 0;
4515	uint64_t queue_mask = 0;
4516	int r, i;
4517
4518	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4519		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4520			continue;
4521
4522		/* This situation may be hit in the future if a new HW
4523		 * generation exposes more than 64 queues. If so, the
4524		 * definition of queue_mask needs updating */
4525		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4526			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4527			break;
4528		}
4529
4530		queue_mask |= (1ull << i);
4531	}
4532
4533	r = amdgpu_gfx_scratch_get(adev, &scratch);
4534	if (r) {
4535		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4536		return r;
4537	}
4538	WREG32(scratch, 0xCAFEDEAD);
4539
4540	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4541	if (r) {
4542		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4543		amdgpu_gfx_scratch_free(adev, scratch);
4544		return r;
4545	}
4546	/* set resources */
4547	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4548	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4549	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4550	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4551	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4552	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4553	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4554	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4555	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4556		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4557		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4558		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4559
4560		/* map queues */
4561		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4562		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4563		amdgpu_ring_write(kiq_ring,
4564				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4565		amdgpu_ring_write(kiq_ring,
4566				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4567				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4568				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4569				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4570		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4571		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4572		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4573		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4574	}
4575	/* write to scratch for completion */
4576	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4577	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4578	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4579	amdgpu_ring_commit(kiq_ring);
4580
4581	for (i = 0; i < adev->usec_timeout; i++) {
4582		tmp = RREG32(scratch);
4583		if (tmp == 0xDEADBEEF)
4584			break;
4585		DRM_UDELAY(1);
4586	}
4587	if (i >= adev->usec_timeout) {
4588		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4589			  scratch, tmp);
4590		r = -EINVAL;
4591	}
4592	amdgpu_gfx_scratch_free(adev, scratch);
4593
4594	return r;
4595}
4596
4597static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4598{
4599	int i, r = 0;
4600
4601	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4602		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4603		for (i = 0; i < adev->usec_timeout; i++) {
4604			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4605				break;
4606			udelay(1);
4607		}
4608		if (i == adev->usec_timeout)
4609			r = -ETIMEDOUT;
4610	}
4611	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4612	WREG32(mmCP_HQD_PQ_RPTR, 0);
4613	WREG32(mmCP_HQD_PQ_WPTR, 0);
4614
4615	return r;
4616}
4617
 
 
 
 
 
 
 
 
 
 
 
 
 
4618static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4619{
4620	struct amdgpu_device *adev = ring->adev;
4621	struct vi_mqd *mqd = ring->mqd_ptr;
4622	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4623	uint32_t tmp;
4624
4625	mqd->header = 0xC0310800;
4626	mqd->compute_pipelinestat_enable = 0x00000001;
4627	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4628	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4629	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4630	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4631	mqd->compute_misc_reserved = 0x00000003;
4632	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4633						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4634	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4635						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4636	eop_base_addr = ring->eop_gpu_addr >> 8;
4637	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4638	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4639
4640	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4641	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4642	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4643			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4644
4645	mqd->cp_hqd_eop_control = tmp;
4646
4647	/* enable doorbell? */
4648	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4649			    CP_HQD_PQ_DOORBELL_CONTROL,
4650			    DOORBELL_EN,
4651			    ring->use_doorbell ? 1 : 0);
4652
4653	mqd->cp_hqd_pq_doorbell_control = tmp;
4654
4655	/* set the pointer to the MQD */
4656	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4657	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4658
4659	/* set MQD vmid to 0 */
4660	tmp = RREG32(mmCP_MQD_CONTROL);
4661	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4662	mqd->cp_mqd_control = tmp;
4663
4664	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665	hqd_gpu_addr = ring->gpu_addr >> 8;
4666	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4667	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668
4669	/* set up the HQD, this is similar to CP_RB0_CNTL */
4670	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4671	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4672			    (order_base_2(ring->ring_size / 4) - 1));
4673	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4674			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4675#ifdef __BIG_ENDIAN
4676	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4677#endif
4678	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4679	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4680	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4681	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4682	mqd->cp_hqd_pq_control = tmp;
4683
4684	/* set the wb address whether it's enabled or not */
4685	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4686	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4687	mqd->cp_hqd_pq_rptr_report_addr_hi =
4688		upper_32_bits(wb_gpu_addr) & 0xffff;
4689
4690	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4691	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4692	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4693	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4694
4695	tmp = 0;
4696	/* enable the doorbell if requested */
4697	if (ring->use_doorbell) {
4698		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4699		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4700				DOORBELL_OFFSET, ring->doorbell_index);
4701
4702		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4703					 DOORBELL_EN, 1);
4704		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4705					 DOORBELL_SOURCE, 0);
4706		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4707					 DOORBELL_HIT, 0);
4708	}
4709
4710	mqd->cp_hqd_pq_doorbell_control = tmp;
4711
4712	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4713	ring->wptr = 0;
4714	mqd->cp_hqd_pq_wptr = ring->wptr;
4715	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4716
4717	/* set the vmid for the queue */
4718	mqd->cp_hqd_vmid = 0;
4719
4720	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4721	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4722	mqd->cp_hqd_persistent_state = tmp;
4723
4724	/* set MTYPE */
4725	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4726	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4727	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4728	mqd->cp_hqd_ib_control = tmp;
4729
4730	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4731	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4732	mqd->cp_hqd_iq_timer = tmp;
4733
4734	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4735	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4736	mqd->cp_hqd_ctx_save_control = tmp;
4737
4738	/* defaults */
4739	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4740	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4741	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4742	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4743	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4744	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4745	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4746	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4747	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4748	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4749	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4750	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4751	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4752	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4753	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4754
4755	/* activate the queue */
4756	mqd->cp_hqd_active = 1;
 
 
 
 
 
 
 
4757
4758	return 0;
4759}
4760
4761int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4762			struct vi_mqd *mqd)
4763{
4764	uint32_t mqd_reg;
4765	uint32_t *mqd_data;
4766
4767	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4768	mqd_data = &mqd->cp_mqd_base_addr_lo;
4769
4770	/* disable wptr polling */
4771	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4772
4773	/* program all HQD registers */
4774	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4775		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4776
4777	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4778	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4779	 * on ASICs that do not support context-save.
4780	 * EOP writes/reads can start anywhere in the ring.
4781	 */
4782	if (adev->asic_type != CHIP_TONGA) {
4783		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4784		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4785		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4786	}
4787
4788	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4789		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4790
4791	/* activate the HQD */
4792	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4793		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4794
4795	return 0;
4796}
4797
4798static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4799{
4800	struct amdgpu_device *adev = ring->adev;
4801	struct vi_mqd *mqd = ring->mqd_ptr;
4802	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4803
4804	gfx_v8_0_kiq_setting(ring);
4805
4806	if (adev->in_gpu_reset) { /* for GPU_RESET case */
4807		/* reset MQD to a clean status */
4808		if (adev->gfx.mec.mqd_backup[mqd_idx])
4809			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4810
4811		/* reset ring buffer */
4812		ring->wptr = 0;
4813		amdgpu_ring_clear_ring(ring);
4814		mutex_lock(&adev->srbm_mutex);
4815		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4816		gfx_v8_0_mqd_commit(adev, mqd);
4817		vi_srbm_select(adev, 0, 0, 0, 0);
4818		mutex_unlock(&adev->srbm_mutex);
4819	} else {
4820		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4821		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4822		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4823		mutex_lock(&adev->srbm_mutex);
4824		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4825		gfx_v8_0_mqd_init(ring);
4826		gfx_v8_0_mqd_commit(adev, mqd);
4827		vi_srbm_select(adev, 0, 0, 0, 0);
4828		mutex_unlock(&adev->srbm_mutex);
4829
4830		if (adev->gfx.mec.mqd_backup[mqd_idx])
4831			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4832	}
4833
4834	return 0;
4835}
4836
4837static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4838{
4839	struct amdgpu_device *adev = ring->adev;
4840	struct vi_mqd *mqd = ring->mqd_ptr;
4841	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4842
4843	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4844		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4845		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4846		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4847		mutex_lock(&adev->srbm_mutex);
4848		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4849		gfx_v8_0_mqd_init(ring);
4850		vi_srbm_select(adev, 0, 0, 0, 0);
4851		mutex_unlock(&adev->srbm_mutex);
4852
4853		if (adev->gfx.mec.mqd_backup[mqd_idx])
4854			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4855	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4856		/* reset MQD to a clean status */
4857		if (adev->gfx.mec.mqd_backup[mqd_idx])
4858			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4859		/* reset ring buffer */
4860		ring->wptr = 0;
4861		amdgpu_ring_clear_ring(ring);
4862	} else {
4863		amdgpu_ring_clear_ring(ring);
4864	}
4865	return 0;
4866}
4867
4868static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4869{
4870	if (adev->asic_type > CHIP_TONGA) {
4871		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4872		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4873	}
4874	/* enable doorbells */
4875	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4876}
4877
4878static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4879{
4880	struct amdgpu_ring *ring = NULL;
4881	int r = 0, i;
4882
4883	gfx_v8_0_cp_compute_enable(adev, true);
4884
4885	ring = &adev->gfx.kiq.ring;
4886
4887	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4888	if (unlikely(r != 0))
4889		goto done;
4890
4891	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4892	if (!r) {
4893		r = gfx_v8_0_kiq_init_queue(ring);
4894		amdgpu_bo_kunmap(ring->mqd_obj);
4895		ring->mqd_ptr = NULL;
4896	}
 
4897	amdgpu_bo_unreserve(ring->mqd_obj);
4898	if (r)
4899		goto done;
 
 
 
 
 
 
 
 
4900
4901	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4902		ring = &adev->gfx.compute_ring[i];
4903
4904		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4905		if (unlikely(r != 0))
4906			goto done;
4907		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4908		if (!r) {
4909			r = gfx_v8_0_kcq_init_queue(ring);
4910			amdgpu_bo_kunmap(ring->mqd_obj);
4911			ring->mqd_ptr = NULL;
4912		}
4913		amdgpu_bo_unreserve(ring->mqd_obj);
4914		if (r)
4915			goto done;
4916	}
4917
4918	gfx_v8_0_set_mec_doorbell_range(adev);
4919
4920	r = gfx_v8_0_kiq_kcq_enable(adev);
4921	if (r)
4922		goto done;
4923
4924	/* Test KIQ */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4925	ring = &adev->gfx.kiq.ring;
4926	ring->ready = true;
4927	r = amdgpu_ring_test_ring(ring);
4928	if (r) {
4929		ring->ready = false;
4930		goto done;
4931	}
4932
4933	/* Test KCQs */
4934	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4935		ring = &adev->gfx.compute_ring[i];
4936		ring->ready = true;
4937		r = amdgpu_ring_test_ring(ring);
4938		if (r)
4939			ring->ready = false;
4940	}
4941
4942done:
4943	return r;
4944}
4945
4946static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4947{
4948	int r;
4949
4950	if (!(adev->flags & AMD_IS_APU))
4951		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4952
4953	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4954			/* legacy firmware loading */
4955		r = gfx_v8_0_cp_gfx_load_microcode(adev);
4956		if (r)
4957			return r;
4958
4959		r = gfx_v8_0_cp_compute_load_microcode(adev);
4960		if (r)
4961			return r;
4962	}
4963
4964	r = gfx_v8_0_cp_gfx_resume(adev);
4965	if (r)
4966		return r;
4967
4968	r = gfx_v8_0_kiq_resume(adev);
 
 
 
 
4969	if (r)
4970		return r;
4971
4972	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4973
4974	return 0;
4975}
4976
4977static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4978{
4979	gfx_v8_0_cp_gfx_enable(adev, enable);
4980	gfx_v8_0_cp_compute_enable(adev, enable);
4981}
4982
4983static int gfx_v8_0_hw_init(void *handle)
4984{
4985	int r;
4986	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4987
4988	gfx_v8_0_init_golden_registers(adev);
4989	gfx_v8_0_gpu_init(adev);
4990
4991	r = gfx_v8_0_rlc_resume(adev);
4992	if (r)
4993		return r;
4994
4995	r = gfx_v8_0_cp_resume(adev);
4996
4997	return r;
4998}
4999
5000static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5001{
5002	struct amdgpu_device *adev = kiq_ring->adev;
5003	uint32_t scratch, tmp = 0;
5004	int r, i;
 
5005
5006	r = amdgpu_gfx_scratch_get(adev, &scratch);
5007	if (r) {
5008		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5009		return r;
5010	}
5011	WREG32(scratch, 0xCAFEDEAD);
5012
5013	r = amdgpu_ring_alloc(kiq_ring, 10);
5014	if (r) {
5015		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5016		amdgpu_gfx_scratch_free(adev, scratch);
5017		return r;
5018	}
5019
5020	/* unmap queues */
5021	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5022	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 
 
5023						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5024						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5025						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5026						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5027	amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5028	amdgpu_ring_write(kiq_ring, 0);
5029	amdgpu_ring_write(kiq_ring, 0);
5030	amdgpu_ring_write(kiq_ring, 0);
5031	/* write to scratch for completion */
5032	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5033	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5034	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5035	amdgpu_ring_commit(kiq_ring);
5036
5037	for (i = 0; i < adev->usec_timeout; i++) {
5038		tmp = RREG32(scratch);
5039		if (tmp == 0xDEADBEEF)
5040			break;
5041		DRM_UDELAY(1);
5042	}
5043	if (i >= adev->usec_timeout) {
5044		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5045		r = -EINVAL;
5046	}
5047	amdgpu_gfx_scratch_free(adev, scratch);
5048	return r;
5049}
5050
5051static int gfx_v8_0_hw_fini(void *handle)
5052{
5053	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5054	int i;
5055
5056	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5057	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5058
5059	/* disable KCQ to avoid CPC touch memory not valid anymore */
5060	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5061		gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5062
5063	if (amdgpu_sriov_vf(adev)) {
5064		pr_debug("For SRIOV client, shouldn't do anything.\n");
5065		return 0;
5066	}
5067	gfx_v8_0_cp_enable(adev, false);
5068	gfx_v8_0_rlc_stop(adev);
5069
5070	amdgpu_device_ip_set_powergating_state(adev,
5071					       AMD_IP_BLOCK_TYPE_GFX,
5072					       AMD_PG_STATE_UNGATE);
5073
5074	return 0;
5075}
5076
5077static int gfx_v8_0_suspend(void *handle)
5078{
5079	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080	adev->gfx.in_suspend = true;
5081	return gfx_v8_0_hw_fini(adev);
5082}
5083
5084static int gfx_v8_0_resume(void *handle)
5085{
5086	int r;
5087	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088
5089	r = gfx_v8_0_hw_init(adev);
5090	adev->gfx.in_suspend = false;
5091	return r;
5092}
5093
5094static bool gfx_v8_0_is_idle(void *handle)
5095{
 
5096	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5097
5098	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5099		return false;
5100	else
5101		return true;
 
 
 
5102}
5103
5104static int gfx_v8_0_wait_for_idle(void *handle)
5105{
5106	unsigned i;
5107	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5108
5109	for (i = 0; i < adev->usec_timeout; i++) {
5110		if (gfx_v8_0_is_idle(handle))
5111			return 0;
5112
5113		udelay(1);
5114	}
5115	return -ETIMEDOUT;
5116}
5117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5118static bool gfx_v8_0_check_soft_reset(void *handle)
5119{
5120	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5122	u32 tmp;
5123
5124	/* GRBM_STATUS */
5125	tmp = RREG32(mmGRBM_STATUS);
5126	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5127		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5128		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5129		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5130		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5131		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5132		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5133		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5134						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5135		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5136						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5137		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5138						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5139	}
5140
5141	/* GRBM_STATUS2 */
5142	tmp = RREG32(mmGRBM_STATUS2);
5143	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5144		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5145						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5146
5147	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5148	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5149	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5150		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5151						SOFT_RESET_CPF, 1);
5152		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5153						SOFT_RESET_CPC, 1);
5154		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5155						SOFT_RESET_CPG, 1);
5156		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5157						SOFT_RESET_GRBM, 1);
5158	}
5159
5160	/* SRBM_STATUS */
5161	tmp = RREG32(mmSRBM_STATUS);
5162	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5163		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5164						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5165	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5166		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5168
5169	if (grbm_soft_reset || srbm_soft_reset) {
5170		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5171		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5172		return true;
5173	} else {
5174		adev->gfx.grbm_soft_reset = 0;
5175		adev->gfx.srbm_soft_reset = 0;
5176		return false;
5177	}
5178}
5179
5180static int gfx_v8_0_pre_soft_reset(void *handle)
5181{
5182	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5183	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5184
5185	if ((!adev->gfx.grbm_soft_reset) &&
5186	    (!adev->gfx.srbm_soft_reset))
5187		return 0;
5188
5189	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5190	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5191
5192	/* stop the rlc */
5193	gfx_v8_0_rlc_stop(adev);
5194
5195	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5196	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5197		/* Disable GFX parsing/prefetching */
5198		gfx_v8_0_cp_gfx_enable(adev, false);
5199
5200	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5201	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5202	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5203	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5204		int i;
5205
5206		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5207			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5208
5209			mutex_lock(&adev->srbm_mutex);
5210			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5211			gfx_v8_0_deactivate_hqd(adev, 2);
5212			vi_srbm_select(adev, 0, 0, 0, 0);
5213			mutex_unlock(&adev->srbm_mutex);
5214		}
5215		/* Disable MEC parsing/prefetching */
5216		gfx_v8_0_cp_compute_enable(adev, false);
5217	}
5218
5219       return 0;
5220}
5221
5222static int gfx_v8_0_soft_reset(void *handle)
5223{
5224	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5225	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5226	u32 tmp;
5227
5228	if ((!adev->gfx.grbm_soft_reset) &&
5229	    (!adev->gfx.srbm_soft_reset))
5230		return 0;
5231
5232	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5233	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5234
5235	if (grbm_soft_reset || srbm_soft_reset) {
5236		tmp = RREG32(mmGMCON_DEBUG);
5237		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5238		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5239		WREG32(mmGMCON_DEBUG, tmp);
5240		udelay(50);
5241	}
5242
5243	if (grbm_soft_reset) {
5244		tmp = RREG32(mmGRBM_SOFT_RESET);
5245		tmp |= grbm_soft_reset;
5246		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5247		WREG32(mmGRBM_SOFT_RESET, tmp);
5248		tmp = RREG32(mmGRBM_SOFT_RESET);
5249
5250		udelay(50);
5251
5252		tmp &= ~grbm_soft_reset;
5253		WREG32(mmGRBM_SOFT_RESET, tmp);
5254		tmp = RREG32(mmGRBM_SOFT_RESET);
5255	}
5256
5257	if (srbm_soft_reset) {
5258		tmp = RREG32(mmSRBM_SOFT_RESET);
5259		tmp |= srbm_soft_reset;
5260		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5261		WREG32(mmSRBM_SOFT_RESET, tmp);
5262		tmp = RREG32(mmSRBM_SOFT_RESET);
5263
5264		udelay(50);
5265
5266		tmp &= ~srbm_soft_reset;
5267		WREG32(mmSRBM_SOFT_RESET, tmp);
5268		tmp = RREG32(mmSRBM_SOFT_RESET);
5269	}
5270
5271	if (grbm_soft_reset || srbm_soft_reset) {
5272		tmp = RREG32(mmGMCON_DEBUG);
5273		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5274		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5275		WREG32(mmGMCON_DEBUG, tmp);
5276	}
5277
5278	/* Wait a little for things to settle down */
5279	udelay(50);
5280
5281	return 0;
5282}
5283
5284static int gfx_v8_0_post_soft_reset(void *handle)
5285{
5286	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5287	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5288
5289	if ((!adev->gfx.grbm_soft_reset) &&
5290	    (!adev->gfx.srbm_soft_reset))
5291		return 0;
5292
5293	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5294	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5295
5296	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5297	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5298		gfx_v8_0_cp_gfx_resume(adev);
5299
5300	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5301	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5302	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5303	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5304		int i;
5305
5306		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5307			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5308
5309			mutex_lock(&adev->srbm_mutex);
5310			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5311			gfx_v8_0_deactivate_hqd(adev, 2);
5312			vi_srbm_select(adev, 0, 0, 0, 0);
5313			mutex_unlock(&adev->srbm_mutex);
5314		}
5315		gfx_v8_0_kiq_resume(adev);
 
5316	}
5317	gfx_v8_0_rlc_start(adev);
 
 
 
 
 
 
 
5318
5319	return 0;
5320}
5321
5322/**
5323 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5324 *
5325 * @adev: amdgpu_device pointer
5326 *
5327 * Fetches a GPU clock counter snapshot.
5328 * Returns the 64 bit clock counter snapshot.
5329 */
5330static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5331{
5332	uint64_t clock;
5333
5334	mutex_lock(&adev->gfx.gpu_clock_mutex);
5335	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5336	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5337		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5338	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5339	return clock;
5340}
5341
5342static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5343					  uint32_t vmid,
5344					  uint32_t gds_base, uint32_t gds_size,
5345					  uint32_t gws_base, uint32_t gws_size,
5346					  uint32_t oa_base, uint32_t oa_size)
5347{
5348	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5349	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5350
5351	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5352	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5353
5354	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5355	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5356
5357	/* GDS Base */
5358	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5359	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5360				WRITE_DATA_DST_SEL(0)));
5361	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5362	amdgpu_ring_write(ring, 0);
5363	amdgpu_ring_write(ring, gds_base);
5364
5365	/* GDS Size */
5366	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5367	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5368				WRITE_DATA_DST_SEL(0)));
5369	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5370	amdgpu_ring_write(ring, 0);
5371	amdgpu_ring_write(ring, gds_size);
5372
5373	/* GWS */
5374	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5375	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5376				WRITE_DATA_DST_SEL(0)));
5377	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5378	amdgpu_ring_write(ring, 0);
5379	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5380
5381	/* OA */
5382	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5383	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5384				WRITE_DATA_DST_SEL(0)));
5385	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5386	amdgpu_ring_write(ring, 0);
5387	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5388}
5389
5390static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5391{
5392	WREG32(mmSQ_IND_INDEX,
5393		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5394		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5395		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5396		(SQ_IND_INDEX__FORCE_READ_MASK));
5397	return RREG32(mmSQ_IND_DATA);
5398}
5399
5400static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5401			   uint32_t wave, uint32_t thread,
5402			   uint32_t regno, uint32_t num, uint32_t *out)
5403{
5404	WREG32(mmSQ_IND_INDEX,
5405		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5406		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5407		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5408		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5409		(SQ_IND_INDEX__FORCE_READ_MASK) |
5410		(SQ_IND_INDEX__AUTO_INCR_MASK));
5411	while (num--)
5412		*(out++) = RREG32(mmSQ_IND_DATA);
5413}
5414
5415static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5416{
5417	/* type 0 wave data */
5418	dst[(*no_fields)++] = 0;
5419	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5420	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5421	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5422	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5423	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5424	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5425	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5426	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5427	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5428	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5429	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5430	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5431	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5432	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5433	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5434	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5435	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5436	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5437}
5438
5439static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5440				     uint32_t wave, uint32_t start,
5441				     uint32_t size, uint32_t *dst)
5442{
5443	wave_read_regs(
5444		adev, simd, wave, 0,
5445		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5446}
5447
5448
5449static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5450	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5451	.select_se_sh = &gfx_v8_0_select_se_sh,
5452	.read_wave_data = &gfx_v8_0_read_wave_data,
5453	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5454	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5455};
5456
5457static int gfx_v8_0_early_init(void *handle)
5458{
5459	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5460
5461	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5462	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 
5463	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5464	gfx_v8_0_set_ring_funcs(adev);
5465	gfx_v8_0_set_irq_funcs(adev);
5466	gfx_v8_0_set_gds_init(adev);
5467	gfx_v8_0_set_rlc_funcs(adev);
5468
5469	return 0;
5470}
5471
5472static int gfx_v8_0_late_init(void *handle)
5473{
5474	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5475	int r;
5476
5477	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5478	if (r)
5479		return r;
5480
5481	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5482	if (r)
5483		return r;
5484
5485	/* requires IBs so do in late init after IB pool is initialized */
5486	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5487	if (r)
5488		return r;
5489
5490	amdgpu_device_ip_set_powergating_state(adev,
5491					       AMD_IP_BLOCK_TYPE_GFX,
5492					       AMD_PG_STATE_GATE);
 
 
 
 
 
 
 
 
 
 
5493
5494	return 0;
5495}
5496
5497static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5498						       bool enable)
5499{
5500	if ((adev->asic_type == CHIP_POLARIS11) ||
5501	    (adev->asic_type == CHIP_POLARIS12))
 
5502		/* Send msg to SMU via Powerplay */
5503		amdgpu_device_ip_set_powergating_state(adev,
5504						       AMD_IP_BLOCK_TYPE_SMC,
5505						       enable ?
5506						       AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5507
5508	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5509}
5510
5511static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5512							bool enable)
5513{
5514	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5515}
5516
5517static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5518		bool enable)
5519{
5520	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5521}
5522
5523static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5524					  bool enable)
5525{
5526	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5527}
5528
5529static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5530						bool enable)
5531{
5532	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5533
5534	/* Read any GFX register to wake up GFX. */
5535	if (!enable)
5536		RREG32(mmDB_RENDER_CONTROL);
5537}
5538
5539static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5540					  bool enable)
5541{
5542	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5543		cz_enable_gfx_cg_power_gating(adev, true);
5544		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5545			cz_enable_gfx_pipeline_power_gating(adev, true);
5546	} else {
5547		cz_enable_gfx_cg_power_gating(adev, false);
5548		cz_enable_gfx_pipeline_power_gating(adev, false);
5549	}
5550}
5551
5552static int gfx_v8_0_set_powergating_state(void *handle,
5553					  enum amd_powergating_state state)
5554{
5555	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5556	bool enable = (state == AMD_PG_STATE_GATE);
5557
5558	if (amdgpu_sriov_vf(adev))
5559		return 0;
5560
 
 
 
 
 
5561	switch (adev->asic_type) {
5562	case CHIP_CARRIZO:
5563	case CHIP_STONEY:
5564
5565		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5566			cz_enable_sck_slow_down_on_power_up(adev, true);
5567			cz_enable_sck_slow_down_on_power_down(adev, true);
5568		} else {
5569			cz_enable_sck_slow_down_on_power_up(adev, false);
5570			cz_enable_sck_slow_down_on_power_down(adev, false);
5571		}
5572		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5573			cz_enable_cp_power_gating(adev, true);
5574		else
5575			cz_enable_cp_power_gating(adev, false);
5576
5577		cz_update_gfx_cg_power_gating(adev, enable);
5578
5579		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5580			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5581		else
5582			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5583
5584		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5585			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5586		else
5587			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5588		break;
5589	case CHIP_POLARIS11:
5590	case CHIP_POLARIS12:
 
5591		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5592			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5593		else
5594			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5595
5596		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5597			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5598		else
5599			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5600
5601		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5602			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5603		else
5604			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5605		break;
5606	default:
5607		break;
5608	}
5609
 
 
 
 
5610	return 0;
5611}
5612
5613static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5614{
5615	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5616	int data;
5617
5618	if (amdgpu_sriov_vf(adev))
5619		*flags = 0;
5620
5621	/* AMD_CG_SUPPORT_GFX_MGCG */
5622	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5623	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5624		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5625
5626	/* AMD_CG_SUPPORT_GFX_CGLG */
5627	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5628	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5629		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5630
5631	/* AMD_CG_SUPPORT_GFX_CGLS */
5632	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5633		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5634
5635	/* AMD_CG_SUPPORT_GFX_CGTS */
5636	data = RREG32(mmCGTS_SM_CTRL_REG);
5637	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5638		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5639
5640	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5641	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5642		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5643
5644	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5645	data = RREG32(mmRLC_MEM_SLP_CNTL);
5646	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5647		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5648
5649	/* AMD_CG_SUPPORT_GFX_CP_LS */
5650	data = RREG32(mmCP_MEM_SLP_CNTL);
5651	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5652		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5653}
5654
5655static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5656				     uint32_t reg_addr, uint32_t cmd)
5657{
5658	uint32_t data;
5659
5660	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5661
5662	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5663	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5664
5665	data = RREG32(mmRLC_SERDES_WR_CTRL);
5666	if (adev->asic_type == CHIP_STONEY)
5667		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5668			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5669			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5670			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5671			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5672			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5673			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5674			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5675			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5676	else
5677		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5678			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5679			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5680			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5681			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5682			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5683			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5684			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5685			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5686			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5687			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5688	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5689		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5690		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5691		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5692
5693	WREG32(mmRLC_SERDES_WR_CTRL, data);
5694}
5695
5696#define MSG_ENTER_RLC_SAFE_MODE     1
5697#define MSG_EXIT_RLC_SAFE_MODE      0
5698#define RLC_GPR_REG2__REQ_MASK 0x00000001
5699#define RLC_GPR_REG2__REQ__SHIFT 0
5700#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5701#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5702
5703static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5704{
5705	u32 data;
5706	unsigned i;
5707
5708	data = RREG32(mmRLC_CNTL);
5709	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5710		return;
5711
5712	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5713		data |= RLC_SAFE_MODE__CMD_MASK;
5714		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5715		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5716		WREG32(mmRLC_SAFE_MODE, data);
5717
5718		for (i = 0; i < adev->usec_timeout; i++) {
5719			if ((RREG32(mmRLC_GPM_STAT) &
5720			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5721			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5722			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5723			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5724				break;
5725			udelay(1);
5726		}
5727
5728		for (i = 0; i < adev->usec_timeout; i++) {
5729			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5730				break;
5731			udelay(1);
5732		}
5733		adev->gfx.rlc.in_safe_mode = true;
 
 
 
 
 
 
 
 
5734	}
5735}
5736
5737static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5738{
5739	u32 data = 0;
5740	unsigned i;
5741
5742	data = RREG32(mmRLC_CNTL);
5743	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5744		return;
5745
5746	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5747		if (adev->gfx.rlc.in_safe_mode) {
5748			data |= RLC_SAFE_MODE__CMD_MASK;
5749			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5750			WREG32(mmRLC_SAFE_MODE, data);
5751			adev->gfx.rlc.in_safe_mode = false;
5752		}
5753	}
5754
5755	for (i = 0; i < adev->usec_timeout; i++) {
5756		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5757			break;
5758		udelay(1);
5759	}
5760}
5761
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5762static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5763	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5764	.exit_safe_mode = iceland_exit_rlc_safe_mode
 
 
 
 
 
 
 
 
 
 
5765};
5766
5767static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5768						      bool enable)
5769{
5770	uint32_t temp, data;
5771
5772	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5773
5774	/* It is disabled by HW by default */
5775	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5776		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5777			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5778				/* 1 - RLC memory Light sleep */
5779				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5780
5781			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5782				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5783		}
5784
5785		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5786		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5787		if (adev->flags & AMD_IS_APU)
5788			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5789				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5790				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5791		else
5792			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5793				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5794				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5795				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5796
5797		if (temp != data)
5798			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5799
5800		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801		gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803		/* 5 - clear mgcg override */
5804		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5805
5806		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5807			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5808			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5809			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5810			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5811			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5812			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5813			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5814			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5815				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5816			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5817			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5818			if (temp != data)
5819				WREG32(mmCGTS_SM_CTRL_REG, data);
5820		}
5821		udelay(50);
5822
5823		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5824		gfx_v8_0_wait_for_rlc_serdes(adev);
5825	} else {
5826		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5827		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5828		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5829				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5830				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5831				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5832		if (temp != data)
5833			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5834
5835		/* 2 - disable MGLS in RLC */
5836		data = RREG32(mmRLC_MEM_SLP_CNTL);
5837		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5838			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5839			WREG32(mmRLC_MEM_SLP_CNTL, data);
5840		}
5841
5842		/* 3 - disable MGLS in CP */
5843		data = RREG32(mmCP_MEM_SLP_CNTL);
5844		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5845			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5846			WREG32(mmCP_MEM_SLP_CNTL, data);
5847		}
5848
5849		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5850		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5851		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5852				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5853		if (temp != data)
5854			WREG32(mmCGTS_SM_CTRL_REG, data);
5855
5856		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5857		gfx_v8_0_wait_for_rlc_serdes(adev);
5858
5859		/* 6 - set mgcg override */
5860		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5861
5862		udelay(50);
5863
5864		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5865		gfx_v8_0_wait_for_rlc_serdes(adev);
5866	}
5867
5868	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5869}
5870
5871static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5872						      bool enable)
5873{
5874	uint32_t temp, temp1, data, data1;
5875
5876	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5877
5878	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5879
5880	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5881		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5882		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5883		if (temp1 != data1)
5884			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5885
5886		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5887		gfx_v8_0_wait_for_rlc_serdes(adev);
5888
5889		/* 2 - clear cgcg override */
5890		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5891
5892		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5893		gfx_v8_0_wait_for_rlc_serdes(adev);
5894
5895		/* 3 - write cmd to set CGLS */
5896		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5897
5898		/* 4 - enable cgcg */
5899		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5900
5901		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5902			/* enable cgls*/
5903			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5904
5905			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5906			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5907
5908			if (temp1 != data1)
5909				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5910		} else {
5911			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5912		}
5913
5914		if (temp != data)
5915			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5916
5917		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5918		 * Cmp_busy/GFX_Idle interrupts
5919		 */
5920		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5921	} else {
5922		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5923		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5924
5925		/* TEST CGCG */
5926		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5927		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5928				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5929		if (temp1 != data1)
5930			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5931
5932		/* read gfx register to wake up cgcg */
5933		RREG32(mmCB_CGTT_SCLK_CTRL);
5934		RREG32(mmCB_CGTT_SCLK_CTRL);
5935		RREG32(mmCB_CGTT_SCLK_CTRL);
5936		RREG32(mmCB_CGTT_SCLK_CTRL);
5937
5938		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5939		gfx_v8_0_wait_for_rlc_serdes(adev);
5940
5941		/* write cmd to Set CGCG Overrride */
5942		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5943
5944		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5945		gfx_v8_0_wait_for_rlc_serdes(adev);
5946
5947		/* write cmd to Clear CGLS */
5948		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5949
5950		/* disable cgcg, cgls should be disabled too. */
5951		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5952			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5953		if (temp != data)
5954			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5955		/* enable interrupts again for PG */
5956		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5957	}
5958
5959	gfx_v8_0_wait_for_rlc_serdes(adev);
5960
5961	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5962}
5963static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5964					    bool enable)
5965{
5966	if (enable) {
5967		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5968		 * ===  MGCG + MGLS + TS(CG/LS) ===
5969		 */
5970		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5971		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5972	} else {
5973		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5974		 * ===  CGCG + CGLS ===
5975		 */
5976		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5977		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5978	}
5979	return 0;
5980}
5981
5982static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5983					  enum amd_clockgating_state state)
5984{
5985	uint32_t msg_id, pp_state = 0;
5986	uint32_t pp_support_state = 0;
5987
5988	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5989		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5990			pp_support_state = PP_STATE_SUPPORT_LS;
5991			pp_state = PP_STATE_LS;
5992		}
5993		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5994			pp_support_state |= PP_STATE_SUPPORT_CG;
5995			pp_state |= PP_STATE_CG;
5996		}
5997		if (state == AMD_CG_STATE_UNGATE)
5998			pp_state = 0;
5999
6000		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6001				PP_BLOCK_GFX_CG,
6002				pp_support_state,
6003				pp_state);
6004		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6005			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6006	}
6007
6008	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6009		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6010			pp_support_state = PP_STATE_SUPPORT_LS;
6011			pp_state = PP_STATE_LS;
6012		}
6013
6014		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6015			pp_support_state |= PP_STATE_SUPPORT_CG;
6016			pp_state |= PP_STATE_CG;
6017		}
6018
6019		if (state == AMD_CG_STATE_UNGATE)
6020			pp_state = 0;
6021
6022		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6023				PP_BLOCK_GFX_MG,
6024				pp_support_state,
6025				pp_state);
6026		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6027			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6028	}
6029
6030	return 0;
6031}
6032
6033static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6034					  enum amd_clockgating_state state)
6035{
6036
6037	uint32_t msg_id, pp_state = 0;
6038	uint32_t pp_support_state = 0;
6039
6040	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6041		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6042			pp_support_state = PP_STATE_SUPPORT_LS;
6043			pp_state = PP_STATE_LS;
6044		}
6045		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6046			pp_support_state |= PP_STATE_SUPPORT_CG;
6047			pp_state |= PP_STATE_CG;
6048		}
6049		if (state == AMD_CG_STATE_UNGATE)
6050			pp_state = 0;
6051
6052		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6053				PP_BLOCK_GFX_CG,
6054				pp_support_state,
6055				pp_state);
6056		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6057			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6058	}
6059
6060	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6061		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6062			pp_support_state = PP_STATE_SUPPORT_LS;
6063			pp_state = PP_STATE_LS;
6064		}
6065		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6066			pp_support_state |= PP_STATE_SUPPORT_CG;
6067			pp_state |= PP_STATE_CG;
6068		}
6069		if (state == AMD_CG_STATE_UNGATE)
6070			pp_state = 0;
6071
6072		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6073				PP_BLOCK_GFX_3D,
6074				pp_support_state,
6075				pp_state);
6076		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6077			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6078	}
6079
6080	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6081		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6082			pp_support_state = PP_STATE_SUPPORT_LS;
6083			pp_state = PP_STATE_LS;
6084		}
6085
6086		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6087			pp_support_state |= PP_STATE_SUPPORT_CG;
6088			pp_state |= PP_STATE_CG;
6089		}
6090
6091		if (state == AMD_CG_STATE_UNGATE)
6092			pp_state = 0;
6093
6094		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6095				PP_BLOCK_GFX_MG,
6096				pp_support_state,
6097				pp_state);
6098		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6099			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6100	}
6101
6102	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6103		pp_support_state = PP_STATE_SUPPORT_LS;
6104
6105		if (state == AMD_CG_STATE_UNGATE)
6106			pp_state = 0;
6107		else
6108			pp_state = PP_STATE_LS;
6109
6110		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6111				PP_BLOCK_GFX_RLC,
6112				pp_support_state,
6113				pp_state);
6114		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6115			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6116	}
6117
6118	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6119		pp_support_state = PP_STATE_SUPPORT_LS;
6120
6121		if (state == AMD_CG_STATE_UNGATE)
6122			pp_state = 0;
6123		else
6124			pp_state = PP_STATE_LS;
6125		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6126			PP_BLOCK_GFX_CP,
6127			pp_support_state,
6128			pp_state);
6129		if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6130			amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6131	}
6132
6133	return 0;
6134}
6135
6136static int gfx_v8_0_set_clockgating_state(void *handle,
6137					  enum amd_clockgating_state state)
6138{
6139	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6140
6141	if (amdgpu_sriov_vf(adev))
6142		return 0;
6143
6144	switch (adev->asic_type) {
6145	case CHIP_FIJI:
6146	case CHIP_CARRIZO:
6147	case CHIP_STONEY:
6148		gfx_v8_0_update_gfx_clock_gating(adev,
6149						 state == AMD_CG_STATE_GATE);
6150		break;
6151	case CHIP_TONGA:
6152		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6153		break;
6154	case CHIP_POLARIS10:
6155	case CHIP_POLARIS11:
6156	case CHIP_POLARIS12:
 
6157		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6158		break;
6159	default:
6160		break;
6161	}
6162	return 0;
6163}
6164
6165static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6166{
6167	return ring->adev->wb.wb[ring->rptr_offs];
6168}
6169
6170static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6171{
6172	struct amdgpu_device *adev = ring->adev;
6173
6174	if (ring->use_doorbell)
6175		/* XXX check if swapping is necessary on BE */
6176		return ring->adev->wb.wb[ring->wptr_offs];
6177	else
6178		return RREG32(mmCP_RB0_WPTR);
6179}
6180
6181static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6182{
6183	struct amdgpu_device *adev = ring->adev;
6184
6185	if (ring->use_doorbell) {
6186		/* XXX check if swapping is necessary on BE */
6187		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6188		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6189	} else {
6190		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6191		(void)RREG32(mmCP_RB0_WPTR);
6192	}
6193}
6194
6195static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6196{
6197	u32 ref_and_mask, reg_mem_engine;
6198
6199	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6200	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6201		switch (ring->me) {
6202		case 1:
6203			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6204			break;
6205		case 2:
6206			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6207			break;
6208		default:
6209			return;
6210		}
6211		reg_mem_engine = 0;
6212	} else {
6213		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6214		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6215	}
6216
6217	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6218	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6219				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6220				 reg_mem_engine));
6221	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6222	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6223	amdgpu_ring_write(ring, ref_and_mask);
6224	amdgpu_ring_write(ring, ref_and_mask);
6225	amdgpu_ring_write(ring, 0x20); /* poll interval */
6226}
6227
6228static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6229{
6230	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6231	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6232		EVENT_INDEX(4));
6233
6234	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6235	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6236		EVENT_INDEX(0));
6237}
6238
6239static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6240				      struct amdgpu_ib *ib,
6241				      unsigned vmid, bool ctx_switch)
 
6242{
 
6243	u32 header, control = 0;
6244
6245	if (ib->flags & AMDGPU_IB_FLAG_CE)
6246		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6247	else
6248		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6249
6250	control |= ib->length_dw | (vmid << 24);
6251
6252	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6253		control |= INDIRECT_BUFFER_PRE_ENB(1);
6254
6255		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6256			gfx_v8_0_ring_emit_de_meta(ring);
6257	}
6258
6259	amdgpu_ring_write(ring, header);
6260	amdgpu_ring_write(ring,
6261#ifdef __BIG_ENDIAN
6262			  (2 << 0) |
6263#endif
6264			  (ib->gpu_addr & 0xFFFFFFFC));
6265	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6266	amdgpu_ring_write(ring, control);
6267}
6268
6269static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
6270					  struct amdgpu_ib *ib,
6271					  unsigned vmid, bool ctx_switch)
6272{
 
6273	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6275	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6276	amdgpu_ring_write(ring,
6277#ifdef __BIG_ENDIAN
6278				(2 << 0) |
6279#endif
6280				(ib->gpu_addr & 0xFFFFFFFC));
6281	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6282	amdgpu_ring_write(ring, control);
6283}
6284
6285static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6286					 u64 seq, unsigned flags)
6287{
6288	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6289	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6290
6291	/* EVENT_WRITE_EOP - flush caches, send int */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6292	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6293	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6294				 EOP_TC_ACTION_EN |
6295				 EOP_TC_WB_ACTION_EN |
6296				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6297				 EVENT_INDEX(5)));
6298	amdgpu_ring_write(ring, addr & 0xfffffffc);
6299	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6300			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6301	amdgpu_ring_write(ring, lower_32_bits(seq));
6302	amdgpu_ring_write(ring, upper_32_bits(seq));
6303
6304}
6305
6306static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6307{
6308	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6309	uint32_t seq = ring->fence_drv.sync_seq;
6310	uint64_t addr = ring->fence_drv.gpu_addr;
6311
6312	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6313	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6314				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6315				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6316	amdgpu_ring_write(ring, addr & 0xfffffffc);
6317	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6318	amdgpu_ring_write(ring, seq);
6319	amdgpu_ring_write(ring, 0xffffffff);
6320	amdgpu_ring_write(ring, 4); /* poll interval */
6321}
6322
6323static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6324					unsigned vmid, uint64_t pd_addr)
6325{
6326	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6327
6328	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6329
6330	/* wait for the invalidate to complete */
6331	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6332	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6333				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6334				 WAIT_REG_MEM_ENGINE(0))); /* me */
6335	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6336	amdgpu_ring_write(ring, 0);
6337	amdgpu_ring_write(ring, 0); /* ref */
6338	amdgpu_ring_write(ring, 0); /* mask */
6339	amdgpu_ring_write(ring, 0x20); /* poll interval */
6340
6341	/* compute doesn't have PFP */
6342	if (usepfp) {
6343		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6344		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6345		amdgpu_ring_write(ring, 0x0);
6346	}
6347}
6348
6349static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6350{
6351	return ring->adev->wb.wb[ring->wptr_offs];
6352}
6353
6354static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6355{
6356	struct amdgpu_device *adev = ring->adev;
6357
6358	/* XXX check if swapping is necessary on BE */
6359	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6360	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6361}
6362
6363static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6364					   bool acquire)
6365{
6366	struct amdgpu_device *adev = ring->adev;
6367	int pipe_num, tmp, reg;
6368	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6369
6370	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6371
6372	/* first me only has 2 entries, GFX and HP3D */
6373	if (ring->me > 0)
6374		pipe_num -= 2;
6375
6376	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6377	tmp = RREG32(reg);
6378	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6379	WREG32(reg, tmp);
6380}
6381
6382static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6383					    struct amdgpu_ring *ring,
6384					    bool acquire)
6385{
6386	int i, pipe;
6387	bool reserve;
6388	struct amdgpu_ring *iring;
6389
6390	mutex_lock(&adev->gfx.pipe_reserve_mutex);
6391	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6392	if (acquire)
6393		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6394	else
6395		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6396
6397	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6398		/* Clear all reservations - everyone reacquires all resources */
6399		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6400			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6401						       true);
6402
6403		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6404			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6405						       true);
6406	} else {
6407		/* Lower all pipes without a current reservation */
6408		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6409			iring = &adev->gfx.gfx_ring[i];
6410			pipe = amdgpu_gfx_queue_to_bit(adev,
6411						       iring->me,
6412						       iring->pipe,
6413						       0);
6414			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6415			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6416		}
6417
6418		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6419			iring = &adev->gfx.compute_ring[i];
6420			pipe = amdgpu_gfx_queue_to_bit(adev,
6421						       iring->me,
6422						       iring->pipe,
6423						       0);
6424			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6425			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6426		}
6427	}
6428
6429	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6430}
6431
6432static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6433				      struct amdgpu_ring *ring,
6434				      bool acquire)
6435{
6436	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6437	uint32_t queue_priority = acquire ? 0xf : 0x0;
6438
6439	mutex_lock(&adev->srbm_mutex);
6440	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6441
6442	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6443	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6444
6445	vi_srbm_select(adev, 0, 0, 0, 0);
6446	mutex_unlock(&adev->srbm_mutex);
6447}
6448static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6449					       enum drm_sched_priority priority)
6450{
6451	struct amdgpu_device *adev = ring->adev;
6452	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6453
6454	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6455		return;
6456
6457	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6458	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6459}
6460
6461static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6462					     u64 addr, u64 seq,
6463					     unsigned flags)
6464{
6465	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6466	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6467
6468	/* RELEASE_MEM - flush caches, send int */
6469	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6470	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6471				 EOP_TC_ACTION_EN |
6472				 EOP_TC_WB_ACTION_EN |
6473				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6474				 EVENT_INDEX(5)));
6475	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6476	amdgpu_ring_write(ring, addr & 0xfffffffc);
6477	amdgpu_ring_write(ring, upper_32_bits(addr));
6478	amdgpu_ring_write(ring, lower_32_bits(seq));
6479	amdgpu_ring_write(ring, upper_32_bits(seq));
6480}
6481
6482static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6483					 u64 seq, unsigned int flags)
6484{
6485	/* we only allocate 32bit for each seq wb address */
6486	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6487
6488	/* write fence seq to the "addr" */
6489	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6490	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6491				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6492	amdgpu_ring_write(ring, lower_32_bits(addr));
6493	amdgpu_ring_write(ring, upper_32_bits(addr));
6494	amdgpu_ring_write(ring, lower_32_bits(seq));
6495
6496	if (flags & AMDGPU_FENCE_FLAG_INT) {
6497		/* set register to trigger INT */
6498		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6499		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6500					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6501		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6502		amdgpu_ring_write(ring, 0);
6503		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6504	}
6505}
6506
6507static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6508{
6509	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6510	amdgpu_ring_write(ring, 0);
6511}
6512
6513static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6514{
6515	uint32_t dw2 = 0;
6516
6517	if (amdgpu_sriov_vf(ring->adev))
6518		gfx_v8_0_ring_emit_ce_meta(ring);
6519
6520	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6521	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6522		gfx_v8_0_ring_emit_vgt_flush(ring);
6523		/* set load_global_config & load_global_uconfig */
6524		dw2 |= 0x8001;
6525		/* set load_cs_sh_regs */
6526		dw2 |= 0x01000000;
6527		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6528		dw2 |= 0x10002;
6529
6530		/* set load_ce_ram if preamble presented */
6531		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6532			dw2 |= 0x10000000;
6533	} else {
6534		/* still load_ce_ram if this is the first time preamble presented
6535		 * although there is no context switch happens.
6536		 */
6537		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6538			dw2 |= 0x10000000;
6539	}
6540
6541	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6542	amdgpu_ring_write(ring, dw2);
6543	amdgpu_ring_write(ring, 0);
6544}
6545
6546static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6547{
6548	unsigned ret;
6549
6550	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6551	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6552	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6553	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6554	ret = ring->wptr & ring->buf_mask;
6555	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6556	return ret;
6557}
6558
6559static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6560{
6561	unsigned cur;
6562
6563	BUG_ON(offset > ring->buf_mask);
6564	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6565
6566	cur = (ring->wptr & ring->buf_mask) - 1;
6567	if (likely(cur > offset))
6568		ring->ring[offset] = cur - offset;
6569	else
6570		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6571}
6572
6573static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 
6574{
6575	struct amdgpu_device *adev = ring->adev;
6576
6577	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6578	amdgpu_ring_write(ring, 0 |	/* src: register*/
6579				(5 << 8) |	/* dst: memory */
6580				(1 << 20));	/* write confirm */
6581	amdgpu_ring_write(ring, reg);
6582	amdgpu_ring_write(ring, 0);
6583	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6584				adev->virt.reg_val_offs * 4));
6585	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6586				adev->virt.reg_val_offs * 4));
6587}
6588
6589static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6590				  uint32_t val)
6591{
6592	uint32_t cmd;
6593
6594	switch (ring->funcs->type) {
6595	case AMDGPU_RING_TYPE_GFX:
6596		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6597		break;
6598	case AMDGPU_RING_TYPE_KIQ:
6599		cmd = 1 << 16; /* no inc addr */
6600		break;
6601	default:
6602		cmd = WR_CONFIRM;
6603		break;
6604	}
6605
6606	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6607	amdgpu_ring_write(ring, cmd);
6608	amdgpu_ring_write(ring, reg);
6609	amdgpu_ring_write(ring, 0);
6610	amdgpu_ring_write(ring, val);
6611}
6612
 
 
 
 
 
 
 
 
 
 
 
 
6613static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6614						 enum amdgpu_interrupt_state state)
6615{
6616	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6617		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6618}
6619
6620static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6621						     int me, int pipe,
6622						     enum amdgpu_interrupt_state state)
6623{
6624	u32 mec_int_cntl, mec_int_cntl_reg;
6625
6626	/*
6627	 * amdgpu controls only the first MEC. That's why this function only
6628	 * handles the setting of interrupts for this specific MEC. All other
6629	 * pipes' interrupts are set by amdkfd.
6630	 */
6631
6632	if (me == 1) {
6633		switch (pipe) {
6634		case 0:
6635			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6636			break;
6637		case 1:
6638			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6639			break;
6640		case 2:
6641			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6642			break;
6643		case 3:
6644			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6645			break;
6646		default:
6647			DRM_DEBUG("invalid pipe %d\n", pipe);
6648			return;
6649		}
6650	} else {
6651		DRM_DEBUG("invalid me %d\n", me);
6652		return;
6653	}
6654
6655	switch (state) {
6656	case AMDGPU_IRQ_STATE_DISABLE:
6657		mec_int_cntl = RREG32(mec_int_cntl_reg);
6658		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6659		WREG32(mec_int_cntl_reg, mec_int_cntl);
6660		break;
6661	case AMDGPU_IRQ_STATE_ENABLE:
6662		mec_int_cntl = RREG32(mec_int_cntl_reg);
6663		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6664		WREG32(mec_int_cntl_reg, mec_int_cntl);
6665		break;
6666	default:
6667		break;
6668	}
6669}
6670
6671static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6672					     struct amdgpu_irq_src *source,
6673					     unsigned type,
6674					     enum amdgpu_interrupt_state state)
6675{
6676	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6677		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6678
6679	return 0;
6680}
6681
6682static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6683					      struct amdgpu_irq_src *source,
6684					      unsigned type,
6685					      enum amdgpu_interrupt_state state)
6686{
6687	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6688		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6689
6690	return 0;
6691}
6692
6693static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6694					    struct amdgpu_irq_src *src,
6695					    unsigned type,
6696					    enum amdgpu_interrupt_state state)
6697{
6698	switch (type) {
6699	case AMDGPU_CP_IRQ_GFX_EOP:
6700		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6701		break;
6702	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6703		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6704		break;
6705	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6706		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6707		break;
6708	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6709		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6710		break;
6711	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6712		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6713		break;
6714	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6715		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6716		break;
6717	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6718		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6719		break;
6720	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6721		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6722		break;
6723	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6724		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6725		break;
6726	default:
6727		break;
6728	}
6729	return 0;
6730}
6731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6732static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6733			    struct amdgpu_irq_src *source,
6734			    struct amdgpu_iv_entry *entry)
6735{
6736	int i;
6737	u8 me_id, pipe_id, queue_id;
6738	struct amdgpu_ring *ring;
6739
6740	DRM_DEBUG("IH: CP EOP\n");
6741	me_id = (entry->ring_id & 0x0c) >> 2;
6742	pipe_id = (entry->ring_id & 0x03) >> 0;
6743	queue_id = (entry->ring_id & 0x70) >> 4;
6744
6745	switch (me_id) {
6746	case 0:
6747		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6748		break;
6749	case 1:
6750	case 2:
6751		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6752			ring = &adev->gfx.compute_ring[i];
6753			/* Per-queue interrupt is supported for MEC starting from VI.
6754			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6755			  */
6756			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6757				amdgpu_fence_process(ring);
6758		}
6759		break;
6760	}
6761	return 0;
6762}
6763
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6764static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6765				 struct amdgpu_irq_src *source,
6766				 struct amdgpu_iv_entry *entry)
6767{
6768	DRM_ERROR("Illegal register access in command stream\n");
6769	schedule_work(&adev->reset_work);
6770	return 0;
6771}
6772
6773static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6774				  struct amdgpu_irq_src *source,
6775				  struct amdgpu_iv_entry *entry)
6776{
6777	DRM_ERROR("Illegal instruction in command stream\n");
6778	schedule_work(&adev->reset_work);
6779	return 0;
6780}
6781
6782static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6783					    struct amdgpu_irq_src *src,
6784					    unsigned int type,
6785					    enum amdgpu_interrupt_state state)
6786{
6787	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 
6788
6789	switch (type) {
6790	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6791		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6792			     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6793		if (ring->me == 1)
6794			WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6795				     ring->pipe,
6796				     GENERIC2_INT_ENABLE,
6797				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6798		else
6799			WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6800				     ring->pipe,
6801				     GENERIC2_INT_ENABLE,
6802				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6803		break;
6804	default:
6805		BUG(); /* kiq only support GENERIC2_INT now */
 
 
 
 
 
 
6806		break;
 
 
 
6807	}
6808	return 0;
 
 
6809}
6810
6811static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6812			    struct amdgpu_irq_src *source,
6813			    struct amdgpu_iv_entry *entry)
6814{
6815	u8 me_id, pipe_id, queue_id;
6816	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
6817
6818	me_id = (entry->ring_id & 0x0c) >> 2;
6819	pipe_id = (entry->ring_id & 0x03) >> 0;
6820	queue_id = (entry->ring_id & 0x70) >> 4;
6821	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6822		   me_id, pipe_id, queue_id);
 
 
 
 
 
 
 
 
 
 
 
 
6823
6824	amdgpu_fence_process(ring);
6825	return 0;
6826}
6827
6828static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6829	.name = "gfx_v8_0",
6830	.early_init = gfx_v8_0_early_init,
6831	.late_init = gfx_v8_0_late_init,
6832	.sw_init = gfx_v8_0_sw_init,
6833	.sw_fini = gfx_v8_0_sw_fini,
6834	.hw_init = gfx_v8_0_hw_init,
6835	.hw_fini = gfx_v8_0_hw_fini,
6836	.suspend = gfx_v8_0_suspend,
6837	.resume = gfx_v8_0_resume,
6838	.is_idle = gfx_v8_0_is_idle,
6839	.wait_for_idle = gfx_v8_0_wait_for_idle,
6840	.check_soft_reset = gfx_v8_0_check_soft_reset,
6841	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6842	.soft_reset = gfx_v8_0_soft_reset,
6843	.post_soft_reset = gfx_v8_0_post_soft_reset,
6844	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6845	.set_powergating_state = gfx_v8_0_set_powergating_state,
6846	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6847};
6848
6849static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6850	.type = AMDGPU_RING_TYPE_GFX,
6851	.align_mask = 0xff,
6852	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6853	.support_64bit_ptrs = false,
6854	.get_rptr = gfx_v8_0_ring_get_rptr,
6855	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6856	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6857	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6858		5 +  /* COND_EXEC */
6859		7 +  /* PIPELINE_SYNC */
6860		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6861		8 +  /* FENCE for VM_FLUSH */
6862		20 + /* GDS switch */
6863		4 + /* double SWITCH_BUFFER,
6864		       the first COND_EXEC jump to the place just
6865			   prior to this double SWITCH_BUFFER  */
6866		5 + /* COND_EXEC */
6867		7 +	 /*	HDP_flush */
6868		4 +	 /*	VGT_flush */
6869		14 + /*	CE_META */
6870		31 + /*	DE_META */
6871		3 + /* CNTX_CTRL */
6872		5 + /* HDP_INVL */
6873		8 + 8 + /* FENCE x2 */
6874		2, /* SWITCH_BUFFER */
 
6875	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6876	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6877	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6878	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6879	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6880	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6881	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6882	.test_ring = gfx_v8_0_ring_test_ring,
6883	.test_ib = gfx_v8_0_ring_test_ib,
6884	.insert_nop = amdgpu_ring_insert_nop,
6885	.pad_ib = amdgpu_ring_generic_pad_ib,
6886	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6887	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6888	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6889	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6890	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 
 
6891};
6892
6893static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6894	.type = AMDGPU_RING_TYPE_COMPUTE,
6895	.align_mask = 0xff,
6896	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6897	.support_64bit_ptrs = false,
6898	.get_rptr = gfx_v8_0_ring_get_rptr,
6899	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6900	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6901	.emit_frame_size =
6902		20 + /* gfx_v8_0_ring_emit_gds_switch */
6903		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6904		5 + /* hdp_invalidate */
6905		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6906		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6907		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6908	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 
 
 
6909	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6910	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6911	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6912	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6913	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6914	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6915	.test_ring = gfx_v8_0_ring_test_ring,
6916	.test_ib = gfx_v8_0_ring_test_ib,
6917	.insert_nop = amdgpu_ring_insert_nop,
6918	.pad_ib = amdgpu_ring_generic_pad_ib,
6919	.set_priority = gfx_v8_0_ring_set_priority_compute,
6920	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 
 
6921};
6922
6923static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6924	.type = AMDGPU_RING_TYPE_KIQ,
6925	.align_mask = 0xff,
6926	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927	.support_64bit_ptrs = false,
6928	.get_rptr = gfx_v8_0_ring_get_rptr,
6929	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6930	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6931	.emit_frame_size =
6932		20 + /* gfx_v8_0_ring_emit_gds_switch */
6933		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6934		5 + /* hdp_invalidate */
6935		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6936		17 + /* gfx_v8_0_ring_emit_vm_flush */
6937		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6938	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6939	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6940	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6941	.test_ring = gfx_v8_0_ring_test_ring,
6942	.test_ib = gfx_v8_0_ring_test_ib,
6943	.insert_nop = amdgpu_ring_insert_nop,
6944	.pad_ib = amdgpu_ring_generic_pad_ib,
6945	.emit_rreg = gfx_v8_0_ring_emit_rreg,
6946	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6947};
6948
6949static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6950{
6951	int i;
6952
6953	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6954
6955	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6956		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6957
6958	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6959		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6960}
6961
6962static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6963	.set = gfx_v8_0_set_eop_interrupt_state,
6964	.process = gfx_v8_0_eop_irq,
6965};
6966
6967static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6968	.set = gfx_v8_0_set_priv_reg_fault_state,
6969	.process = gfx_v8_0_priv_reg_irq,
6970};
6971
6972static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6973	.set = gfx_v8_0_set_priv_inst_fault_state,
6974	.process = gfx_v8_0_priv_inst_irq,
6975};
6976
6977static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6978	.set = gfx_v8_0_kiq_set_interrupt_state,
6979	.process = gfx_v8_0_kiq_irq,
 
 
 
 
 
6980};
6981
6982static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6983{
6984	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6985	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6986
6987	adev->gfx.priv_reg_irq.num_types = 1;
6988	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6989
6990	adev->gfx.priv_inst_irq.num_types = 1;
6991	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6992
6993	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6994	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
 
 
 
6995}
6996
6997static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6998{
6999	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7000}
7001
7002static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7003{
7004	/* init asci gds info */
7005	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7006	adev->gds.gws.total_size = 64;
7007	adev->gds.oa.total_size = 16;
7008
7009	if (adev->gds.mem.total_size == 64 * 1024) {
7010		adev->gds.mem.gfx_partition_size = 4096;
7011		adev->gds.mem.cs_partition_size = 4096;
7012
7013		adev->gds.gws.gfx_partition_size = 4;
7014		adev->gds.gws.cs_partition_size = 4;
7015
7016		adev->gds.oa.gfx_partition_size = 4;
7017		adev->gds.oa.cs_partition_size = 1;
7018	} else {
7019		adev->gds.mem.gfx_partition_size = 1024;
7020		adev->gds.mem.cs_partition_size = 1024;
7021
7022		adev->gds.gws.gfx_partition_size = 16;
7023		adev->gds.gws.cs_partition_size = 16;
7024
7025		adev->gds.oa.gfx_partition_size = 4;
7026		adev->gds.oa.cs_partition_size = 4;
7027	}
7028}
7029
7030static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7031						 u32 bitmap)
7032{
7033	u32 data;
7034
7035	if (!bitmap)
7036		return;
7037
7038	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7039	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7040
7041	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7042}
7043
7044static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7045{
7046	u32 data, mask;
7047
7048	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7049		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7050
7051	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7052
7053	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7054}
7055
7056static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7057{
7058	int i, j, k, counter, active_cu_number = 0;
7059	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7060	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7061	unsigned disable_masks[4 * 2];
7062	u32 ao_cu_num;
7063
7064	memset(cu_info, 0, sizeof(*cu_info));
7065
7066	if (adev->flags & AMD_IS_APU)
7067		ao_cu_num = 2;
7068	else
7069		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7070
7071	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7072
7073	mutex_lock(&adev->grbm_idx_mutex);
7074	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7075		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7076			mask = 1;
7077			ao_bitmap = 0;
7078			counter = 0;
7079			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7080			if (i < 4 && j < 2)
7081				gfx_v8_0_set_user_cu_inactive_bitmap(
7082					adev, disable_masks[i * 2 + j]);
7083			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7084			cu_info->bitmap[i][j] = bitmap;
7085
7086			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7087				if (bitmap & mask) {
7088					if (counter < ao_cu_num)
7089						ao_bitmap |= mask;
7090					counter ++;
7091				}
7092				mask <<= 1;
7093			}
7094			active_cu_number += counter;
7095			if (i < 2 && j < 2)
7096				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7097			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7098		}
7099	}
7100	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7101	mutex_unlock(&adev->grbm_idx_mutex);
7102
7103	cu_info->number = active_cu_number;
7104	cu_info->ao_cu_mask = ao_cu_mask;
7105	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7106	cu_info->max_waves_per_simd = 10;
7107	cu_info->max_scratch_slots_per_cu = 32;
7108	cu_info->wave_front_size = 64;
7109	cu_info->lds_size = 64;
7110}
7111
7112const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7113{
7114	.type = AMD_IP_BLOCK_TYPE_GFX,
7115	.major = 8,
7116	.minor = 0,
7117	.rev = 0,
7118	.funcs = &gfx_v8_0_ip_funcs,
7119};
7120
7121const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7122{
7123	.type = AMD_IP_BLOCK_TYPE_GFX,
7124	.major = 8,
7125	.minor = 1,
7126	.rev = 0,
7127	.funcs = &gfx_v8_0_ip_funcs,
7128};
7129
7130static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7131{
7132	uint64_t ce_payload_addr;
7133	int cnt_ce;
7134	union {
7135		struct vi_ce_ib_state regular;
7136		struct vi_ce_ib_state_chained_ib chained;
7137	} ce_payload = {};
7138
7139	if (ring->adev->virt.chained_ib_support) {
7140		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7141			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7142		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7143	} else {
7144		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7145			offsetof(struct vi_gfx_meta_data, ce_payload);
7146		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7147	}
7148
7149	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7150	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7151				WRITE_DATA_DST_SEL(8) |
7152				WR_CONFIRM) |
7153				WRITE_DATA_CACHE_POLICY(0));
7154	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7155	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7156	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7157}
7158
7159static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7160{
7161	uint64_t de_payload_addr, gds_addr, csa_addr;
7162	int cnt_de;
7163	union {
7164		struct vi_de_ib_state regular;
7165		struct vi_de_ib_state_chained_ib chained;
7166	} de_payload = {};
7167
7168	csa_addr = amdgpu_csa_vaddr(ring->adev);
7169	gds_addr = csa_addr + 4096;
7170	if (ring->adev->virt.chained_ib_support) {
7171		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7172		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7173		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7174		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7175	} else {
7176		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7177		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7178		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7179		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7180	}
7181
7182	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7183	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7184				WRITE_DATA_DST_SEL(8) |
7185				WR_CONFIRM) |
7186				WRITE_DATA_CACHE_POLICY(0));
7187	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7188	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7189	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7190}
v5.14.15
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
 
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96	BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 
 
 
 
 
 
 
 
 
 
 
 
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738	uint32_t data;
 739
 740	switch (adev->asic_type) {
 741	case CHIP_TOPAZ:
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_mgcg_cgcg_init,
 744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745		amdgpu_device_program_register_sequence(adev,
 746							golden_settings_iceland_a11,
 747							ARRAY_SIZE(golden_settings_iceland_a11));
 748		amdgpu_device_program_register_sequence(adev,
 749							iceland_golden_common_all,
 750							ARRAY_SIZE(iceland_golden_common_all));
 751		break;
 752	case CHIP_FIJI:
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_mgcg_cgcg_init,
 755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756		amdgpu_device_program_register_sequence(adev,
 757							golden_settings_fiji_a10,
 758							ARRAY_SIZE(golden_settings_fiji_a10));
 759		amdgpu_device_program_register_sequence(adev,
 760							fiji_golden_common_all,
 761							ARRAY_SIZE(fiji_golden_common_all));
 762		break;
 763
 764	case CHIP_TONGA:
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_mgcg_cgcg_init,
 767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768		amdgpu_device_program_register_sequence(adev,
 769							golden_settings_tonga_a11,
 770							ARRAY_SIZE(golden_settings_tonga_a11));
 771		amdgpu_device_program_register_sequence(adev,
 772							tonga_golden_common_all,
 773							ARRAY_SIZE(tonga_golden_common_all));
 774		break;
 775	case CHIP_VEGAM:
 776		amdgpu_device_program_register_sequence(adev,
 777							golden_settings_vegam_a11,
 778							ARRAY_SIZE(golden_settings_vegam_a11));
 779		amdgpu_device_program_register_sequence(adev,
 780							vegam_golden_common_all,
 781							ARRAY_SIZE(vegam_golden_common_all));
 782		break;
 783	case CHIP_POLARIS11:
 784	case CHIP_POLARIS12:
 785		amdgpu_device_program_register_sequence(adev,
 786							golden_settings_polaris11_a11,
 787							ARRAY_SIZE(golden_settings_polaris11_a11));
 788		amdgpu_device_program_register_sequence(adev,
 789							polaris11_golden_common_all,
 790							ARRAY_SIZE(polaris11_golden_common_all));
 791		break;
 792	case CHIP_POLARIS10:
 793		amdgpu_device_program_register_sequence(adev,
 794							golden_settings_polaris10_a11,
 795							ARRAY_SIZE(golden_settings_polaris10_a11));
 796		amdgpu_device_program_register_sequence(adev,
 797							polaris10_golden_common_all,
 798							ARRAY_SIZE(polaris10_golden_common_all));
 799		data = RREG32_SMC(ixCG_ACLK_CNTL);
 800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802		WREG32_SMC(ixCG_ACLK_CNTL, data);
 803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809		}
 810		break;
 811	case CHIP_CARRIZO:
 812		amdgpu_device_program_register_sequence(adev,
 813							cz_mgcg_cgcg_init,
 814							ARRAY_SIZE(cz_mgcg_cgcg_init));
 815		amdgpu_device_program_register_sequence(adev,
 816							cz_golden_settings_a11,
 817							ARRAY_SIZE(cz_golden_settings_a11));
 818		amdgpu_device_program_register_sequence(adev,
 819							cz_golden_common_all,
 820							ARRAY_SIZE(cz_golden_common_all));
 821		break;
 822	case CHIP_STONEY:
 823		amdgpu_device_program_register_sequence(adev,
 824							stoney_mgcg_cgcg_init,
 825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826		amdgpu_device_program_register_sequence(adev,
 827							stoney_golden_settings_a11,
 828							ARRAY_SIZE(stoney_golden_settings_a11));
 829		amdgpu_device_program_register_sequence(adev,
 830							stoney_golden_common_all,
 831							ARRAY_SIZE(stoney_golden_common_all));
 832		break;
 833	default:
 834		break;
 835	}
 836}
 837
 838static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 839{
 840	adev->gfx.scratch.num_reg = 8;
 841	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 842	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 843}
 844
 845static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 846{
 847	struct amdgpu_device *adev = ring->adev;
 848	uint32_t scratch;
 849	uint32_t tmp = 0;
 850	unsigned i;
 851	int r;
 852
 853	r = amdgpu_gfx_scratch_get(adev, &scratch);
 854	if (r)
 
 855		return r;
 856
 857	WREG32(scratch, 0xCAFEDEAD);
 858	r = amdgpu_ring_alloc(ring, 3);
 859	if (r)
 860		goto error_free_scratch;
 861
 
 
 
 862	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 863	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 864	amdgpu_ring_write(ring, 0xDEADBEEF);
 865	amdgpu_ring_commit(ring);
 866
 867	for (i = 0; i < adev->usec_timeout; i++) {
 868		tmp = RREG32(scratch);
 869		if (tmp == 0xDEADBEEF)
 870			break;
 871		udelay(1);
 
 
 
 
 
 
 
 
 872	}
 873
 874	if (i >= adev->usec_timeout)
 875		r = -ETIMEDOUT;
 876
 877error_free_scratch:
 878	amdgpu_gfx_scratch_free(adev, scratch);
 879	return r;
 880}
 881
 882static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 883{
 884	struct amdgpu_device *adev = ring->adev;
 885	struct amdgpu_ib ib;
 886	struct dma_fence *f = NULL;
 887
 888	unsigned int index;
 889	uint64_t gpu_addr;
 890	uint32_t tmp;
 891	long r;
 892
 893	r = amdgpu_device_wb_get(adev, &index);
 894	if (r)
 
 895		return r;
 896
 897	gpu_addr = adev->wb.gpu_addr + (index * 4);
 898	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 899	memset(&ib, 0, sizeof(ib));
 900	r = amdgpu_ib_get(adev, NULL, 16,
 901					AMDGPU_IB_POOL_DIRECT, &ib);
 902	if (r)
 903		goto err1;
 904
 905	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 906	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 907	ib.ptr[2] = lower_32_bits(gpu_addr);
 908	ib.ptr[3] = upper_32_bits(gpu_addr);
 909	ib.ptr[4] = 0xDEADBEEF;
 910	ib.length_dw = 5;
 911
 912	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 913	if (r)
 914		goto err2;
 915
 916	r = dma_fence_wait_timeout(f, false, timeout);
 917	if (r == 0) {
 
 918		r = -ETIMEDOUT;
 919		goto err2;
 920	} else if (r < 0) {
 
 921		goto err2;
 922	}
 923
 924	tmp = adev->wb.wb[index];
 925	if (tmp == 0xDEADBEEF)
 926		r = 0;
 927	else
 
 
 928		r = -EINVAL;
 929
 930err2:
 931	amdgpu_ib_free(adev, &ib, NULL);
 932	dma_fence_put(f);
 933err1:
 934	amdgpu_device_wb_free(adev, index);
 935	return r;
 936}
 937
 938
 939static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 940{
 941	release_firmware(adev->gfx.pfp_fw);
 942	adev->gfx.pfp_fw = NULL;
 943	release_firmware(adev->gfx.me_fw);
 944	adev->gfx.me_fw = NULL;
 945	release_firmware(adev->gfx.ce_fw);
 946	adev->gfx.ce_fw = NULL;
 947	release_firmware(adev->gfx.rlc_fw);
 948	adev->gfx.rlc_fw = NULL;
 949	release_firmware(adev->gfx.mec_fw);
 950	adev->gfx.mec_fw = NULL;
 951	if ((adev->asic_type != CHIP_STONEY) &&
 952	    (adev->asic_type != CHIP_TOPAZ))
 953		release_firmware(adev->gfx.mec2_fw);
 954	adev->gfx.mec2_fw = NULL;
 955
 956	kfree(adev->gfx.rlc.register_list_format);
 957}
 958
 959static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 960{
 961	const char *chip_name;
 962	char fw_name[30];
 963	int err;
 964	struct amdgpu_firmware_info *info = NULL;
 965	const struct common_firmware_header *header = NULL;
 966	const struct gfx_firmware_header_v1_0 *cp_hdr;
 967	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 968	unsigned int *tmp = NULL, i;
 969
 970	DRM_DEBUG("\n");
 971
 972	switch (adev->asic_type) {
 973	case CHIP_TOPAZ:
 974		chip_name = "topaz";
 975		break;
 976	case CHIP_TONGA:
 977		chip_name = "tonga";
 978		break;
 979	case CHIP_CARRIZO:
 980		chip_name = "carrizo";
 981		break;
 982	case CHIP_FIJI:
 983		chip_name = "fiji";
 984		break;
 985	case CHIP_STONEY:
 986		chip_name = "stoney";
 987		break;
 988	case CHIP_POLARIS10:
 989		chip_name = "polaris10";
 990		break;
 991	case CHIP_POLARIS11:
 992		chip_name = "polaris11";
 993		break;
 994	case CHIP_POLARIS12:
 995		chip_name = "polaris12";
 996		break;
 997	case CHIP_VEGAM:
 998		chip_name = "vegam";
 999		break;
1000	default:
1001		BUG();
1002	}
1003
1004	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1005		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1006		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1007		if (err == -ENOENT) {
1008			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1009			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1010		}
1011	} else {
1012		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1013		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1014	}
1015	if (err)
1016		goto out;
1017	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1018	if (err)
1019		goto out;
1020	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1021	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1022	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1026		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1027		if (err == -ENOENT) {
1028			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1029			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1030		}
1031	} else {
1032		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1033		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1034	}
1035	if (err)
1036		goto out;
1037	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1038	if (err)
1039		goto out;
1040	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1041	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042
1043	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1044
1045	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1046		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1047		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1048		if (err == -ENOENT) {
1049			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1050			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1051		}
1052	} else {
1053		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1054		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1055	}
1056	if (err)
1057		goto out;
1058	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1059	if (err)
1060		goto out;
1061	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1062	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1063	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1064
1065	/*
1066	 * Support for MCBP/Virtualization in combination with chained IBs is
1067	 * formal released on feature version #46
1068	 */
1069	if (adev->gfx.ce_feature_version >= 46 &&
1070	    adev->gfx.pfp_feature_version >= 46) {
1071		adev->virt.chained_ib_support = true;
1072		DRM_INFO("Chained IB support enabled!\n");
1073	} else
1074		adev->virt.chained_ib_support = false;
1075
1076	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1077	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1078	if (err)
1079		goto out;
1080	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1081	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1082	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1083	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1084
1085	adev->gfx.rlc.save_and_restore_offset =
1086			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1087	adev->gfx.rlc.clear_state_descriptor_offset =
1088			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1089	adev->gfx.rlc.avail_scratch_ram_locations =
1090			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1091	adev->gfx.rlc.reg_restore_list_size =
1092			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1093	adev->gfx.rlc.reg_list_format_start =
1094			le32_to_cpu(rlc_hdr->reg_list_format_start);
1095	adev->gfx.rlc.reg_list_format_separate_start =
1096			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1097	adev->gfx.rlc.starting_offsets_start =
1098			le32_to_cpu(rlc_hdr->starting_offsets_start);
1099	adev->gfx.rlc.reg_list_format_size_bytes =
1100			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1101	adev->gfx.rlc.reg_list_size_bytes =
1102			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1103
1104	adev->gfx.rlc.register_list_format =
1105			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1106					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1107
1108	if (!adev->gfx.rlc.register_list_format) {
1109		err = -ENOMEM;
1110		goto out;
1111	}
1112
1113	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1114			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1115	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1116		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1117
1118	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1119
1120	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1121			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1122	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1123		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1124
1125	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1126		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1127		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1128		if (err == -ENOENT) {
1129			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1130			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1131		}
1132	} else {
1133		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1134		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1135	}
1136	if (err)
1137		goto out;
1138	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1139	if (err)
1140		goto out;
1141	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1142	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1143	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1144
1145	if ((adev->asic_type != CHIP_STONEY) &&
1146	    (adev->asic_type != CHIP_TOPAZ)) {
1147		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1148			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1149			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1150			if (err == -ENOENT) {
1151				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1152				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1153			}
1154		} else {
1155			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1156			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1157		}
1158		if (!err) {
1159			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1160			if (err)
1161				goto out;
1162			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1163				adev->gfx.mec2_fw->data;
1164			adev->gfx.mec2_fw_version =
1165				le32_to_cpu(cp_hdr->header.ucode_version);
1166			adev->gfx.mec2_feature_version =
1167				le32_to_cpu(cp_hdr->ucode_feature_version);
1168		} else {
1169			err = 0;
1170			adev->gfx.mec2_fw = NULL;
1171		}
1172	}
1173
1174	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1175	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1176	info->fw = adev->gfx.pfp_fw;
1177	header = (const struct common_firmware_header *)info->fw->data;
1178	adev->firmware.fw_size +=
1179		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1182	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1183	info->fw = adev->gfx.me_fw;
1184	header = (const struct common_firmware_header *)info->fw->data;
1185	adev->firmware.fw_size +=
1186		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1189	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1190	info->fw = adev->gfx.ce_fw;
1191	header = (const struct common_firmware_header *)info->fw->data;
1192	adev->firmware.fw_size +=
1193		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1196	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1197	info->fw = adev->gfx.rlc_fw;
1198	header = (const struct common_firmware_header *)info->fw->data;
1199	adev->firmware.fw_size +=
1200		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1201
1202	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1203	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1204	info->fw = adev->gfx.mec_fw;
1205	header = (const struct common_firmware_header *)info->fw->data;
1206	adev->firmware.fw_size +=
1207		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208
1209	/* we need account JT in */
1210	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1211	adev->firmware.fw_size +=
1212		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
 
 
1213
1214	if (amdgpu_sriov_vf(adev)) {
1215		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1216		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1217		info->fw = adev->gfx.mec_fw;
1218		adev->firmware.fw_size +=
1219			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1220	}
1221
1222	if (adev->gfx.mec2_fw) {
1223		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1224		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1225		info->fw = adev->gfx.mec2_fw;
1226		header = (const struct common_firmware_header *)info->fw->data;
1227		adev->firmware.fw_size +=
1228			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1229	}
1230
1231out:
1232	if (err) {
1233		dev_err(adev->dev,
1234			"gfx8: Failed to load firmware \"%s\"\n",
1235			fw_name);
1236		release_firmware(adev->gfx.pfp_fw);
1237		adev->gfx.pfp_fw = NULL;
1238		release_firmware(adev->gfx.me_fw);
1239		adev->gfx.me_fw = NULL;
1240		release_firmware(adev->gfx.ce_fw);
1241		adev->gfx.ce_fw = NULL;
1242		release_firmware(adev->gfx.rlc_fw);
1243		adev->gfx.rlc_fw = NULL;
1244		release_firmware(adev->gfx.mec_fw);
1245		adev->gfx.mec_fw = NULL;
1246		release_firmware(adev->gfx.mec2_fw);
1247		adev->gfx.mec2_fw = NULL;
1248	}
1249	return err;
1250}
1251
1252static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1253				    volatile u32 *buffer)
1254{
1255	u32 count = 0, i;
1256	const struct cs_section_def *sect = NULL;
1257	const struct cs_extent_def *ext = NULL;
1258
1259	if (adev->gfx.rlc.cs_data == NULL)
1260		return;
1261	if (buffer == NULL)
1262		return;
1263
1264	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1265	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1266
1267	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1268	buffer[count++] = cpu_to_le32(0x80000000);
1269	buffer[count++] = cpu_to_le32(0x80000000);
1270
1271	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1272		for (ext = sect->section; ext->extent != NULL; ++ext) {
1273			if (sect->id == SECT_CONTEXT) {
1274				buffer[count++] =
1275					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1276				buffer[count++] = cpu_to_le32(ext->reg_index -
1277						PACKET3_SET_CONTEXT_REG_START);
1278				for (i = 0; i < ext->reg_count; i++)
1279					buffer[count++] = cpu_to_le32(ext->extent[i]);
1280			} else {
1281				return;
1282			}
1283		}
1284	}
1285
1286	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1287	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1288			PACKET3_SET_CONTEXT_REG_START);
1289	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1290	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1291
1292	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1293	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1294
1295	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1296	buffer[count++] = cpu_to_le32(0);
1297}
1298
1299static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1300{
 
 
 
 
 
 
1301	if (adev->asic_type == CHIP_CARRIZO)
1302		return 5;
1303	else
1304		return 4;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1305}
1306
1307static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308{
 
 
1309	const struct cs_section_def *cs_data;
1310	int r;
1311
1312	adev->gfx.rlc.cs_data = vi_cs_data;
1313
1314	cs_data = adev->gfx.rlc.cs_data;
1315
1316	if (cs_data) {
1317		/* init clear state block */
1318		r = amdgpu_gfx_rlc_init_csb(adev);
1319		if (r)
 
 
 
 
 
 
 
 
1320			return r;
 
 
 
 
 
 
 
1321	}
1322
1323	if ((adev->asic_type == CHIP_CARRIZO) ||
1324	    (adev->asic_type == CHIP_STONEY)) {
1325		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1326		r = amdgpu_gfx_rlc_init_cpt(adev);
1327		if (r)
 
 
 
 
 
1328			return r;
 
 
 
 
 
 
1329	}
1330
1331	/* init spm vmid with 0xf */
1332	if (adev->gfx.rlc.funcs->update_spm_vmid)
1333		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1334
1335	return 0;
1336}
1337
1338static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1339{
1340	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1341}
1342
1343static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1344{
1345	int r;
1346	u32 *hpd;
1347	size_t mec_hpd_size;
1348
1349	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1350
1351	/* take ownership of the relevant compute queues */
1352	amdgpu_gfx_compute_queue_acquire(adev);
1353
1354	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1355	if (mec_hpd_size) {
1356		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1357					      AMDGPU_GEM_DOMAIN_VRAM,
1358					      &adev->gfx.mec.hpd_eop_obj,
1359					      &adev->gfx.mec.hpd_eop_gpu_addr,
1360					      (void **)&hpd);
1361		if (r) {
1362			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1363			return r;
1364		}
1365
1366		memset(hpd, 0, mec_hpd_size);
 
 
 
 
 
 
 
 
 
 
1367
1368		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1369		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1370	}
1371
1372	return 0;
1373}
1374
1375static const u32 vgpr_init_compute_shader[] =
1376{
1377	0x7e000209, 0x7e020208,
1378	0x7e040207, 0x7e060206,
1379	0x7e080205, 0x7e0a0204,
1380	0x7e0c0203, 0x7e0e0202,
1381	0x7e100201, 0x7e120200,
1382	0x7e140209, 0x7e160208,
1383	0x7e180207, 0x7e1a0206,
1384	0x7e1c0205, 0x7e1e0204,
1385	0x7e200203, 0x7e220202,
1386	0x7e240201, 0x7e260200,
1387	0x7e280209, 0x7e2a0208,
1388	0x7e2c0207, 0x7e2e0206,
1389	0x7e300205, 0x7e320204,
1390	0x7e340203, 0x7e360202,
1391	0x7e380201, 0x7e3a0200,
1392	0x7e3c0209, 0x7e3e0208,
1393	0x7e400207, 0x7e420206,
1394	0x7e440205, 0x7e460204,
1395	0x7e480203, 0x7e4a0202,
1396	0x7e4c0201, 0x7e4e0200,
1397	0x7e500209, 0x7e520208,
1398	0x7e540207, 0x7e560206,
1399	0x7e580205, 0x7e5a0204,
1400	0x7e5c0203, 0x7e5e0202,
1401	0x7e600201, 0x7e620200,
1402	0x7e640209, 0x7e660208,
1403	0x7e680207, 0x7e6a0206,
1404	0x7e6c0205, 0x7e6e0204,
1405	0x7e700203, 0x7e720202,
1406	0x7e740201, 0x7e760200,
1407	0x7e780209, 0x7e7a0208,
1408	0x7e7c0207, 0x7e7e0206,
1409	0xbf8a0000, 0xbf810000,
1410};
1411
1412static const u32 sgpr_init_compute_shader[] =
1413{
1414	0xbe8a0100, 0xbe8c0102,
1415	0xbe8e0104, 0xbe900106,
1416	0xbe920108, 0xbe940100,
1417	0xbe960102, 0xbe980104,
1418	0xbe9a0106, 0xbe9c0108,
1419	0xbe9e0100, 0xbea00102,
1420	0xbea20104, 0xbea40106,
1421	0xbea60108, 0xbea80100,
1422	0xbeaa0102, 0xbeac0104,
1423	0xbeae0106, 0xbeb00108,
1424	0xbeb20100, 0xbeb40102,
1425	0xbeb60104, 0xbeb80106,
1426	0xbeba0108, 0xbebc0100,
1427	0xbebe0102, 0xbec00104,
1428	0xbec20106, 0xbec40108,
1429	0xbec60100, 0xbec80102,
1430	0xbee60004, 0xbee70005,
1431	0xbeea0006, 0xbeeb0007,
1432	0xbee80008, 0xbee90009,
1433	0xbefc0000, 0xbf8a0000,
1434	0xbf810000, 0x00000000,
1435};
1436
1437static const u32 vgpr_init_regs[] =
1438{
1439	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1440	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1441	mmCOMPUTE_NUM_THREAD_X, 256*4,
1442	mmCOMPUTE_NUM_THREAD_Y, 1,
1443	mmCOMPUTE_NUM_THREAD_Z, 1,
1444	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1445	mmCOMPUTE_PGM_RSRC2, 20,
1446	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1447	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1448	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1449	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1450	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1451	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1452	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1453	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1454	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1455	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1456};
1457
1458static const u32 sgpr1_init_regs[] =
1459{
1460	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1461	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1462	mmCOMPUTE_NUM_THREAD_X, 256*5,
1463	mmCOMPUTE_NUM_THREAD_Y, 1,
1464	mmCOMPUTE_NUM_THREAD_Z, 1,
1465	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1466	mmCOMPUTE_PGM_RSRC2, 20,
1467	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477};
1478
1479static const u32 sgpr2_init_regs[] =
1480{
1481	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1482	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483	mmCOMPUTE_NUM_THREAD_X, 256*5,
1484	mmCOMPUTE_NUM_THREAD_Y, 1,
1485	mmCOMPUTE_NUM_THREAD_Z, 1,
1486	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1487	mmCOMPUTE_PGM_RSRC2, 20,
1488	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1489	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1490	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1491	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1492	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1493	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1494	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1495	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1496	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1497	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1498};
1499
1500static const u32 sec_ded_counter_registers[] =
1501{
1502	mmCPC_EDC_ATC_CNT,
1503	mmCPC_EDC_SCRATCH_CNT,
1504	mmCPC_EDC_UCODE_CNT,
1505	mmCPF_EDC_ATC_CNT,
1506	mmCPF_EDC_ROQ_CNT,
1507	mmCPF_EDC_TAG_CNT,
1508	mmCPG_EDC_ATC_CNT,
1509	mmCPG_EDC_DMA_CNT,
1510	mmCPG_EDC_TAG_CNT,
1511	mmDC_EDC_CSINVOC_CNT,
1512	mmDC_EDC_RESTORE_CNT,
1513	mmDC_EDC_STATE_CNT,
1514	mmGDS_EDC_CNT,
1515	mmGDS_EDC_GRBM_CNT,
1516	mmGDS_EDC_OA_DED,
1517	mmSPI_EDC_CNT,
1518	mmSQC_ATC_EDC_GATCL1_CNT,
1519	mmSQC_EDC_CNT,
1520	mmSQ_EDC_DED_CNT,
1521	mmSQ_EDC_INFO,
1522	mmSQ_EDC_SEC_CNT,
1523	mmTCC_EDC_CNT,
1524	mmTCP_ATC_EDC_GATCL1_CNT,
1525	mmTCP_EDC_CNT,
1526	mmTD_EDC_CNT
1527};
1528
1529static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1530{
1531	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1532	struct amdgpu_ib ib;
1533	struct dma_fence *f = NULL;
1534	int r, i;
1535	u32 tmp;
1536	unsigned total_size, vgpr_offset, sgpr_offset;
1537	u64 gpu_addr;
1538
1539	/* only supported on CZ */
1540	if (adev->asic_type != CHIP_CARRIZO)
1541		return 0;
1542
1543	/* bail if the compute ring is not ready */
1544	if (!ring->sched.ready)
1545		return 0;
1546
1547	tmp = RREG32(mmGB_EDC_MODE);
1548	WREG32(mmGB_EDC_MODE, 0);
1549
1550	total_size =
1551		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1552	total_size +=
1553		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1554	total_size +=
1555		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1556	total_size = ALIGN(total_size, 256);
1557	vgpr_offset = total_size;
1558	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1559	sgpr_offset = total_size;
1560	total_size += sizeof(sgpr_init_compute_shader);
1561
1562	/* allocate an indirect buffer to put the commands in */
1563	memset(&ib, 0, sizeof(ib));
1564	r = amdgpu_ib_get(adev, NULL, total_size,
1565					AMDGPU_IB_POOL_DIRECT, &ib);
1566	if (r) {
1567		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1568		return r;
1569	}
1570
1571	/* load the compute shaders */
1572	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1573		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1574
1575	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1576		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1577
1578	/* init the ib length to 0 */
1579	ib.length_dw = 0;
1580
1581	/* VGPR */
1582	/* write the register state for the compute dispatch */
1583	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1584		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1585		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1586		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1587	}
1588	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1589	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1590	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1591	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1592	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1593	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1594
1595	/* write dispatch packet */
1596	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1597	ib.ptr[ib.length_dw++] = 8; /* x */
1598	ib.ptr[ib.length_dw++] = 1; /* y */
1599	ib.ptr[ib.length_dw++] = 1; /* z */
1600	ib.ptr[ib.length_dw++] =
1601		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1602
1603	/* write CS partial flush packet */
1604	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1605	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1606
1607	/* SGPR1 */
1608	/* write the register state for the compute dispatch */
1609	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1610		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1611		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1612		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1613	}
1614	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1615	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1616	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1617	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1618	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1619	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1620
1621	/* write dispatch packet */
1622	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1623	ib.ptr[ib.length_dw++] = 8; /* x */
1624	ib.ptr[ib.length_dw++] = 1; /* y */
1625	ib.ptr[ib.length_dw++] = 1; /* z */
1626	ib.ptr[ib.length_dw++] =
1627		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1628
1629	/* write CS partial flush packet */
1630	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1631	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1632
1633	/* SGPR2 */
1634	/* write the register state for the compute dispatch */
1635	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1636		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1637		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1638		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1639	}
1640	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1641	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1642	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1643	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1644	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1645	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1646
1647	/* write dispatch packet */
1648	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1649	ib.ptr[ib.length_dw++] = 8; /* x */
1650	ib.ptr[ib.length_dw++] = 1; /* y */
1651	ib.ptr[ib.length_dw++] = 1; /* z */
1652	ib.ptr[ib.length_dw++] =
1653		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1654
1655	/* write CS partial flush packet */
1656	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1657	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1658
1659	/* shedule the ib on the ring */
1660	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1661	if (r) {
1662		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1663		goto fail;
1664	}
1665
1666	/* wait for the GPU to finish processing the IB */
1667	r = dma_fence_wait(f, false);
1668	if (r) {
1669		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1670		goto fail;
1671	}
1672
1673	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1674	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1675	WREG32(mmGB_EDC_MODE, tmp);
1676
1677	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1678	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1679	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1680
1681
1682	/* read back registers to clear the counters */
1683	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1684		RREG32(sec_ded_counter_registers[i]);
1685
1686fail:
1687	amdgpu_ib_free(adev, &ib, NULL);
1688	dma_fence_put(f);
1689
1690	return r;
1691}
1692
1693static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1694{
1695	u32 gb_addr_config;
1696	u32 mc_arb_ramcfg;
1697	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1698	u32 tmp;
1699	int ret;
1700
1701	switch (adev->asic_type) {
1702	case CHIP_TOPAZ:
1703		adev->gfx.config.max_shader_engines = 1;
1704		adev->gfx.config.max_tile_pipes = 2;
1705		adev->gfx.config.max_cu_per_sh = 6;
1706		adev->gfx.config.max_sh_per_se = 1;
1707		adev->gfx.config.max_backends_per_se = 2;
1708		adev->gfx.config.max_texture_channel_caches = 2;
1709		adev->gfx.config.max_gprs = 256;
1710		adev->gfx.config.max_gs_threads = 32;
1711		adev->gfx.config.max_hw_contexts = 8;
1712
1713		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1714		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1715		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1716		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1717		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1718		break;
1719	case CHIP_FIJI:
1720		adev->gfx.config.max_shader_engines = 4;
1721		adev->gfx.config.max_tile_pipes = 16;
1722		adev->gfx.config.max_cu_per_sh = 16;
1723		adev->gfx.config.max_sh_per_se = 1;
1724		adev->gfx.config.max_backends_per_se = 4;
1725		adev->gfx.config.max_texture_channel_caches = 16;
1726		adev->gfx.config.max_gprs = 256;
1727		adev->gfx.config.max_gs_threads = 32;
1728		adev->gfx.config.max_hw_contexts = 8;
1729
1730		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1735		break;
1736	case CHIP_POLARIS11:
1737	case CHIP_POLARIS12:
1738		ret = amdgpu_atombios_get_gfx_info(adev);
1739		if (ret)
1740			return ret;
1741		adev->gfx.config.max_gprs = 256;
1742		adev->gfx.config.max_gs_threads = 32;
1743		adev->gfx.config.max_hw_contexts = 8;
1744
1745		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1746		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1747		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1748		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1749		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1750		break;
1751	case CHIP_POLARIS10:
1752	case CHIP_VEGAM:
1753		ret = amdgpu_atombios_get_gfx_info(adev);
1754		if (ret)
1755			return ret;
1756		adev->gfx.config.max_gprs = 256;
1757		adev->gfx.config.max_gs_threads = 32;
1758		adev->gfx.config.max_hw_contexts = 8;
1759
1760		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1761		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1762		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1763		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1764		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1765		break;
1766	case CHIP_TONGA:
1767		adev->gfx.config.max_shader_engines = 4;
1768		adev->gfx.config.max_tile_pipes = 8;
1769		adev->gfx.config.max_cu_per_sh = 8;
1770		adev->gfx.config.max_sh_per_se = 1;
1771		adev->gfx.config.max_backends_per_se = 2;
1772		adev->gfx.config.max_texture_channel_caches = 8;
1773		adev->gfx.config.max_gprs = 256;
1774		adev->gfx.config.max_gs_threads = 32;
1775		adev->gfx.config.max_hw_contexts = 8;
1776
1777		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782		break;
1783	case CHIP_CARRIZO:
1784		adev->gfx.config.max_shader_engines = 1;
1785		adev->gfx.config.max_tile_pipes = 2;
1786		adev->gfx.config.max_sh_per_se = 1;
1787		adev->gfx.config.max_backends_per_se = 2;
1788		adev->gfx.config.max_cu_per_sh = 8;
1789		adev->gfx.config.max_texture_channel_caches = 2;
1790		adev->gfx.config.max_gprs = 256;
1791		adev->gfx.config.max_gs_threads = 32;
1792		adev->gfx.config.max_hw_contexts = 8;
1793
1794		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1799		break;
1800	case CHIP_STONEY:
1801		adev->gfx.config.max_shader_engines = 1;
1802		adev->gfx.config.max_tile_pipes = 2;
1803		adev->gfx.config.max_sh_per_se = 1;
1804		adev->gfx.config.max_backends_per_se = 1;
1805		adev->gfx.config.max_cu_per_sh = 3;
1806		adev->gfx.config.max_texture_channel_caches = 2;
1807		adev->gfx.config.max_gprs = 256;
1808		adev->gfx.config.max_gs_threads = 16;
1809		adev->gfx.config.max_hw_contexts = 8;
1810
1811		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816		break;
1817	default:
1818		adev->gfx.config.max_shader_engines = 2;
1819		adev->gfx.config.max_tile_pipes = 4;
1820		adev->gfx.config.max_cu_per_sh = 2;
1821		adev->gfx.config.max_sh_per_se = 1;
1822		adev->gfx.config.max_backends_per_se = 2;
1823		adev->gfx.config.max_texture_channel_caches = 4;
1824		adev->gfx.config.max_gprs = 256;
1825		adev->gfx.config.max_gs_threads = 32;
1826		adev->gfx.config.max_hw_contexts = 8;
1827
1828		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1833		break;
1834	}
1835
 
1836	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1837	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1838
1839	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1840				MC_ARB_RAMCFG, NOOFBANK);
1841	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1842				MC_ARB_RAMCFG, NOOFRANKS);
1843
1844	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1845	adev->gfx.config.mem_max_burst_length_bytes = 256;
1846	if (adev->flags & AMD_IS_APU) {
1847		/* Get memory bank mapping mode. */
1848		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1849		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1850		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1851
1852		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1853		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1854		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1855
1856		/* Validate settings in case only one DIMM installed. */
1857		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1858			dimm00_addr_map = 0;
1859		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1860			dimm01_addr_map = 0;
1861		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1862			dimm10_addr_map = 0;
1863		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1864			dimm11_addr_map = 0;
1865
1866		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1867		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1868		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1869			adev->gfx.config.mem_row_size_in_kb = 2;
1870		else
1871			adev->gfx.config.mem_row_size_in_kb = 1;
1872	} else {
1873		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1874		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1875		if (adev->gfx.config.mem_row_size_in_kb > 4)
1876			adev->gfx.config.mem_row_size_in_kb = 4;
1877	}
1878
1879	adev->gfx.config.shader_engine_tile_size = 32;
1880	adev->gfx.config.num_gpus = 1;
1881	adev->gfx.config.multi_gpu_tile_size = 64;
1882
1883	/* fix up row size */
1884	switch (adev->gfx.config.mem_row_size_in_kb) {
1885	case 1:
1886	default:
1887		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1888		break;
1889	case 2:
1890		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1891		break;
1892	case 4:
1893		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1894		break;
1895	}
1896	adev->gfx.config.gb_addr_config = gb_addr_config;
1897
1898	return 0;
1899}
1900
1901static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1902					int mec, int pipe, int queue)
1903{
1904	int r;
1905	unsigned irq_type;
1906	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1907	unsigned int hw_prio;
1908
1909	ring = &adev->gfx.compute_ring[ring_id];
1910
1911	/* mec0 is me1 */
1912	ring->me = mec + 1;
1913	ring->pipe = pipe;
1914	ring->queue = queue;
1915
1916	ring->ring_obj = NULL;
1917	ring->use_doorbell = true;
1918	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1919	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1920				+ (ring_id * GFX8_MEC_HPD_SIZE);
1921	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1922
1923	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1924		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1925		+ ring->pipe;
1926
1927	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1928			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
1929	/* type-2 packets are deprecated on MEC, use type-3 instead */
1930	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1931			     hw_prio, NULL);
1932	if (r)
1933		return r;
1934
1935
1936	return 0;
1937}
1938
1939static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1940
1941static int gfx_v8_0_sw_init(void *handle)
1942{
1943	int i, j, k, r, ring_id;
1944	struct amdgpu_ring *ring;
1945	struct amdgpu_kiq *kiq;
1946	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1947
1948	switch (adev->asic_type) {
 
1949	case CHIP_TONGA:
1950	case CHIP_CARRIZO:
1951	case CHIP_FIJI:
1952	case CHIP_POLARIS10:
1953	case CHIP_POLARIS11:
1954	case CHIP_POLARIS12:
1955	case CHIP_VEGAM:
 
1956		adev->gfx.mec.num_mec = 2;
1957		break;
1958	case CHIP_TOPAZ:
1959	case CHIP_STONEY:
1960	default:
1961		adev->gfx.mec.num_mec = 1;
1962		break;
1963	}
1964
1965	adev->gfx.mec.num_pipe_per_mec = 4;
1966	adev->gfx.mec.num_queue_per_pipe = 8;
1967
 
 
 
 
 
1968	/* EOP Event */
1969	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1970	if (r)
1971		return r;
1972
1973	/* Privileged reg */
1974	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1975			      &adev->gfx.priv_reg_irq);
1976	if (r)
1977		return r;
1978
1979	/* Privileged inst */
1980	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1981			      &adev->gfx.priv_inst_irq);
1982	if (r)
1983		return r;
1984
1985	/* Add CP EDC/ECC irq  */
1986	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1987			      &adev->gfx.cp_ecc_error_irq);
1988	if (r)
1989		return r;
1990
1991	/* SQ interrupts. */
1992	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1993			      &adev->gfx.sq_irq);
1994	if (r) {
1995		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1996		return r;
1997	}
1998
1999	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2000
2001	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2002
2003	gfx_v8_0_scratch_init(adev);
2004
2005	r = gfx_v8_0_init_microcode(adev);
2006	if (r) {
2007		DRM_ERROR("Failed to load gfx firmware!\n");
2008		return r;
2009	}
2010
2011	r = adev->gfx.rlc.funcs->init(adev);
2012	if (r) {
2013		DRM_ERROR("Failed to init rlc BOs!\n");
2014		return r;
2015	}
2016
2017	r = gfx_v8_0_mec_init(adev);
2018	if (r) {
2019		DRM_ERROR("Failed to init MEC BOs!\n");
2020		return r;
2021	}
2022
2023	/* set up the gfx ring */
2024	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2025		ring = &adev->gfx.gfx_ring[i];
2026		ring->ring_obj = NULL;
2027		sprintf(ring->name, "gfx");
2028		/* no gfx doorbells on iceland */
2029		if (adev->asic_type != CHIP_TOPAZ) {
2030			ring->use_doorbell = true;
2031			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2032		}
2033
2034		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2035				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2036				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2037		if (r)
2038			return r;
2039	}
2040
2041
2042	/* set up the compute queues - allocate horizontally across pipes */
2043	ring_id = 0;
2044	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2045		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2046			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2047				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2048					continue;
2049
2050				r = gfx_v8_0_compute_ring_init(adev,
2051								ring_id,
2052								i, k, j);
2053				if (r)
2054					return r;
2055
2056				ring_id++;
2057			}
2058		}
2059	}
2060
2061	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2062	if (r) {
2063		DRM_ERROR("Failed to init KIQ BOs!\n");
2064		return r;
2065	}
2066
2067	kiq = &adev->gfx.kiq;
2068	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2069	if (r)
2070		return r;
2071
2072	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2073	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2074	if (r)
2075		return r;
2076
2077	adev->gfx.ce_ram_size = 0x8000;
2078
2079	r = gfx_v8_0_gpu_early_init(adev);
2080	if (r)
2081		return r;
2082
2083	return 0;
2084}
2085
2086static int gfx_v8_0_sw_fini(void *handle)
2087{
 
2088	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2089	int i;
 
 
 
2090
2091	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2092		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2093	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2094		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2095
2096	amdgpu_gfx_mqd_sw_fini(adev);
2097	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2098	amdgpu_gfx_kiq_fini(adev);
2099
2100	gfx_v8_0_mec_fini(adev);
2101	amdgpu_gfx_rlc_fini(adev);
2102	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2103				&adev->gfx.rlc.clear_state_gpu_addr,
2104				(void **)&adev->gfx.rlc.cs_ptr);
2105	if ((adev->asic_type == CHIP_CARRIZO) ||
2106	    (adev->asic_type == CHIP_STONEY)) {
2107		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2108				&adev->gfx.rlc.cp_table_gpu_addr,
2109				(void **)&adev->gfx.rlc.cp_table_ptr);
2110	}
2111	gfx_v8_0_free_microcode(adev);
2112
2113	return 0;
2114}
2115
2116static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2117{
2118	uint32_t *modearray, *mod2array;
2119	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2120	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2121	u32 reg_offset;
2122
2123	modearray = adev->gfx.config.tile_mode_array;
2124	mod2array = adev->gfx.config.macrotile_mode_array;
2125
2126	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2127		modearray[reg_offset] = 0;
2128
2129	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2130		mod2array[reg_offset] = 0;
2131
2132	switch (adev->asic_type) {
2133	case CHIP_TOPAZ:
2134		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135				PIPE_CONFIG(ADDR_SURF_P2) |
2136				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2137				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2139				PIPE_CONFIG(ADDR_SURF_P2) |
2140				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2141				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2143				PIPE_CONFIG(ADDR_SURF_P2) |
2144				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2145				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2147				PIPE_CONFIG(ADDR_SURF_P2) |
2148				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2149				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2150		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151				PIPE_CONFIG(ADDR_SURF_P2) |
2152				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2153				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2155				PIPE_CONFIG(ADDR_SURF_P2) |
2156				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2157				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2159				PIPE_CONFIG(ADDR_SURF_P2) |
2160				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2161				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2163				PIPE_CONFIG(ADDR_SURF_P2));
2164		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2165				PIPE_CONFIG(ADDR_SURF_P2) |
2166				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169				 PIPE_CONFIG(ADDR_SURF_P2) |
2170				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2171				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173				 PIPE_CONFIG(ADDR_SURF_P2) |
2174				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2175				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177				 PIPE_CONFIG(ADDR_SURF_P2) |
2178				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181				 PIPE_CONFIG(ADDR_SURF_P2) |
2182				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2185				 PIPE_CONFIG(ADDR_SURF_P2) |
2186				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189				 PIPE_CONFIG(ADDR_SURF_P2) |
2190				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2193				 PIPE_CONFIG(ADDR_SURF_P2) |
2194				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2197				 PIPE_CONFIG(ADDR_SURF_P2) |
2198				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2201				 PIPE_CONFIG(ADDR_SURF_P2) |
2202				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2205				 PIPE_CONFIG(ADDR_SURF_P2) |
2206				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2209				 PIPE_CONFIG(ADDR_SURF_P2) |
2210				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2211				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2213				 PIPE_CONFIG(ADDR_SURF_P2) |
2214				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2215				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2217				 PIPE_CONFIG(ADDR_SURF_P2) |
2218				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2221				 PIPE_CONFIG(ADDR_SURF_P2) |
2222				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225				 PIPE_CONFIG(ADDR_SURF_P2) |
2226				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2227				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2228		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229				 PIPE_CONFIG(ADDR_SURF_P2) |
2230				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2231				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233				 PIPE_CONFIG(ADDR_SURF_P2) |
2234				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2235				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2236
2237		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240				NUM_BANKS(ADDR_SURF_8_BANK));
2241		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244				NUM_BANKS(ADDR_SURF_8_BANK));
2245		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248				NUM_BANKS(ADDR_SURF_8_BANK));
2249		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252				NUM_BANKS(ADDR_SURF_8_BANK));
2253		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256				NUM_BANKS(ADDR_SURF_8_BANK));
2257		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260				NUM_BANKS(ADDR_SURF_8_BANK));
2261		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264				NUM_BANKS(ADDR_SURF_8_BANK));
2265		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2266				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268				NUM_BANKS(ADDR_SURF_16_BANK));
2269		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2270				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2271				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272				NUM_BANKS(ADDR_SURF_16_BANK));
2273		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276				 NUM_BANKS(ADDR_SURF_16_BANK));
2277		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2278				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2279				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280				 NUM_BANKS(ADDR_SURF_16_BANK));
2281		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2282				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2283				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284				 NUM_BANKS(ADDR_SURF_16_BANK));
2285		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288				 NUM_BANKS(ADDR_SURF_16_BANK));
2289		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292				 NUM_BANKS(ADDR_SURF_8_BANK));
2293
2294		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2295			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2296			    reg_offset != 23)
2297				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2298
2299		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2300			if (reg_offset != 7)
2301				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2302
2303		break;
2304	case CHIP_FIJI:
2305	case CHIP_VEGAM:
2306		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2309				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2313				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2317				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2321				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2325				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2329				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2333				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2335				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2336				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2340		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2343				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2357				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2365				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2377				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2381				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2389				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2393				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2397				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2398				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2401				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2403				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2405				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2409				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2421				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2424		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2426				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2428
2429		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432				NUM_BANKS(ADDR_SURF_8_BANK));
2433		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2435				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436				NUM_BANKS(ADDR_SURF_8_BANK));
2437		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440				NUM_BANKS(ADDR_SURF_8_BANK));
2441		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444				NUM_BANKS(ADDR_SURF_8_BANK));
2445		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2447				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448				NUM_BANKS(ADDR_SURF_8_BANK));
2449		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452				NUM_BANKS(ADDR_SURF_8_BANK));
2453		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456				NUM_BANKS(ADDR_SURF_8_BANK));
2457		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2459				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460				NUM_BANKS(ADDR_SURF_8_BANK));
2461		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464				NUM_BANKS(ADDR_SURF_8_BANK));
2465		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468				 NUM_BANKS(ADDR_SURF_8_BANK));
2469		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472				 NUM_BANKS(ADDR_SURF_8_BANK));
2473		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2475				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476				 NUM_BANKS(ADDR_SURF_8_BANK));
2477		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2479				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480				 NUM_BANKS(ADDR_SURF_8_BANK));
2481		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484				 NUM_BANKS(ADDR_SURF_4_BANK));
2485
2486		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2487			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2488
2489		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2490			if (reg_offset != 7)
2491				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2492
2493		break;
2494	case CHIP_TONGA:
2495		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2498				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2506				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2510				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2514				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2516				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2518				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2520				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2522				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2524				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2525				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2528				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2529		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2532				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2536				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2540				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2546				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2554				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2556				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2566				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2570				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2578				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2582				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2586				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2590				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2594				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2598				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2602				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2605		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2608				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2612				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2613		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2617
2618		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621				NUM_BANKS(ADDR_SURF_16_BANK));
2622		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625				NUM_BANKS(ADDR_SURF_16_BANK));
2626		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629				NUM_BANKS(ADDR_SURF_16_BANK));
2630		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633				NUM_BANKS(ADDR_SURF_16_BANK));
2634		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637				NUM_BANKS(ADDR_SURF_16_BANK));
2638		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641				NUM_BANKS(ADDR_SURF_16_BANK));
2642		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645				NUM_BANKS(ADDR_SURF_16_BANK));
2646		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2648				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649				NUM_BANKS(ADDR_SURF_16_BANK));
2650		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653				NUM_BANKS(ADDR_SURF_16_BANK));
2654		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657				 NUM_BANKS(ADDR_SURF_16_BANK));
2658		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2661				 NUM_BANKS(ADDR_SURF_16_BANK));
2662		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665				 NUM_BANKS(ADDR_SURF_8_BANK));
2666		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669				 NUM_BANKS(ADDR_SURF_4_BANK));
2670		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2673				 NUM_BANKS(ADDR_SURF_4_BANK));
2674
2675		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2676			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2677
2678		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2679			if (reg_offset != 7)
2680				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2681
2682		break;
2683	case CHIP_POLARIS11:
2684	case CHIP_POLARIS12:
2685		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807
2808		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811				NUM_BANKS(ADDR_SURF_16_BANK));
2812
2813		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816				NUM_BANKS(ADDR_SURF_16_BANK));
2817
2818		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821				NUM_BANKS(ADDR_SURF_16_BANK));
2822
2823		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826				NUM_BANKS(ADDR_SURF_16_BANK));
2827
2828		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831				NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836				NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841				NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846				NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861				NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866				NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871				NUM_BANKS(ADDR_SURF_8_BANK));
2872
2873		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876				NUM_BANKS(ADDR_SURF_4_BANK));
2877
2878		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880
2881		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882			if (reg_offset != 7)
2883				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884
2885		break;
2886	case CHIP_POLARIS10:
2887		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009
3010		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013				NUM_BANKS(ADDR_SURF_16_BANK));
3014
3015		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018				NUM_BANKS(ADDR_SURF_16_BANK));
3019
3020		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023				NUM_BANKS(ADDR_SURF_16_BANK));
3024
3025		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028				NUM_BANKS(ADDR_SURF_16_BANK));
3029
3030		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033				NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038				NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043				NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048				NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053				NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058				NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063				NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068				NUM_BANKS(ADDR_SURF_8_BANK));
3069
3070		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073				NUM_BANKS(ADDR_SURF_4_BANK));
3074
3075		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078				NUM_BANKS(ADDR_SURF_4_BANK));
3079
3080		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082
3083		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084			if (reg_offset != 7)
3085				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086
3087		break;
3088	case CHIP_STONEY:
3089		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090				PIPE_CONFIG(ADDR_SURF_P2) |
3091				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094				PIPE_CONFIG(ADDR_SURF_P2) |
3095				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098				PIPE_CONFIG(ADDR_SURF_P2) |
3099				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102				PIPE_CONFIG(ADDR_SURF_P2) |
3103				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106				PIPE_CONFIG(ADDR_SURF_P2) |
3107				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110				PIPE_CONFIG(ADDR_SURF_P2) |
3111				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114				PIPE_CONFIG(ADDR_SURF_P2) |
3115				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118				PIPE_CONFIG(ADDR_SURF_P2));
3119		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120				PIPE_CONFIG(ADDR_SURF_P2) |
3121				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124				 PIPE_CONFIG(ADDR_SURF_P2) |
3125				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128				 PIPE_CONFIG(ADDR_SURF_P2) |
3129				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132				 PIPE_CONFIG(ADDR_SURF_P2) |
3133				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136				 PIPE_CONFIG(ADDR_SURF_P2) |
3137				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140				 PIPE_CONFIG(ADDR_SURF_P2) |
3141				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144				 PIPE_CONFIG(ADDR_SURF_P2) |
3145				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148				 PIPE_CONFIG(ADDR_SURF_P2) |
3149				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152				 PIPE_CONFIG(ADDR_SURF_P2) |
3153				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156				 PIPE_CONFIG(ADDR_SURF_P2) |
3157				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160				 PIPE_CONFIG(ADDR_SURF_P2) |
3161				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164				 PIPE_CONFIG(ADDR_SURF_P2) |
3165				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168				 PIPE_CONFIG(ADDR_SURF_P2) |
3169				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172				 PIPE_CONFIG(ADDR_SURF_P2) |
3173				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176				 PIPE_CONFIG(ADDR_SURF_P2) |
3177				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180				 PIPE_CONFIG(ADDR_SURF_P2) |
3181				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184				 PIPE_CONFIG(ADDR_SURF_P2) |
3185				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188				 PIPE_CONFIG(ADDR_SURF_P2) |
3189				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191
3192		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195				NUM_BANKS(ADDR_SURF_8_BANK));
3196		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199				NUM_BANKS(ADDR_SURF_8_BANK));
3200		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203				NUM_BANKS(ADDR_SURF_8_BANK));
3204		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207				NUM_BANKS(ADDR_SURF_8_BANK));
3208		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211				NUM_BANKS(ADDR_SURF_8_BANK));
3212		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215				NUM_BANKS(ADDR_SURF_8_BANK));
3216		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219				NUM_BANKS(ADDR_SURF_8_BANK));
3220		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223				NUM_BANKS(ADDR_SURF_16_BANK));
3224		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227				NUM_BANKS(ADDR_SURF_16_BANK));
3228		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231				 NUM_BANKS(ADDR_SURF_16_BANK));
3232		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235				 NUM_BANKS(ADDR_SURF_16_BANK));
3236		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239				 NUM_BANKS(ADDR_SURF_16_BANK));
3240		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243				 NUM_BANKS(ADDR_SURF_16_BANK));
3244		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247				 NUM_BANKS(ADDR_SURF_8_BANK));
3248
3249		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251			    reg_offset != 23)
3252				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253
3254		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255			if (reg_offset != 7)
3256				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257
3258		break;
3259	default:
3260		dev_warn(adev->dev,
3261			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262			 adev->asic_type);
3263		fallthrough;
3264
3265	case CHIP_CARRIZO:
3266		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267				PIPE_CONFIG(ADDR_SURF_P2) |
3268				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3269				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3271				PIPE_CONFIG(ADDR_SURF_P2) |
3272				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3273				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3275				PIPE_CONFIG(ADDR_SURF_P2) |
3276				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3277				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279				PIPE_CONFIG(ADDR_SURF_P2) |
3280				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3281				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283				PIPE_CONFIG(ADDR_SURF_P2) |
3284				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3285				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3287				PIPE_CONFIG(ADDR_SURF_P2) |
3288				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3289				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3291				PIPE_CONFIG(ADDR_SURF_P2) |
3292				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3293				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3295				PIPE_CONFIG(ADDR_SURF_P2));
3296		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297				PIPE_CONFIG(ADDR_SURF_P2) |
3298				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3299				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301				 PIPE_CONFIG(ADDR_SURF_P2) |
3302				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3303				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305				 PIPE_CONFIG(ADDR_SURF_P2) |
3306				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3307				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309				 PIPE_CONFIG(ADDR_SURF_P2) |
3310				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313				 PIPE_CONFIG(ADDR_SURF_P2) |
3314				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3315				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3317				 PIPE_CONFIG(ADDR_SURF_P2) |
3318				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3319				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3321				 PIPE_CONFIG(ADDR_SURF_P2) |
3322				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3324		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3325				 PIPE_CONFIG(ADDR_SURF_P2) |
3326				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3329				 PIPE_CONFIG(ADDR_SURF_P2) |
3330				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3333				 PIPE_CONFIG(ADDR_SURF_P2) |
3334				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3337				 PIPE_CONFIG(ADDR_SURF_P2) |
3338				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3341				 PIPE_CONFIG(ADDR_SURF_P2) |
3342				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345				 PIPE_CONFIG(ADDR_SURF_P2) |
3346				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3349				 PIPE_CONFIG(ADDR_SURF_P2) |
3350				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3353				 PIPE_CONFIG(ADDR_SURF_P2) |
3354				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3357				 PIPE_CONFIG(ADDR_SURF_P2) |
3358				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3360		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361				 PIPE_CONFIG(ADDR_SURF_P2) |
3362				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3364		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3365				 PIPE_CONFIG(ADDR_SURF_P2) |
3366				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3368
3369		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372				NUM_BANKS(ADDR_SURF_8_BANK));
3373		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3375				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376				NUM_BANKS(ADDR_SURF_8_BANK));
3377		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380				NUM_BANKS(ADDR_SURF_8_BANK));
3381		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3384				NUM_BANKS(ADDR_SURF_8_BANK));
3385		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3388				NUM_BANKS(ADDR_SURF_8_BANK));
3389		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392				NUM_BANKS(ADDR_SURF_8_BANK));
3393		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396				NUM_BANKS(ADDR_SURF_8_BANK));
3397		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3398				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3399				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400				NUM_BANKS(ADDR_SURF_16_BANK));
3401		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3402				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3403				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404				NUM_BANKS(ADDR_SURF_16_BANK));
3405		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3406				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3407				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3408				 NUM_BANKS(ADDR_SURF_16_BANK));
3409		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3410				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3411				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412				 NUM_BANKS(ADDR_SURF_16_BANK));
3413		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3414				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3415				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416				 NUM_BANKS(ADDR_SURF_16_BANK));
3417		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3418				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3419				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420				 NUM_BANKS(ADDR_SURF_16_BANK));
3421		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3422				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3423				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3424				 NUM_BANKS(ADDR_SURF_8_BANK));
3425
3426		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3427			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3428			    reg_offset != 23)
3429				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3430
3431		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3432			if (reg_offset != 7)
3433				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3434
3435		break;
3436	}
3437}
3438
3439static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3440				  u32 se_num, u32 sh_num, u32 instance)
3441{
3442	u32 data;
3443
3444	if (instance == 0xffffffff)
3445		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3446	else
3447		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3448
3449	if (se_num == 0xffffffff)
3450		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3451	else
3452		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3453
3454	if (sh_num == 0xffffffff)
3455		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3456	else
3457		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3458
3459	WREG32(mmGRBM_GFX_INDEX, data);
3460}
3461
3462static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3463				  u32 me, u32 pipe, u32 q, u32 vm)
3464{
3465	vi_srbm_select(adev, me, pipe, q, vm);
3466}
3467
3468static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3469{
3470	u32 data, mask;
3471
3472	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3473		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3474
3475	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3476
3477	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3478					 adev->gfx.config.max_sh_per_se);
3479
3480	return (~data) & mask;
3481}
3482
3483static void
3484gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3485{
3486	switch (adev->asic_type) {
3487	case CHIP_FIJI:
3488	case CHIP_VEGAM:
3489		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3490			  RB_XSEL2(1) | PKR_MAP(2) |
3491			  PKR_XSEL(1) | PKR_YSEL(1) |
3492			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3493		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3494			   SE_PAIR_YSEL(2);
3495		break;
3496	case CHIP_TONGA:
3497	case CHIP_POLARIS10:
3498		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3499			  SE_XSEL(1) | SE_YSEL(1);
3500		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3501			   SE_PAIR_YSEL(2);
3502		break;
3503	case CHIP_TOPAZ:
3504	case CHIP_CARRIZO:
3505		*rconf |= RB_MAP_PKR0(2);
3506		*rconf1 |= 0x0;
3507		break;
3508	case CHIP_POLARIS11:
3509	case CHIP_POLARIS12:
3510		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3511			  SE_XSEL(1) | SE_YSEL(1);
3512		*rconf1 |= 0x0;
3513		break;
3514	case CHIP_STONEY:
3515		*rconf |= 0x0;
3516		*rconf1 |= 0x0;
3517		break;
3518	default:
3519		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3520		break;
3521	}
3522}
3523
3524static void
3525gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3526					u32 raster_config, u32 raster_config_1,
3527					unsigned rb_mask, unsigned num_rb)
3528{
3529	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3530	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3531	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3532	unsigned rb_per_se = num_rb / num_se;
3533	unsigned se_mask[4];
3534	unsigned se;
3535
3536	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3537	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3538	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3539	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3540
3541	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3542	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3543	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3544
3545	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3546			     (!se_mask[2] && !se_mask[3]))) {
3547		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3548
3549		if (!se_mask[0] && !se_mask[1]) {
3550			raster_config_1 |=
3551				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3552		} else {
3553			raster_config_1 |=
3554				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3555		}
3556	}
3557
3558	for (se = 0; se < num_se; se++) {
3559		unsigned raster_config_se = raster_config;
3560		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3561		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3562		int idx = (se / 2) * 2;
3563
3564		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3565			raster_config_se &= ~SE_MAP_MASK;
3566
3567			if (!se_mask[idx]) {
3568				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3569			} else {
3570				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3571			}
3572		}
3573
3574		pkr0_mask &= rb_mask;
3575		pkr1_mask &= rb_mask;
3576		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3577			raster_config_se &= ~PKR_MAP_MASK;
3578
3579			if (!pkr0_mask) {
3580				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3581			} else {
3582				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3583			}
3584		}
3585
3586		if (rb_per_se >= 2) {
3587			unsigned rb0_mask = 1 << (se * rb_per_se);
3588			unsigned rb1_mask = rb0_mask << 1;
3589
3590			rb0_mask &= rb_mask;
3591			rb1_mask &= rb_mask;
3592			if (!rb0_mask || !rb1_mask) {
3593				raster_config_se &= ~RB_MAP_PKR0_MASK;
3594
3595				if (!rb0_mask) {
3596					raster_config_se |=
3597						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3598				} else {
3599					raster_config_se |=
3600						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3601				}
3602			}
3603
3604			if (rb_per_se > 2) {
3605				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3606				rb1_mask = rb0_mask << 1;
3607				rb0_mask &= rb_mask;
3608				rb1_mask &= rb_mask;
3609				if (!rb0_mask || !rb1_mask) {
3610					raster_config_se &= ~RB_MAP_PKR1_MASK;
3611
3612					if (!rb0_mask) {
3613						raster_config_se |=
3614							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3615					} else {
3616						raster_config_se |=
3617							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3618					}
3619				}
3620			}
3621		}
3622
3623		/* GRBM_GFX_INDEX has a different offset on VI */
3624		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3625		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3626		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3627	}
3628
3629	/* GRBM_GFX_INDEX has a different offset on VI */
3630	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3631}
3632
3633static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3634{
3635	int i, j;
3636	u32 data;
3637	u32 raster_config = 0, raster_config_1 = 0;
3638	u32 active_rbs = 0;
3639	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3640					adev->gfx.config.max_sh_per_se;
3641	unsigned num_rb_pipes;
3642
3643	mutex_lock(&adev->grbm_idx_mutex);
3644	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3645		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3646			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3647			data = gfx_v8_0_get_rb_active_bitmap(adev);
3648			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3649					       rb_bitmap_width_per_sh);
3650		}
3651	}
3652	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3653
3654	adev->gfx.config.backend_enable_mask = active_rbs;
3655	adev->gfx.config.num_rbs = hweight32(active_rbs);
3656
3657	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3658			     adev->gfx.config.max_shader_engines, 16);
3659
3660	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3661
3662	if (!adev->gfx.config.backend_enable_mask ||
3663			adev->gfx.config.num_rbs >= num_rb_pipes) {
3664		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3665		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3666	} else {
3667		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3668							adev->gfx.config.backend_enable_mask,
3669							num_rb_pipes);
3670	}
3671
3672	/* cache the values for userspace */
3673	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3674		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3675			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3676			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3677				RREG32(mmCC_RB_BACKEND_DISABLE);
3678			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3679				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3680			adev->gfx.config.rb_config[i][j].raster_config =
3681				RREG32(mmPA_SC_RASTER_CONFIG);
3682			adev->gfx.config.rb_config[i][j].raster_config_1 =
3683				RREG32(mmPA_SC_RASTER_CONFIG_1);
3684		}
3685	}
3686	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3687	mutex_unlock(&adev->grbm_idx_mutex);
3688}
3689
3690#define DEFAULT_SH_MEM_BASES	(0x6000)
3691/**
3692 * gfx_v8_0_init_compute_vmid - gart enable
3693 *
3694 * @adev: amdgpu_device pointer
3695 *
3696 * Initialize compute vmid sh_mem registers
3697 *
3698 */
 
 
 
3699static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3700{
3701	int i;
3702	uint32_t sh_mem_config;
3703	uint32_t sh_mem_bases;
3704
3705	/*
3706	 * Configure apertures:
3707	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3708	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3709	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3710	 */
3711	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3712
3713	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3714			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3715			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3716			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3717			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3718			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3719
3720	mutex_lock(&adev->srbm_mutex);
3721	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3722		vi_srbm_select(adev, 0, 0, 0, i);
3723		/* CP and shaders */
3724		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3725		WREG32(mmSH_MEM_APE1_BASE, 1);
3726		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3727		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3728	}
3729	vi_srbm_select(adev, 0, 0, 0, 0);
3730	mutex_unlock(&adev->srbm_mutex);
3731
3732	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3733	   acccess. These should be enabled by FW for target VMIDs. */
3734	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3735		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3736		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3737		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3738		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3739	}
3740}
3741
3742static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3743{
3744	int vmid;
3745
3746	/*
3747	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3748	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3749	 * the driver can enable them for graphics. VMID0 should maintain
3750	 * access so that HWS firmware can save/restore entries.
3751	 */
3752	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3753		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3754		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3755		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3756		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3757	}
3758}
3759
3760static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3761{
3762	switch (adev->asic_type) {
3763	default:
3764		adev->gfx.config.double_offchip_lds_buf = 1;
3765		break;
3766	case CHIP_CARRIZO:
3767	case CHIP_STONEY:
3768		adev->gfx.config.double_offchip_lds_buf = 0;
3769		break;
3770	}
3771}
3772
3773static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3774{
3775	u32 tmp, sh_static_mem_cfg;
3776	int i;
3777
3778	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3779	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3780	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3781	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3782
3783	gfx_v8_0_tiling_mode_table_init(adev);
3784	gfx_v8_0_setup_rb(adev);
3785	gfx_v8_0_get_cu_info(adev);
3786	gfx_v8_0_config_init(adev);
3787
3788	/* XXX SH_MEM regs */
3789	/* where to put LDS, scratch, GPUVM in FSA64 space */
3790	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3791				   SWIZZLE_ENABLE, 1);
3792	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3793				   ELEMENT_SIZE, 1);
3794	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3795				   INDEX_STRIDE, 3);
3796	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3797
3798	mutex_lock(&adev->srbm_mutex);
3799	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3800		vi_srbm_select(adev, 0, 0, 0, i);
3801		/* CP and shaders */
3802		if (i == 0) {
3803			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3804			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3805			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3806					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3807			WREG32(mmSH_MEM_CONFIG, tmp);
3808			WREG32(mmSH_MEM_BASES, 0);
3809		} else {
3810			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3811			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3812			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3813					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3814			WREG32(mmSH_MEM_CONFIG, tmp);
3815			tmp = adev->gmc.shared_aperture_start >> 48;
3816			WREG32(mmSH_MEM_BASES, tmp);
3817		}
3818
3819		WREG32(mmSH_MEM_APE1_BASE, 1);
3820		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3821	}
3822	vi_srbm_select(adev, 0, 0, 0, 0);
3823	mutex_unlock(&adev->srbm_mutex);
3824
3825	gfx_v8_0_init_compute_vmid(adev);
3826	gfx_v8_0_init_gds_vmid(adev);
3827
3828	mutex_lock(&adev->grbm_idx_mutex);
3829	/*
3830	 * making sure that the following register writes will be broadcasted
3831	 * to all the shaders
3832	 */
3833	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3834
3835	WREG32(mmPA_SC_FIFO_SIZE,
3836		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3837			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3838		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3839			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3840		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3841			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3842		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3843			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3844
3845	tmp = RREG32(mmSPI_ARB_PRIORITY);
3846	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3847	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3848	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3849	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3850	WREG32(mmSPI_ARB_PRIORITY, tmp);
3851
3852	mutex_unlock(&adev->grbm_idx_mutex);
3853
3854}
3855
3856static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3857{
3858	u32 i, j, k;
3859	u32 mask;
3860
3861	mutex_lock(&adev->grbm_idx_mutex);
3862	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3863		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3864			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3865			for (k = 0; k < adev->usec_timeout; k++) {
3866				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3867					break;
3868				udelay(1);
3869			}
3870			if (k == adev->usec_timeout) {
3871				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3872						      0xffffffff, 0xffffffff);
3873				mutex_unlock(&adev->grbm_idx_mutex);
3874				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3875					 i, j);
3876				return;
3877			}
3878		}
3879	}
3880	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3881	mutex_unlock(&adev->grbm_idx_mutex);
3882
3883	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3884		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3885		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3886		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3887	for (k = 0; k < adev->usec_timeout; k++) {
3888		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3889			break;
3890		udelay(1);
3891	}
3892}
3893
3894static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3895					       bool enable)
3896{
3897	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3898
3899	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3900	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3901	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3902	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3903
3904	WREG32(mmCP_INT_CNTL_RING0, tmp);
3905}
3906
3907static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3908{
3909	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3910	/* csib */
3911	WREG32(mmRLC_CSIB_ADDR_HI,
3912			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3913	WREG32(mmRLC_CSIB_ADDR_LO,
3914			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3915	WREG32(mmRLC_CSIB_LENGTH,
3916			adev->gfx.rlc.clear_state_size);
3917}
3918
3919static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3920				int ind_offset,
3921				int list_size,
3922				int *unique_indices,
3923				int *indices_count,
3924				int max_indices,
3925				int *ind_start_offsets,
3926				int *offset_count,
3927				int max_offset)
3928{
3929	int indices;
3930	bool new_entry = true;
3931
3932	for (; ind_offset < list_size; ind_offset++) {
3933
3934		if (new_entry) {
3935			new_entry = false;
3936			ind_start_offsets[*offset_count] = ind_offset;
3937			*offset_count = *offset_count + 1;
3938			BUG_ON(*offset_count >= max_offset);
3939		}
3940
3941		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3942			new_entry = true;
3943			continue;
3944		}
3945
3946		ind_offset += 2;
3947
3948		/* look for the matching indice */
3949		for (indices = 0;
3950			indices < *indices_count;
3951			indices++) {
3952			if (unique_indices[indices] ==
3953				register_list_format[ind_offset])
3954				break;
3955		}
3956
3957		if (indices >= *indices_count) {
3958			unique_indices[*indices_count] =
3959				register_list_format[ind_offset];
3960			indices = *indices_count;
3961			*indices_count = *indices_count + 1;
3962			BUG_ON(*indices_count >= max_indices);
3963		}
3964
3965		register_list_format[ind_offset] = indices;
3966	}
3967}
3968
3969static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3970{
3971	int i, temp, data;
3972	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3973	int indices_count = 0;
3974	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3975	int offset_count = 0;
3976
3977	int list_size;
3978	unsigned int *register_list_format =
3979		kmemdup(adev->gfx.rlc.register_list_format,
3980			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3981	if (!register_list_format)
3982		return -ENOMEM;
 
 
3983
3984	gfx_v8_0_parse_ind_reg_list(register_list_format,
3985				RLC_FormatDirectRegListLength,
3986				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3987				unique_indices,
3988				&indices_count,
3989				ARRAY_SIZE(unique_indices),
3990				indirect_start_offsets,
3991				&offset_count,
3992				ARRAY_SIZE(indirect_start_offsets));
3993
3994	/* save and restore list */
3995	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3996
3997	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3998	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3999		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4000
4001	/* indirect list */
4002	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4003	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4004		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4005
4006	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4007	list_size = list_size >> 1;
4008	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4009	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4010
4011	/* starting offsets starts */
4012	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4013		adev->gfx.rlc.starting_offsets_start);
4014	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4015		WREG32(mmRLC_GPM_SCRATCH_DATA,
4016				indirect_start_offsets[i]);
4017
4018	/* unique indices */
4019	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4020	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4021	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4022		if (unique_indices[i] != 0) {
4023			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4024			WREG32(data + i, unique_indices[i] >> 20);
4025		}
4026	}
4027	kfree(register_list_format);
4028
4029	return 0;
4030}
4031
4032static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4033{
4034	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4035}
4036
4037static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4038{
4039	uint32_t data;
4040
4041	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4042
4043	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4044	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4045	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4046	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4047	WREG32(mmRLC_PG_DELAY, data);
4048
4049	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4050	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4051
4052}
4053
4054static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4055						bool enable)
4056{
4057	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4058}
4059
4060static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4061						  bool enable)
4062{
4063	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4064}
4065
4066static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4067{
4068	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4069}
4070
4071static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4072{
4073	if ((adev->asic_type == CHIP_CARRIZO) ||
4074	    (adev->asic_type == CHIP_STONEY)) {
4075		gfx_v8_0_init_csb(adev);
4076		gfx_v8_0_init_save_restore_list(adev);
4077		gfx_v8_0_enable_save_restore_machine(adev);
4078		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4079		gfx_v8_0_init_power_gating(adev);
4080		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4081	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4082		   (adev->asic_type == CHIP_POLARIS12) ||
4083		   (adev->asic_type == CHIP_VEGAM)) {
4084		gfx_v8_0_init_csb(adev);
4085		gfx_v8_0_init_save_restore_list(adev);
4086		gfx_v8_0_enable_save_restore_machine(adev);
4087		gfx_v8_0_init_power_gating(adev);
4088	}
4089
4090}
4091
4092static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4093{
4094	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4095
4096	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4097	gfx_v8_0_wait_for_rlc_serdes(adev);
4098}
4099
4100static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4101{
4102	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103	udelay(50);
4104
4105	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4106	udelay(50);
4107}
4108
4109static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4110{
4111	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4112
4113	/* carrizo do enable cp interrupt after cp inited */
4114	if (!(adev->flags & AMD_IS_APU))
4115		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4116
4117	udelay(50);
4118}
4119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4120static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4121{
4122	if (amdgpu_sriov_vf(adev)) {
4123		gfx_v8_0_init_csb(adev);
4124		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
4125	}
4126
4127	adev->gfx.rlc.funcs->stop(adev);
4128	adev->gfx.rlc.funcs->reset(adev);
 
 
4129	gfx_v8_0_init_pg(adev);
4130	adev->gfx.rlc.funcs->start(adev);
 
 
 
 
 
 
 
 
 
4131
4132	return 0;
4133}
4134
4135static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4136{
 
4137	u32 tmp = RREG32(mmCP_ME_CNTL);
4138
4139	if (enable) {
4140		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4141		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4142		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4143	} else {
4144		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4145		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4146		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
 
 
4147	}
4148	WREG32(mmCP_ME_CNTL, tmp);
4149	udelay(50);
4150}
4151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4152static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4153{
4154	u32 count = 0;
4155	const struct cs_section_def *sect = NULL;
4156	const struct cs_extent_def *ext = NULL;
4157
4158	/* begin clear state */
4159	count += 2;
4160	/* context control state */
4161	count += 3;
4162
4163	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4164		for (ext = sect->section; ext->extent != NULL; ++ext) {
4165			if (sect->id == SECT_CONTEXT)
4166				count += 2 + ext->reg_count;
4167			else
4168				return 0;
4169		}
4170	}
4171	/* pa_sc_raster_config/pa_sc_raster_config1 */
4172	count += 4;
4173	/* end clear state */
4174	count += 2;
4175	/* clear state */
4176	count += 2;
4177
4178	return count;
4179}
4180
4181static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4182{
4183	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4184	const struct cs_section_def *sect = NULL;
4185	const struct cs_extent_def *ext = NULL;
4186	int r, i;
4187
4188	/* init the CP */
4189	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4190	WREG32(mmCP_ENDIAN_SWAP, 0);
4191	WREG32(mmCP_DEVICE_ID, 1);
4192
4193	gfx_v8_0_cp_gfx_enable(adev, true);
4194
4195	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4196	if (r) {
4197		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4198		return r;
4199	}
4200
4201	/* clear state buffer */
4202	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4203	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4204
4205	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4206	amdgpu_ring_write(ring, 0x80000000);
4207	amdgpu_ring_write(ring, 0x80000000);
4208
4209	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4210		for (ext = sect->section; ext->extent != NULL; ++ext) {
4211			if (sect->id == SECT_CONTEXT) {
4212				amdgpu_ring_write(ring,
4213				       PACKET3(PACKET3_SET_CONTEXT_REG,
4214					       ext->reg_count));
4215				amdgpu_ring_write(ring,
4216				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4217				for (i = 0; i < ext->reg_count; i++)
4218					amdgpu_ring_write(ring, ext->extent[i]);
4219			}
4220		}
4221	}
4222
4223	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4224	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4225	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4226	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4227
4228	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4229	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4230
4231	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4232	amdgpu_ring_write(ring, 0);
4233
4234	/* init the CE partitions */
4235	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4236	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4237	amdgpu_ring_write(ring, 0x8000);
4238	amdgpu_ring_write(ring, 0x8000);
4239
4240	amdgpu_ring_commit(ring);
4241
4242	return 0;
4243}
4244static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4245{
4246	u32 tmp;
4247	/* no gfx doorbells on iceland */
4248	if (adev->asic_type == CHIP_TOPAZ)
4249		return;
4250
4251	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4252
4253	if (ring->use_doorbell) {
4254		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4255				DOORBELL_OFFSET, ring->doorbell_index);
4256		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4257						DOORBELL_HIT, 0);
4258		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4259					    DOORBELL_EN, 1);
4260	} else {
4261		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4262	}
4263
4264	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4265
4266	if (adev->flags & AMD_IS_APU)
4267		return;
4268
4269	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4270					DOORBELL_RANGE_LOWER,
4271					adev->doorbell_index.gfx_ring0);
4272	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4273
4274	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4275		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4276}
4277
4278static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4279{
4280	struct amdgpu_ring *ring;
4281	u32 tmp;
4282	u32 rb_bufsz;
4283	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 
4284
4285	/* Set the write pointer delay */
4286	WREG32(mmCP_RB_WPTR_DELAY, 0);
4287
4288	/* set the RB to use vmid 0 */
4289	WREG32(mmCP_RB_VMID, 0);
4290
4291	/* Set ring buffer size */
4292	ring = &adev->gfx.gfx_ring[0];
4293	rb_bufsz = order_base_2(ring->ring_size / 8);
4294	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4295	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4296	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4297	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4298#ifdef __BIG_ENDIAN
4299	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4300#endif
4301	WREG32(mmCP_RB0_CNTL, tmp);
4302
4303	/* Initialize the ring buffer's read and write pointers */
4304	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4305	ring->wptr = 0;
4306	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4307
4308	/* set the wb address wether it's enabled or not */
4309	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4310	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4311	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4312
4313	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4314	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4315	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4316	mdelay(1);
4317	WREG32(mmCP_RB0_CNTL, tmp);
4318
4319	rb_addr = ring->gpu_addr >> 8;
4320	WREG32(mmCP_RB0_BASE, rb_addr);
4321	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4322
4323	gfx_v8_0_set_cpg_door_bell(adev, ring);
4324	/* start the ring */
4325	amdgpu_ring_clear_ring(ring);
4326	gfx_v8_0_cp_gfx_start(adev);
4327	ring->sched.ready = true;
 
 
 
4328
4329	return 0;
4330}
4331
4332static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4333{
 
 
4334	if (enable) {
4335		WREG32(mmCP_MEC_CNTL, 0);
4336	} else {
4337		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4338		adev->gfx.kiq.ring.sched.ready = false;
 
 
4339	}
4340	udelay(50);
4341}
4342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4343/* KIQ functions */
4344static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4345{
4346	uint32_t tmp;
4347	struct amdgpu_device *adev = ring->adev;
4348
4349	/* tell RLC which is KIQ queue */
4350	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4351	tmp &= 0xffffff00;
4352	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4353	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4354	tmp |= 0x80;
4355	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4356}
4357
4358static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4359{
4360	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
 
4361	uint64_t queue_mask = 0;
4362	int r, i;
4363
4364	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4365		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4366			continue;
4367
4368		/* This situation may be hit in the future if a new HW
4369		 * generation exposes more than 64 queues. If so, the
4370		 * definition of queue_mask needs updating */
4371		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4372			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4373			break;
4374		}
4375
4376		queue_mask |= (1ull << i);
4377	}
4378
4379	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
 
 
 
 
 
 
 
4380	if (r) {
4381		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 
4382		return r;
4383	}
4384	/* set resources */
4385	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4386	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4387	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4388	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4389	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4390	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4391	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4392	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4393	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4394		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4395		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4396		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4397
4398		/* map queues */
4399		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4400		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4401		amdgpu_ring_write(kiq_ring,
4402				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4403		amdgpu_ring_write(kiq_ring,
4404				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4405				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4406				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4407				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4408		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4409		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4410		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4411		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4412	}
 
 
 
 
 
4413
4414	amdgpu_ring_commit(kiq_ring);
 
 
 
 
 
 
 
 
 
 
 
4415
4416	return 0;
4417}
4418
4419static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4420{
4421	int i, r = 0;
4422
4423	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4424		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4425		for (i = 0; i < adev->usec_timeout; i++) {
4426			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4427				break;
4428			udelay(1);
4429		}
4430		if (i == adev->usec_timeout)
4431			r = -ETIMEDOUT;
4432	}
4433	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4434	WREG32(mmCP_HQD_PQ_RPTR, 0);
4435	WREG32(mmCP_HQD_PQ_WPTR, 0);
4436
4437	return r;
4438}
4439
4440static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4441{
4442	struct amdgpu_device *adev = ring->adev;
4443
4444	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4445		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4446			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4447			mqd->cp_hqd_queue_priority =
4448				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4449		}
4450	}
4451}
4452
4453static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4454{
4455	struct amdgpu_device *adev = ring->adev;
4456	struct vi_mqd *mqd = ring->mqd_ptr;
4457	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4458	uint32_t tmp;
4459
4460	mqd->header = 0xC0310800;
4461	mqd->compute_pipelinestat_enable = 0x00000001;
4462	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4463	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4464	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4465	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4466	mqd->compute_misc_reserved = 0x00000003;
4467	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4468						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4469	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4470						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4471	eop_base_addr = ring->eop_gpu_addr >> 8;
4472	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4473	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4474
4475	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4476	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4477	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4478			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4479
4480	mqd->cp_hqd_eop_control = tmp;
4481
4482	/* enable doorbell? */
4483	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4484			    CP_HQD_PQ_DOORBELL_CONTROL,
4485			    DOORBELL_EN,
4486			    ring->use_doorbell ? 1 : 0);
4487
4488	mqd->cp_hqd_pq_doorbell_control = tmp;
4489
4490	/* set the pointer to the MQD */
4491	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4492	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4493
4494	/* set MQD vmid to 0 */
4495	tmp = RREG32(mmCP_MQD_CONTROL);
4496	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4497	mqd->cp_mqd_control = tmp;
4498
4499	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4500	hqd_gpu_addr = ring->gpu_addr >> 8;
4501	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4502	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4503
4504	/* set up the HQD, this is similar to CP_RB0_CNTL */
4505	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4506	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4507			    (order_base_2(ring->ring_size / 4) - 1));
4508	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4509			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4510#ifdef __BIG_ENDIAN
4511	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4512#endif
4513	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4514	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4515	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4516	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4517	mqd->cp_hqd_pq_control = tmp;
4518
4519	/* set the wb address whether it's enabled or not */
4520	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4521	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4522	mqd->cp_hqd_pq_rptr_report_addr_hi =
4523		upper_32_bits(wb_gpu_addr) & 0xffff;
4524
4525	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4526	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4527	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4528	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4529
4530	tmp = 0;
4531	/* enable the doorbell if requested */
4532	if (ring->use_doorbell) {
4533		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4534		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4535				DOORBELL_OFFSET, ring->doorbell_index);
4536
4537		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4538					 DOORBELL_EN, 1);
4539		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4540					 DOORBELL_SOURCE, 0);
4541		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4542					 DOORBELL_HIT, 0);
4543	}
4544
4545	mqd->cp_hqd_pq_doorbell_control = tmp;
4546
4547	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4548	ring->wptr = 0;
4549	mqd->cp_hqd_pq_wptr = ring->wptr;
4550	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4551
4552	/* set the vmid for the queue */
4553	mqd->cp_hqd_vmid = 0;
4554
4555	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4556	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4557	mqd->cp_hqd_persistent_state = tmp;
4558
4559	/* set MTYPE */
4560	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4561	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4562	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4563	mqd->cp_hqd_ib_control = tmp;
4564
4565	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4566	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4567	mqd->cp_hqd_iq_timer = tmp;
4568
4569	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4570	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4571	mqd->cp_hqd_ctx_save_control = tmp;
4572
4573	/* defaults */
4574	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4575	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
 
 
 
4576	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4577	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4578	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4579	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4580	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4581	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4582	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4583	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4584	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4585	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4586
4587	/* set static priority for a queue/ring */
4588	gfx_v8_0_mqd_set_priority(ring, mqd);
4589	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4590
4591	/* map_queues packet doesn't need activate the queue,
4592	 * so only kiq need set this field.
4593	 */
4594	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4595		mqd->cp_hqd_active = 1;
4596
4597	return 0;
4598}
4599
4600static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4601			struct vi_mqd *mqd)
4602{
4603	uint32_t mqd_reg;
4604	uint32_t *mqd_data;
4605
4606	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4607	mqd_data = &mqd->cp_mqd_base_addr_lo;
4608
4609	/* disable wptr polling */
4610	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4611
4612	/* program all HQD registers */
4613	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4614		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4615
4616	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4617	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4618	 * on ASICs that do not support context-save.
4619	 * EOP writes/reads can start anywhere in the ring.
4620	 */
4621	if (adev->asic_type != CHIP_TONGA) {
4622		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4623		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4624		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4625	}
4626
4627	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4628		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4629
4630	/* activate the HQD */
4631	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4632		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4633
4634	return 0;
4635}
4636
4637static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4638{
4639	struct amdgpu_device *adev = ring->adev;
4640	struct vi_mqd *mqd = ring->mqd_ptr;
4641	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4642
4643	gfx_v8_0_kiq_setting(ring);
4644
4645	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4646		/* reset MQD to a clean status */
4647		if (adev->gfx.mec.mqd_backup[mqd_idx])
4648			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4649
4650		/* reset ring buffer */
4651		ring->wptr = 0;
4652		amdgpu_ring_clear_ring(ring);
4653		mutex_lock(&adev->srbm_mutex);
4654		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655		gfx_v8_0_mqd_commit(adev, mqd);
4656		vi_srbm_select(adev, 0, 0, 0, 0);
4657		mutex_unlock(&adev->srbm_mutex);
4658	} else {
4659		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4660		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4661		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4662		mutex_lock(&adev->srbm_mutex);
4663		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4664		gfx_v8_0_mqd_init(ring);
4665		gfx_v8_0_mqd_commit(adev, mqd);
4666		vi_srbm_select(adev, 0, 0, 0, 0);
4667		mutex_unlock(&adev->srbm_mutex);
4668
4669		if (adev->gfx.mec.mqd_backup[mqd_idx])
4670			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4671	}
4672
4673	return 0;
4674}
4675
4676static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4677{
4678	struct amdgpu_device *adev = ring->adev;
4679	struct vi_mqd *mqd = ring->mqd_ptr;
4680	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4681
4682	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4683		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4684		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4685		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4686		mutex_lock(&adev->srbm_mutex);
4687		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4688		gfx_v8_0_mqd_init(ring);
4689		vi_srbm_select(adev, 0, 0, 0, 0);
4690		mutex_unlock(&adev->srbm_mutex);
4691
4692		if (adev->gfx.mec.mqd_backup[mqd_idx])
4693			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4694	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4695		/* reset MQD to a clean status */
4696		if (adev->gfx.mec.mqd_backup[mqd_idx])
4697			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4698		/* reset ring buffer */
4699		ring->wptr = 0;
4700		amdgpu_ring_clear_ring(ring);
4701	} else {
4702		amdgpu_ring_clear_ring(ring);
4703	}
4704	return 0;
4705}
4706
4707static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4708{
4709	if (adev->asic_type > CHIP_TONGA) {
4710		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4711		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4712	}
4713	/* enable doorbells */
4714	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4715}
4716
4717static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4718{
4719	struct amdgpu_ring *ring;
4720	int r;
 
 
4721
4722	ring = &adev->gfx.kiq.ring;
4723
4724	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4725	if (unlikely(r != 0))
4726		return r;
4727
4728	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4729	if (unlikely(r != 0))
4730		return r;
4731
4732	gfx_v8_0_kiq_init_queue(ring);
4733	amdgpu_bo_kunmap(ring->mqd_obj);
4734	ring->mqd_ptr = NULL;
4735	amdgpu_bo_unreserve(ring->mqd_obj);
4736	ring->sched.ready = true;
4737	return 0;
4738}
4739
4740static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4741{
4742	struct amdgpu_ring *ring = NULL;
4743	int r = 0, i;
4744
4745	gfx_v8_0_cp_compute_enable(adev, true);
4746
4747	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748		ring = &adev->gfx.compute_ring[i];
4749
4750		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4751		if (unlikely(r != 0))
4752			goto done;
4753		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4754		if (!r) {
4755			r = gfx_v8_0_kcq_init_queue(ring);
4756			amdgpu_bo_kunmap(ring->mqd_obj);
4757			ring->mqd_ptr = NULL;
4758		}
4759		amdgpu_bo_unreserve(ring->mqd_obj);
4760		if (r)
4761			goto done;
4762	}
4763
4764	gfx_v8_0_set_mec_doorbell_range(adev);
4765
4766	r = gfx_v8_0_kiq_kcq_enable(adev);
4767	if (r)
4768		goto done;
4769
4770done:
4771	return r;
4772}
4773
4774static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4775{
4776	int r, i;
4777	struct amdgpu_ring *ring;
4778
4779	/* collect all the ring_tests here, gfx, kiq, compute */
4780	ring = &adev->gfx.gfx_ring[0];
4781	r = amdgpu_ring_test_helper(ring);
4782	if (r)
4783		return r;
4784
4785	ring = &adev->gfx.kiq.ring;
4786	r = amdgpu_ring_test_helper(ring);
4787	if (r)
4788		return r;
 
 
 
4789
 
4790	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4791		ring = &adev->gfx.compute_ring[i];
4792		amdgpu_ring_test_helper(ring);
 
 
 
4793	}
4794
4795	return 0;
 
4796}
4797
4798static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4799{
4800	int r;
4801
4802	if (!(adev->flags & AMD_IS_APU))
4803		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4804
4805	r = gfx_v8_0_kiq_resume(adev);
4806	if (r)
4807		return r;
 
 
 
 
 
 
 
4808
4809	r = gfx_v8_0_cp_gfx_resume(adev);
4810	if (r)
4811		return r;
4812
4813	r = gfx_v8_0_kcq_resume(adev);
4814	if (r)
4815		return r;
4816
4817	r = gfx_v8_0_cp_test_all_rings(adev);
4818	if (r)
4819		return r;
4820
4821	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4822
4823	return 0;
4824}
4825
4826static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4827{
4828	gfx_v8_0_cp_gfx_enable(adev, enable);
4829	gfx_v8_0_cp_compute_enable(adev, enable);
4830}
4831
4832static int gfx_v8_0_hw_init(void *handle)
4833{
4834	int r;
4835	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4836
4837	gfx_v8_0_init_golden_registers(adev);
4838	gfx_v8_0_constants_init(adev);
4839
4840	r = adev->gfx.rlc.funcs->resume(adev);
4841	if (r)
4842		return r;
4843
4844	r = gfx_v8_0_cp_resume(adev);
4845
4846	return r;
4847}
4848
4849static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4850{
 
 
4851	int r, i;
4852	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4853
4854	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4855	if (r)
 
 
 
 
 
 
 
4856		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 
 
 
4857
4858	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4859		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4860
4861		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4862		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4863						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4864						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4865						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4866						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4867		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4868		amdgpu_ring_write(kiq_ring, 0);
4869		amdgpu_ring_write(kiq_ring, 0);
4870		amdgpu_ring_write(kiq_ring, 0);
 
 
 
 
 
 
 
 
 
 
 
4871	}
4872	r = amdgpu_ring_test_helper(kiq_ring);
4873	if (r)
4874		DRM_ERROR("KCQ disable failed\n");
4875
 
4876	return r;
4877}
4878
4879static bool gfx_v8_0_is_idle(void *handle)
4880{
4881	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
4882
4883	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4884		|| RREG32(mmGRBM_STATUS2) != 0x8)
4885		return false;
4886	else
4887		return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4888}
4889
4890static bool gfx_v8_0_rlc_is_idle(void *handle)
4891{
4892	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
4893
4894	if (RREG32(mmGRBM_STATUS2) != 0x8)
4895		return false;
4896	else
4897		return true;
 
 
 
 
4898}
4899
4900static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4901{
4902	unsigned int i;
4903	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904
4905	for (i = 0; i < adev->usec_timeout; i++) {
4906		if (gfx_v8_0_rlc_is_idle(handle))
4907			return 0;
4908
4909		udelay(1);
4910	}
4911	return -ETIMEDOUT;
4912}
4913
4914static int gfx_v8_0_wait_for_idle(void *handle)
4915{
4916	unsigned int i;
4917	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918
4919	for (i = 0; i < adev->usec_timeout; i++) {
4920		if (gfx_v8_0_is_idle(handle))
4921			return 0;
4922
4923		udelay(1);
4924	}
4925	return -ETIMEDOUT;
4926}
4927
4928static int gfx_v8_0_hw_fini(void *handle)
4929{
4930	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4931
4932	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4933	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4934
4935	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4936
4937	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4938
4939	/* disable KCQ to avoid CPC touch memory not valid anymore */
4940	gfx_v8_0_kcq_disable(adev);
4941
4942	if (amdgpu_sriov_vf(adev)) {
4943		pr_debug("For SRIOV client, shouldn't do anything.\n");
4944		return 0;
4945	}
4946	amdgpu_gfx_rlc_enter_safe_mode(adev);
4947	if (!gfx_v8_0_wait_for_idle(adev))
4948		gfx_v8_0_cp_enable(adev, false);
4949	else
4950		pr_err("cp is busy, skip halt cp\n");
4951	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4952		adev->gfx.rlc.funcs->stop(adev);
4953	else
4954		pr_err("rlc is busy, skip halt rlc\n");
4955	amdgpu_gfx_rlc_exit_safe_mode(adev);
4956
4957	return 0;
4958}
4959
4960static int gfx_v8_0_suspend(void *handle)
4961{
4962	return gfx_v8_0_hw_fini(handle);
4963}
4964
4965static int gfx_v8_0_resume(void *handle)
4966{
4967	return gfx_v8_0_hw_init(handle);
4968}
4969
4970static bool gfx_v8_0_check_soft_reset(void *handle)
4971{
4972	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4973	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4974	u32 tmp;
4975
4976	/* GRBM_STATUS */
4977	tmp = RREG32(mmGRBM_STATUS);
4978	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4979		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4980		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4981		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4982		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4983		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4984		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4985		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4986						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4987		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4988						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4989		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4990						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4991	}
4992
4993	/* GRBM_STATUS2 */
4994	tmp = RREG32(mmGRBM_STATUS2);
4995	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4996		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4997						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4998
4999	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5000	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5001	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5002		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5003						SOFT_RESET_CPF, 1);
5004		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5005						SOFT_RESET_CPC, 1);
5006		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5007						SOFT_RESET_CPG, 1);
5008		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5009						SOFT_RESET_GRBM, 1);
5010	}
5011
5012	/* SRBM_STATUS */
5013	tmp = RREG32(mmSRBM_STATUS);
5014	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5015		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5016						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5017	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5018		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5019						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5020
5021	if (grbm_soft_reset || srbm_soft_reset) {
5022		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5023		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5024		return true;
5025	} else {
5026		adev->gfx.grbm_soft_reset = 0;
5027		adev->gfx.srbm_soft_reset = 0;
5028		return false;
5029	}
5030}
5031
5032static int gfx_v8_0_pre_soft_reset(void *handle)
5033{
5034	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035	u32 grbm_soft_reset = 0;
5036
5037	if ((!adev->gfx.grbm_soft_reset) &&
5038	    (!adev->gfx.srbm_soft_reset))
5039		return 0;
5040
5041	grbm_soft_reset = adev->gfx.grbm_soft_reset;
 
5042
5043	/* stop the rlc */
5044	adev->gfx.rlc.funcs->stop(adev);
5045
5046	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5047	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5048		/* Disable GFX parsing/prefetching */
5049		gfx_v8_0_cp_gfx_enable(adev, false);
5050
5051	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5052	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5053	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5054	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5055		int i;
5056
5057		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5058			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5059
5060			mutex_lock(&adev->srbm_mutex);
5061			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5062			gfx_v8_0_deactivate_hqd(adev, 2);
5063			vi_srbm_select(adev, 0, 0, 0, 0);
5064			mutex_unlock(&adev->srbm_mutex);
5065		}
5066		/* Disable MEC parsing/prefetching */
5067		gfx_v8_0_cp_compute_enable(adev, false);
5068	}
5069
5070	return 0;
5071}
5072
5073static int gfx_v8_0_soft_reset(void *handle)
5074{
5075	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5076	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5077	u32 tmp;
5078
5079	if ((!adev->gfx.grbm_soft_reset) &&
5080	    (!adev->gfx.srbm_soft_reset))
5081		return 0;
5082
5083	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5084	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5085
5086	if (grbm_soft_reset || srbm_soft_reset) {
5087		tmp = RREG32(mmGMCON_DEBUG);
5088		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5089		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5090		WREG32(mmGMCON_DEBUG, tmp);
5091		udelay(50);
5092	}
5093
5094	if (grbm_soft_reset) {
5095		tmp = RREG32(mmGRBM_SOFT_RESET);
5096		tmp |= grbm_soft_reset;
5097		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5098		WREG32(mmGRBM_SOFT_RESET, tmp);
5099		tmp = RREG32(mmGRBM_SOFT_RESET);
5100
5101		udelay(50);
5102
5103		tmp &= ~grbm_soft_reset;
5104		WREG32(mmGRBM_SOFT_RESET, tmp);
5105		tmp = RREG32(mmGRBM_SOFT_RESET);
5106	}
5107
5108	if (srbm_soft_reset) {
5109		tmp = RREG32(mmSRBM_SOFT_RESET);
5110		tmp |= srbm_soft_reset;
5111		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5112		WREG32(mmSRBM_SOFT_RESET, tmp);
5113		tmp = RREG32(mmSRBM_SOFT_RESET);
5114
5115		udelay(50);
5116
5117		tmp &= ~srbm_soft_reset;
5118		WREG32(mmSRBM_SOFT_RESET, tmp);
5119		tmp = RREG32(mmSRBM_SOFT_RESET);
5120	}
5121
5122	if (grbm_soft_reset || srbm_soft_reset) {
5123		tmp = RREG32(mmGMCON_DEBUG);
5124		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5125		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5126		WREG32(mmGMCON_DEBUG, tmp);
5127	}
5128
5129	/* Wait a little for things to settle down */
5130	udelay(50);
5131
5132	return 0;
5133}
5134
5135static int gfx_v8_0_post_soft_reset(void *handle)
5136{
5137	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5138	u32 grbm_soft_reset = 0;
5139
5140	if ((!adev->gfx.grbm_soft_reset) &&
5141	    (!adev->gfx.srbm_soft_reset))
5142		return 0;
5143
5144	grbm_soft_reset = adev->gfx.grbm_soft_reset;
 
 
 
 
 
5145
5146	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5147	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5148	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5149	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5150		int i;
5151
5152		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5153			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5154
5155			mutex_lock(&adev->srbm_mutex);
5156			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5157			gfx_v8_0_deactivate_hqd(adev, 2);
5158			vi_srbm_select(adev, 0, 0, 0, 0);
5159			mutex_unlock(&adev->srbm_mutex);
5160		}
5161		gfx_v8_0_kiq_resume(adev);
5162		gfx_v8_0_kcq_resume(adev);
5163	}
5164
5165	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5166	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5167		gfx_v8_0_cp_gfx_resume(adev);
5168
5169	gfx_v8_0_cp_test_all_rings(adev);
5170
5171	adev->gfx.rlc.funcs->start(adev);
5172
5173	return 0;
5174}
5175
5176/**
5177 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5178 *
5179 * @adev: amdgpu_device pointer
5180 *
5181 * Fetches a GPU clock counter snapshot.
5182 * Returns the 64 bit clock counter snapshot.
5183 */
5184static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5185{
5186	uint64_t clock;
5187
5188	mutex_lock(&adev->gfx.gpu_clock_mutex);
5189	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5190	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5191		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5192	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5193	return clock;
5194}
5195
5196static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5197					  uint32_t vmid,
5198					  uint32_t gds_base, uint32_t gds_size,
5199					  uint32_t gws_base, uint32_t gws_size,
5200					  uint32_t oa_base, uint32_t oa_size)
5201{
 
 
 
 
 
 
 
 
 
5202	/* GDS Base */
5203	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5204	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5205				WRITE_DATA_DST_SEL(0)));
5206	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5207	amdgpu_ring_write(ring, 0);
5208	amdgpu_ring_write(ring, gds_base);
5209
5210	/* GDS Size */
5211	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5212	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5213				WRITE_DATA_DST_SEL(0)));
5214	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5215	amdgpu_ring_write(ring, 0);
5216	amdgpu_ring_write(ring, gds_size);
5217
5218	/* GWS */
5219	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5220	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5221				WRITE_DATA_DST_SEL(0)));
5222	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5223	amdgpu_ring_write(ring, 0);
5224	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5225
5226	/* OA */
5227	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5228	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5229				WRITE_DATA_DST_SEL(0)));
5230	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5231	amdgpu_ring_write(ring, 0);
5232	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5233}
5234
5235static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5236{
5237	WREG32(mmSQ_IND_INDEX,
5238		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5239		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5240		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5241		(SQ_IND_INDEX__FORCE_READ_MASK));
5242	return RREG32(mmSQ_IND_DATA);
5243}
5244
5245static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5246			   uint32_t wave, uint32_t thread,
5247			   uint32_t regno, uint32_t num, uint32_t *out)
5248{
5249	WREG32(mmSQ_IND_INDEX,
5250		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5251		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5252		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5253		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5254		(SQ_IND_INDEX__FORCE_READ_MASK) |
5255		(SQ_IND_INDEX__AUTO_INCR_MASK));
5256	while (num--)
5257		*(out++) = RREG32(mmSQ_IND_DATA);
5258}
5259
5260static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5261{
5262	/* type 0 wave data */
5263	dst[(*no_fields)++] = 0;
5264	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5265	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5266	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5267	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5268	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5269	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5270	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5271	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5272	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5273	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5274	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5275	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5276	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5277	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5278	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5279	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5280	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5281	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5282}
5283
5284static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5285				     uint32_t wave, uint32_t start,
5286				     uint32_t size, uint32_t *dst)
5287{
5288	wave_read_regs(
5289		adev, simd, wave, 0,
5290		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5291}
5292
5293
5294static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5295	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5296	.select_se_sh = &gfx_v8_0_select_se_sh,
5297	.read_wave_data = &gfx_v8_0_read_wave_data,
5298	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5299	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5300};
5301
5302static int gfx_v8_0_early_init(void *handle)
5303{
5304	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5305
5306	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5307	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5308					  AMDGPU_MAX_COMPUTE_RINGS);
5309	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5310	gfx_v8_0_set_ring_funcs(adev);
5311	gfx_v8_0_set_irq_funcs(adev);
5312	gfx_v8_0_set_gds_init(adev);
5313	gfx_v8_0_set_rlc_funcs(adev);
5314
5315	return 0;
5316}
5317
5318static int gfx_v8_0_late_init(void *handle)
5319{
5320	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5321	int r;
5322
5323	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5324	if (r)
5325		return r;
5326
5327	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5328	if (r)
5329		return r;
5330
5331	/* requires IBs so do in late init after IB pool is initialized */
5332	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5333	if (r)
5334		return r;
5335
5336	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5337	if (r) {
5338		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5339		return r;
5340	}
5341
5342	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5343	if (r) {
5344		DRM_ERROR(
5345			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5346			r);
5347		return r;
5348	}
5349
5350	return 0;
5351}
5352
5353static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5354						       bool enable)
5355{
5356	if ((adev->asic_type == CHIP_POLARIS11) ||
5357	    (adev->asic_type == CHIP_POLARIS12) ||
5358	    (adev->asic_type == CHIP_VEGAM))
5359		/* Send msg to SMU via Powerplay */
5360		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
 
 
 
5361
5362	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5363}
5364
5365static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5366							bool enable)
5367{
5368	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5369}
5370
5371static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5372		bool enable)
5373{
5374	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5375}
5376
5377static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5378					  bool enable)
5379{
5380	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5381}
5382
5383static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5384						bool enable)
5385{
5386	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5387
5388	/* Read any GFX register to wake up GFX. */
5389	if (!enable)
5390		RREG32(mmDB_RENDER_CONTROL);
5391}
5392
5393static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5394					  bool enable)
5395{
5396	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5397		cz_enable_gfx_cg_power_gating(adev, true);
5398		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5399			cz_enable_gfx_pipeline_power_gating(adev, true);
5400	} else {
5401		cz_enable_gfx_cg_power_gating(adev, false);
5402		cz_enable_gfx_pipeline_power_gating(adev, false);
5403	}
5404}
5405
5406static int gfx_v8_0_set_powergating_state(void *handle,
5407					  enum amd_powergating_state state)
5408{
5409	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5410	bool enable = (state == AMD_PG_STATE_GATE);
5411
5412	if (amdgpu_sriov_vf(adev))
5413		return 0;
5414
5415	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5416				AMD_PG_SUPPORT_RLC_SMU_HS |
5417				AMD_PG_SUPPORT_CP |
5418				AMD_PG_SUPPORT_GFX_DMG))
5419		amdgpu_gfx_rlc_enter_safe_mode(adev);
5420	switch (adev->asic_type) {
5421	case CHIP_CARRIZO:
5422	case CHIP_STONEY:
5423
5424		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5425			cz_enable_sck_slow_down_on_power_up(adev, true);
5426			cz_enable_sck_slow_down_on_power_down(adev, true);
5427		} else {
5428			cz_enable_sck_slow_down_on_power_up(adev, false);
5429			cz_enable_sck_slow_down_on_power_down(adev, false);
5430		}
5431		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5432			cz_enable_cp_power_gating(adev, true);
5433		else
5434			cz_enable_cp_power_gating(adev, false);
5435
5436		cz_update_gfx_cg_power_gating(adev, enable);
5437
5438		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5439			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440		else
5441			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442
5443		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5444			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445		else
5446			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447		break;
5448	case CHIP_POLARIS11:
5449	case CHIP_POLARIS12:
5450	case CHIP_VEGAM:
5451		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5452			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5453		else
5454			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5455
5456		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5457			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5458		else
5459			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5460
5461		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5462			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5463		else
5464			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5465		break;
5466	default:
5467		break;
5468	}
5469	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5470				AMD_PG_SUPPORT_RLC_SMU_HS |
5471				AMD_PG_SUPPORT_CP |
5472				AMD_PG_SUPPORT_GFX_DMG))
5473		amdgpu_gfx_rlc_exit_safe_mode(adev);
5474	return 0;
5475}
5476
5477static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5478{
5479	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5480	int data;
5481
5482	if (amdgpu_sriov_vf(adev))
5483		*flags = 0;
5484
5485	/* AMD_CG_SUPPORT_GFX_MGCG */
5486	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5487	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5488		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5489
5490	/* AMD_CG_SUPPORT_GFX_CGLG */
5491	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5492	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5493		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5494
5495	/* AMD_CG_SUPPORT_GFX_CGLS */
5496	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5497		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5498
5499	/* AMD_CG_SUPPORT_GFX_CGTS */
5500	data = RREG32(mmCGTS_SM_CTRL_REG);
5501	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5502		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5503
5504	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5505	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5506		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5507
5508	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5509	data = RREG32(mmRLC_MEM_SLP_CNTL);
5510	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5511		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5512
5513	/* AMD_CG_SUPPORT_GFX_CP_LS */
5514	data = RREG32(mmCP_MEM_SLP_CNTL);
5515	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5516		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5517}
5518
5519static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5520				     uint32_t reg_addr, uint32_t cmd)
5521{
5522	uint32_t data;
5523
5524	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5525
5526	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5527	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5528
5529	data = RREG32(mmRLC_SERDES_WR_CTRL);
5530	if (adev->asic_type == CHIP_STONEY)
5531		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5532			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5533			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5534			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5535			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5536			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5537			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5538			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5539			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5540	else
5541		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5542			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5543			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5544			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5545			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5546			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5547			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5548			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5549			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5550			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5551			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5552	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5553		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5554		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5555		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5556
5557	WREG32(mmRLC_SERDES_WR_CTRL, data);
5558}
5559
5560#define MSG_ENTER_RLC_SAFE_MODE     1
5561#define MSG_EXIT_RLC_SAFE_MODE      0
5562#define RLC_GPR_REG2__REQ_MASK 0x00000001
5563#define RLC_GPR_REG2__REQ__SHIFT 0
5564#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5565#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5566
5567static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5568{
5569	uint32_t rlc_setting;
 
5570
5571	rlc_setting = RREG32(mmRLC_CNTL);
5572	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5573		return false;
5574
5575	return true;
5576}
 
 
 
5577
5578static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5579{
5580	uint32_t data;
5581	unsigned i;
5582	data = RREG32(mmRLC_CNTL);
5583	data |= RLC_SAFE_MODE__CMD_MASK;
5584	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5585	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5586	WREG32(mmRLC_SAFE_MODE, data);
5587
5588	/* wait for RLC_SAFE_MODE */
5589	for (i = 0; i < adev->usec_timeout; i++) {
5590		if ((RREG32(mmRLC_GPM_STAT) &
5591		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5592		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5593		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5594		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5595			break;
5596		udelay(1);
5597	}
5598	for (i = 0; i < adev->usec_timeout; i++) {
5599		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5600			break;
5601		udelay(1);
5602	}
5603}
5604
5605static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5606{
5607	uint32_t data;
5608	unsigned i;
5609
5610	data = RREG32(mmRLC_CNTL);
5611	data |= RLC_SAFE_MODE__CMD_MASK;
5612	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5613	WREG32(mmRLC_SAFE_MODE, data);
 
 
 
 
 
 
 
 
5614
5615	for (i = 0; i < adev->usec_timeout; i++) {
5616		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5617			break;
5618		udelay(1);
5619	}
5620}
5621
5622static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5623{
5624	u32 data;
5625
5626	if (amdgpu_sriov_is_pp_one_vf(adev))
5627		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5628	else
5629		data = RREG32(mmRLC_SPM_VMID);
5630
5631	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5632	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5633
5634	if (amdgpu_sriov_is_pp_one_vf(adev))
5635		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5636	else
5637		WREG32(mmRLC_SPM_VMID, data);
5638}
5639
5640static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5641	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5642	.set_safe_mode = gfx_v8_0_set_safe_mode,
5643	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5644	.init = gfx_v8_0_rlc_init,
5645	.get_csb_size = gfx_v8_0_get_csb_size,
5646	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5647	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5648	.resume = gfx_v8_0_rlc_resume,
5649	.stop = gfx_v8_0_rlc_stop,
5650	.reset = gfx_v8_0_rlc_reset,
5651	.start = gfx_v8_0_rlc_start,
5652	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5653};
5654
5655static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5656						      bool enable)
5657{
5658	uint32_t temp, data;
5659
5660	amdgpu_gfx_rlc_enter_safe_mode(adev);
5661
5662	/* It is disabled by HW by default */
5663	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5664		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5665			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5666				/* 1 - RLC memory Light sleep */
5667				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5668
5669			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5670				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5671		}
5672
5673		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5674		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5675		if (adev->flags & AMD_IS_APU)
5676			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5677				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5678				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5679		else
5680			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5681				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5682				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5683				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5684
5685		if (temp != data)
5686			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687
5688		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5689		gfx_v8_0_wait_for_rlc_serdes(adev);
5690
5691		/* 5 - clear mgcg override */
5692		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5693
5694		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5695			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5696			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5697			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5698			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5699			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5700			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5701			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5702			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5703				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5704			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5705			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5706			if (temp != data)
5707				WREG32(mmCGTS_SM_CTRL_REG, data);
5708		}
5709		udelay(50);
5710
5711		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5712		gfx_v8_0_wait_for_rlc_serdes(adev);
5713	} else {
5714		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5715		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5716		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5717				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5718				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5719				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5720		if (temp != data)
5721			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5722
5723		/* 2 - disable MGLS in RLC */
5724		data = RREG32(mmRLC_MEM_SLP_CNTL);
5725		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5726			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5727			WREG32(mmRLC_MEM_SLP_CNTL, data);
5728		}
5729
5730		/* 3 - disable MGLS in CP */
5731		data = RREG32(mmCP_MEM_SLP_CNTL);
5732		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5733			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5734			WREG32(mmCP_MEM_SLP_CNTL, data);
5735		}
5736
5737		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5738		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5739		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5740				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5741		if (temp != data)
5742			WREG32(mmCGTS_SM_CTRL_REG, data);
5743
5744		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745		gfx_v8_0_wait_for_rlc_serdes(adev);
5746
5747		/* 6 - set mgcg override */
5748		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5749
5750		udelay(50);
5751
5752		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5753		gfx_v8_0_wait_for_rlc_serdes(adev);
5754	}
5755
5756	amdgpu_gfx_rlc_exit_safe_mode(adev);
5757}
5758
5759static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5760						      bool enable)
5761{
5762	uint32_t temp, temp1, data, data1;
5763
5764	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5765
5766	amdgpu_gfx_rlc_enter_safe_mode(adev);
5767
5768	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5769		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5770		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5771		if (temp1 != data1)
5772			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5773
5774		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5775		gfx_v8_0_wait_for_rlc_serdes(adev);
5776
5777		/* 2 - clear cgcg override */
5778		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5779
5780		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5781		gfx_v8_0_wait_for_rlc_serdes(adev);
5782
5783		/* 3 - write cmd to set CGLS */
5784		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5785
5786		/* 4 - enable cgcg */
5787		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5788
5789		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5790			/* enable cgls*/
5791			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792
5793			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5794			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5795
5796			if (temp1 != data1)
5797				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5798		} else {
5799			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5800		}
5801
5802		if (temp != data)
5803			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5804
5805		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5806		 * Cmp_busy/GFX_Idle interrupts
5807		 */
5808		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5809	} else {
5810		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5811		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5812
5813		/* TEST CGCG */
5814		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5815		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5816				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5817		if (temp1 != data1)
5818			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5819
5820		/* read gfx register to wake up cgcg */
5821		RREG32(mmCB_CGTT_SCLK_CTRL);
5822		RREG32(mmCB_CGTT_SCLK_CTRL);
5823		RREG32(mmCB_CGTT_SCLK_CTRL);
5824		RREG32(mmCB_CGTT_SCLK_CTRL);
5825
5826		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5827		gfx_v8_0_wait_for_rlc_serdes(adev);
5828
5829		/* write cmd to Set CGCG Overrride */
5830		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5831
5832		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5833		gfx_v8_0_wait_for_rlc_serdes(adev);
5834
5835		/* write cmd to Clear CGLS */
5836		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5837
5838		/* disable cgcg, cgls should be disabled too. */
5839		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5840			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5841		if (temp != data)
5842			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5843		/* enable interrupts again for PG */
5844		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5845	}
5846
5847	gfx_v8_0_wait_for_rlc_serdes(adev);
5848
5849	amdgpu_gfx_rlc_exit_safe_mode(adev);
5850}
5851static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5852					    bool enable)
5853{
5854	if (enable) {
5855		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5856		 * ===  MGCG + MGLS + TS(CG/LS) ===
5857		 */
5858		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5859		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5860	} else {
5861		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5862		 * ===  CGCG + CGLS ===
5863		 */
5864		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5865		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5866	}
5867	return 0;
5868}
5869
5870static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5871					  enum amd_clockgating_state state)
5872{
5873	uint32_t msg_id, pp_state = 0;
5874	uint32_t pp_support_state = 0;
5875
5876	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5877		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5878			pp_support_state = PP_STATE_SUPPORT_LS;
5879			pp_state = PP_STATE_LS;
5880		}
5881		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5882			pp_support_state |= PP_STATE_SUPPORT_CG;
5883			pp_state |= PP_STATE_CG;
5884		}
5885		if (state == AMD_CG_STATE_UNGATE)
5886			pp_state = 0;
5887
5888		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5889				PP_BLOCK_GFX_CG,
5890				pp_support_state,
5891				pp_state);
5892		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5893	}
5894
5895	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5896		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5897			pp_support_state = PP_STATE_SUPPORT_LS;
5898			pp_state = PP_STATE_LS;
5899		}
5900
5901		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5902			pp_support_state |= PP_STATE_SUPPORT_CG;
5903			pp_state |= PP_STATE_CG;
5904		}
5905
5906		if (state == AMD_CG_STATE_UNGATE)
5907			pp_state = 0;
5908
5909		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5910				PP_BLOCK_GFX_MG,
5911				pp_support_state,
5912				pp_state);
5913		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5914	}
5915
5916	return 0;
5917}
5918
5919static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5920					  enum amd_clockgating_state state)
5921{
5922
5923	uint32_t msg_id, pp_state = 0;
5924	uint32_t pp_support_state = 0;
5925
5926	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5927		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5928			pp_support_state = PP_STATE_SUPPORT_LS;
5929			pp_state = PP_STATE_LS;
5930		}
5931		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5932			pp_support_state |= PP_STATE_SUPPORT_CG;
5933			pp_state |= PP_STATE_CG;
5934		}
5935		if (state == AMD_CG_STATE_UNGATE)
5936			pp_state = 0;
5937
5938		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5939				PP_BLOCK_GFX_CG,
5940				pp_support_state,
5941				pp_state);
5942		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5943	}
5944
5945	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5946		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5947			pp_support_state = PP_STATE_SUPPORT_LS;
5948			pp_state = PP_STATE_LS;
5949		}
5950		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5951			pp_support_state |= PP_STATE_SUPPORT_CG;
5952			pp_state |= PP_STATE_CG;
5953		}
5954		if (state == AMD_CG_STATE_UNGATE)
5955			pp_state = 0;
5956
5957		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5958				PP_BLOCK_GFX_3D,
5959				pp_support_state,
5960				pp_state);
5961		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5962	}
5963
5964	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5965		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5966			pp_support_state = PP_STATE_SUPPORT_LS;
5967			pp_state = PP_STATE_LS;
5968		}
5969
5970		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5971			pp_support_state |= PP_STATE_SUPPORT_CG;
5972			pp_state |= PP_STATE_CG;
5973		}
5974
5975		if (state == AMD_CG_STATE_UNGATE)
5976			pp_state = 0;
5977
5978		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5979				PP_BLOCK_GFX_MG,
5980				pp_support_state,
5981				pp_state);
5982		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5983	}
5984
5985	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5986		pp_support_state = PP_STATE_SUPPORT_LS;
5987
5988		if (state == AMD_CG_STATE_UNGATE)
5989			pp_state = 0;
5990		else
5991			pp_state = PP_STATE_LS;
5992
5993		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5994				PP_BLOCK_GFX_RLC,
5995				pp_support_state,
5996				pp_state);
5997		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
5998	}
5999
6000	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6001		pp_support_state = PP_STATE_SUPPORT_LS;
6002
6003		if (state == AMD_CG_STATE_UNGATE)
6004			pp_state = 0;
6005		else
6006			pp_state = PP_STATE_LS;
6007		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6008			PP_BLOCK_GFX_CP,
6009			pp_support_state,
6010			pp_state);
6011		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
 
6012	}
6013
6014	return 0;
6015}
6016
6017static int gfx_v8_0_set_clockgating_state(void *handle,
6018					  enum amd_clockgating_state state)
6019{
6020	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6021
6022	if (amdgpu_sriov_vf(adev))
6023		return 0;
6024
6025	switch (adev->asic_type) {
6026	case CHIP_FIJI:
6027	case CHIP_CARRIZO:
6028	case CHIP_STONEY:
6029		gfx_v8_0_update_gfx_clock_gating(adev,
6030						 state == AMD_CG_STATE_GATE);
6031		break;
6032	case CHIP_TONGA:
6033		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6034		break;
6035	case CHIP_POLARIS10:
6036	case CHIP_POLARIS11:
6037	case CHIP_POLARIS12:
6038	case CHIP_VEGAM:
6039		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6040		break;
6041	default:
6042		break;
6043	}
6044	return 0;
6045}
6046
6047static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6048{
6049	return ring->adev->wb.wb[ring->rptr_offs];
6050}
6051
6052static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6053{
6054	struct amdgpu_device *adev = ring->adev;
6055
6056	if (ring->use_doorbell)
6057		/* XXX check if swapping is necessary on BE */
6058		return ring->adev->wb.wb[ring->wptr_offs];
6059	else
6060		return RREG32(mmCP_RB0_WPTR);
6061}
6062
6063static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6064{
6065	struct amdgpu_device *adev = ring->adev;
6066
6067	if (ring->use_doorbell) {
6068		/* XXX check if swapping is necessary on BE */
6069		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6070		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6071	} else {
6072		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6073		(void)RREG32(mmCP_RB0_WPTR);
6074	}
6075}
6076
6077static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6078{
6079	u32 ref_and_mask, reg_mem_engine;
6080
6081	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6082	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6083		switch (ring->me) {
6084		case 1:
6085			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6086			break;
6087		case 2:
6088			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6089			break;
6090		default:
6091			return;
6092		}
6093		reg_mem_engine = 0;
6094	} else {
6095		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6096		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6097	}
6098
6099	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6100	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6101				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6102				 reg_mem_engine));
6103	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6104	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6105	amdgpu_ring_write(ring, ref_and_mask);
6106	amdgpu_ring_write(ring, ref_and_mask);
6107	amdgpu_ring_write(ring, 0x20); /* poll interval */
6108}
6109
6110static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6111{
6112	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6113	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6114		EVENT_INDEX(4));
6115
6116	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6117	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6118		EVENT_INDEX(0));
6119}
6120
6121static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6122					struct amdgpu_job *job,
6123					struct amdgpu_ib *ib,
6124					uint32_t flags)
6125{
6126	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6127	u32 header, control = 0;
6128
6129	if (ib->flags & AMDGPU_IB_FLAG_CE)
6130		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6131	else
6132		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6133
6134	control |= ib->length_dw | (vmid << 24);
6135
6136	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6137		control |= INDIRECT_BUFFER_PRE_ENB(1);
6138
6139		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6140			gfx_v8_0_ring_emit_de_meta(ring);
6141	}
6142
6143	amdgpu_ring_write(ring, header);
6144	amdgpu_ring_write(ring,
6145#ifdef __BIG_ENDIAN
6146			  (2 << 0) |
6147#endif
6148			  (ib->gpu_addr & 0xFFFFFFFC));
6149	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6150	amdgpu_ring_write(ring, control);
6151}
6152
6153static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6154					  struct amdgpu_job *job,
6155					  struct amdgpu_ib *ib,
6156					  uint32_t flags)
6157{
6158	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6159	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6160
6161	/* Currently, there is a high possibility to get wave ID mismatch
6162	 * between ME and GDS, leading to a hw deadlock, because ME generates
6163	 * different wave IDs than the GDS expects. This situation happens
6164	 * randomly when at least 5 compute pipes use GDS ordered append.
6165	 * The wave IDs generated by ME are also wrong after suspend/resume.
6166	 * Those are probably bugs somewhere else in the kernel driver.
6167	 *
6168	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6169	 * GDS to 0 for this ring (me/pipe).
6170	 */
6171	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6172		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6173		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6174		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6175	}
6176
6177	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6178	amdgpu_ring_write(ring,
6179#ifdef __BIG_ENDIAN
6180				(2 << 0) |
6181#endif
6182				(ib->gpu_addr & 0xFFFFFFFC));
6183	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6184	amdgpu_ring_write(ring, control);
6185}
6186
6187static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6188					 u64 seq, unsigned flags)
6189{
6190	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6191	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6192
6193	/* Workaround for cache flush problems. First send a dummy EOP
6194	 * event down the pipe with seq one below.
6195	 */
6196	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6197	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6198				 EOP_TC_ACTION_EN |
6199				 EOP_TC_WB_ACTION_EN |
6200				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6201				 EVENT_INDEX(5)));
6202	amdgpu_ring_write(ring, addr & 0xfffffffc);
6203	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6204				DATA_SEL(1) | INT_SEL(0));
6205	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6206	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6207
6208	/* Then send the real EOP event down the pipe:
6209	 * EVENT_WRITE_EOP - flush caches, send int */
6210	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6211	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6212				 EOP_TC_ACTION_EN |
6213				 EOP_TC_WB_ACTION_EN |
6214				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6215				 EVENT_INDEX(5)));
6216	amdgpu_ring_write(ring, addr & 0xfffffffc);
6217	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6218			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6219	amdgpu_ring_write(ring, lower_32_bits(seq));
6220	amdgpu_ring_write(ring, upper_32_bits(seq));
6221
6222}
6223
6224static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6225{
6226	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6227	uint32_t seq = ring->fence_drv.sync_seq;
6228	uint64_t addr = ring->fence_drv.gpu_addr;
6229
6230	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6231	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6232				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6233				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6234	amdgpu_ring_write(ring, addr & 0xfffffffc);
6235	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6236	amdgpu_ring_write(ring, seq);
6237	amdgpu_ring_write(ring, 0xffffffff);
6238	amdgpu_ring_write(ring, 4); /* poll interval */
6239}
6240
6241static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6242					unsigned vmid, uint64_t pd_addr)
6243{
6244	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6245
6246	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6247
6248	/* wait for the invalidate to complete */
6249	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6250	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6251				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6252				 WAIT_REG_MEM_ENGINE(0))); /* me */
6253	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6254	amdgpu_ring_write(ring, 0);
6255	amdgpu_ring_write(ring, 0); /* ref */
6256	amdgpu_ring_write(ring, 0); /* mask */
6257	amdgpu_ring_write(ring, 0x20); /* poll interval */
6258
6259	/* compute doesn't have PFP */
6260	if (usepfp) {
6261		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6262		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6263		amdgpu_ring_write(ring, 0x0);
6264	}
6265}
6266
6267static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6268{
6269	return ring->adev->wb.wb[ring->wptr_offs];
6270}
6271
6272static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6273{
6274	struct amdgpu_device *adev = ring->adev;
6275
6276	/* XXX check if swapping is necessary on BE */
6277	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6278	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6279}
6280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6281static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6282					     u64 addr, u64 seq,
6283					     unsigned flags)
6284{
6285	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6286	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6287
6288	/* RELEASE_MEM - flush caches, send int */
6289	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6290	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6291				 EOP_TC_ACTION_EN |
6292				 EOP_TC_WB_ACTION_EN |
6293				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6294				 EVENT_INDEX(5)));
6295	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6296	amdgpu_ring_write(ring, addr & 0xfffffffc);
6297	amdgpu_ring_write(ring, upper_32_bits(addr));
6298	amdgpu_ring_write(ring, lower_32_bits(seq));
6299	amdgpu_ring_write(ring, upper_32_bits(seq));
6300}
6301
6302static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6303					 u64 seq, unsigned int flags)
6304{
6305	/* we only allocate 32bit for each seq wb address */
6306	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6307
6308	/* write fence seq to the "addr" */
6309	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6310	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6311				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6312	amdgpu_ring_write(ring, lower_32_bits(addr));
6313	amdgpu_ring_write(ring, upper_32_bits(addr));
6314	amdgpu_ring_write(ring, lower_32_bits(seq));
6315
6316	if (flags & AMDGPU_FENCE_FLAG_INT) {
6317		/* set register to trigger INT */
6318		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6319		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6320					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6321		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6322		amdgpu_ring_write(ring, 0);
6323		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6324	}
6325}
6326
6327static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328{
6329	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330	amdgpu_ring_write(ring, 0);
6331}
6332
6333static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6334{
6335	uint32_t dw2 = 0;
6336
6337	if (amdgpu_sriov_vf(ring->adev))
6338		gfx_v8_0_ring_emit_ce_meta(ring);
6339
6340	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6341	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6342		gfx_v8_0_ring_emit_vgt_flush(ring);
6343		/* set load_global_config & load_global_uconfig */
6344		dw2 |= 0x8001;
6345		/* set load_cs_sh_regs */
6346		dw2 |= 0x01000000;
6347		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6348		dw2 |= 0x10002;
6349
6350		/* set load_ce_ram if preamble presented */
6351		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6352			dw2 |= 0x10000000;
6353	} else {
6354		/* still load_ce_ram if this is the first time preamble presented
6355		 * although there is no context switch happens.
6356		 */
6357		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6358			dw2 |= 0x10000000;
6359	}
6360
6361	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6362	amdgpu_ring_write(ring, dw2);
6363	amdgpu_ring_write(ring, 0);
6364}
6365
6366static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6367{
6368	unsigned ret;
6369
6370	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6371	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6372	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6373	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6374	ret = ring->wptr & ring->buf_mask;
6375	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6376	return ret;
6377}
6378
6379static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6380{
6381	unsigned cur;
6382
6383	BUG_ON(offset > ring->buf_mask);
6384	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6385
6386	cur = (ring->wptr & ring->buf_mask) - 1;
6387	if (likely(cur > offset))
6388		ring->ring[offset] = cur - offset;
6389	else
6390		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6391}
6392
6393static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6394				    uint32_t reg_val_offs)
6395{
6396	struct amdgpu_device *adev = ring->adev;
6397
6398	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6399	amdgpu_ring_write(ring, 0 |	/* src: register*/
6400				(5 << 8) |	/* dst: memory */
6401				(1 << 20));	/* write confirm */
6402	amdgpu_ring_write(ring, reg);
6403	amdgpu_ring_write(ring, 0);
6404	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6405				reg_val_offs * 4));
6406	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6407				reg_val_offs * 4));
6408}
6409
6410static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6411				  uint32_t val)
6412{
6413	uint32_t cmd;
6414
6415	switch (ring->funcs->type) {
6416	case AMDGPU_RING_TYPE_GFX:
6417		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6418		break;
6419	case AMDGPU_RING_TYPE_KIQ:
6420		cmd = 1 << 16; /* no inc addr */
6421		break;
6422	default:
6423		cmd = WR_CONFIRM;
6424		break;
6425	}
6426
6427	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6428	amdgpu_ring_write(ring, cmd);
6429	amdgpu_ring_write(ring, reg);
6430	amdgpu_ring_write(ring, 0);
6431	amdgpu_ring_write(ring, val);
6432}
6433
6434static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6435{
6436	struct amdgpu_device *adev = ring->adev;
6437	uint32_t value = 0;
6438
6439	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6440	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6441	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6442	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6443	WREG32(mmSQ_CMD, value);
6444}
6445
6446static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6447						 enum amdgpu_interrupt_state state)
6448{
6449	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6450		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6451}
6452
6453static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6454						     int me, int pipe,
6455						     enum amdgpu_interrupt_state state)
6456{
6457	u32 mec_int_cntl, mec_int_cntl_reg;
6458
6459	/*
6460	 * amdgpu controls only the first MEC. That's why this function only
6461	 * handles the setting of interrupts for this specific MEC. All other
6462	 * pipes' interrupts are set by amdkfd.
6463	 */
6464
6465	if (me == 1) {
6466		switch (pipe) {
6467		case 0:
6468			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6469			break;
6470		case 1:
6471			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6472			break;
6473		case 2:
6474			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6475			break;
6476		case 3:
6477			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6478			break;
6479		default:
6480			DRM_DEBUG("invalid pipe %d\n", pipe);
6481			return;
6482		}
6483	} else {
6484		DRM_DEBUG("invalid me %d\n", me);
6485		return;
6486	}
6487
6488	switch (state) {
6489	case AMDGPU_IRQ_STATE_DISABLE:
6490		mec_int_cntl = RREG32(mec_int_cntl_reg);
6491		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6492		WREG32(mec_int_cntl_reg, mec_int_cntl);
6493		break;
6494	case AMDGPU_IRQ_STATE_ENABLE:
6495		mec_int_cntl = RREG32(mec_int_cntl_reg);
6496		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6497		WREG32(mec_int_cntl_reg, mec_int_cntl);
6498		break;
6499	default:
6500		break;
6501	}
6502}
6503
6504static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6505					     struct amdgpu_irq_src *source,
6506					     unsigned type,
6507					     enum amdgpu_interrupt_state state)
6508{
6509	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6510		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6511
6512	return 0;
6513}
6514
6515static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6516					      struct amdgpu_irq_src *source,
6517					      unsigned type,
6518					      enum amdgpu_interrupt_state state)
6519{
6520	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6521		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6522
6523	return 0;
6524}
6525
6526static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6527					    struct amdgpu_irq_src *src,
6528					    unsigned type,
6529					    enum amdgpu_interrupt_state state)
6530{
6531	switch (type) {
6532	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6533		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6534		break;
6535	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6536		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6537		break;
6538	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6539		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6540		break;
6541	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6542		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6543		break;
6544	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6545		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6546		break;
6547	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6548		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6549		break;
6550	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6551		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6552		break;
6553	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6554		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6555		break;
6556	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6557		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6558		break;
6559	default:
6560		break;
6561	}
6562	return 0;
6563}
6564
6565static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6566					 struct amdgpu_irq_src *source,
6567					 unsigned int type,
6568					 enum amdgpu_interrupt_state state)
6569{
6570	int enable_flag;
6571
6572	switch (state) {
6573	case AMDGPU_IRQ_STATE_DISABLE:
6574		enable_flag = 0;
6575		break;
6576
6577	case AMDGPU_IRQ_STATE_ENABLE:
6578		enable_flag = 1;
6579		break;
6580
6581	default:
6582		return -EINVAL;
6583	}
6584
6585	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6586	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6587	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6588	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6589	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6590	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591		     enable_flag);
6592	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593		     enable_flag);
6594	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595		     enable_flag);
6596	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6597		     enable_flag);
6598	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6599		     enable_flag);
6600	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6601		     enable_flag);
6602	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6603		     enable_flag);
6604	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6605		     enable_flag);
6606
6607	return 0;
6608}
6609
6610static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6611				     struct amdgpu_irq_src *source,
6612				     unsigned int type,
6613				     enum amdgpu_interrupt_state state)
6614{
6615	int enable_flag;
6616
6617	switch (state) {
6618	case AMDGPU_IRQ_STATE_DISABLE:
6619		enable_flag = 1;
6620		break;
6621
6622	case AMDGPU_IRQ_STATE_ENABLE:
6623		enable_flag = 0;
6624		break;
6625
6626	default:
6627		return -EINVAL;
6628	}
6629
6630	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6631		     enable_flag);
6632
6633	return 0;
6634}
6635
6636static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6637			    struct amdgpu_irq_src *source,
6638			    struct amdgpu_iv_entry *entry)
6639{
6640	int i;
6641	u8 me_id, pipe_id, queue_id;
6642	struct amdgpu_ring *ring;
6643
6644	DRM_DEBUG("IH: CP EOP\n");
6645	me_id = (entry->ring_id & 0x0c) >> 2;
6646	pipe_id = (entry->ring_id & 0x03) >> 0;
6647	queue_id = (entry->ring_id & 0x70) >> 4;
6648
6649	switch (me_id) {
6650	case 0:
6651		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6652		break;
6653	case 1:
6654	case 2:
6655		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6656			ring = &adev->gfx.compute_ring[i];
6657			/* Per-queue interrupt is supported for MEC starting from VI.
6658			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6659			  */
6660			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6661				amdgpu_fence_process(ring);
6662		}
6663		break;
6664	}
6665	return 0;
6666}
6667
6668static void gfx_v8_0_fault(struct amdgpu_device *adev,
6669			   struct amdgpu_iv_entry *entry)
6670{
6671	u8 me_id, pipe_id, queue_id;
6672	struct amdgpu_ring *ring;
6673	int i;
6674
6675	me_id = (entry->ring_id & 0x0c) >> 2;
6676	pipe_id = (entry->ring_id & 0x03) >> 0;
6677	queue_id = (entry->ring_id & 0x70) >> 4;
6678
6679	switch (me_id) {
6680	case 0:
6681		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6682		break;
6683	case 1:
6684	case 2:
6685		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6686			ring = &adev->gfx.compute_ring[i];
6687			if (ring->me == me_id && ring->pipe == pipe_id &&
6688			    ring->queue == queue_id)
6689				drm_sched_fault(&ring->sched);
6690		}
6691		break;
6692	}
6693}
6694
6695static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6696				 struct amdgpu_irq_src *source,
6697				 struct amdgpu_iv_entry *entry)
6698{
6699	DRM_ERROR("Illegal register access in command stream\n");
6700	gfx_v8_0_fault(adev, entry);
6701	return 0;
6702}
6703
6704static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6705				  struct amdgpu_irq_src *source,
6706				  struct amdgpu_iv_entry *entry)
6707{
6708	DRM_ERROR("Illegal instruction in command stream\n");
6709	gfx_v8_0_fault(adev, entry);
6710	return 0;
6711}
6712
6713static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6714				     struct amdgpu_irq_src *source,
6715				     struct amdgpu_iv_entry *entry)
 
6716{
6717	DRM_ERROR("CP EDC/ECC error detected.");
6718	return 0;
6719}
6720
6721static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6722				  bool from_wq)
6723{
6724	u32 enc, se_id, sh_id, cu_id;
6725	char type[20];
6726	int sq_edc_source = -1;
6727
6728	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6729	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6730
6731	switch (enc) {
6732		case 0:
6733			DRM_INFO("SQ general purpose intr detected:"
6734					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6735					"host_cmd_overflow %d, cmd_timestamp %d,"
6736					"reg_timestamp %d, thread_trace_buff_full %d,"
6737					"wlt %d, thread_trace %d.\n",
6738					se_id,
6739					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6740					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6741					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6742					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6743					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6744					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6745					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6746					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6747					);
6748			break;
6749		case 1:
6750		case 2:
6751
6752			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6753			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6754
6755			/*
6756			 * This function can be called either directly from ISR
6757			 * or from BH in which case we can access SQ_EDC_INFO
6758			 * instance
6759			 */
6760			if (from_wq) {
6761				mutex_lock(&adev->grbm_idx_mutex);
6762				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6763
6764				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6765
6766				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6767				mutex_unlock(&adev->grbm_idx_mutex);
6768			}
6769
6770			if (enc == 1)
6771				sprintf(type, "instruction intr");
6772			else
6773				sprintf(type, "EDC/ECC error");
6774
6775			DRM_INFO(
6776				"SQ %s detected: "
6777					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6778					"trap %s, sq_ed_info.source %s.\n",
6779					type, se_id, sh_id, cu_id,
6780					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6781					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6782					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6783					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6784					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6785				);
6786			break;
6787		default:
6788			DRM_ERROR("SQ invalid encoding type\n.");
6789	}
6790}
6791
6792static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6793{
6794
6795	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6796	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6797
6798	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6799}
6800
6801static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6802			   struct amdgpu_irq_src *source,
6803			   struct amdgpu_iv_entry *entry)
6804{
6805	unsigned ih_data = entry->src_data[0];
6806
6807	/*
6808	 * Try to submit work so SQ_EDC_INFO can be accessed from
6809	 * BH. If previous work submission hasn't finished yet
6810	 * just print whatever info is possible directly from the ISR.
6811	 */
6812	if (work_pending(&adev->gfx.sq_work.work)) {
6813		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6814	} else {
6815		adev->gfx.sq_work.ih_data = ih_data;
6816		schedule_work(&adev->gfx.sq_work.work);
6817	}
6818
6819	return 0;
6820}
6821
6822static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6823{
6824	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6825	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6826			  PACKET3_TC_ACTION_ENA |
6827			  PACKET3_SH_KCACHE_ACTION_ENA |
6828			  PACKET3_SH_ICACHE_ACTION_ENA |
6829			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6830	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6831	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6832	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6833}
6834
6835static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6836{
6837	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6838	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6839			  PACKET3_TC_ACTION_ENA |
6840			  PACKET3_SH_KCACHE_ACTION_ENA |
6841			  PACKET3_SH_ICACHE_ACTION_ENA |
6842			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6843	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6844	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6845	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6846	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6847	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6848}
6849
6850
6851/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6852#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6853static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6854					uint32_t pipe, bool enable)
6855{
6856	uint32_t val;
6857	uint32_t wcl_cs_reg;
6858
6859	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6860
6861	switch (pipe) {
6862	case 0:
6863		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6864		break;
6865	case 1:
6866		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6867		break;
6868	case 2:
6869		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6870		break;
6871	case 3:
6872		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6873		break;
6874	default:
6875		DRM_DEBUG("invalid pipe %d\n", pipe);
6876		return;
6877	}
6878
6879	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6880
6881}
6882
6883#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6884static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
 
6885{
6886	struct amdgpu_device *adev = ring->adev;
6887	uint32_t val;
6888	int i;
6889
6890	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6891	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6892	 * around 25% of gpu resources.
6893	 */
6894	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6895	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6896
6897	/* Restrict waves for normal/low priority compute queues as well
6898	 * to get best QoS for high priority compute jobs.
6899	 *
6900	 * amdgpu controls only 1st ME(0-3 CS pipes).
6901	 */
6902	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6903		if (i != ring->pipe)
6904			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6905
6906	}
6907
 
 
6908}
6909
6910static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6911	.name = "gfx_v8_0",
6912	.early_init = gfx_v8_0_early_init,
6913	.late_init = gfx_v8_0_late_init,
6914	.sw_init = gfx_v8_0_sw_init,
6915	.sw_fini = gfx_v8_0_sw_fini,
6916	.hw_init = gfx_v8_0_hw_init,
6917	.hw_fini = gfx_v8_0_hw_fini,
6918	.suspend = gfx_v8_0_suspend,
6919	.resume = gfx_v8_0_resume,
6920	.is_idle = gfx_v8_0_is_idle,
6921	.wait_for_idle = gfx_v8_0_wait_for_idle,
6922	.check_soft_reset = gfx_v8_0_check_soft_reset,
6923	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6924	.soft_reset = gfx_v8_0_soft_reset,
6925	.post_soft_reset = gfx_v8_0_post_soft_reset,
6926	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6927	.set_powergating_state = gfx_v8_0_set_powergating_state,
6928	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6929};
6930
6931static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6932	.type = AMDGPU_RING_TYPE_GFX,
6933	.align_mask = 0xff,
6934	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6935	.support_64bit_ptrs = false,
6936	.get_rptr = gfx_v8_0_ring_get_rptr,
6937	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6938	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6939	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6940		5 +  /* COND_EXEC */
6941		7 +  /* PIPELINE_SYNC */
6942		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6943		12 +  /* FENCE for VM_FLUSH */
6944		20 + /* GDS switch */
6945		4 + /* double SWITCH_BUFFER,
6946		       the first COND_EXEC jump to the place just
6947			   prior to this double SWITCH_BUFFER  */
6948		5 + /* COND_EXEC */
6949		7 +	 /*	HDP_flush */
6950		4 +	 /*	VGT_flush */
6951		14 + /*	CE_META */
6952		31 + /*	DE_META */
6953		3 + /* CNTX_CTRL */
6954		5 + /* HDP_INVL */
6955		12 + 12 + /* FENCE x2 */
6956		2 + /* SWITCH_BUFFER */
6957		5, /* SURFACE_SYNC */
6958	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6959	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6960	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6961	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6962	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6963	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6964	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6965	.test_ring = gfx_v8_0_ring_test_ring,
6966	.test_ib = gfx_v8_0_ring_test_ib,
6967	.insert_nop = amdgpu_ring_insert_nop,
6968	.pad_ib = amdgpu_ring_generic_pad_ib,
6969	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6970	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6971	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6972	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6973	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6974	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6975	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6976};
6977
6978static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6979	.type = AMDGPU_RING_TYPE_COMPUTE,
6980	.align_mask = 0xff,
6981	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6982	.support_64bit_ptrs = false,
6983	.get_rptr = gfx_v8_0_ring_get_rptr,
6984	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6985	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6986	.emit_frame_size =
6987		20 + /* gfx_v8_0_ring_emit_gds_switch */
6988		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6989		5 + /* hdp_invalidate */
6990		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6991		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6992		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6993		7 + /* gfx_v8_0_emit_mem_sync_compute */
6994		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6995		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6996	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6997	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6998	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6999	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7000	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7001	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7002	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7003	.test_ring = gfx_v8_0_ring_test_ring,
7004	.test_ib = gfx_v8_0_ring_test_ib,
7005	.insert_nop = amdgpu_ring_insert_nop,
7006	.pad_ib = amdgpu_ring_generic_pad_ib,
 
7007	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7008	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7009	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
7010};
7011
7012static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7013	.type = AMDGPU_RING_TYPE_KIQ,
7014	.align_mask = 0xff,
7015	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7016	.support_64bit_ptrs = false,
7017	.get_rptr = gfx_v8_0_ring_get_rptr,
7018	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7019	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7020	.emit_frame_size =
7021		20 + /* gfx_v8_0_ring_emit_gds_switch */
7022		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7023		5 + /* hdp_invalidate */
7024		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7025		17 + /* gfx_v8_0_ring_emit_vm_flush */
7026		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7027	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 
7028	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7029	.test_ring = gfx_v8_0_ring_test_ring,
 
7030	.insert_nop = amdgpu_ring_insert_nop,
7031	.pad_ib = amdgpu_ring_generic_pad_ib,
7032	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7033	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7034};
7035
7036static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7037{
7038	int i;
7039
7040	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7041
7042	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7043		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7044
7045	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7046		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7047}
7048
7049static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7050	.set = gfx_v8_0_set_eop_interrupt_state,
7051	.process = gfx_v8_0_eop_irq,
7052};
7053
7054static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7055	.set = gfx_v8_0_set_priv_reg_fault_state,
7056	.process = gfx_v8_0_priv_reg_irq,
7057};
7058
7059static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7060	.set = gfx_v8_0_set_priv_inst_fault_state,
7061	.process = gfx_v8_0_priv_inst_irq,
7062};
7063
7064static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7065	.set = gfx_v8_0_set_cp_ecc_int_state,
7066	.process = gfx_v8_0_cp_ecc_error_irq,
7067};
7068
7069static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7070	.set = gfx_v8_0_set_sq_int_state,
7071	.process = gfx_v8_0_sq_irq,
7072};
7073
7074static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7075{
7076	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7077	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7078
7079	adev->gfx.priv_reg_irq.num_types = 1;
7080	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7081
7082	adev->gfx.priv_inst_irq.num_types = 1;
7083	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7084
7085	adev->gfx.cp_ecc_error_irq.num_types = 1;
7086	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7087
7088	adev->gfx.sq_irq.num_types = 1;
7089	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7090}
7091
7092static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7093{
7094	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7095}
7096
7097static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7098{
7099	/* init asci gds info */
7100	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7101	adev->gds.gws_size = 64;
7102	adev->gds.oa_size = 16;
7103	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7104}
7105
7106static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7107						 u32 bitmap)
7108{
7109	u32 data;
7110
7111	if (!bitmap)
7112		return;
7113
7114	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7115	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7116
7117	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7118}
7119
7120static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7121{
7122	u32 data, mask;
7123
7124	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7125		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7126
7127	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7128
7129	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7130}
7131
7132static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7133{
7134	int i, j, k, counter, active_cu_number = 0;
7135	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7136	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7137	unsigned disable_masks[4 * 2];
7138	u32 ao_cu_num;
7139
7140	memset(cu_info, 0, sizeof(*cu_info));
7141
7142	if (adev->flags & AMD_IS_APU)
7143		ao_cu_num = 2;
7144	else
7145		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7146
7147	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7148
7149	mutex_lock(&adev->grbm_idx_mutex);
7150	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7151		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7152			mask = 1;
7153			ao_bitmap = 0;
7154			counter = 0;
7155			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7156			if (i < 4 && j < 2)
7157				gfx_v8_0_set_user_cu_inactive_bitmap(
7158					adev, disable_masks[i * 2 + j]);
7159			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7160			cu_info->bitmap[i][j] = bitmap;
7161
7162			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7163				if (bitmap & mask) {
7164					if (counter < ao_cu_num)
7165						ao_bitmap |= mask;
7166					counter ++;
7167				}
7168				mask <<= 1;
7169			}
7170			active_cu_number += counter;
7171			if (i < 2 && j < 2)
7172				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7173			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7174		}
7175	}
7176	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7177	mutex_unlock(&adev->grbm_idx_mutex);
7178
7179	cu_info->number = active_cu_number;
7180	cu_info->ao_cu_mask = ao_cu_mask;
7181	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7182	cu_info->max_waves_per_simd = 10;
7183	cu_info->max_scratch_slots_per_cu = 32;
7184	cu_info->wave_front_size = 64;
7185	cu_info->lds_size = 64;
7186}
7187
7188const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7189{
7190	.type = AMD_IP_BLOCK_TYPE_GFX,
7191	.major = 8,
7192	.minor = 0,
7193	.rev = 0,
7194	.funcs = &gfx_v8_0_ip_funcs,
7195};
7196
7197const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7198{
7199	.type = AMD_IP_BLOCK_TYPE_GFX,
7200	.major = 8,
7201	.minor = 1,
7202	.rev = 0,
7203	.funcs = &gfx_v8_0_ip_funcs,
7204};
7205
7206static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7207{
7208	uint64_t ce_payload_addr;
7209	int cnt_ce;
7210	union {
7211		struct vi_ce_ib_state regular;
7212		struct vi_ce_ib_state_chained_ib chained;
7213	} ce_payload = {};
7214
7215	if (ring->adev->virt.chained_ib_support) {
7216		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7217			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7218		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7219	} else {
7220		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7221			offsetof(struct vi_gfx_meta_data, ce_payload);
7222		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7223	}
7224
7225	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7226	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7227				WRITE_DATA_DST_SEL(8) |
7228				WR_CONFIRM) |
7229				WRITE_DATA_CACHE_POLICY(0));
7230	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7231	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7232	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7233}
7234
7235static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7236{
7237	uint64_t de_payload_addr, gds_addr, csa_addr;
7238	int cnt_de;
7239	union {
7240		struct vi_de_ib_state regular;
7241		struct vi_de_ib_state_chained_ib chained;
7242	} de_payload = {};
7243
7244	csa_addr = amdgpu_csa_vaddr(ring->adev);
7245	gds_addr = csa_addr + 4096;
7246	if (ring->adev->virt.chained_ib_support) {
7247		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7248		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7249		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7250		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7251	} else {
7252		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7253		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7254		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7255		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7256	}
7257
7258	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7259	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7260				WRITE_DATA_DST_SEL(8) |
7261				WR_CONFIRM) |
7262				WRITE_DATA_CACHE_POLICY(0));
7263	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7264	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7265	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7266}