Linux Audio

Check our new training course

Embedded Linux training

Mar 10-20, 2025, special US time zones
Register
Loading...
v6.2
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
 
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
 
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96	BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 
 
 
 
 
 
 
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738	uint32_t data;
 739
 740	switch (adev->asic_type) {
 741	case CHIP_TOPAZ:
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_mgcg_cgcg_init,
 744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745		amdgpu_device_program_register_sequence(adev,
 746							golden_settings_iceland_a11,
 747							ARRAY_SIZE(golden_settings_iceland_a11));
 748		amdgpu_device_program_register_sequence(adev,
 749							iceland_golden_common_all,
 750							ARRAY_SIZE(iceland_golden_common_all));
 751		break;
 752	case CHIP_FIJI:
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_mgcg_cgcg_init,
 755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756		amdgpu_device_program_register_sequence(adev,
 757							golden_settings_fiji_a10,
 758							ARRAY_SIZE(golden_settings_fiji_a10));
 759		amdgpu_device_program_register_sequence(adev,
 760							fiji_golden_common_all,
 761							ARRAY_SIZE(fiji_golden_common_all));
 762		break;
 763
 764	case CHIP_TONGA:
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_mgcg_cgcg_init,
 767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768		amdgpu_device_program_register_sequence(adev,
 769							golden_settings_tonga_a11,
 770							ARRAY_SIZE(golden_settings_tonga_a11));
 771		amdgpu_device_program_register_sequence(adev,
 772							tonga_golden_common_all,
 773							ARRAY_SIZE(tonga_golden_common_all));
 774		break;
 775	case CHIP_VEGAM:
 776		amdgpu_device_program_register_sequence(adev,
 777							golden_settings_vegam_a11,
 778							ARRAY_SIZE(golden_settings_vegam_a11));
 779		amdgpu_device_program_register_sequence(adev,
 780							vegam_golden_common_all,
 781							ARRAY_SIZE(vegam_golden_common_all));
 782		break;
 783	case CHIP_POLARIS11:
 784	case CHIP_POLARIS12:
 785		amdgpu_device_program_register_sequence(adev,
 786							golden_settings_polaris11_a11,
 787							ARRAY_SIZE(golden_settings_polaris11_a11));
 788		amdgpu_device_program_register_sequence(adev,
 789							polaris11_golden_common_all,
 790							ARRAY_SIZE(polaris11_golden_common_all));
 791		break;
 792	case CHIP_POLARIS10:
 793		amdgpu_device_program_register_sequence(adev,
 794							golden_settings_polaris10_a11,
 795							ARRAY_SIZE(golden_settings_polaris10_a11));
 796		amdgpu_device_program_register_sequence(adev,
 797							polaris10_golden_common_all,
 798							ARRAY_SIZE(polaris10_golden_common_all));
 799		data = RREG32_SMC(ixCG_ACLK_CNTL);
 800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802		WREG32_SMC(ixCG_ACLK_CNTL, data);
 803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809		}
 810		break;
 811	case CHIP_CARRIZO:
 812		amdgpu_device_program_register_sequence(adev,
 813							cz_mgcg_cgcg_init,
 814							ARRAY_SIZE(cz_mgcg_cgcg_init));
 815		amdgpu_device_program_register_sequence(adev,
 816							cz_golden_settings_a11,
 817							ARRAY_SIZE(cz_golden_settings_a11));
 818		amdgpu_device_program_register_sequence(adev,
 819							cz_golden_common_all,
 820							ARRAY_SIZE(cz_golden_common_all));
 821		break;
 822	case CHIP_STONEY:
 823		amdgpu_device_program_register_sequence(adev,
 824							stoney_mgcg_cgcg_init,
 825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826		amdgpu_device_program_register_sequence(adev,
 827							stoney_golden_settings_a11,
 828							ARRAY_SIZE(stoney_golden_settings_a11));
 829		amdgpu_device_program_register_sequence(adev,
 830							stoney_golden_common_all,
 831							ARRAY_SIZE(stoney_golden_common_all));
 832		break;
 833	default:
 834		break;
 835	}
 836}
 837
 
 
 
 
 
 
 
 
 
 
 
 
 838static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 839{
 840	struct amdgpu_device *adev = ring->adev;
 
 841	uint32_t tmp = 0;
 842	unsigned i;
 843	int r;
 844
 845	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
 
 
 
 
 
 846	r = amdgpu_ring_alloc(ring, 3);
 847	if (r)
 
 
 
 848		return r;
 849
 850	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 851	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
 852	amdgpu_ring_write(ring, 0xDEADBEEF);
 853	amdgpu_ring_commit(ring);
 854
 855	for (i = 0; i < adev->usec_timeout; i++) {
 856		tmp = RREG32(mmSCRATCH_REG0);
 857		if (tmp == 0xDEADBEEF)
 858			break;
 859		udelay(1);
 860	}
 861
 862	if (i >= adev->usec_timeout)
 863		r = -ETIMEDOUT;
 864
 
 
 
 
 
 865	return r;
 866}
 867
 868static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 869{
 870	struct amdgpu_device *adev = ring->adev;
 871	struct amdgpu_ib ib;
 872	struct dma_fence *f = NULL;
 873
 874	unsigned int index;
 875	uint64_t gpu_addr;
 876	uint32_t tmp;
 877	long r;
 878
 879	r = amdgpu_device_wb_get(adev, &index);
 880	if (r)
 
 881		return r;
 882
 883	gpu_addr = adev->wb.gpu_addr + (index * 4);
 884	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 885	memset(&ib, 0, sizeof(ib));
 886	r = amdgpu_ib_get(adev, NULL, 16,
 887					AMDGPU_IB_POOL_DIRECT, &ib);
 888	if (r)
 889		goto err1;
 
 
 
 
 
 890
 891	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 892	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 893	ib.ptr[2] = lower_32_bits(gpu_addr);
 894	ib.ptr[3] = upper_32_bits(gpu_addr);
 895	ib.ptr[4] = 0xDEADBEEF;
 896	ib.length_dw = 5;
 897
 898	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 899	if (r)
 900		goto err2;
 901
 902	r = dma_fence_wait_timeout(f, false, timeout);
 903	if (r == 0) {
 
 904		r = -ETIMEDOUT;
 905		goto err2;
 906	} else if (r < 0) {
 
 907		goto err2;
 908	}
 909
 910	tmp = adev->wb.wb[index];
 911	if (tmp == 0xDEADBEEF)
 912		r = 0;
 913	else
 
 
 914		r = -EINVAL;
 915
 916err2:
 917	amdgpu_ib_free(adev, &ib, NULL);
 918	dma_fence_put(f);
 919err1:
 920	amdgpu_device_wb_free(adev, index);
 921	return r;
 922}
 923
 924
 925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 926{
 927	release_firmware(adev->gfx.pfp_fw);
 928	adev->gfx.pfp_fw = NULL;
 929	release_firmware(adev->gfx.me_fw);
 930	adev->gfx.me_fw = NULL;
 931	release_firmware(adev->gfx.ce_fw);
 932	adev->gfx.ce_fw = NULL;
 933	release_firmware(adev->gfx.rlc_fw);
 934	adev->gfx.rlc_fw = NULL;
 935	release_firmware(adev->gfx.mec_fw);
 936	adev->gfx.mec_fw = NULL;
 937	if ((adev->asic_type != CHIP_STONEY) &&
 938	    (adev->asic_type != CHIP_TOPAZ))
 939		release_firmware(adev->gfx.mec2_fw);
 940	adev->gfx.mec2_fw = NULL;
 941
 942	kfree(adev->gfx.rlc.register_list_format);
 943}
 944
 945static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 946{
 947	const char *chip_name;
 948	char fw_name[30];
 949	int err;
 950	struct amdgpu_firmware_info *info = NULL;
 951	const struct common_firmware_header *header = NULL;
 952	const struct gfx_firmware_header_v1_0 *cp_hdr;
 953	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 954	unsigned int *tmp = NULL, i;
 955
 956	DRM_DEBUG("\n");
 957
 958	switch (adev->asic_type) {
 959	case CHIP_TOPAZ:
 960		chip_name = "topaz";
 961		break;
 962	case CHIP_TONGA:
 963		chip_name = "tonga";
 964		break;
 965	case CHIP_CARRIZO:
 966		chip_name = "carrizo";
 967		break;
 968	case CHIP_FIJI:
 969		chip_name = "fiji";
 970		break;
 971	case CHIP_STONEY:
 972		chip_name = "stoney";
 973		break;
 974	case CHIP_POLARIS10:
 975		chip_name = "polaris10";
 976		break;
 977	case CHIP_POLARIS11:
 978		chip_name = "polaris11";
 979		break;
 980	case CHIP_POLARIS12:
 981		chip_name = "polaris12";
 982		break;
 983	case CHIP_VEGAM:
 984		chip_name = "vegam";
 985		break;
 986	default:
 987		BUG();
 988	}
 989
 990	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 991		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
 992		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 993		if (err == -ENOENT) {
 994			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 995			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 996		}
 997	} else {
 998		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 999		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000	}
1001	if (err)
1002		goto out;
1003	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004	if (err)
1005		goto out;
1006	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009
1010	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013		if (err == -ENOENT) {
1014			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016		}
1017	} else {
1018		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020	}
1021	if (err)
1022		goto out;
1023	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024	if (err)
1025		goto out;
1026	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028
1029	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030
1031	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034		if (err == -ENOENT) {
1035			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037		}
1038	} else {
1039		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041	}
1042	if (err)
1043		goto out;
1044	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045	if (err)
1046		goto out;
1047	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050
1051	/*
1052	 * Support for MCBP/Virtualization in combination with chained IBs is
1053	 * formal released on feature version #46
1054	 */
1055	if (adev->gfx.ce_feature_version >= 46 &&
1056	    adev->gfx.pfp_feature_version >= 46) {
1057		adev->virt.chained_ib_support = true;
1058		DRM_INFO("Chained IB support enabled!\n");
1059	} else
1060		adev->virt.chained_ib_support = false;
1061
1062	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064	if (err)
1065		goto out;
1066	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070
1071	adev->gfx.rlc.save_and_restore_offset =
1072			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073	adev->gfx.rlc.clear_state_descriptor_offset =
1074			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075	adev->gfx.rlc.avail_scratch_ram_locations =
1076			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077	adev->gfx.rlc.reg_restore_list_size =
1078			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079	adev->gfx.rlc.reg_list_format_start =
1080			le32_to_cpu(rlc_hdr->reg_list_format_start);
1081	adev->gfx.rlc.reg_list_format_separate_start =
1082			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083	adev->gfx.rlc.starting_offsets_start =
1084			le32_to_cpu(rlc_hdr->starting_offsets_start);
1085	adev->gfx.rlc.reg_list_format_size_bytes =
1086			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087	adev->gfx.rlc.reg_list_size_bytes =
1088			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089
1090	adev->gfx.rlc.register_list_format =
1091			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093
1094	if (!adev->gfx.rlc.register_list_format) {
1095		err = -ENOMEM;
1096		goto out;
1097	}
1098
1099	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1103
1104	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105
1106	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110
1111	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114		if (err == -ENOENT) {
1115			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117		}
1118	} else {
1119		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121	}
1122	if (err)
1123		goto out;
1124	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125	if (err)
1126		goto out;
1127	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130
1131	if ((adev->asic_type != CHIP_STONEY) &&
1132	    (adev->asic_type != CHIP_TOPAZ)) {
1133		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136			if (err == -ENOENT) {
1137				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139			}
1140		} else {
1141			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143		}
1144		if (!err) {
1145			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146			if (err)
1147				goto out;
1148			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149				adev->gfx.mec2_fw->data;
1150			adev->gfx.mec2_fw_version =
1151				le32_to_cpu(cp_hdr->header.ucode_version);
1152			adev->gfx.mec2_feature_version =
1153				le32_to_cpu(cp_hdr->ucode_feature_version);
1154		} else {
1155			err = 0;
1156			adev->gfx.mec2_fw = NULL;
1157		}
1158	}
1159
1160	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162	info->fw = adev->gfx.pfp_fw;
1163	header = (const struct common_firmware_header *)info->fw->data;
1164	adev->firmware.fw_size +=
1165		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169	info->fw = adev->gfx.me_fw;
1170	header = (const struct common_firmware_header *)info->fw->data;
1171	adev->firmware.fw_size +=
1172		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176	info->fw = adev->gfx.ce_fw;
1177	header = (const struct common_firmware_header *)info->fw->data;
1178	adev->firmware.fw_size +=
1179		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183	info->fw = adev->gfx.rlc_fw;
1184	header = (const struct common_firmware_header *)info->fw->data;
1185	adev->firmware.fw_size +=
1186		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190	info->fw = adev->gfx.mec_fw;
1191	header = (const struct common_firmware_header *)info->fw->data;
1192	adev->firmware.fw_size +=
1193		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194
1195	/* we need account JT in */
1196	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197	adev->firmware.fw_size +=
1198		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
 
 
1199
1200	if (amdgpu_sriov_vf(adev)) {
1201		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203		info->fw = adev->gfx.mec_fw;
1204		adev->firmware.fw_size +=
1205			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206	}
1207
1208	if (adev->gfx.mec2_fw) {
1209		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211		info->fw = adev->gfx.mec2_fw;
 
 
 
 
 
 
1212		header = (const struct common_firmware_header *)info->fw->data;
1213		adev->firmware.fw_size +=
1214			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1215	}
1216
1217out:
1218	if (err) {
1219		dev_err(adev->dev,
1220			"gfx8: Failed to load firmware \"%s\"\n",
1221			fw_name);
1222		release_firmware(adev->gfx.pfp_fw);
1223		adev->gfx.pfp_fw = NULL;
1224		release_firmware(adev->gfx.me_fw);
1225		adev->gfx.me_fw = NULL;
1226		release_firmware(adev->gfx.ce_fw);
1227		adev->gfx.ce_fw = NULL;
1228		release_firmware(adev->gfx.rlc_fw);
1229		adev->gfx.rlc_fw = NULL;
1230		release_firmware(adev->gfx.mec_fw);
1231		adev->gfx.mec_fw = NULL;
1232		release_firmware(adev->gfx.mec2_fw);
1233		adev->gfx.mec2_fw = NULL;
1234	}
1235	return err;
1236}
1237
1238static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239				    volatile u32 *buffer)
1240{
1241	u32 count = 0, i;
1242	const struct cs_section_def *sect = NULL;
1243	const struct cs_extent_def *ext = NULL;
1244
1245	if (adev->gfx.rlc.cs_data == NULL)
1246		return;
1247	if (buffer == NULL)
1248		return;
1249
1250	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252
1253	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254	buffer[count++] = cpu_to_le32(0x80000000);
1255	buffer[count++] = cpu_to_le32(0x80000000);
1256
1257	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258		for (ext = sect->section; ext->extent != NULL; ++ext) {
1259			if (sect->id == SECT_CONTEXT) {
1260				buffer[count++] =
1261					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262				buffer[count++] = cpu_to_le32(ext->reg_index -
1263						PACKET3_SET_CONTEXT_REG_START);
1264				for (i = 0; i < ext->reg_count; i++)
1265					buffer[count++] = cpu_to_le32(ext->extent[i]);
1266			} else {
1267				return;
1268			}
1269		}
1270	}
1271
1272	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274			PACKET3_SET_CONTEXT_REG_START);
1275	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277
1278	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280
1281	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282	buffer[count++] = cpu_to_le32(0);
1283}
1284
1285static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286{
 
 
 
 
 
 
1287	if (adev->asic_type == CHIP_CARRIZO)
1288		return 5;
1289	else
1290		return 4;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1291}
1292
1293static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294{
 
 
1295	const struct cs_section_def *cs_data;
1296	int r;
1297
1298	adev->gfx.rlc.cs_data = vi_cs_data;
1299
1300	cs_data = adev->gfx.rlc.cs_data;
1301
1302	if (cs_data) {
1303		/* init clear state block */
1304		r = amdgpu_gfx_rlc_init_csb(adev);
1305		if (r)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1306			return r;
 
 
 
 
 
 
 
 
 
 
 
 
 
1307	}
1308
1309	if ((adev->asic_type == CHIP_CARRIZO) ||
1310	    (adev->asic_type == CHIP_STONEY)) {
1311		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312		r = amdgpu_gfx_rlc_init_cpt(adev);
1313		if (r)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1314			return r;
1315	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1316
1317	/* init spm vmid with 0xf */
1318	if (adev->gfx.rlc.funcs->update_spm_vmid)
1319		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320
1321	return 0;
1322}
1323
1324static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325{
1326	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
 
 
 
 
 
 
 
 
 
 
1327}
1328
 
 
1329static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330{
1331	int r;
1332	u32 *hpd;
1333	size_t mec_hpd_size;
1334
1335	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337	/* take ownership of the relevant compute queues */
1338	amdgpu_gfx_compute_queue_acquire(adev);
1339
1340	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341	if (mec_hpd_size) {
1342		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343					      AMDGPU_GEM_DOMAIN_VRAM,
1344					      &adev->gfx.mec.hpd_eop_obj,
1345					      &adev->gfx.mec.hpd_eop_gpu_addr,
1346					      (void **)&hpd);
 
 
 
 
1347		if (r) {
1348			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349			return r;
1350		}
 
1351
1352		memset(hpd, 0, mec_hpd_size);
1353
1354		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 
 
 
 
 
 
1356	}
 
 
 
 
 
 
 
 
 
 
 
1357
1358	return 0;
1359}
1360
1361static const u32 vgpr_init_compute_shader[] =
1362{
1363	0x7e000209, 0x7e020208,
1364	0x7e040207, 0x7e060206,
1365	0x7e080205, 0x7e0a0204,
1366	0x7e0c0203, 0x7e0e0202,
1367	0x7e100201, 0x7e120200,
1368	0x7e140209, 0x7e160208,
1369	0x7e180207, 0x7e1a0206,
1370	0x7e1c0205, 0x7e1e0204,
1371	0x7e200203, 0x7e220202,
1372	0x7e240201, 0x7e260200,
1373	0x7e280209, 0x7e2a0208,
1374	0x7e2c0207, 0x7e2e0206,
1375	0x7e300205, 0x7e320204,
1376	0x7e340203, 0x7e360202,
1377	0x7e380201, 0x7e3a0200,
1378	0x7e3c0209, 0x7e3e0208,
1379	0x7e400207, 0x7e420206,
1380	0x7e440205, 0x7e460204,
1381	0x7e480203, 0x7e4a0202,
1382	0x7e4c0201, 0x7e4e0200,
1383	0x7e500209, 0x7e520208,
1384	0x7e540207, 0x7e560206,
1385	0x7e580205, 0x7e5a0204,
1386	0x7e5c0203, 0x7e5e0202,
1387	0x7e600201, 0x7e620200,
1388	0x7e640209, 0x7e660208,
1389	0x7e680207, 0x7e6a0206,
1390	0x7e6c0205, 0x7e6e0204,
1391	0x7e700203, 0x7e720202,
1392	0x7e740201, 0x7e760200,
1393	0x7e780209, 0x7e7a0208,
1394	0x7e7c0207, 0x7e7e0206,
1395	0xbf8a0000, 0xbf810000,
1396};
1397
1398static const u32 sgpr_init_compute_shader[] =
1399{
1400	0xbe8a0100, 0xbe8c0102,
1401	0xbe8e0104, 0xbe900106,
1402	0xbe920108, 0xbe940100,
1403	0xbe960102, 0xbe980104,
1404	0xbe9a0106, 0xbe9c0108,
1405	0xbe9e0100, 0xbea00102,
1406	0xbea20104, 0xbea40106,
1407	0xbea60108, 0xbea80100,
1408	0xbeaa0102, 0xbeac0104,
1409	0xbeae0106, 0xbeb00108,
1410	0xbeb20100, 0xbeb40102,
1411	0xbeb60104, 0xbeb80106,
1412	0xbeba0108, 0xbebc0100,
1413	0xbebe0102, 0xbec00104,
1414	0xbec20106, 0xbec40108,
1415	0xbec60100, 0xbec80102,
1416	0xbee60004, 0xbee70005,
1417	0xbeea0006, 0xbeeb0007,
1418	0xbee80008, 0xbee90009,
1419	0xbefc0000, 0xbf8a0000,
1420	0xbf810000, 0x00000000,
1421};
1422
1423static const u32 vgpr_init_regs[] =
1424{
1425	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1426	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1427	mmCOMPUTE_NUM_THREAD_X, 256*4,
1428	mmCOMPUTE_NUM_THREAD_Y, 1,
1429	mmCOMPUTE_NUM_THREAD_Z, 1,
1430	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1431	mmCOMPUTE_PGM_RSRC2, 20,
1432	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1433	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1434	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1435	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1436	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1437	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1438	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1439	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1440	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1441	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1442};
1443
1444static const u32 sgpr1_init_regs[] =
1445{
1446	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1447	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1448	mmCOMPUTE_NUM_THREAD_X, 256*5,
1449	mmCOMPUTE_NUM_THREAD_Y, 1,
1450	mmCOMPUTE_NUM_THREAD_Z, 1,
1451	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1452	mmCOMPUTE_PGM_RSRC2, 20,
1453	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1454	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1455	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1456	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1457	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1458	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1459	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1460	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1461	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1462	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1463};
1464
1465static const u32 sgpr2_init_regs[] =
1466{
1467	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1468	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1469	mmCOMPUTE_NUM_THREAD_X, 256*5,
1470	mmCOMPUTE_NUM_THREAD_Y, 1,
1471	mmCOMPUTE_NUM_THREAD_Z, 1,
1472	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1473	mmCOMPUTE_PGM_RSRC2, 20,
1474	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1475	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1476	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1477	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1478	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1479	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1480	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1481	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1482	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1483	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1484};
1485
1486static const u32 sec_ded_counter_registers[] =
1487{
1488	mmCPC_EDC_ATC_CNT,
1489	mmCPC_EDC_SCRATCH_CNT,
1490	mmCPC_EDC_UCODE_CNT,
1491	mmCPF_EDC_ATC_CNT,
1492	mmCPF_EDC_ROQ_CNT,
1493	mmCPF_EDC_TAG_CNT,
1494	mmCPG_EDC_ATC_CNT,
1495	mmCPG_EDC_DMA_CNT,
1496	mmCPG_EDC_TAG_CNT,
1497	mmDC_EDC_CSINVOC_CNT,
1498	mmDC_EDC_RESTORE_CNT,
1499	mmDC_EDC_STATE_CNT,
1500	mmGDS_EDC_CNT,
1501	mmGDS_EDC_GRBM_CNT,
1502	mmGDS_EDC_OA_DED,
1503	mmSPI_EDC_CNT,
1504	mmSQC_ATC_EDC_GATCL1_CNT,
1505	mmSQC_EDC_CNT,
1506	mmSQ_EDC_DED_CNT,
1507	mmSQ_EDC_INFO,
1508	mmSQ_EDC_SEC_CNT,
1509	mmTCC_EDC_CNT,
1510	mmTCP_ATC_EDC_GATCL1_CNT,
1511	mmTCP_EDC_CNT,
1512	mmTD_EDC_CNT
1513};
1514
1515static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1516{
1517	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1518	struct amdgpu_ib ib;
1519	struct dma_fence *f = NULL;
1520	int r, i;
1521	u32 tmp;
1522	unsigned total_size, vgpr_offset, sgpr_offset;
1523	u64 gpu_addr;
1524
1525	/* only supported on CZ */
1526	if (adev->asic_type != CHIP_CARRIZO)
1527		return 0;
1528
1529	/* bail if the compute ring is not ready */
1530	if (!ring->sched.ready)
1531		return 0;
1532
1533	tmp = RREG32(mmGB_EDC_MODE);
1534	WREG32(mmGB_EDC_MODE, 0);
1535
1536	total_size =
1537		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1538	total_size +=
1539		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1540	total_size +=
1541		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1542	total_size = ALIGN(total_size, 256);
1543	vgpr_offset = total_size;
1544	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1545	sgpr_offset = total_size;
1546	total_size += sizeof(sgpr_init_compute_shader);
1547
1548	/* allocate an indirect buffer to put the commands in */
1549	memset(&ib, 0, sizeof(ib));
1550	r = amdgpu_ib_get(adev, NULL, total_size,
1551					AMDGPU_IB_POOL_DIRECT, &ib);
1552	if (r) {
1553		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1554		return r;
1555	}
1556
1557	/* load the compute shaders */
1558	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1559		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1560
1561	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1562		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1563
1564	/* init the ib length to 0 */
1565	ib.length_dw = 0;
1566
1567	/* VGPR */
1568	/* write the register state for the compute dispatch */
1569	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1570		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1571		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1572		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1573	}
1574	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1575	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1576	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1577	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1578	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1579	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1580
1581	/* write dispatch packet */
1582	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1583	ib.ptr[ib.length_dw++] = 8; /* x */
1584	ib.ptr[ib.length_dw++] = 1; /* y */
1585	ib.ptr[ib.length_dw++] = 1; /* z */
1586	ib.ptr[ib.length_dw++] =
1587		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1588
1589	/* write CS partial flush packet */
1590	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1591	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1592
1593	/* SGPR1 */
1594	/* write the register state for the compute dispatch */
1595	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1596		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1597		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1598		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1599	}
1600	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1601	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1602	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1603	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1604	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1605	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1606
1607	/* write dispatch packet */
1608	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1609	ib.ptr[ib.length_dw++] = 8; /* x */
1610	ib.ptr[ib.length_dw++] = 1; /* y */
1611	ib.ptr[ib.length_dw++] = 1; /* z */
1612	ib.ptr[ib.length_dw++] =
1613		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1614
1615	/* write CS partial flush packet */
1616	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1617	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1618
1619	/* SGPR2 */
1620	/* write the register state for the compute dispatch */
1621	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1622		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1623		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1624		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1625	}
1626	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1627	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1628	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1629	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1630	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1631	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1632
1633	/* write dispatch packet */
1634	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1635	ib.ptr[ib.length_dw++] = 8; /* x */
1636	ib.ptr[ib.length_dw++] = 1; /* y */
1637	ib.ptr[ib.length_dw++] = 1; /* z */
1638	ib.ptr[ib.length_dw++] =
1639		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1640
1641	/* write CS partial flush packet */
1642	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1643	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1644
1645	/* shedule the ib on the ring */
1646	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1647	if (r) {
1648		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1649		goto fail;
1650	}
1651
1652	/* wait for the GPU to finish processing the IB */
1653	r = dma_fence_wait(f, false);
1654	if (r) {
1655		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1656		goto fail;
1657	}
1658
1659	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1660	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1661	WREG32(mmGB_EDC_MODE, tmp);
1662
1663	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1664	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1665	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1666
1667
1668	/* read back registers to clear the counters */
1669	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1670		RREG32(sec_ded_counter_registers[i]);
1671
1672fail:
1673	amdgpu_ib_free(adev, &ib, NULL);
1674	dma_fence_put(f);
1675
1676	return r;
1677}
1678
1679static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1680{
1681	u32 gb_addr_config;
1682	u32 mc_arb_ramcfg;
1683	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1684	u32 tmp;
1685	int ret;
1686
1687	switch (adev->asic_type) {
1688	case CHIP_TOPAZ:
1689		adev->gfx.config.max_shader_engines = 1;
1690		adev->gfx.config.max_tile_pipes = 2;
1691		adev->gfx.config.max_cu_per_sh = 6;
1692		adev->gfx.config.max_sh_per_se = 1;
1693		adev->gfx.config.max_backends_per_se = 2;
1694		adev->gfx.config.max_texture_channel_caches = 2;
1695		adev->gfx.config.max_gprs = 256;
1696		adev->gfx.config.max_gs_threads = 32;
1697		adev->gfx.config.max_hw_contexts = 8;
1698
1699		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1700		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1701		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1702		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1703		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1704		break;
1705	case CHIP_FIJI:
1706		adev->gfx.config.max_shader_engines = 4;
1707		adev->gfx.config.max_tile_pipes = 16;
1708		adev->gfx.config.max_cu_per_sh = 16;
1709		adev->gfx.config.max_sh_per_se = 1;
1710		adev->gfx.config.max_backends_per_se = 4;
1711		adev->gfx.config.max_texture_channel_caches = 16;
1712		adev->gfx.config.max_gprs = 256;
1713		adev->gfx.config.max_gs_threads = 32;
1714		adev->gfx.config.max_hw_contexts = 8;
1715
1716		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721		break;
1722	case CHIP_POLARIS11:
1723	case CHIP_POLARIS12:
1724		ret = amdgpu_atombios_get_gfx_info(adev);
1725		if (ret)
1726			return ret;
1727		adev->gfx.config.max_gprs = 256;
1728		adev->gfx.config.max_gs_threads = 32;
1729		adev->gfx.config.max_hw_contexts = 8;
1730
1731		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1736		break;
1737	case CHIP_POLARIS10:
1738	case CHIP_VEGAM:
1739		ret = amdgpu_atombios_get_gfx_info(adev);
1740		if (ret)
1741			return ret;
1742		adev->gfx.config.max_gprs = 256;
1743		adev->gfx.config.max_gs_threads = 32;
1744		adev->gfx.config.max_hw_contexts = 8;
1745
1746		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1747		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1748		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1749		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1750		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1751		break;
1752	case CHIP_TONGA:
1753		adev->gfx.config.max_shader_engines = 4;
1754		adev->gfx.config.max_tile_pipes = 8;
1755		adev->gfx.config.max_cu_per_sh = 8;
1756		adev->gfx.config.max_sh_per_se = 1;
1757		adev->gfx.config.max_backends_per_se = 2;
1758		adev->gfx.config.max_texture_channel_caches = 8;
1759		adev->gfx.config.max_gprs = 256;
1760		adev->gfx.config.max_gs_threads = 32;
1761		adev->gfx.config.max_hw_contexts = 8;
1762
1763		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1768		break;
1769	case CHIP_CARRIZO:
1770		adev->gfx.config.max_shader_engines = 1;
1771		adev->gfx.config.max_tile_pipes = 2;
1772		adev->gfx.config.max_sh_per_se = 1;
1773		adev->gfx.config.max_backends_per_se = 2;
1774		adev->gfx.config.max_cu_per_sh = 8;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1775		adev->gfx.config.max_texture_channel_caches = 2;
1776		adev->gfx.config.max_gprs = 256;
1777		adev->gfx.config.max_gs_threads = 32;
1778		adev->gfx.config.max_hw_contexts = 8;
1779
1780		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1784		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1785		break;
1786	case CHIP_STONEY:
1787		adev->gfx.config.max_shader_engines = 1;
1788		adev->gfx.config.max_tile_pipes = 2;
1789		adev->gfx.config.max_sh_per_se = 1;
1790		adev->gfx.config.max_backends_per_se = 1;
1791		adev->gfx.config.max_cu_per_sh = 3;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1792		adev->gfx.config.max_texture_channel_caches = 2;
1793		adev->gfx.config.max_gprs = 256;
1794		adev->gfx.config.max_gs_threads = 16;
1795		adev->gfx.config.max_hw_contexts = 8;
1796
1797		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1798		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1799		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1800		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1801		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1802		break;
1803	default:
1804		adev->gfx.config.max_shader_engines = 2;
1805		adev->gfx.config.max_tile_pipes = 4;
1806		adev->gfx.config.max_cu_per_sh = 2;
1807		adev->gfx.config.max_sh_per_se = 1;
1808		adev->gfx.config.max_backends_per_se = 2;
1809		adev->gfx.config.max_texture_channel_caches = 4;
1810		adev->gfx.config.max_gprs = 256;
1811		adev->gfx.config.max_gs_threads = 32;
1812		adev->gfx.config.max_hw_contexts = 8;
1813
1814		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1815		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1816		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1817		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1818		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1819		break;
1820	}
1821
 
1822	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1823	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1824
1825	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1826				MC_ARB_RAMCFG, NOOFBANK);
1827	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1828				MC_ARB_RAMCFG, NOOFRANKS);
1829
1830	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1831	adev->gfx.config.mem_max_burst_length_bytes = 256;
1832	if (adev->flags & AMD_IS_APU) {
1833		/* Get memory bank mapping mode. */
1834		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1835		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1836		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1837
1838		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1839		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1840		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1841
1842		/* Validate settings in case only one DIMM installed. */
1843		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1844			dimm00_addr_map = 0;
1845		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1846			dimm01_addr_map = 0;
1847		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1848			dimm10_addr_map = 0;
1849		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1850			dimm11_addr_map = 0;
1851
1852		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1853		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1854		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1855			adev->gfx.config.mem_row_size_in_kb = 2;
1856		else
1857			adev->gfx.config.mem_row_size_in_kb = 1;
1858	} else {
1859		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1860		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1861		if (adev->gfx.config.mem_row_size_in_kb > 4)
1862			adev->gfx.config.mem_row_size_in_kb = 4;
1863	}
1864
1865	adev->gfx.config.shader_engine_tile_size = 32;
1866	adev->gfx.config.num_gpus = 1;
1867	adev->gfx.config.multi_gpu_tile_size = 64;
1868
1869	/* fix up row size */
1870	switch (adev->gfx.config.mem_row_size_in_kb) {
1871	case 1:
1872	default:
1873		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1874		break;
1875	case 2:
1876		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1877		break;
1878	case 4:
1879		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1880		break;
1881	}
1882	adev->gfx.config.gb_addr_config = gb_addr_config;
1883
1884	return 0;
1885}
1886
1887static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1888					int mec, int pipe, int queue)
1889{
1890	int r;
1891	unsigned irq_type;
1892	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1893	unsigned int hw_prio;
1894
1895	ring = &adev->gfx.compute_ring[ring_id];
1896
1897	/* mec0 is me1 */
1898	ring->me = mec + 1;
1899	ring->pipe = pipe;
1900	ring->queue = queue;
1901
1902	ring->ring_obj = NULL;
1903	ring->use_doorbell = true;
1904	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1905	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1906				+ (ring_id * GFX8_MEC_HPD_SIZE);
1907	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1908
1909	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1910		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1911		+ ring->pipe;
1912
1913	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1914			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1915	/* type-2 packets are deprecated on MEC, use type-3 instead */
1916	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1917			     hw_prio, NULL);
1918	if (r)
1919		return r;
1920
1921
1922	return 0;
1923}
1924
1925static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1926
1927static int gfx_v8_0_sw_init(void *handle)
1928{
1929	int i, j, k, r, ring_id;
1930	struct amdgpu_ring *ring;
1931	struct amdgpu_kiq *kiq;
1932	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1933
1934	switch (adev->asic_type) {
1935	case CHIP_TONGA:
1936	case CHIP_CARRIZO:
1937	case CHIP_FIJI:
1938	case CHIP_POLARIS10:
1939	case CHIP_POLARIS11:
1940	case CHIP_POLARIS12:
1941	case CHIP_VEGAM:
1942		adev->gfx.mec.num_mec = 2;
1943		break;
1944	case CHIP_TOPAZ:
1945	case CHIP_STONEY:
1946	default:
1947		adev->gfx.mec.num_mec = 1;
1948		break;
1949	}
1950
1951	adev->gfx.mec.num_pipe_per_mec = 4;
1952	adev->gfx.mec.num_queue_per_pipe = 8;
1953
1954	/* EOP Event */
1955	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1956	if (r)
1957		return r;
1958
1959	/* Privileged reg */
1960	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1961			      &adev->gfx.priv_reg_irq);
1962	if (r)
1963		return r;
1964
1965	/* Privileged inst */
1966	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1967			      &adev->gfx.priv_inst_irq);
1968	if (r)
1969		return r;
1970
1971	/* Add CP EDC/ECC irq  */
1972	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1973			      &adev->gfx.cp_ecc_error_irq);
1974	if (r)
1975		return r;
1976
1977	/* SQ interrupts. */
1978	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1979			      &adev->gfx.sq_irq);
1980	if (r) {
1981		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1982		return r;
1983	}
1984
1985	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1986
1987	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1988
 
 
1989	r = gfx_v8_0_init_microcode(adev);
1990	if (r) {
1991		DRM_ERROR("Failed to load gfx firmware!\n");
1992		return r;
1993	}
1994
1995	r = adev->gfx.rlc.funcs->init(adev);
1996	if (r) {
1997		DRM_ERROR("Failed to init rlc BOs!\n");
1998		return r;
1999	}
2000
2001	r = gfx_v8_0_mec_init(adev);
2002	if (r) {
2003		DRM_ERROR("Failed to init MEC BOs!\n");
2004		return r;
2005	}
2006
2007	/* set up the gfx ring */
2008	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2009		ring = &adev->gfx.gfx_ring[i];
2010		ring->ring_obj = NULL;
2011		sprintf(ring->name, "gfx");
2012		/* no gfx doorbells on iceland */
2013		if (adev->asic_type != CHIP_TOPAZ) {
2014			ring->use_doorbell = true;
2015			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2016		}
2017
2018		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2019				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2020				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2021		if (r)
2022			return r;
2023	}
2024
 
 
 
2025
2026	/* set up the compute queues - allocate horizontally across pipes */
2027	ring_id = 0;
2028	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2029		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2030			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2031				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2032					continue;
2033
2034				r = gfx_v8_0_compute_ring_init(adev,
2035								ring_id,
2036								i, k, j);
2037				if (r)
2038					return r;
2039
2040				ring_id++;
2041			}
2042		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2043	}
2044
2045	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2046	if (r) {
2047		DRM_ERROR("Failed to init KIQ BOs!\n");
 
 
2048		return r;
2049	}
2050
2051	kiq = &adev->gfx.kiq;
2052	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 
2053	if (r)
2054		return r;
2055
2056	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2057	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
 
2058	if (r)
2059		return r;
2060
2061	adev->gfx.ce_ram_size = 0x8000;
2062
2063	r = gfx_v8_0_gpu_early_init(adev);
2064	if (r)
2065		return r;
2066
2067	return 0;
2068}
2069
2070static int gfx_v8_0_sw_fini(void *handle)
2071{
2072	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2073	int i;
 
 
 
 
 
2074
2075	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2076		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2077	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2078		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2079
2080	amdgpu_gfx_mqd_sw_fini(adev);
2081	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2082	amdgpu_gfx_kiq_fini(adev);
2083
2084	gfx_v8_0_mec_fini(adev);
2085	amdgpu_gfx_rlc_fini(adev);
2086	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2087				&adev->gfx.rlc.clear_state_gpu_addr,
2088				(void **)&adev->gfx.rlc.cs_ptr);
2089	if ((adev->asic_type == CHIP_CARRIZO) ||
2090	    (adev->asic_type == CHIP_STONEY)) {
2091		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2092				&adev->gfx.rlc.cp_table_gpu_addr,
2093				(void **)&adev->gfx.rlc.cp_table_ptr);
2094	}
2095	gfx_v8_0_free_microcode(adev);
2096
2097	return 0;
2098}
2099
2100static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2101{
2102	uint32_t *modearray, *mod2array;
2103	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2104	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2105	u32 reg_offset;
2106
2107	modearray = adev->gfx.config.tile_mode_array;
2108	mod2array = adev->gfx.config.macrotile_mode_array;
2109
2110	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2111		modearray[reg_offset] = 0;
2112
2113	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2114		mod2array[reg_offset] = 0;
2115
2116	switch (adev->asic_type) {
2117	case CHIP_TOPAZ:
2118		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119				PIPE_CONFIG(ADDR_SURF_P2) |
2120				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2121				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123				PIPE_CONFIG(ADDR_SURF_P2) |
2124				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2125				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127				PIPE_CONFIG(ADDR_SURF_P2) |
2128				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2129				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131				PIPE_CONFIG(ADDR_SURF_P2) |
2132				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2133				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135				PIPE_CONFIG(ADDR_SURF_P2) |
2136				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139				PIPE_CONFIG(ADDR_SURF_P2) |
2140				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2141				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2142		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143				PIPE_CONFIG(ADDR_SURF_P2) |
2144				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2145				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2146		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2147				PIPE_CONFIG(ADDR_SURF_P2));
2148		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2149				PIPE_CONFIG(ADDR_SURF_P2) |
2150				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153				 PIPE_CONFIG(ADDR_SURF_P2) |
2154				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157				 PIPE_CONFIG(ADDR_SURF_P2) |
2158				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2161				 PIPE_CONFIG(ADDR_SURF_P2) |
2162				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165				 PIPE_CONFIG(ADDR_SURF_P2) |
2166				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2168		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2169				 PIPE_CONFIG(ADDR_SURF_P2) |
2170				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173				 PIPE_CONFIG(ADDR_SURF_P2) |
2174				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2176		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2177				 PIPE_CONFIG(ADDR_SURF_P2) |
2178				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2179				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2181				 PIPE_CONFIG(ADDR_SURF_P2) |
2182				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2185				 PIPE_CONFIG(ADDR_SURF_P2) |
2186				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2189				 PIPE_CONFIG(ADDR_SURF_P2) |
2190				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2193				 PIPE_CONFIG(ADDR_SURF_P2) |
2194				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2197				 PIPE_CONFIG(ADDR_SURF_P2) |
2198				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2201				 PIPE_CONFIG(ADDR_SURF_P2) |
2202				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2203				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2204		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2205				 PIPE_CONFIG(ADDR_SURF_P2) |
2206				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2207				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2208		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2209				 PIPE_CONFIG(ADDR_SURF_P2) |
2210				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2212		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213				 PIPE_CONFIG(ADDR_SURF_P2) |
2214				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2215				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2216		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2217				 PIPE_CONFIG(ADDR_SURF_P2) |
2218				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2219				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2220
2221		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2222				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224				NUM_BANKS(ADDR_SURF_8_BANK));
2225		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2226				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228				NUM_BANKS(ADDR_SURF_8_BANK));
2229		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2230				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2231				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232				NUM_BANKS(ADDR_SURF_8_BANK));
2233		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2235				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236				NUM_BANKS(ADDR_SURF_8_BANK));
2237		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240				NUM_BANKS(ADDR_SURF_8_BANK));
2241		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244				NUM_BANKS(ADDR_SURF_8_BANK));
2245		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2246				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2247				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2248				NUM_BANKS(ADDR_SURF_8_BANK));
2249		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2250				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2251				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252				NUM_BANKS(ADDR_SURF_16_BANK));
2253		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256				NUM_BANKS(ADDR_SURF_16_BANK));
2257		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2258				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260				 NUM_BANKS(ADDR_SURF_16_BANK));
2261		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2263				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264				 NUM_BANKS(ADDR_SURF_16_BANK));
2265		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2267				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268				 NUM_BANKS(ADDR_SURF_16_BANK));
2269		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2271				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2272				 NUM_BANKS(ADDR_SURF_16_BANK));
2273		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276				 NUM_BANKS(ADDR_SURF_8_BANK));
2277
2278		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2279			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2280			    reg_offset != 23)
2281				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2282
2283		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2284			if (reg_offset != 7)
2285				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2286
2287		break;
2288	case CHIP_FIJI:
2289	case CHIP_VEGAM:
2290		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2297				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2301				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2305				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2315				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2317				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2318		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2320				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2321				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2323				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2324		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2333				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2336		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2338				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2345				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2348		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2349				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2352		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2356		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2357				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2358				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2360		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2361				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2365				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2373				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2377				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2381				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2382				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2385				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2389				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2391				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2393				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2400		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2405				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2407				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2408		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2411				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2412
2413		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416				NUM_BANKS(ADDR_SURF_8_BANK));
2417		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420				NUM_BANKS(ADDR_SURF_8_BANK));
2421		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2423				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424				NUM_BANKS(ADDR_SURF_8_BANK));
2425		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2427				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428				NUM_BANKS(ADDR_SURF_8_BANK));
2429		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2431				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432				NUM_BANKS(ADDR_SURF_8_BANK));
2433		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436				NUM_BANKS(ADDR_SURF_8_BANK));
2437		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440				NUM_BANKS(ADDR_SURF_8_BANK));
2441		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2443				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444				NUM_BANKS(ADDR_SURF_8_BANK));
2445		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448				NUM_BANKS(ADDR_SURF_8_BANK));
2449		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452				 NUM_BANKS(ADDR_SURF_8_BANK));
2453		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456				 NUM_BANKS(ADDR_SURF_8_BANK));
2457		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460				 NUM_BANKS(ADDR_SURF_8_BANK));
2461		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464				 NUM_BANKS(ADDR_SURF_8_BANK));
2465		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468				 NUM_BANKS(ADDR_SURF_4_BANK));
2469
2470		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2471			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2472
2473		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2474			if (reg_offset != 7)
2475				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2476
2477		break;
2478	case CHIP_TONGA:
2479		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2482				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2486				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2492				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2494				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2506				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2507		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2510				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2512				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2513		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2524				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2528				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2537		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2538				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2541		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2545		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2546				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2547				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2548				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2549		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2550				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2552				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2554				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2562				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2566				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2570				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2571				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2574				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2578				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2580				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2582				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2593		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2594				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2597		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2598				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2601
2602		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605				NUM_BANKS(ADDR_SURF_16_BANK));
2606		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609				NUM_BANKS(ADDR_SURF_16_BANK));
2610		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2612				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2613				NUM_BANKS(ADDR_SURF_16_BANK));
2614		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617				NUM_BANKS(ADDR_SURF_16_BANK));
2618		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2621				NUM_BANKS(ADDR_SURF_16_BANK));
2622		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2624				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2625				NUM_BANKS(ADDR_SURF_16_BANK));
2626		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2629				NUM_BANKS(ADDR_SURF_16_BANK));
2630		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2632				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633				NUM_BANKS(ADDR_SURF_16_BANK));
2634		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637				NUM_BANKS(ADDR_SURF_16_BANK));
2638		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2641				 NUM_BANKS(ADDR_SURF_16_BANK));
2642		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645				 NUM_BANKS(ADDR_SURF_16_BANK));
2646		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649				 NUM_BANKS(ADDR_SURF_8_BANK));
2650		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653				 NUM_BANKS(ADDR_SURF_4_BANK));
2654		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657				 NUM_BANKS(ADDR_SURF_4_BANK));
2658
2659		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2660			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2661
2662		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663			if (reg_offset != 7)
2664				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2665
2666		break;
2667	case CHIP_POLARIS11:
2668	case CHIP_POLARIS12:
2669		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2672				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2676				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2684				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2696				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2700				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2712				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2728				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2736				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2739		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2740				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2744				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2752				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2756				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2760				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2768				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2772				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2776				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2784				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2786				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2787		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2788				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2790				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2791
2792		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2795				NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2799				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800				NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805				NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810				NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815				NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825				NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2828				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2829				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830				NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840				NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845				NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850				NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855				NUM_BANKS(ADDR_SURF_8_BANK));
2856
2857		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2860				NUM_BANKS(ADDR_SURF_4_BANK));
2861
2862		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2863			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2864
2865		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2866			if (reg_offset != 7)
2867				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2868
2869		break;
2870	case CHIP_POLARIS10:
2871		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2874				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2878				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2886				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2896				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2898				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2900				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2902				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2904				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2905		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2919				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2920				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2930				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2938				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2939				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2941		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2942				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2946				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2954				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2958				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2962				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2963				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2970				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2974				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2978				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2982				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2985		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2989		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2990				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2991				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2992				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2993
2994		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997				NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002				NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012				NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3016				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017				NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022				NUM_BANKS(ADDR_SURF_16_BANK));
3023
3024		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027				NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3031				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032				NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037				NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3041				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042				NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3046				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047				NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052				NUM_BANKS(ADDR_SURF_8_BANK));
3053
3054		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057				NUM_BANKS(ADDR_SURF_4_BANK));
3058
3059		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062				NUM_BANKS(ADDR_SURF_4_BANK));
3063
3064		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3065			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3066
3067		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3068			if (reg_offset != 7)
3069				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3070
3071		break;
3072	case CHIP_STONEY:
3073		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074				PIPE_CONFIG(ADDR_SURF_P2) |
3075				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3076				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078				PIPE_CONFIG(ADDR_SURF_P2) |
3079				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3080				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082				PIPE_CONFIG(ADDR_SURF_P2) |
3083				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3084				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086				PIPE_CONFIG(ADDR_SURF_P2) |
3087				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3088				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090				PIPE_CONFIG(ADDR_SURF_P2) |
3091				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3094				PIPE_CONFIG(ADDR_SURF_P2) |
3095				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3096				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098				PIPE_CONFIG(ADDR_SURF_P2) |
3099				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3100				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3102				PIPE_CONFIG(ADDR_SURF_P2));
3103		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104				PIPE_CONFIG(ADDR_SURF_P2) |
3105				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108				 PIPE_CONFIG(ADDR_SURF_P2) |
3109				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3110				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112				 PIPE_CONFIG(ADDR_SURF_P2) |
3113				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3114				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116				 PIPE_CONFIG(ADDR_SURF_P2) |
3117				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120				 PIPE_CONFIG(ADDR_SURF_P2) |
3121				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3124				 PIPE_CONFIG(ADDR_SURF_P2) |
3125				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128				 PIPE_CONFIG(ADDR_SURF_P2) |
3129				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3130				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3132				 PIPE_CONFIG(ADDR_SURF_P2) |
3133				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3136				 PIPE_CONFIG(ADDR_SURF_P2) |
3137				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3140				 PIPE_CONFIG(ADDR_SURF_P2) |
3141				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3144				 PIPE_CONFIG(ADDR_SURF_P2) |
3145				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3148				 PIPE_CONFIG(ADDR_SURF_P2) |
3149				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3152				 PIPE_CONFIG(ADDR_SURF_P2) |
3153				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3156				 PIPE_CONFIG(ADDR_SURF_P2) |
3157				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3160				 PIPE_CONFIG(ADDR_SURF_P2) |
3161				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3164				 PIPE_CONFIG(ADDR_SURF_P2) |
3165				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3167		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3168				 PIPE_CONFIG(ADDR_SURF_P2) |
3169				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3170				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3171		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3172				 PIPE_CONFIG(ADDR_SURF_P2) |
3173				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3174				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3175
3176		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179				NUM_BANKS(ADDR_SURF_8_BANK));
3180		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183				NUM_BANKS(ADDR_SURF_8_BANK));
3184		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187				NUM_BANKS(ADDR_SURF_8_BANK));
3188		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191				NUM_BANKS(ADDR_SURF_8_BANK));
3192		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195				NUM_BANKS(ADDR_SURF_8_BANK));
3196		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3198				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3199				NUM_BANKS(ADDR_SURF_8_BANK));
3200		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203				NUM_BANKS(ADDR_SURF_8_BANK));
3204		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3205				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3206				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207				NUM_BANKS(ADDR_SURF_16_BANK));
3208		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3209				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211				NUM_BANKS(ADDR_SURF_16_BANK));
3212		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3213				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215				 NUM_BANKS(ADDR_SURF_16_BANK));
3216		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3217				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219				 NUM_BANKS(ADDR_SURF_16_BANK));
3220		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223				 NUM_BANKS(ADDR_SURF_16_BANK));
3224		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227				 NUM_BANKS(ADDR_SURF_16_BANK));
3228		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231				 NUM_BANKS(ADDR_SURF_8_BANK));
3232
3233		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3234			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3235			    reg_offset != 23)
3236				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3237
3238		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3239			if (reg_offset != 7)
3240				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3241
3242		break;
3243	default:
3244		dev_warn(adev->dev,
3245			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3246			 adev->asic_type);
3247		fallthrough;
3248
3249	case CHIP_CARRIZO:
3250		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251				PIPE_CONFIG(ADDR_SURF_P2) |
3252				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3253				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255				PIPE_CONFIG(ADDR_SURF_P2) |
3256				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3257				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259				PIPE_CONFIG(ADDR_SURF_P2) |
3260				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3261				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3263				PIPE_CONFIG(ADDR_SURF_P2) |
3264				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3265				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267				PIPE_CONFIG(ADDR_SURF_P2) |
3268				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3271				PIPE_CONFIG(ADDR_SURF_P2) |
3272				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3273				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3274		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275				PIPE_CONFIG(ADDR_SURF_P2) |
3276				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3277				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3278		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3279				PIPE_CONFIG(ADDR_SURF_P2));
3280		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3281				PIPE_CONFIG(ADDR_SURF_P2) |
3282				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285				 PIPE_CONFIG(ADDR_SURF_P2) |
3286				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3287				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289				 PIPE_CONFIG(ADDR_SURF_P2) |
3290				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3291				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3293				 PIPE_CONFIG(ADDR_SURF_P2) |
3294				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297				 PIPE_CONFIG(ADDR_SURF_P2) |
3298				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3300		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3301				 PIPE_CONFIG(ADDR_SURF_P2) |
3302				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3304		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3305				 PIPE_CONFIG(ADDR_SURF_P2) |
3306				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3307				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3308		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3309				 PIPE_CONFIG(ADDR_SURF_P2) |
3310				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3311				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3313				 PIPE_CONFIG(ADDR_SURF_P2) |
3314				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3317				 PIPE_CONFIG(ADDR_SURF_P2) |
3318				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3321				 PIPE_CONFIG(ADDR_SURF_P2) |
3322				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3325				 PIPE_CONFIG(ADDR_SURF_P2) |
3326				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3329				 PIPE_CONFIG(ADDR_SURF_P2) |
3330				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3333				 PIPE_CONFIG(ADDR_SURF_P2) |
3334				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3336		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3337				 PIPE_CONFIG(ADDR_SURF_P2) |
3338				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3341				 PIPE_CONFIG(ADDR_SURF_P2) |
3342				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3344		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3345				 PIPE_CONFIG(ADDR_SURF_P2) |
3346				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3348		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3349				 PIPE_CONFIG(ADDR_SURF_P2) |
3350				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3352
3353		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356				NUM_BANKS(ADDR_SURF_8_BANK));
3357		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360				NUM_BANKS(ADDR_SURF_8_BANK));
3361		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364				NUM_BANKS(ADDR_SURF_8_BANK));
3365		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368				NUM_BANKS(ADDR_SURF_8_BANK));
3369		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372				NUM_BANKS(ADDR_SURF_8_BANK));
3373		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3374				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3375				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3376				NUM_BANKS(ADDR_SURF_8_BANK));
3377		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3378				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3379				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3380				NUM_BANKS(ADDR_SURF_8_BANK));
3381		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384				NUM_BANKS(ADDR_SURF_16_BANK));
3385		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388				NUM_BANKS(ADDR_SURF_16_BANK));
3389		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3390				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3391				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392				 NUM_BANKS(ADDR_SURF_16_BANK));
3393		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3394				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3395				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396				 NUM_BANKS(ADDR_SURF_16_BANK));
3397		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3399				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3400				 NUM_BANKS(ADDR_SURF_16_BANK));
3401		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3404				 NUM_BANKS(ADDR_SURF_16_BANK));
3405		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408				 NUM_BANKS(ADDR_SURF_8_BANK));
3409
3410		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3411			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3412			    reg_offset != 23)
3413				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3414
3415		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3416			if (reg_offset != 7)
3417				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3418
3419		break;
3420	}
3421}
3422
3423static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3424				  u32 se_num, u32 sh_num, u32 instance)
3425{
3426	u32 data;
3427
3428	if (instance == 0xffffffff)
3429		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3430	else
3431		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3432
3433	if (se_num == 0xffffffff)
3434		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3435	else
3436		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3437
3438	if (sh_num == 0xffffffff)
3439		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3440	else
3441		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3442
3443	WREG32(mmGRBM_GFX_INDEX, data);
3444}
3445
3446static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3447				  u32 me, u32 pipe, u32 q, u32 vm)
3448{
3449	vi_srbm_select(adev, me, pipe, q, vm);
3450}
3451
3452static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3453{
3454	u32 data, mask;
3455
3456	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3457		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3458
3459	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3460
3461	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3462					 adev->gfx.config.max_sh_per_se);
3463
3464	return (~data) & mask;
3465}
3466
3467static void
3468gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3469{
3470	switch (adev->asic_type) {
3471	case CHIP_FIJI:
3472	case CHIP_VEGAM:
3473		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3474			  RB_XSEL2(1) | PKR_MAP(2) |
3475			  PKR_XSEL(1) | PKR_YSEL(1) |
3476			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3477		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3478			   SE_PAIR_YSEL(2);
3479		break;
3480	case CHIP_TONGA:
3481	case CHIP_POLARIS10:
3482		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3483			  SE_XSEL(1) | SE_YSEL(1);
3484		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3485			   SE_PAIR_YSEL(2);
3486		break;
3487	case CHIP_TOPAZ:
3488	case CHIP_CARRIZO:
3489		*rconf |= RB_MAP_PKR0(2);
3490		*rconf1 |= 0x0;
3491		break;
3492	case CHIP_POLARIS11:
3493	case CHIP_POLARIS12:
3494		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3495			  SE_XSEL(1) | SE_YSEL(1);
3496		*rconf1 |= 0x0;
3497		break;
3498	case CHIP_STONEY:
3499		*rconf |= 0x0;
3500		*rconf1 |= 0x0;
3501		break;
3502	default:
3503		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3504		break;
3505	}
3506}
3507
3508static void
3509gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3510					u32 raster_config, u32 raster_config_1,
3511					unsigned rb_mask, unsigned num_rb)
3512{
3513	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3514	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3515	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3516	unsigned rb_per_se = num_rb / num_se;
3517	unsigned se_mask[4];
3518	unsigned se;
3519
3520	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3521	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3522	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3523	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3524
3525	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3526	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3527	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3528
3529	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3530			     (!se_mask[2] && !se_mask[3]))) {
3531		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3532
3533		if (!se_mask[0] && !se_mask[1]) {
3534			raster_config_1 |=
3535				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3536		} else {
3537			raster_config_1 |=
3538				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3539		}
3540	}
3541
3542	for (se = 0; se < num_se; se++) {
3543		unsigned raster_config_se = raster_config;
3544		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3545		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3546		int idx = (se / 2) * 2;
3547
3548		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3549			raster_config_se &= ~SE_MAP_MASK;
3550
3551			if (!se_mask[idx]) {
3552				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3553			} else {
3554				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3555			}
3556		}
3557
3558		pkr0_mask &= rb_mask;
3559		pkr1_mask &= rb_mask;
3560		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3561			raster_config_se &= ~PKR_MAP_MASK;
3562
3563			if (!pkr0_mask) {
3564				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3565			} else {
3566				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3567			}
3568		}
3569
3570		if (rb_per_se >= 2) {
3571			unsigned rb0_mask = 1 << (se * rb_per_se);
3572			unsigned rb1_mask = rb0_mask << 1;
3573
3574			rb0_mask &= rb_mask;
3575			rb1_mask &= rb_mask;
3576			if (!rb0_mask || !rb1_mask) {
3577				raster_config_se &= ~RB_MAP_PKR0_MASK;
3578
3579				if (!rb0_mask) {
3580					raster_config_se |=
3581						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3582				} else {
3583					raster_config_se |=
3584						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3585				}
3586			}
3587
3588			if (rb_per_se > 2) {
3589				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3590				rb1_mask = rb0_mask << 1;
3591				rb0_mask &= rb_mask;
3592				rb1_mask &= rb_mask;
3593				if (!rb0_mask || !rb1_mask) {
3594					raster_config_se &= ~RB_MAP_PKR1_MASK;
3595
3596					if (!rb0_mask) {
3597						raster_config_se |=
3598							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3599					} else {
3600						raster_config_se |=
3601							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3602					}
3603				}
3604			}
3605		}
3606
3607		/* GRBM_GFX_INDEX has a different offset on VI */
3608		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3609		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3610		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3611	}
3612
3613	/* GRBM_GFX_INDEX has a different offset on VI */
3614	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3615}
3616
3617static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3618{
3619	int i, j;
3620	u32 data;
3621	u32 raster_config = 0, raster_config_1 = 0;
3622	u32 active_rbs = 0;
3623	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3624					adev->gfx.config.max_sh_per_se;
3625	unsigned num_rb_pipes;
3626
3627	mutex_lock(&adev->grbm_idx_mutex);
3628	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3631			data = gfx_v8_0_get_rb_active_bitmap(adev);
3632			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3633					       rb_bitmap_width_per_sh);
3634		}
3635	}
3636	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3637
3638	adev->gfx.config.backend_enable_mask = active_rbs;
3639	adev->gfx.config.num_rbs = hweight32(active_rbs);
3640
3641	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3642			     adev->gfx.config.max_shader_engines, 16);
3643
3644	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3645
3646	if (!adev->gfx.config.backend_enable_mask ||
3647			adev->gfx.config.num_rbs >= num_rb_pipes) {
3648		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3649		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3650	} else {
3651		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3652							adev->gfx.config.backend_enable_mask,
3653							num_rb_pipes);
3654	}
3655
3656	/* cache the values for userspace */
3657	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3658		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3659			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3660			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3661				RREG32(mmCC_RB_BACKEND_DISABLE);
3662			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3663				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3664			adev->gfx.config.rb_config[i][j].raster_config =
3665				RREG32(mmPA_SC_RASTER_CONFIG);
3666			adev->gfx.config.rb_config[i][j].raster_config_1 =
3667				RREG32(mmPA_SC_RASTER_CONFIG_1);
3668		}
3669	}
3670	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3671	mutex_unlock(&adev->grbm_idx_mutex);
3672}
3673
3674#define DEFAULT_SH_MEM_BASES	(0x6000)
3675/**
3676 * gfx_v8_0_init_compute_vmid - gart enable
3677 *
3678 * @adev: amdgpu_device pointer
3679 *
3680 * Initialize compute vmid sh_mem registers
3681 *
3682 */
 
 
 
3683static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3684{
3685	int i;
3686	uint32_t sh_mem_config;
3687	uint32_t sh_mem_bases;
3688
3689	/*
3690	 * Configure apertures:
3691	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3692	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3693	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3694	 */
3695	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3696
3697	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3698			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3699			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3700			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3701			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3702			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3703
3704	mutex_lock(&adev->srbm_mutex);
3705	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3706		vi_srbm_select(adev, 0, 0, 0, i);
3707		/* CP and shaders */
3708		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3709		WREG32(mmSH_MEM_APE1_BASE, 1);
3710		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3711		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3712	}
3713	vi_srbm_select(adev, 0, 0, 0, 0);
3714	mutex_unlock(&adev->srbm_mutex);
3715
3716	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3717	   access. These should be enabled by FW for target VMIDs. */
3718	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3719		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3720		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3721		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3722		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3723	}
3724}
3725
3726static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3727{
3728	int vmid;
3729
3730	/*
3731	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3732	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3733	 * the driver can enable them for graphics. VMID0 should maintain
3734	 * access so that HWS firmware can save/restore entries.
3735	 */
3736	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3737		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3738		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3739		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3740		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3741	}
3742}
3743
3744static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745{
3746	switch (adev->asic_type) {
3747	default:
3748		adev->gfx.config.double_offchip_lds_buf = 1;
3749		break;
3750	case CHIP_CARRIZO:
3751	case CHIP_STONEY:
3752		adev->gfx.config.double_offchip_lds_buf = 0;
3753		break;
3754	}
3755}
3756
3757static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3758{
3759	u32 tmp, sh_static_mem_cfg;
3760	int i;
3761
3762	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766
3767	gfx_v8_0_tiling_mode_table_init(adev);
3768	gfx_v8_0_setup_rb(adev);
3769	gfx_v8_0_get_cu_info(adev);
3770	gfx_v8_0_config_init(adev);
3771
3772	/* XXX SH_MEM regs */
3773	/* where to put LDS, scratch, GPUVM in FSA64 space */
3774	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775				   SWIZZLE_ENABLE, 1);
3776	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777				   ELEMENT_SIZE, 1);
3778	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779				   INDEX_STRIDE, 3);
3780	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781
3782	mutex_lock(&adev->srbm_mutex);
3783	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784		vi_srbm_select(adev, 0, 0, 0, i);
3785		/* CP and shaders */
3786		if (i == 0) {
3787			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791			WREG32(mmSH_MEM_CONFIG, tmp);
3792			WREG32(mmSH_MEM_BASES, 0);
3793		} else {
3794			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798			WREG32(mmSH_MEM_CONFIG, tmp);
3799			tmp = adev->gmc.shared_aperture_start >> 48;
3800			WREG32(mmSH_MEM_BASES, tmp);
3801		}
3802
3803		WREG32(mmSH_MEM_APE1_BASE, 1);
3804		WREG32(mmSH_MEM_APE1_LIMIT, 0);
 
3805	}
3806	vi_srbm_select(adev, 0, 0, 0, 0);
3807	mutex_unlock(&adev->srbm_mutex);
3808
3809	gfx_v8_0_init_compute_vmid(adev);
3810	gfx_v8_0_init_gds_vmid(adev);
3811
3812	mutex_lock(&adev->grbm_idx_mutex);
3813	/*
3814	 * making sure that the following register writes will be broadcasted
3815	 * to all the shaders
3816	 */
3817	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818
3819	WREG32(mmPA_SC_FIFO_SIZE,
3820		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3823			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828
3829	tmp = RREG32(mmSPI_ARB_PRIORITY);
3830	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834	WREG32(mmSPI_ARB_PRIORITY, tmp);
3835
3836	mutex_unlock(&adev->grbm_idx_mutex);
3837
3838}
3839
3840static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841{
3842	u32 i, j, k;
3843	u32 mask;
3844
3845	mutex_lock(&adev->grbm_idx_mutex);
3846	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849			for (k = 0; k < adev->usec_timeout; k++) {
3850				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851					break;
3852				udelay(1);
3853			}
3854			if (k == adev->usec_timeout) {
3855				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3856						      0xffffffff, 0xffffffff);
3857				mutex_unlock(&adev->grbm_idx_mutex);
3858				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3859					 i, j);
3860				return;
3861			}
3862		}
3863	}
3864	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3865	mutex_unlock(&adev->grbm_idx_mutex);
3866
3867	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3868		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3869		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3870		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3871	for (k = 0; k < adev->usec_timeout; k++) {
3872		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3873			break;
3874		udelay(1);
3875	}
3876}
3877
3878static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3879					       bool enable)
3880{
3881	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3882
3883	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3884	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3885	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3886	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3887
3888	WREG32(mmCP_INT_CNTL_RING0, tmp);
3889}
3890
3891static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3892{
3893	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3894	/* csib */
3895	WREG32(mmRLC_CSIB_ADDR_HI,
3896			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3897	WREG32(mmRLC_CSIB_ADDR_LO,
3898			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3899	WREG32(mmRLC_CSIB_LENGTH,
3900			adev->gfx.rlc.clear_state_size);
3901}
3902
3903static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3904				int ind_offset,
3905				int list_size,
3906				int *unique_indices,
3907				int *indices_count,
3908				int max_indices,
3909				int *ind_start_offsets,
3910				int *offset_count,
3911				int max_offset)
3912{
3913	int indices;
3914	bool new_entry = true;
3915
3916	for (; ind_offset < list_size; ind_offset++) {
3917
3918		if (new_entry) {
3919			new_entry = false;
3920			ind_start_offsets[*offset_count] = ind_offset;
3921			*offset_count = *offset_count + 1;
3922			BUG_ON(*offset_count >= max_offset);
3923		}
3924
3925		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3926			new_entry = true;
3927			continue;
3928		}
3929
3930		ind_offset += 2;
3931
3932		/* look for the matching indice */
3933		for (indices = 0;
3934			indices < *indices_count;
3935			indices++) {
3936			if (unique_indices[indices] ==
3937				register_list_format[ind_offset])
3938				break;
3939		}
3940
3941		if (indices >= *indices_count) {
3942			unique_indices[*indices_count] =
3943				register_list_format[ind_offset];
3944			indices = *indices_count;
3945			*indices_count = *indices_count + 1;
3946			BUG_ON(*indices_count >= max_indices);
3947		}
3948
3949		register_list_format[ind_offset] = indices;
3950	}
3951}
3952
3953static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3954{
3955	int i, temp, data;
3956	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3957	int indices_count = 0;
3958	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3959	int offset_count = 0;
3960
3961	int list_size;
3962	unsigned int *register_list_format =
3963		kmemdup(adev->gfx.rlc.register_list_format,
3964			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3965	if (!register_list_format)
3966		return -ENOMEM;
 
 
3967
3968	gfx_v8_0_parse_ind_reg_list(register_list_format,
3969				RLC_FormatDirectRegListLength,
3970				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3971				unique_indices,
3972				&indices_count,
3973				ARRAY_SIZE(unique_indices),
3974				indirect_start_offsets,
3975				&offset_count,
3976				ARRAY_SIZE(indirect_start_offsets));
3977
3978	/* save and restore list */
3979	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3980
3981	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3982	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3983		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3984
3985	/* indirect list */
3986	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3987	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3988		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3989
3990	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3991	list_size = list_size >> 1;
3992	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3993	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3994
3995	/* starting offsets starts */
3996	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3997		adev->gfx.rlc.starting_offsets_start);
3998	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3999		WREG32(mmRLC_GPM_SCRATCH_DATA,
4000				indirect_start_offsets[i]);
4001
4002	/* unique indices */
4003	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4004	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4005	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4006		if (unique_indices[i] != 0) {
4007			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4008			WREG32(data + i, unique_indices[i] >> 20);
 
 
4009		}
4010	}
4011	kfree(register_list_format);
4012
4013	return 0;
4014}
4015
4016static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4017{
4018	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4019}
4020
4021static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4022{
4023	uint32_t data;
4024
4025	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4026
4027	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4028	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4029	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4030	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4031	WREG32(mmRLC_PG_DELAY, data);
4032
4033	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4034	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4035
4036}
4037
4038static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4039						bool enable)
4040{
4041	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4042}
4043
4044static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4045						  bool enable)
4046{
4047	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4048}
4049
4050static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4051{
4052	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4053}
4054
4055static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4056{
4057	if ((adev->asic_type == CHIP_CARRIZO) ||
4058	    (adev->asic_type == CHIP_STONEY)) {
4059		gfx_v8_0_init_csb(adev);
4060		gfx_v8_0_init_save_restore_list(adev);
4061		gfx_v8_0_enable_save_restore_machine(adev);
4062		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4063		gfx_v8_0_init_power_gating(adev);
4064		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 
 
 
 
 
 
 
 
 
 
 
4065	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4066		   (adev->asic_type == CHIP_POLARIS12) ||
4067		   (adev->asic_type == CHIP_VEGAM)) {
4068		gfx_v8_0_init_csb(adev);
4069		gfx_v8_0_init_save_restore_list(adev);
4070		gfx_v8_0_enable_save_restore_machine(adev);
4071		gfx_v8_0_init_power_gating(adev);
4072	}
4073
4074}
4075
4076static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4077{
4078	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4079
4080	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4081	gfx_v8_0_wait_for_rlc_serdes(adev);
4082}
4083
4084static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4085{
4086	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4087	udelay(50);
4088
4089	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4090	udelay(50);
4091}
4092
4093static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4094{
4095	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4096
4097	/* carrizo do enable cp interrupt after cp inited */
4098	if (!(adev->flags & AMD_IS_APU))
4099		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4100
4101	udelay(50);
4102}
4103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4104static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4105{
4106	if (amdgpu_sriov_vf(adev)) {
4107		gfx_v8_0_init_csb(adev);
4108		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
4109	}
4110
4111	adev->gfx.rlc.funcs->stop(adev);
4112	adev->gfx.rlc.funcs->reset(adev);
 
 
4113	gfx_v8_0_init_pg(adev);
4114	adev->gfx.rlc.funcs->start(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4115
4116	return 0;
4117}
4118
4119static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4120{
 
4121	u32 tmp = RREG32(mmCP_ME_CNTL);
4122
4123	if (enable) {
4124		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4125		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4126		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4127	} else {
4128		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4129		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4130		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
 
 
4131	}
4132	WREG32(mmCP_ME_CNTL, tmp);
4133	udelay(50);
4134}
4135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4136static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4137{
4138	u32 count = 0;
4139	const struct cs_section_def *sect = NULL;
4140	const struct cs_extent_def *ext = NULL;
4141
4142	/* begin clear state */
4143	count += 2;
4144	/* context control state */
4145	count += 3;
4146
4147	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4148		for (ext = sect->section; ext->extent != NULL; ++ext) {
4149			if (sect->id == SECT_CONTEXT)
4150				count += 2 + ext->reg_count;
4151			else
4152				return 0;
4153		}
4154	}
4155	/* pa_sc_raster_config/pa_sc_raster_config1 */
4156	count += 4;
4157	/* end clear state */
4158	count += 2;
4159	/* clear state */
4160	count += 2;
4161
4162	return count;
4163}
4164
4165static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4166{
4167	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4168	const struct cs_section_def *sect = NULL;
4169	const struct cs_extent_def *ext = NULL;
4170	int r, i;
4171
4172	/* init the CP */
4173	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4174	WREG32(mmCP_ENDIAN_SWAP, 0);
4175	WREG32(mmCP_DEVICE_ID, 1);
4176
4177	gfx_v8_0_cp_gfx_enable(adev, true);
4178
4179	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4180	if (r) {
4181		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4182		return r;
4183	}
4184
4185	/* clear state buffer */
4186	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4187	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4188
4189	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4190	amdgpu_ring_write(ring, 0x80000000);
4191	amdgpu_ring_write(ring, 0x80000000);
4192
4193	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4194		for (ext = sect->section; ext->extent != NULL; ++ext) {
4195			if (sect->id == SECT_CONTEXT) {
4196				amdgpu_ring_write(ring,
4197				       PACKET3(PACKET3_SET_CONTEXT_REG,
4198					       ext->reg_count));
4199				amdgpu_ring_write(ring,
4200				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4201				for (i = 0; i < ext->reg_count; i++)
4202					amdgpu_ring_write(ring, ext->extent[i]);
4203			}
4204		}
4205	}
4206
4207	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4208	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4209	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4210	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4211
4212	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4213	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4214
4215	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4216	amdgpu_ring_write(ring, 0);
4217
4218	/* init the CE partitions */
4219	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4220	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4221	amdgpu_ring_write(ring, 0x8000);
4222	amdgpu_ring_write(ring, 0x8000);
4223
4224	amdgpu_ring_commit(ring);
4225
4226	return 0;
4227}
4228static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4229{
4230	u32 tmp;
4231	/* no gfx doorbells on iceland */
4232	if (adev->asic_type == CHIP_TOPAZ)
4233		return;
4234
4235	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4236
4237	if (ring->use_doorbell) {
4238		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4239				DOORBELL_OFFSET, ring->doorbell_index);
4240		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4241						DOORBELL_HIT, 0);
4242		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4243					    DOORBELL_EN, 1);
4244	} else {
4245		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4246	}
4247
4248	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4249
4250	if (adev->flags & AMD_IS_APU)
4251		return;
4252
4253	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4254					DOORBELL_RANGE_LOWER,
4255					adev->doorbell_index.gfx_ring0);
4256	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4257
4258	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4259		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4260}
4261
4262static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4263{
4264	struct amdgpu_ring *ring;
4265	u32 tmp;
4266	u32 rb_bufsz;
4267	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 
4268
4269	/* Set the write pointer delay */
4270	WREG32(mmCP_RB_WPTR_DELAY, 0);
4271
4272	/* set the RB to use vmid 0 */
4273	WREG32(mmCP_RB_VMID, 0);
4274
4275	/* Set ring buffer size */
4276	ring = &adev->gfx.gfx_ring[0];
4277	rb_bufsz = order_base_2(ring->ring_size / 8);
4278	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4279	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4280	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4281	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4282#ifdef __BIG_ENDIAN
4283	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4284#endif
4285	WREG32(mmCP_RB0_CNTL, tmp);
4286
4287	/* Initialize the ring buffer's read and write pointers */
4288	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4289	ring->wptr = 0;
4290	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4291
4292	/* set the wb address wether it's enabled or not */
4293	rptr_addr = ring->rptr_gpu_addr;
4294	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4295	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4296
4297	wptr_gpu_addr = ring->wptr_gpu_addr;
4298	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4299	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4300	mdelay(1);
4301	WREG32(mmCP_RB0_CNTL, tmp);
4302
4303	rb_addr = ring->gpu_addr >> 8;
4304	WREG32(mmCP_RB0_BASE, rb_addr);
4305	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4306
4307	gfx_v8_0_set_cpg_door_bell(adev, ring);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4308	/* start the ring */
4309	amdgpu_ring_clear_ring(ring);
4310	gfx_v8_0_cp_gfx_start(adev);
4311	ring->sched.ready = true;
 
 
 
4312
4313	return 0;
4314}
4315
4316static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4317{
 
 
4318	if (enable) {
4319		WREG32(mmCP_MEC_CNTL, 0);
4320	} else {
4321		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4322		adev->gfx.kiq.ring.sched.ready = false;
 
4323	}
4324	udelay(50);
4325}
4326
4327/* KIQ functions */
4328static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4329{
4330	uint32_t tmp;
4331	struct amdgpu_device *adev = ring->adev;
 
4332
4333	/* tell RLC which is KIQ queue */
4334	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4335	tmp &= 0xffffff00;
4336	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4337	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4338	tmp |= 0x80;
4339	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4340}
4341
4342static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4343{
4344	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4345	uint64_t queue_mask = 0;
4346	int r, i;
4347
4348	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4349		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4350			continue;
4351
4352		/* This situation may be hit in the future if a new HW
4353		 * generation exposes more than 64 queues. If so, the
4354		 * definition of queue_mask needs updating */
4355		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4356			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4357			break;
4358		}
 
 
 
4359
4360		queue_mask |= (1ull << i);
4361	}
 
4362
4363	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4364	if (r) {
4365		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4366		return r;
4367	}
4368	/* set resources */
4369	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4370	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4371	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4372	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4373	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4374	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4375	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4376	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4377	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4378		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4379		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4380		uint64_t wptr_addr = ring->wptr_gpu_addr;
4381
4382		/* map queues */
4383		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4384		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4385		amdgpu_ring_write(kiq_ring,
4386				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4387		amdgpu_ring_write(kiq_ring,
4388				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4389				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4390				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4391				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4392		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4393		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4394		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4395		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4396	}
4397
4398	amdgpu_ring_commit(kiq_ring);
4399
4400	return 0;
4401}
4402
4403static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4404{
4405	int i, r = 0;
4406
4407	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4408		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4409		for (i = 0; i < adev->usec_timeout; i++) {
4410			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4411				break;
4412			udelay(1);
4413		}
4414		if (i == adev->usec_timeout)
4415			r = -ETIMEDOUT;
4416	}
4417	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4418	WREG32(mmCP_HQD_PQ_RPTR, 0);
4419	WREG32(mmCP_HQD_PQ_WPTR, 0);
4420
4421	return r;
4422}
 
 
4423
4424static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4425{
4426	struct amdgpu_device *adev = ring->adev;
4427
4428	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4429		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4430			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4431			mqd->cp_hqd_queue_priority =
4432				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4433		}
4434	}
4435}
4436
4437static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4438{
4439	struct amdgpu_device *adev = ring->adev;
4440	struct vi_mqd *mqd = ring->mqd_ptr;
4441	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4442	uint32_t tmp;
4443
4444	mqd->header = 0xC0310800;
4445	mqd->compute_pipelinestat_enable = 0x00000001;
4446	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4447	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4448	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4449	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4450	mqd->compute_misc_reserved = 0x00000003;
4451	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4452						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4453	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4454						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4455	eop_base_addr = ring->eop_gpu_addr >> 8;
4456	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4457	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4458
4459	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4460	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4461	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4462			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4463
4464	mqd->cp_hqd_eop_control = tmp;
4465
4466	/* enable doorbell? */
4467	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4468			    CP_HQD_PQ_DOORBELL_CONTROL,
4469			    DOORBELL_EN,
4470			    ring->use_doorbell ? 1 : 0);
4471
4472	mqd->cp_hqd_pq_doorbell_control = tmp;
4473
4474	/* set the pointer to the MQD */
4475	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4476	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4477
4478	/* set MQD vmid to 0 */
4479	tmp = RREG32(mmCP_MQD_CONTROL);
4480	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4481	mqd->cp_mqd_control = tmp;
4482
4483	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4484	hqd_gpu_addr = ring->gpu_addr >> 8;
4485	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4486	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4487
4488	/* set up the HQD, this is similar to CP_RB0_CNTL */
4489	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4490	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4491			    (order_base_2(ring->ring_size / 4) - 1));
4492	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4493			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4494#ifdef __BIG_ENDIAN
4495	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4496#endif
4497	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4498	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4499	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4500	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4501	mqd->cp_hqd_pq_control = tmp;
4502
4503	/* set the wb address whether it's enabled or not */
4504	wb_gpu_addr = ring->rptr_gpu_addr;
4505	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4506	mqd->cp_hqd_pq_rptr_report_addr_hi =
4507		upper_32_bits(wb_gpu_addr) & 0xffff;
4508
4509	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4510	wb_gpu_addr = ring->wptr_gpu_addr;
4511	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4512	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4513
4514	tmp = 0;
4515	/* enable the doorbell if requested */
4516	if (ring->use_doorbell) {
4517		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4518		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4519				DOORBELL_OFFSET, ring->doorbell_index);
4520
4521		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4522					 DOORBELL_EN, 1);
4523		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4524					 DOORBELL_SOURCE, 0);
4525		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4526					 DOORBELL_HIT, 0);
4527	}
4528
4529	mqd->cp_hqd_pq_doorbell_control = tmp;
4530
4531	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532	ring->wptr = 0;
4533	mqd->cp_hqd_pq_wptr = ring->wptr;
4534	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4535
4536	/* set the vmid for the queue */
4537	mqd->cp_hqd_vmid = 0;
4538
4539	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4540	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4541	mqd->cp_hqd_persistent_state = tmp;
4542
4543	/* set MTYPE */
4544	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4545	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4546	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4547	mqd->cp_hqd_ib_control = tmp;
4548
4549	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4550	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4551	mqd->cp_hqd_iq_timer = tmp;
4552
4553	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4554	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4555	mqd->cp_hqd_ctx_save_control = tmp;
4556
4557	/* defaults */
4558	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4559	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4560	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4561	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4562	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4563	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4564	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4565	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4566	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4567	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4568	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4569	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4570
4571	/* set static priority for a queue/ring */
4572	gfx_v8_0_mqd_set_priority(ring, mqd);
4573	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4574
4575	/* map_queues packet doesn't need activate the queue,
4576	 * so only kiq need set this field.
4577	 */
4578	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4579		mqd->cp_hqd_active = 1;
4580
4581	return 0;
4582}
 
4583
4584static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4585			struct vi_mqd *mqd)
4586{
4587	uint32_t mqd_reg;
4588	uint32_t *mqd_data;
 
 
 
 
 
 
4589
4590	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4591	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4592
4593	/* disable wptr polling */
4594	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4595
4596	/* program all HQD registers */
4597	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4598		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599
4600	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4601	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4602	 * on ASICs that do not support context-save.
4603	 * EOP writes/reads can start anywhere in the ring.
4604	 */
4605	if (adev->asic_type != CHIP_TONGA) {
4606		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4607		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4608		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4609	}
4610
4611	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4612		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4613
4614	/* activate the HQD */
4615	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4616		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 
4617
4618	return 0;
4619}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4620
4621static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4622{
4623	struct amdgpu_device *adev = ring->adev;
4624	struct vi_mqd *mqd = ring->mqd_ptr;
4625	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 
 
 
4626
4627	gfx_v8_0_kiq_setting(ring);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4628
4629	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4630		/* reset MQD to a clean status */
4631		if (adev->gfx.mec.mqd_backup[mqd_idx])
4632			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
 
4633
4634		/* reset ring buffer */
4635		ring->wptr = 0;
4636		amdgpu_ring_clear_ring(ring);
4637		mutex_lock(&adev->srbm_mutex);
4638		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4639		gfx_v8_0_mqd_commit(adev, mqd);
4640		vi_srbm_select(adev, 0, 0, 0, 0);
4641		mutex_unlock(&adev->srbm_mutex);
4642	} else {
4643		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4644		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4645		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4646		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4647			amdgpu_ring_clear_ring(ring);
4648		mutex_lock(&adev->srbm_mutex);
4649		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4650		gfx_v8_0_mqd_init(ring);
4651		gfx_v8_0_mqd_commit(adev, mqd);
4652		vi_srbm_select(adev, 0, 0, 0, 0);
4653		mutex_unlock(&adev->srbm_mutex);
4654
4655		if (adev->gfx.mec.mqd_backup[mqd_idx])
4656			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4657	}
4658
4659	return 0;
4660}
4661
4662static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4663{
4664	struct amdgpu_device *adev = ring->adev;
4665	struct vi_mqd *mqd = ring->mqd_ptr;
4666	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4667
4668	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4669		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4670		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4671		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4672		mutex_lock(&adev->srbm_mutex);
4673		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4674		gfx_v8_0_mqd_init(ring);
4675		vi_srbm_select(adev, 0, 0, 0, 0);
4676		mutex_unlock(&adev->srbm_mutex);
4677
4678		if (adev->gfx.mec.mqd_backup[mqd_idx])
4679			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4680	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4681		/* reset MQD to a clean status */
4682		if (adev->gfx.mec.mqd_backup[mqd_idx])
4683			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4684		/* reset ring buffer */
4685		ring->wptr = 0;
4686		amdgpu_ring_clear_ring(ring);
4687	} else {
4688		amdgpu_ring_clear_ring(ring);
4689	}
4690	return 0;
4691}
4692
4693static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4694{
4695	if (adev->asic_type > CHIP_TONGA) {
4696		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4697		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4698	}
4699	/* enable doorbells */
4700	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4701}
4702
4703static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4704{
4705	struct amdgpu_ring *ring;
4706	int r;
4707
4708	ring = &adev->gfx.kiq.ring;
4709
4710	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4711	if (unlikely(r != 0))
4712		return r;
4713
4714	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4715	if (unlikely(r != 0))
4716		return r;
4717
4718	gfx_v8_0_kiq_init_queue(ring);
4719	amdgpu_bo_kunmap(ring->mqd_obj);
4720	ring->mqd_ptr = NULL;
4721	amdgpu_bo_unreserve(ring->mqd_obj);
4722	ring->sched.ready = true;
4723	return 0;
4724}
4725
4726static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4727{
4728	struct amdgpu_ring *ring = NULL;
4729	int r = 0, i;
4730
4731	gfx_v8_0_cp_compute_enable(adev, true);
4732
4733	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4734		ring = &adev->gfx.compute_ring[i];
4735
4736		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4737		if (unlikely(r != 0))
4738			goto done;
4739		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4740		if (!r) {
4741			r = gfx_v8_0_kcq_init_queue(ring);
4742			amdgpu_bo_kunmap(ring->mqd_obj);
4743			ring->mqd_ptr = NULL;
4744		}
4745		amdgpu_bo_unreserve(ring->mqd_obj);
4746		if (r)
4747			goto done;
4748	}
4749
4750	gfx_v8_0_set_mec_doorbell_range(adev);
4751
4752	r = gfx_v8_0_kiq_kcq_enable(adev);
4753	if (r)
4754		goto done;
4755
4756done:
4757	return r;
4758}
4759
4760static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4761{
4762	int r, i;
4763	struct amdgpu_ring *ring;
4764
4765	/* collect all the ring_tests here, gfx, kiq, compute */
4766	ring = &adev->gfx.gfx_ring[0];
4767	r = amdgpu_ring_test_helper(ring);
4768	if (r)
4769		return r;
4770
4771	ring = &adev->gfx.kiq.ring;
4772	r = amdgpu_ring_test_helper(ring);
4773	if (r)
4774		return r;
4775
4776	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4777		ring = &adev->gfx.compute_ring[i];
4778		amdgpu_ring_test_helper(ring);
4779	}
4780
4781	return 0;
4782}
4783
4784static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4785{
4786	int r;
4787
4788	if (!(adev->flags & AMD_IS_APU))
4789		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4790
4791	r = gfx_v8_0_kiq_resume(adev);
4792	if (r)
4793		return r;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4794
4795	r = gfx_v8_0_cp_gfx_resume(adev);
4796	if (r)
4797		return r;
 
 
 
 
 
 
 
 
 
4798
4799	r = gfx_v8_0_kcq_resume(adev);
4800	if (r)
4801		return r;
4802
4803	r = gfx_v8_0_cp_test_all_rings(adev);
4804	if (r)
4805		return r;
4806
4807	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4808
4809	return 0;
4810}
4811
4812static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4813{
4814	gfx_v8_0_cp_gfx_enable(adev, enable);
4815	gfx_v8_0_cp_compute_enable(adev, enable);
4816}
4817
4818static int gfx_v8_0_hw_init(void *handle)
4819{
4820	int r;
4821	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4822
4823	gfx_v8_0_init_golden_registers(adev);
4824	gfx_v8_0_constants_init(adev);
4825
4826	r = adev->gfx.rlc.funcs->resume(adev);
4827	if (r)
4828		return r;
4829
4830	r = gfx_v8_0_cp_resume(adev);
4831
4832	return r;
4833}
4834
4835static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4836{
4837	int r, i;
4838	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4839
4840	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4841	if (r)
4842		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4843
4844	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4845		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4846
4847		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4848		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4849						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4850						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4851						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4852						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4853		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4854		amdgpu_ring_write(kiq_ring, 0);
4855		amdgpu_ring_write(kiq_ring, 0);
4856		amdgpu_ring_write(kiq_ring, 0);
4857	}
4858	r = amdgpu_ring_test_helper(kiq_ring);
4859	if (r)
4860		DRM_ERROR("KCQ disable failed\n");
4861
4862	return r;
 
 
 
4863}
4864
4865static bool gfx_v8_0_is_idle(void *handle)
4866{
4867	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4868
4869	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4870		|| RREG32(mmGRBM_STATUS2) != 0x8)
4871		return false;
4872	else
4873		return true;
4874}
4875
4876static bool gfx_v8_0_rlc_is_idle(void *handle)
4877{
4878	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4879
4880	if (RREG32(mmGRBM_STATUS2) != 0x8)
4881		return false;
4882	else
4883		return true;
4884}
4885
4886static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4887{
4888	unsigned int i;
4889	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4890
4891	for (i = 0; i < adev->usec_timeout; i++) {
4892		if (gfx_v8_0_rlc_is_idle(handle))
4893			return 0;
4894
4895		udelay(1);
4896	}
4897	return -ETIMEDOUT;
4898}
4899
4900static int gfx_v8_0_wait_for_idle(void *handle)
4901{
4902	unsigned int i;
4903	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4904
4905	for (i = 0; i < adev->usec_timeout; i++) {
4906		if (gfx_v8_0_is_idle(handle))
4907			return 0;
4908
4909		udelay(1);
4910	}
4911	return -ETIMEDOUT;
4912}
4913
4914static int gfx_v8_0_hw_fini(void *handle)
4915{
4916	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4917
4918	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4919	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4920
4921	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4922
4923	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4924
4925	/* disable KCQ to avoid CPC touch memory not valid anymore */
4926	gfx_v8_0_kcq_disable(adev);
4927
4928	if (amdgpu_sriov_vf(adev)) {
4929		pr_debug("For SRIOV client, shouldn't do anything.\n");
4930		return 0;
4931	}
4932	amdgpu_gfx_rlc_enter_safe_mode(adev);
4933	if (!gfx_v8_0_wait_for_idle(adev))
4934		gfx_v8_0_cp_enable(adev, false);
4935	else
4936		pr_err("cp is busy, skip halt cp\n");
4937	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4938		adev->gfx.rlc.funcs->stop(adev);
4939	else
4940		pr_err("rlc is busy, skip halt rlc\n");
4941	amdgpu_gfx_rlc_exit_safe_mode(adev);
4942
4943	return 0;
4944}
4945
4946static int gfx_v8_0_suspend(void *handle)
4947{
4948	return gfx_v8_0_hw_fini(handle);
4949}
4950
4951static int gfx_v8_0_resume(void *handle)
4952{
4953	return gfx_v8_0_hw_init(handle);
4954}
4955
4956static bool gfx_v8_0_check_soft_reset(void *handle)
4957{
4958	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4959	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4960	u32 tmp;
4961
4962	/* GRBM_STATUS */
4963	tmp = RREG32(mmGRBM_STATUS);
4964	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4965		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4966		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4967		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4968		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4969		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4970		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4971		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4972						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4973		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4974						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4975		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4976						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4977	}
4978
4979	/* GRBM_STATUS2 */
4980	tmp = RREG32(mmGRBM_STATUS2);
4981	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4982		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4983						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4984
4985	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4986	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4987	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4988		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4989						SOFT_RESET_CPF, 1);
4990		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4991						SOFT_RESET_CPC, 1);
4992		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4993						SOFT_RESET_CPG, 1);
4994		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4995						SOFT_RESET_GRBM, 1);
4996	}
4997
4998	/* SRBM_STATUS */
4999	tmp = RREG32(mmSRBM_STATUS);
5000	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5001		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5002						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5003	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5004		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5005						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5006
5007	if (grbm_soft_reset || srbm_soft_reset) {
5008		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5009		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5010		return true;
5011	} else {
5012		adev->gfx.grbm_soft_reset = 0;
5013		adev->gfx.srbm_soft_reset = 0;
5014		return false;
5015	}
5016}
5017
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5018static int gfx_v8_0_pre_soft_reset(void *handle)
5019{
5020	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021	u32 grbm_soft_reset = 0;
5022
5023	if ((!adev->gfx.grbm_soft_reset) &&
5024	    (!adev->gfx.srbm_soft_reset))
5025		return 0;
5026
5027	grbm_soft_reset = adev->gfx.grbm_soft_reset;
 
5028
5029	/* stop the rlc */
5030	adev->gfx.rlc.funcs->stop(adev);
5031
5032	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5033	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5034		/* Disable GFX parsing/prefetching */
5035		gfx_v8_0_cp_gfx_enable(adev, false);
5036
5037	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5038	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5039	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5040	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5041		int i;
5042
5043		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5044			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5045
5046			mutex_lock(&adev->srbm_mutex);
5047			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5048			gfx_v8_0_deactivate_hqd(adev, 2);
5049			vi_srbm_select(adev, 0, 0, 0, 0);
5050			mutex_unlock(&adev->srbm_mutex);
5051		}
5052		/* Disable MEC parsing/prefetching */
5053		gfx_v8_0_cp_compute_enable(adev, false);
5054	}
5055
5056	return 0;
5057}
5058
5059static int gfx_v8_0_soft_reset(void *handle)
5060{
5061	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5062	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5063	u32 tmp;
5064
5065	if ((!adev->gfx.grbm_soft_reset) &&
5066	    (!adev->gfx.srbm_soft_reset))
5067		return 0;
5068
5069	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5070	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5071
5072	if (grbm_soft_reset || srbm_soft_reset) {
5073		tmp = RREG32(mmGMCON_DEBUG);
5074		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5075		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5076		WREG32(mmGMCON_DEBUG, tmp);
5077		udelay(50);
5078	}
5079
5080	if (grbm_soft_reset) {
5081		tmp = RREG32(mmGRBM_SOFT_RESET);
5082		tmp |= grbm_soft_reset;
5083		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5084		WREG32(mmGRBM_SOFT_RESET, tmp);
5085		tmp = RREG32(mmGRBM_SOFT_RESET);
5086
5087		udelay(50);
5088
5089		tmp &= ~grbm_soft_reset;
5090		WREG32(mmGRBM_SOFT_RESET, tmp);
5091		tmp = RREG32(mmGRBM_SOFT_RESET);
5092	}
5093
5094	if (srbm_soft_reset) {
5095		tmp = RREG32(mmSRBM_SOFT_RESET);
5096		tmp |= srbm_soft_reset;
5097		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5098		WREG32(mmSRBM_SOFT_RESET, tmp);
5099		tmp = RREG32(mmSRBM_SOFT_RESET);
5100
5101		udelay(50);
5102
5103		tmp &= ~srbm_soft_reset;
5104		WREG32(mmSRBM_SOFT_RESET, tmp);
5105		tmp = RREG32(mmSRBM_SOFT_RESET);
5106	}
5107
5108	if (grbm_soft_reset || srbm_soft_reset) {
5109		tmp = RREG32(mmGMCON_DEBUG);
5110		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5111		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5112		WREG32(mmGMCON_DEBUG, tmp);
5113	}
5114
5115	/* Wait a little for things to settle down */
5116	udelay(50);
5117
5118	return 0;
5119}
5120
 
 
 
 
 
 
 
 
 
 
5121static int gfx_v8_0_post_soft_reset(void *handle)
5122{
5123	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5124	u32 grbm_soft_reset = 0;
5125
5126	if ((!adev->gfx.grbm_soft_reset) &&
5127	    (!adev->gfx.srbm_soft_reset))
5128		return 0;
5129
5130	grbm_soft_reset = adev->gfx.grbm_soft_reset;
 
 
 
 
 
5131
5132	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5133	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5134	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5135	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5136		int i;
5137
5138		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5139			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5140
5141			mutex_lock(&adev->srbm_mutex);
5142			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5143			gfx_v8_0_deactivate_hqd(adev, 2);
5144			vi_srbm_select(adev, 0, 0, 0, 0);
5145			mutex_unlock(&adev->srbm_mutex);
5146		}
5147		gfx_v8_0_kiq_resume(adev);
5148		gfx_v8_0_kcq_resume(adev);
5149	}
5150
5151	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5152	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5153		gfx_v8_0_cp_gfx_resume(adev);
5154
5155	gfx_v8_0_cp_test_all_rings(adev);
5156
5157	adev->gfx.rlc.funcs->start(adev);
5158
5159	return 0;
5160}
5161
5162/**
5163 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5164 *
5165 * @adev: amdgpu_device pointer
5166 *
5167 * Fetches a GPU clock counter snapshot.
5168 * Returns the 64 bit clock counter snapshot.
5169 */
5170static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5171{
5172	uint64_t clock;
5173
5174	mutex_lock(&adev->gfx.gpu_clock_mutex);
5175	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5176	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5177		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5178	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5179	return clock;
5180}
5181
5182static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5183					  uint32_t vmid,
5184					  uint32_t gds_base, uint32_t gds_size,
5185					  uint32_t gws_base, uint32_t gws_size,
5186					  uint32_t oa_base, uint32_t oa_size)
5187{
 
 
 
 
 
 
 
 
 
5188	/* GDS Base */
5189	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191				WRITE_DATA_DST_SEL(0)));
5192	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5193	amdgpu_ring_write(ring, 0);
5194	amdgpu_ring_write(ring, gds_base);
5195
5196	/* GDS Size */
5197	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199				WRITE_DATA_DST_SEL(0)));
5200	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5201	amdgpu_ring_write(ring, 0);
5202	amdgpu_ring_write(ring, gds_size);
5203
5204	/* GWS */
5205	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207				WRITE_DATA_DST_SEL(0)));
5208	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5209	amdgpu_ring_write(ring, 0);
5210	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5211
5212	/* OA */
5213	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5214	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5215				WRITE_DATA_DST_SEL(0)));
5216	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5217	amdgpu_ring_write(ring, 0);
5218	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5219}
5220
5221static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5222{
5223	WREG32(mmSQ_IND_INDEX,
5224		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5225		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5226		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5227		(SQ_IND_INDEX__FORCE_READ_MASK));
5228	return RREG32(mmSQ_IND_DATA);
5229}
5230
5231static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5232			   uint32_t wave, uint32_t thread,
5233			   uint32_t regno, uint32_t num, uint32_t *out)
5234{
5235	WREG32(mmSQ_IND_INDEX,
5236		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5237		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5238		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5239		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5240		(SQ_IND_INDEX__FORCE_READ_MASK) |
5241		(SQ_IND_INDEX__AUTO_INCR_MASK));
5242	while (num--)
5243		*(out++) = RREG32(mmSQ_IND_DATA);
5244}
5245
5246static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5247{
5248	/* type 0 wave data */
5249	dst[(*no_fields)++] = 0;
5250	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5251	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5252	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5253	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5254	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5255	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5256	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5257	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5258	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5259	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5260	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5261	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5262	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5263	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5264	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5265	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5266	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5267	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5268	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5269}
5270
5271static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5272				     uint32_t wave, uint32_t start,
5273				     uint32_t size, uint32_t *dst)
5274{
5275	wave_read_regs(
5276		adev, simd, wave, 0,
5277		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5278}
5279
5280
5281static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5282	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5283	.select_se_sh = &gfx_v8_0_select_se_sh,
5284	.read_wave_data = &gfx_v8_0_read_wave_data,
5285	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5286	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5287};
5288
5289static int gfx_v8_0_early_init(void *handle)
5290{
5291	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5292
5293	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5294	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5295					  AMDGPU_MAX_COMPUTE_RINGS);
5296	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5297	gfx_v8_0_set_ring_funcs(adev);
5298	gfx_v8_0_set_irq_funcs(adev);
5299	gfx_v8_0_set_gds_init(adev);
5300	gfx_v8_0_set_rlc_funcs(adev);
5301
5302	return 0;
5303}
5304
5305static int gfx_v8_0_late_init(void *handle)
5306{
5307	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308	int r;
5309
5310	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5311	if (r)
5312		return r;
5313
5314	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5315	if (r)
5316		return r;
5317
5318	/* requires IBs so do in late init after IB pool is initialized */
5319	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5320	if (r)
5321		return r;
5322
5323	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5324	if (r) {
5325		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5326		return r;
5327	}
5328
5329	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5330	if (r) {
5331		DRM_ERROR(
5332			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5333			r);
5334		return r;
5335	}
5336
5337	return 0;
5338}
5339
5340static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5341						       bool enable)
5342{
5343	if ((adev->asic_type == CHIP_POLARIS11) ||
5344	    (adev->asic_type == CHIP_POLARIS12) ||
5345	    (adev->asic_type == CHIP_VEGAM))
5346		/* Send msg to SMU via Powerplay */
5347		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
 
 
 
5348
5349	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5350}
5351
5352static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5353							bool enable)
5354{
5355	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5356}
5357
5358static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5359		bool enable)
5360{
5361	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5362}
5363
5364static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5365					  bool enable)
5366{
5367	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5368}
5369
5370static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5371						bool enable)
5372{
5373	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5374
5375	/* Read any GFX register to wake up GFX. */
5376	if (!enable)
5377		RREG32(mmDB_RENDER_CONTROL);
5378}
5379
5380static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5381					  bool enable)
5382{
5383	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5384		cz_enable_gfx_cg_power_gating(adev, true);
5385		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5386			cz_enable_gfx_pipeline_power_gating(adev, true);
5387	} else {
5388		cz_enable_gfx_cg_power_gating(adev, false);
5389		cz_enable_gfx_pipeline_power_gating(adev, false);
5390	}
5391}
5392
5393static int gfx_v8_0_set_powergating_state(void *handle,
5394					  enum amd_powergating_state state)
5395{
5396	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5397	bool enable = (state == AMD_PG_STATE_GATE);
5398
5399	if (amdgpu_sriov_vf(adev))
5400		return 0;
5401
5402	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5403				AMD_PG_SUPPORT_RLC_SMU_HS |
5404				AMD_PG_SUPPORT_CP |
5405				AMD_PG_SUPPORT_GFX_DMG))
5406		amdgpu_gfx_rlc_enter_safe_mode(adev);
5407	switch (adev->asic_type) {
5408	case CHIP_CARRIZO:
5409	case CHIP_STONEY:
5410
5411		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5412			cz_enable_sck_slow_down_on_power_up(adev, true);
5413			cz_enable_sck_slow_down_on_power_down(adev, true);
5414		} else {
5415			cz_enable_sck_slow_down_on_power_up(adev, false);
5416			cz_enable_sck_slow_down_on_power_down(adev, false);
5417		}
5418		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5419			cz_enable_cp_power_gating(adev, true);
5420		else
5421			cz_enable_cp_power_gating(adev, false);
5422
5423		cz_update_gfx_cg_power_gating(adev, enable);
5424
5425		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5426			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5427		else
5428			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5429
5430		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5431			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5432		else
5433			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5434		break;
5435	case CHIP_POLARIS11:
5436	case CHIP_POLARIS12:
5437	case CHIP_VEGAM:
5438		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5439			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5440		else
5441			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5442
5443		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5444			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5445		else
5446			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5447
5448		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5449			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5450		else
5451			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5452		break;
5453	default:
5454		break;
5455	}
5456	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5457				AMD_PG_SUPPORT_RLC_SMU_HS |
5458				AMD_PG_SUPPORT_CP |
5459				AMD_PG_SUPPORT_GFX_DMG))
5460		amdgpu_gfx_rlc_exit_safe_mode(adev);
5461	return 0;
5462}
5463
5464static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5465{
5466	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5467	int data;
5468
5469	if (amdgpu_sriov_vf(adev))
5470		*flags = 0;
5471
5472	/* AMD_CG_SUPPORT_GFX_MGCG */
5473	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5474	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5475		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5476
5477	/* AMD_CG_SUPPORT_GFX_CGLG */
5478	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5479	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5480		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5481
5482	/* AMD_CG_SUPPORT_GFX_CGLS */
5483	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5484		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5485
5486	/* AMD_CG_SUPPORT_GFX_CGTS */
5487	data = RREG32(mmCGTS_SM_CTRL_REG);
5488	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5489		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5490
5491	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5492	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5493		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5494
5495	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5496	data = RREG32(mmRLC_MEM_SLP_CNTL);
5497	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5498		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5499
5500	/* AMD_CG_SUPPORT_GFX_CP_LS */
5501	data = RREG32(mmCP_MEM_SLP_CNTL);
5502	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5503		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5504}
5505
5506static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5507				     uint32_t reg_addr, uint32_t cmd)
5508{
5509	uint32_t data;
5510
5511	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5512
5513	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5514	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5515
5516	data = RREG32(mmRLC_SERDES_WR_CTRL);
5517	if (adev->asic_type == CHIP_STONEY)
5518		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5519			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5520			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5521			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5522			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5523			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5524			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5525			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5526			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5527	else
5528		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5529			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5530			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5531			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5532			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5533			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5534			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5535			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5536			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5537			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5538			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5539	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5540		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5541		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5542		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5543
5544	WREG32(mmRLC_SERDES_WR_CTRL, data);
5545}
5546
5547#define MSG_ENTER_RLC_SAFE_MODE     1
5548#define MSG_EXIT_RLC_SAFE_MODE      0
5549#define RLC_GPR_REG2__REQ_MASK 0x00000001
5550#define RLC_GPR_REG2__REQ__SHIFT 0
5551#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5552#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5553
5554static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5555{
5556	uint32_t rlc_setting;
 
5557
5558	rlc_setting = RREG32(mmRLC_CNTL);
5559	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5560		return false;
5561
5562	return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5563}
5564
5565static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5566{
5567	uint32_t data;
5568	unsigned i;
 
5569	data = RREG32(mmRLC_CNTL);
5570	data |= RLC_SAFE_MODE__CMD_MASK;
5571	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5572	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5573	WREG32(mmRLC_SAFE_MODE, data);
5574
5575	/* wait for RLC_SAFE_MODE */
5576	for (i = 0; i < adev->usec_timeout; i++) {
5577		if ((RREG32(mmRLC_GPM_STAT) &
5578		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5579		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5580		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5581		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5582			break;
5583		udelay(1);
5584	}
 
5585	for (i = 0; i < adev->usec_timeout; i++) {
5586		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5587			break;
5588		udelay(1);
5589	}
5590}
5591
5592static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5593{
5594	uint32_t data;
5595	unsigned i;
5596
5597	data = RREG32(mmRLC_CNTL);
5598	data |= RLC_SAFE_MODE__CMD_MASK;
5599	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5600	WREG32(mmRLC_SAFE_MODE, data);
5601
5602	for (i = 0; i < adev->usec_timeout; i++) {
5603		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5604			break;
5605		udelay(1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5606	}
5607}
5608
5609static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5610{
5611	u32 data;
 
5612
5613	amdgpu_gfx_off_ctrl(adev, false);
 
 
5614
5615	if (amdgpu_sriov_is_pp_one_vf(adev))
5616		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5617	else
5618		data = RREG32(mmRLC_SPM_VMID);
 
 
 
 
5619
5620	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5621	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
 
 
 
 
5622
5623	if (amdgpu_sriov_is_pp_one_vf(adev))
5624		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5625	else
5626		WREG32(mmRLC_SPM_VMID, data);
5627
5628	amdgpu_gfx_off_ctrl(adev, true);
 
 
5629}
5630
 
 
 
 
 
5631static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5632	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5633	.set_safe_mode = gfx_v8_0_set_safe_mode,
5634	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5635	.init = gfx_v8_0_rlc_init,
5636	.get_csb_size = gfx_v8_0_get_csb_size,
5637	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5638	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5639	.resume = gfx_v8_0_rlc_resume,
5640	.stop = gfx_v8_0_rlc_stop,
5641	.reset = gfx_v8_0_rlc_reset,
5642	.start = gfx_v8_0_rlc_start,
5643	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5644};
5645
5646static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5647						      bool enable)
5648{
5649	uint32_t temp, data;
5650
5651	amdgpu_gfx_rlc_enter_safe_mode(adev);
5652
5653	/* It is disabled by HW by default */
5654	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5655		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5656			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5657				/* 1 - RLC memory Light sleep */
5658				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5659
5660			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5661				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5662		}
5663
5664		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5665		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5666		if (adev->flags & AMD_IS_APU)
5667			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5668				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5669				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5670		else
5671			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5672				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5673				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5674				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5675
5676		if (temp != data)
5677			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5678
5679		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680		gfx_v8_0_wait_for_rlc_serdes(adev);
5681
5682		/* 5 - clear mgcg override */
5683		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5684
5685		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5686			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5687			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5688			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5689			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5690			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5691			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5692			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5693			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5694				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5695			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5696			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5697			if (temp != data)
5698				WREG32(mmCGTS_SM_CTRL_REG, data);
5699		}
5700		udelay(50);
5701
5702		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5703		gfx_v8_0_wait_for_rlc_serdes(adev);
5704	} else {
5705		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5706		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5707		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5708				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5709				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5710				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5711		if (temp != data)
5712			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5713
5714		/* 2 - disable MGLS in RLC */
5715		data = RREG32(mmRLC_MEM_SLP_CNTL);
5716		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5717			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5718			WREG32(mmRLC_MEM_SLP_CNTL, data);
5719		}
5720
5721		/* 3 - disable MGLS in CP */
5722		data = RREG32(mmCP_MEM_SLP_CNTL);
5723		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5724			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5725			WREG32(mmCP_MEM_SLP_CNTL, data);
5726		}
5727
5728		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5729		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5730		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5731				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5732		if (temp != data)
5733			WREG32(mmCGTS_SM_CTRL_REG, data);
5734
5735		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5736		gfx_v8_0_wait_for_rlc_serdes(adev);
5737
5738		/* 6 - set mgcg override */
5739		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5740
5741		udelay(50);
5742
5743		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5744		gfx_v8_0_wait_for_rlc_serdes(adev);
5745	}
5746
5747	amdgpu_gfx_rlc_exit_safe_mode(adev);
5748}
5749
5750static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5751						      bool enable)
5752{
5753	uint32_t temp, temp1, data, data1;
5754
5755	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5756
5757	amdgpu_gfx_rlc_enter_safe_mode(adev);
5758
5759	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5760		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5761		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5762		if (temp1 != data1)
5763			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5764
5765		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5766		gfx_v8_0_wait_for_rlc_serdes(adev);
5767
5768		/* 2 - clear cgcg override */
5769		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5770
5771		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5772		gfx_v8_0_wait_for_rlc_serdes(adev);
5773
5774		/* 3 - write cmd to set CGLS */
5775		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5776
5777		/* 4 - enable cgcg */
5778		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5779
5780		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5781			/* enable cgls*/
5782			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5783
5784			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5785			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5786
5787			if (temp1 != data1)
5788				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5789		} else {
5790			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5791		}
5792
5793		if (temp != data)
5794			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5795
5796		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5797		 * Cmp_busy/GFX_Idle interrupts
5798		 */
5799		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5800	} else {
5801		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5802		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5803
5804		/* TEST CGCG */
5805		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5806		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5807				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5808		if (temp1 != data1)
5809			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5810
5811		/* read gfx register to wake up cgcg */
5812		RREG32(mmCB_CGTT_SCLK_CTRL);
5813		RREG32(mmCB_CGTT_SCLK_CTRL);
5814		RREG32(mmCB_CGTT_SCLK_CTRL);
5815		RREG32(mmCB_CGTT_SCLK_CTRL);
5816
5817		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5818		gfx_v8_0_wait_for_rlc_serdes(adev);
5819
5820		/* write cmd to Set CGCG Override */
5821		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5822
5823		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5824		gfx_v8_0_wait_for_rlc_serdes(adev);
5825
5826		/* write cmd to Clear CGLS */
5827		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5828
5829		/* disable cgcg, cgls should be disabled too. */
5830		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5831			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5832		if (temp != data)
5833			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5834		/* enable interrupts again for PG */
5835		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5836	}
5837
5838	gfx_v8_0_wait_for_rlc_serdes(adev);
5839
5840	amdgpu_gfx_rlc_exit_safe_mode(adev);
5841}
5842static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5843					    bool enable)
5844{
5845	if (enable) {
5846		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5847		 * ===  MGCG + MGLS + TS(CG/LS) ===
5848		 */
5849		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5850		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5851	} else {
5852		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5853		 * ===  CGCG + CGLS ===
5854		 */
5855		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5856		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5857	}
5858	return 0;
5859}
5860
5861static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5862					  enum amd_clockgating_state state)
5863{
5864	uint32_t msg_id, pp_state = 0;
5865	uint32_t pp_support_state = 0;
 
5866
5867	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5868		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5869			pp_support_state = PP_STATE_SUPPORT_LS;
5870			pp_state = PP_STATE_LS;
5871		}
5872		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5873			pp_support_state |= PP_STATE_SUPPORT_CG;
5874			pp_state |= PP_STATE_CG;
5875		}
5876		if (state == AMD_CG_STATE_UNGATE)
5877			pp_state = 0;
5878
5879		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880				PP_BLOCK_GFX_CG,
5881				pp_support_state,
5882				pp_state);
5883		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884	}
5885
5886	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888			pp_support_state = PP_STATE_SUPPORT_LS;
5889			pp_state = PP_STATE_LS;
5890		}
5891
5892		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893			pp_support_state |= PP_STATE_SUPPORT_CG;
5894			pp_state |= PP_STATE_CG;
5895		}
5896
5897		if (state == AMD_CG_STATE_UNGATE)
5898			pp_state = 0;
5899
5900		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901				PP_BLOCK_GFX_MG,
5902				pp_support_state,
5903				pp_state);
5904		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5905	}
5906
5907	return 0;
5908}
5909
5910static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5911					  enum amd_clockgating_state state)
5912{
5913
5914	uint32_t msg_id, pp_state = 0;
5915	uint32_t pp_support_state = 0;
 
5916
5917	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5918		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5919			pp_support_state = PP_STATE_SUPPORT_LS;
5920			pp_state = PP_STATE_LS;
5921		}
5922		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5923			pp_support_state |= PP_STATE_SUPPORT_CG;
5924			pp_state |= PP_STATE_CG;
5925		}
5926		if (state == AMD_CG_STATE_UNGATE)
5927			pp_state = 0;
5928
5929		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5930				PP_BLOCK_GFX_CG,
5931				pp_support_state,
5932				pp_state);
5933		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5934	}
5935
5936	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5937		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5938			pp_support_state = PP_STATE_SUPPORT_LS;
5939			pp_state = PP_STATE_LS;
5940		}
5941		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5942			pp_support_state |= PP_STATE_SUPPORT_CG;
5943			pp_state |= PP_STATE_CG;
5944		}
5945		if (state == AMD_CG_STATE_UNGATE)
5946			pp_state = 0;
5947
5948		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5949				PP_BLOCK_GFX_3D,
5950				pp_support_state,
5951				pp_state);
5952		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953	}
5954
5955	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5956		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5957			pp_support_state = PP_STATE_SUPPORT_LS;
5958			pp_state = PP_STATE_LS;
5959		}
5960
5961		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5962			pp_support_state |= PP_STATE_SUPPORT_CG;
5963			pp_state |= PP_STATE_CG;
5964		}
5965
5966		if (state == AMD_CG_STATE_UNGATE)
5967			pp_state = 0;
5968
5969		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5970				PP_BLOCK_GFX_MG,
5971				pp_support_state,
5972				pp_state);
5973		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5974	}
5975
5976	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5977		pp_support_state = PP_STATE_SUPPORT_LS;
5978
5979		if (state == AMD_CG_STATE_UNGATE)
5980			pp_state = 0;
5981		else
5982			pp_state = PP_STATE_LS;
5983
5984		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5985				PP_BLOCK_GFX_RLC,
5986				pp_support_state,
5987				pp_state);
5988		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5989	}
5990
5991	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5992		pp_support_state = PP_STATE_SUPPORT_LS;
5993
5994		if (state == AMD_CG_STATE_UNGATE)
5995			pp_state = 0;
5996		else
5997			pp_state = PP_STATE_LS;
5998		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5999			PP_BLOCK_GFX_CP,
6000			pp_support_state,
6001			pp_state);
6002		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6003	}
6004
6005	return 0;
6006}
6007
6008static int gfx_v8_0_set_clockgating_state(void *handle,
6009					  enum amd_clockgating_state state)
6010{
6011	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6012
6013	if (amdgpu_sriov_vf(adev))
6014		return 0;
6015
6016	switch (adev->asic_type) {
6017	case CHIP_FIJI:
6018	case CHIP_CARRIZO:
6019	case CHIP_STONEY:
6020		gfx_v8_0_update_gfx_clock_gating(adev,
6021						 state == AMD_CG_STATE_GATE);
6022		break;
6023	case CHIP_TONGA:
6024		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6025		break;
6026	case CHIP_POLARIS10:
6027	case CHIP_POLARIS11:
6028	case CHIP_POLARIS12:
6029	case CHIP_VEGAM:
6030		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6031		break;
6032	default:
6033		break;
6034	}
6035	return 0;
6036}
6037
6038static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6039{
6040	return *ring->rptr_cpu_addr;
6041}
6042
6043static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6044{
6045	struct amdgpu_device *adev = ring->adev;
6046
6047	if (ring->use_doorbell)
6048		/* XXX check if swapping is necessary on BE */
6049		return *ring->wptr_cpu_addr;
6050	else
6051		return RREG32(mmCP_RB0_WPTR);
6052}
6053
6054static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6055{
6056	struct amdgpu_device *adev = ring->adev;
6057
6058	if (ring->use_doorbell) {
6059		/* XXX check if swapping is necessary on BE */
6060		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6061		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6062	} else {
6063		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6064		(void)RREG32(mmCP_RB0_WPTR);
6065	}
6066}
6067
6068static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6069{
6070	u32 ref_and_mask, reg_mem_engine;
6071
6072	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6073	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6074		switch (ring->me) {
6075		case 1:
6076			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6077			break;
6078		case 2:
6079			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6080			break;
6081		default:
6082			return;
6083		}
6084		reg_mem_engine = 0;
6085	} else {
6086		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6087		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6088	}
6089
6090	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6091	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6092				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6093				 reg_mem_engine));
6094	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6095	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6096	amdgpu_ring_write(ring, ref_and_mask);
6097	amdgpu_ring_write(ring, ref_and_mask);
6098	amdgpu_ring_write(ring, 0x20); /* poll interval */
6099}
6100
6101static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6102{
6103	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6104	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6105		EVENT_INDEX(4));
6106
6107	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6108	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6109		EVENT_INDEX(0));
6110}
6111
 
 
 
 
 
 
 
 
 
 
 
 
 
6112static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6113					struct amdgpu_job *job,
6114					struct amdgpu_ib *ib,
6115					uint32_t flags)
6116{
6117	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6118	u32 header, control = 0;
6119
6120	if (ib->flags & AMDGPU_IB_FLAG_CE)
6121		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6122	else
6123		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6124
6125	control |= ib->length_dw | (vmid << 24);
6126
6127	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6128		control |= INDIRECT_BUFFER_PRE_ENB(1);
6129
6130		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6131			gfx_v8_0_ring_emit_de_meta(ring);
6132	}
6133
6134	amdgpu_ring_write(ring, header);
6135	amdgpu_ring_write(ring,
6136#ifdef __BIG_ENDIAN
6137			  (2 << 0) |
6138#endif
6139			  (ib->gpu_addr & 0xFFFFFFFC));
6140	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6141	amdgpu_ring_write(ring, control);
6142}
6143
6144static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6145					  struct amdgpu_job *job,
6146					  struct amdgpu_ib *ib,
6147					  uint32_t flags)
6148{
6149	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6150	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6151
6152	/* Currently, there is a high possibility to get wave ID mismatch
6153	 * between ME and GDS, leading to a hw deadlock, because ME generates
6154	 * different wave IDs than the GDS expects. This situation happens
6155	 * randomly when at least 5 compute pipes use GDS ordered append.
6156	 * The wave IDs generated by ME are also wrong after suspend/resume.
6157	 * Those are probably bugs somewhere else in the kernel driver.
6158	 *
6159	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6160	 * GDS to 0 for this ring (me/pipe).
6161	 */
6162	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6163		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6164		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6165		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6166	}
6167
6168	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6169	amdgpu_ring_write(ring,
6170#ifdef __BIG_ENDIAN
6171				(2 << 0) |
6172#endif
6173				(ib->gpu_addr & 0xFFFFFFFC));
6174	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6175	amdgpu_ring_write(ring, control);
6176}
6177
6178static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6179					 u64 seq, unsigned flags)
6180{
6181	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6182	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6183
6184	/* Workaround for cache flush problems. First send a dummy EOP
6185	 * event down the pipe with seq one below.
6186	 */
6187	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6188	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6189				 EOP_TC_ACTION_EN |
6190				 EOP_TC_WB_ACTION_EN |
6191				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6192				 EVENT_INDEX(5)));
6193	amdgpu_ring_write(ring, addr & 0xfffffffc);
6194	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6195				DATA_SEL(1) | INT_SEL(0));
6196	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6197	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6198
6199	/* Then send the real EOP event down the pipe:
6200	 * EVENT_WRITE_EOP - flush caches, send int */
6201	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6202	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6203				 EOP_TC_ACTION_EN |
6204				 EOP_TC_WB_ACTION_EN |
6205				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6206				 EVENT_INDEX(5)));
6207	amdgpu_ring_write(ring, addr & 0xfffffffc);
6208	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6209			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6210	amdgpu_ring_write(ring, lower_32_bits(seq));
6211	amdgpu_ring_write(ring, upper_32_bits(seq));
6212
6213}
6214
6215static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6216{
6217	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6218	uint32_t seq = ring->fence_drv.sync_seq;
6219	uint64_t addr = ring->fence_drv.gpu_addr;
6220
6221	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6222	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6223				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6224				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6225	amdgpu_ring_write(ring, addr & 0xfffffffc);
6226	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6227	amdgpu_ring_write(ring, seq);
6228	amdgpu_ring_write(ring, 0xffffffff);
6229	amdgpu_ring_write(ring, 4); /* poll interval */
6230}
6231
6232static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6233					unsigned vmid, uint64_t pd_addr)
6234{
6235	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6236
6237	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6238
6239	/* wait for the invalidate to complete */
6240	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6241	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6242				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6243				 WAIT_REG_MEM_ENGINE(0))); /* me */
6244	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6245	amdgpu_ring_write(ring, 0);
6246	amdgpu_ring_write(ring, 0); /* ref */
6247	amdgpu_ring_write(ring, 0); /* mask */
6248	amdgpu_ring_write(ring, 0x20); /* poll interval */
6249
6250	/* compute doesn't have PFP */
6251	if (usepfp) {
6252		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6253		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6254		amdgpu_ring_write(ring, 0x0);
 
 
6255	}
6256}
6257
6258static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6259{
6260	return *ring->wptr_cpu_addr;
6261}
6262
6263static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6264{
6265	struct amdgpu_device *adev = ring->adev;
6266
6267	/* XXX check if swapping is necessary on BE */
6268	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6269	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6270}
6271
6272static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6273					     u64 addr, u64 seq,
6274					     unsigned flags)
6275{
6276	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6277	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6278
6279	/* RELEASE_MEM - flush caches, send int */
6280	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6281	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6282				 EOP_TC_ACTION_EN |
6283				 EOP_TC_WB_ACTION_EN |
6284				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6285				 EVENT_INDEX(5)));
6286	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6287	amdgpu_ring_write(ring, addr & 0xfffffffc);
6288	amdgpu_ring_write(ring, upper_32_bits(addr));
6289	amdgpu_ring_write(ring, lower_32_bits(seq));
6290	amdgpu_ring_write(ring, upper_32_bits(seq));
6291}
6292
6293static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6294					 u64 seq, unsigned int flags)
6295{
6296	/* we only allocate 32bit for each seq wb address */
6297	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6298
6299	/* write fence seq to the "addr" */
6300	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6301	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6302				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6303	amdgpu_ring_write(ring, lower_32_bits(addr));
6304	amdgpu_ring_write(ring, upper_32_bits(addr));
6305	amdgpu_ring_write(ring, lower_32_bits(seq));
6306
6307	if (flags & AMDGPU_FENCE_FLAG_INT) {
6308		/* set register to trigger INT */
6309		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6310		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6311					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6312		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6313		amdgpu_ring_write(ring, 0);
6314		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6315	}
6316}
6317
6318static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6319{
6320	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6321	amdgpu_ring_write(ring, 0);
6322}
6323
6324static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6325{
6326	uint32_t dw2 = 0;
6327
6328	if (amdgpu_sriov_vf(ring->adev))
6329		gfx_v8_0_ring_emit_ce_meta(ring);
6330
6331	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6332	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6333		gfx_v8_0_ring_emit_vgt_flush(ring);
6334		/* set load_global_config & load_global_uconfig */
6335		dw2 |= 0x8001;
6336		/* set load_cs_sh_regs */
6337		dw2 |= 0x01000000;
6338		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6339		dw2 |= 0x10002;
6340
6341		/* set load_ce_ram if preamble presented */
6342		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6343			dw2 |= 0x10000000;
6344	} else {
6345		/* still load_ce_ram if this is the first time preamble presented
6346		 * although there is no context switch happens.
6347		 */
6348		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6349			dw2 |= 0x10000000;
6350	}
6351
6352	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6353	amdgpu_ring_write(ring, dw2);
6354	amdgpu_ring_write(ring, 0);
6355}
6356
6357static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6358{
6359	unsigned ret;
6360
6361	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6362	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6363	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6364	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6365	ret = ring->wptr & ring->buf_mask;
6366	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6367	return ret;
6368}
6369
6370static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6371{
6372	unsigned cur;
6373
6374	BUG_ON(offset > ring->buf_mask);
6375	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6376
6377	cur = (ring->wptr & ring->buf_mask) - 1;
6378	if (likely(cur > offset))
6379		ring->ring[offset] = cur - offset;
6380	else
6381		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6382}
6383
6384static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6385				    uint32_t reg_val_offs)
6386{
6387	struct amdgpu_device *adev = ring->adev;
6388
6389	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6390	amdgpu_ring_write(ring, 0 |	/* src: register*/
6391				(5 << 8) |	/* dst: memory */
6392				(1 << 20));	/* write confirm */
6393	amdgpu_ring_write(ring, reg);
6394	amdgpu_ring_write(ring, 0);
6395	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6396				reg_val_offs * 4));
6397	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6398				reg_val_offs * 4));
6399}
6400
6401static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6402				  uint32_t val)
6403{
6404	uint32_t cmd;
6405
6406	switch (ring->funcs->type) {
6407	case AMDGPU_RING_TYPE_GFX:
6408		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6409		break;
6410	case AMDGPU_RING_TYPE_KIQ:
6411		cmd = 1 << 16; /* no inc addr */
6412		break;
6413	default:
6414		cmd = WR_CONFIRM;
6415		break;
6416	}
6417
6418	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6419	amdgpu_ring_write(ring, cmd);
6420	amdgpu_ring_write(ring, reg);
6421	amdgpu_ring_write(ring, 0);
6422	amdgpu_ring_write(ring, val);
6423}
6424
6425static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6426{
6427	struct amdgpu_device *adev = ring->adev;
6428	uint32_t value = 0;
6429
6430	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6431	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6432	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6433	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6434	WREG32(mmSQ_CMD, value);
6435}
6436
6437static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6438						 enum amdgpu_interrupt_state state)
6439{
6440	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6441		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6442}
6443
6444static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6445						     int me, int pipe,
6446						     enum amdgpu_interrupt_state state)
6447{
6448	u32 mec_int_cntl, mec_int_cntl_reg;
6449
6450	/*
6451	 * amdgpu controls only the first MEC. That's why this function only
6452	 * handles the setting of interrupts for this specific MEC. All other
6453	 * pipes' interrupts are set by amdkfd.
6454	 */
6455
6456	if (me == 1) {
6457		switch (pipe) {
6458		case 0:
6459			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6460			break;
6461		case 1:
6462			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6463			break;
6464		case 2:
6465			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6466			break;
6467		case 3:
6468			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6469			break;
6470		default:
6471			DRM_DEBUG("invalid pipe %d\n", pipe);
6472			return;
6473		}
6474	} else {
6475		DRM_DEBUG("invalid me %d\n", me);
6476		return;
6477	}
6478
6479	switch (state) {
6480	case AMDGPU_IRQ_STATE_DISABLE:
6481		mec_int_cntl = RREG32(mec_int_cntl_reg);
6482		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6483		WREG32(mec_int_cntl_reg, mec_int_cntl);
6484		break;
6485	case AMDGPU_IRQ_STATE_ENABLE:
6486		mec_int_cntl = RREG32(mec_int_cntl_reg);
6487		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6488		WREG32(mec_int_cntl_reg, mec_int_cntl);
6489		break;
6490	default:
6491		break;
6492	}
6493}
6494
6495static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6496					     struct amdgpu_irq_src *source,
6497					     unsigned type,
6498					     enum amdgpu_interrupt_state state)
6499{
6500	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6501		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6502
6503	return 0;
6504}
6505
6506static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6507					      struct amdgpu_irq_src *source,
6508					      unsigned type,
6509					      enum amdgpu_interrupt_state state)
6510{
6511	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6512		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6513
6514	return 0;
6515}
6516
6517static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6518					    struct amdgpu_irq_src *src,
6519					    unsigned type,
6520					    enum amdgpu_interrupt_state state)
6521{
6522	switch (type) {
6523	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6524		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6525		break;
6526	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6527		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6528		break;
6529	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6530		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6531		break;
6532	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6533		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6534		break;
6535	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6536		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6537		break;
6538	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6539		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6540		break;
6541	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6542		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6543		break;
6544	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6545		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6546		break;
6547	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6548		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6549		break;
6550	default:
6551		break;
6552	}
6553	return 0;
6554}
6555
6556static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6557					 struct amdgpu_irq_src *source,
6558					 unsigned int type,
6559					 enum amdgpu_interrupt_state state)
6560{
6561	int enable_flag;
6562
6563	switch (state) {
6564	case AMDGPU_IRQ_STATE_DISABLE:
6565		enable_flag = 0;
6566		break;
6567
6568	case AMDGPU_IRQ_STATE_ENABLE:
6569		enable_flag = 1;
6570		break;
6571
6572	default:
6573		return -EINVAL;
6574	}
6575
6576	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6580	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6581	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582		     enable_flag);
6583	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584		     enable_flag);
6585	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586		     enable_flag);
6587	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588		     enable_flag);
6589	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590		     enable_flag);
6591	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592		     enable_flag);
6593	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6594		     enable_flag);
6595	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6596		     enable_flag);
6597
6598	return 0;
6599}
6600
6601static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6602				     struct amdgpu_irq_src *source,
6603				     unsigned int type,
6604				     enum amdgpu_interrupt_state state)
6605{
6606	int enable_flag;
6607
6608	switch (state) {
6609	case AMDGPU_IRQ_STATE_DISABLE:
6610		enable_flag = 1;
6611		break;
6612
6613	case AMDGPU_IRQ_STATE_ENABLE:
6614		enable_flag = 0;
6615		break;
6616
6617	default:
6618		return -EINVAL;
6619	}
6620
6621	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6622		     enable_flag);
6623
6624	return 0;
6625}
6626
6627static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6628			    struct amdgpu_irq_src *source,
6629			    struct amdgpu_iv_entry *entry)
6630{
6631	int i;
6632	u8 me_id, pipe_id, queue_id;
6633	struct amdgpu_ring *ring;
6634
6635	DRM_DEBUG("IH: CP EOP\n");
6636	me_id = (entry->ring_id & 0x0c) >> 2;
6637	pipe_id = (entry->ring_id & 0x03) >> 0;
6638	queue_id = (entry->ring_id & 0x70) >> 4;
6639
6640	switch (me_id) {
6641	case 0:
6642		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6643		break;
6644	case 1:
6645	case 2:
6646		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6647			ring = &adev->gfx.compute_ring[i];
6648			/* Per-queue interrupt is supported for MEC starting from VI.
6649			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6650			  */
6651			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6652				amdgpu_fence_process(ring);
6653		}
6654		break;
6655	}
6656	return 0;
6657}
6658
6659static void gfx_v8_0_fault(struct amdgpu_device *adev,
6660			   struct amdgpu_iv_entry *entry)
6661{
6662	u8 me_id, pipe_id, queue_id;
6663	struct amdgpu_ring *ring;
6664	int i;
6665
6666	me_id = (entry->ring_id & 0x0c) >> 2;
6667	pipe_id = (entry->ring_id & 0x03) >> 0;
6668	queue_id = (entry->ring_id & 0x70) >> 4;
6669
6670	switch (me_id) {
6671	case 0:
6672		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6673		break;
6674	case 1:
6675	case 2:
6676		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6677			ring = &adev->gfx.compute_ring[i];
6678			if (ring->me == me_id && ring->pipe == pipe_id &&
6679			    ring->queue == queue_id)
6680				drm_sched_fault(&ring->sched);
6681		}
6682		break;
6683	}
6684}
6685
6686static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6687				 struct amdgpu_irq_src *source,
6688				 struct amdgpu_iv_entry *entry)
6689{
6690	DRM_ERROR("Illegal register access in command stream\n");
6691	gfx_v8_0_fault(adev, entry);
6692	return 0;
6693}
6694
6695static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6696				  struct amdgpu_irq_src *source,
6697				  struct amdgpu_iv_entry *entry)
6698{
6699	DRM_ERROR("Illegal instruction in command stream\n");
6700	gfx_v8_0_fault(adev, entry);
6701	return 0;
6702}
6703
6704static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6705				     struct amdgpu_irq_src *source,
6706				     struct amdgpu_iv_entry *entry)
6707{
6708	DRM_ERROR("CP EDC/ECC error detected.");
6709	return 0;
6710}
6711
6712static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6713				  bool from_wq)
6714{
6715	u32 enc, se_id, sh_id, cu_id;
6716	char type[20];
6717	int sq_edc_source = -1;
6718
6719	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6720	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6721
6722	switch (enc) {
6723		case 0:
6724			DRM_INFO("SQ general purpose intr detected:"
6725					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6726					"host_cmd_overflow %d, cmd_timestamp %d,"
6727					"reg_timestamp %d, thread_trace_buff_full %d,"
6728					"wlt %d, thread_trace %d.\n",
6729					se_id,
6730					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6731					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6732					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6733					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6734					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6735					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6736					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6737					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6738					);
6739			break;
6740		case 1:
6741		case 2:
6742
6743			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6744			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6745
6746			/*
6747			 * This function can be called either directly from ISR
6748			 * or from BH in which case we can access SQ_EDC_INFO
6749			 * instance
6750			 */
6751			if (from_wq) {
6752				mutex_lock(&adev->grbm_idx_mutex);
6753				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6754
6755				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6756
6757				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6758				mutex_unlock(&adev->grbm_idx_mutex);
6759			}
6760
6761			if (enc == 1)
6762				sprintf(type, "instruction intr");
6763			else
6764				sprintf(type, "EDC/ECC error");
6765
6766			DRM_INFO(
6767				"SQ %s detected: "
6768					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6769					"trap %s, sq_ed_info.source %s.\n",
6770					type, se_id, sh_id, cu_id,
6771					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6772					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6773					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6774					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6775					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6776				);
6777			break;
6778		default:
6779			DRM_ERROR("SQ invalid encoding type\n.");
6780	}
6781}
6782
6783static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6784{
6785
6786	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6787	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6788
6789	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6790}
6791
6792static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6793			   struct amdgpu_irq_src *source,
6794			   struct amdgpu_iv_entry *entry)
6795{
6796	unsigned ih_data = entry->src_data[0];
6797
6798	/*
6799	 * Try to submit work so SQ_EDC_INFO can be accessed from
6800	 * BH. If previous work submission hasn't finished yet
6801	 * just print whatever info is possible directly from the ISR.
6802	 */
6803	if (work_pending(&adev->gfx.sq_work.work)) {
6804		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6805	} else {
6806		adev->gfx.sq_work.ih_data = ih_data;
6807		schedule_work(&adev->gfx.sq_work.work);
6808	}
6809
6810	return 0;
6811}
6812
6813static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6814{
6815	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6816	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6817			  PACKET3_TC_ACTION_ENA |
6818			  PACKET3_SH_KCACHE_ACTION_ENA |
6819			  PACKET3_SH_ICACHE_ACTION_ENA |
6820			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6821	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6822	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6823	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6824}
6825
6826static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6827{
6828	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6829	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6830			  PACKET3_TC_ACTION_ENA |
6831			  PACKET3_SH_KCACHE_ACTION_ENA |
6832			  PACKET3_SH_ICACHE_ACTION_ENA |
6833			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6834	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6835	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6836	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6837	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6838	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6839}
6840
6841
6842/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6843#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6844static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6845					uint32_t pipe, bool enable)
6846{
6847	uint32_t val;
6848	uint32_t wcl_cs_reg;
6849
6850	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6851
6852	switch (pipe) {
6853	case 0:
6854		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6855		break;
6856	case 1:
6857		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6858		break;
6859	case 2:
6860		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6861		break;
6862	case 3:
6863		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6864		break;
6865	default:
6866		DRM_DEBUG("invalid pipe %d\n", pipe);
6867		return;
6868	}
6869
6870	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6871
6872}
6873
6874#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6875static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6876{
6877	struct amdgpu_device *adev = ring->adev;
6878	uint32_t val;
6879	int i;
6880
6881	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6882	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6883	 * around 25% of gpu resources.
6884	 */
6885	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6886	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6887
6888	/* Restrict waves for normal/low priority compute queues as well
6889	 * to get best QoS for high priority compute jobs.
6890	 *
6891	 * amdgpu controls only 1st ME(0-3 CS pipes).
6892	 */
6893	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6894		if (i != ring->pipe)
6895			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6896
6897	}
6898
6899}
6900
6901static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6902	.name = "gfx_v8_0",
6903	.early_init = gfx_v8_0_early_init,
6904	.late_init = gfx_v8_0_late_init,
6905	.sw_init = gfx_v8_0_sw_init,
6906	.sw_fini = gfx_v8_0_sw_fini,
6907	.hw_init = gfx_v8_0_hw_init,
6908	.hw_fini = gfx_v8_0_hw_fini,
6909	.suspend = gfx_v8_0_suspend,
6910	.resume = gfx_v8_0_resume,
6911	.is_idle = gfx_v8_0_is_idle,
6912	.wait_for_idle = gfx_v8_0_wait_for_idle,
6913	.check_soft_reset = gfx_v8_0_check_soft_reset,
6914	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6915	.soft_reset = gfx_v8_0_soft_reset,
6916	.post_soft_reset = gfx_v8_0_post_soft_reset,
6917	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6918	.set_powergating_state = gfx_v8_0_set_powergating_state,
6919	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6920};
6921
6922static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6923	.type = AMDGPU_RING_TYPE_GFX,
6924	.align_mask = 0xff,
6925	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6926	.support_64bit_ptrs = false,
6927	.get_rptr = gfx_v8_0_ring_get_rptr,
6928	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6929	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6930	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6931		5 +  /* COND_EXEC */
6932		7 +  /* PIPELINE_SYNC */
6933		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6934		12 +  /* FENCE for VM_FLUSH */
6935		20 + /* GDS switch */
6936		4 + /* double SWITCH_BUFFER,
6937		       the first COND_EXEC jump to the place just
6938			   prior to this double SWITCH_BUFFER  */
6939		5 + /* COND_EXEC */
6940		7 +	 /*	HDP_flush */
6941		4 +	 /*	VGT_flush */
6942		14 + /*	CE_META */
6943		31 + /*	DE_META */
6944		3 + /* CNTX_CTRL */
6945		5 + /* HDP_INVL */
6946		12 + 12 + /* FENCE x2 */
6947		2 + /* SWITCH_BUFFER */
6948		5, /* SURFACE_SYNC */
6949	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6950	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6951	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6952	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6953	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6954	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6955	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 
6956	.test_ring = gfx_v8_0_ring_test_ring,
6957	.test_ib = gfx_v8_0_ring_test_ib,
6958	.insert_nop = amdgpu_ring_insert_nop,
6959	.pad_ib = amdgpu_ring_generic_pad_ib,
6960	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6961	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6962	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6963	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6964	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6965	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6966	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6967};
6968
6969static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6970	.type = AMDGPU_RING_TYPE_COMPUTE,
6971	.align_mask = 0xff,
6972	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6973	.support_64bit_ptrs = false,
6974	.get_rptr = gfx_v8_0_ring_get_rptr,
6975	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6976	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6977	.emit_frame_size =
6978		20 + /* gfx_v8_0_ring_emit_gds_switch */
6979		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6980		5 + /* hdp_invalidate */
6981		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6982		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6983		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6984		7 + /* gfx_v8_0_emit_mem_sync_compute */
6985		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6986		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6987	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6988	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6989	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6990	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6991	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6992	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6993	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 
6994	.test_ring = gfx_v8_0_ring_test_ring,
6995	.test_ib = gfx_v8_0_ring_test_ib,
6996	.insert_nop = amdgpu_ring_insert_nop,
6997	.pad_ib = amdgpu_ring_generic_pad_ib,
6998	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6999	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7000	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
7001};
7002
7003static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7004	.type = AMDGPU_RING_TYPE_KIQ,
7005	.align_mask = 0xff,
7006	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7007	.support_64bit_ptrs = false,
7008	.get_rptr = gfx_v8_0_ring_get_rptr,
7009	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7010	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7011	.emit_frame_size =
7012		20 + /* gfx_v8_0_ring_emit_gds_switch */
7013		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7014		5 + /* hdp_invalidate */
7015		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7016		17 + /* gfx_v8_0_ring_emit_vm_flush */
7017		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7018	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7019	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7020	.test_ring = gfx_v8_0_ring_test_ring,
7021	.insert_nop = amdgpu_ring_insert_nop,
7022	.pad_ib = amdgpu_ring_generic_pad_ib,
7023	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7024	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7025};
7026
7027static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7028{
7029	int i;
7030
7031	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7032
7033	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7034		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7035
7036	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7037		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7038}
7039
7040static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7041	.set = gfx_v8_0_set_eop_interrupt_state,
7042	.process = gfx_v8_0_eop_irq,
7043};
7044
7045static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7046	.set = gfx_v8_0_set_priv_reg_fault_state,
7047	.process = gfx_v8_0_priv_reg_irq,
7048};
7049
7050static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7051	.set = gfx_v8_0_set_priv_inst_fault_state,
7052	.process = gfx_v8_0_priv_inst_irq,
7053};
7054
7055static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7056	.set = gfx_v8_0_set_cp_ecc_int_state,
7057	.process = gfx_v8_0_cp_ecc_error_irq,
7058};
7059
7060static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7061	.set = gfx_v8_0_set_sq_int_state,
7062	.process = gfx_v8_0_sq_irq,
7063};
7064
7065static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7066{
7067	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7068	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7069
7070	adev->gfx.priv_reg_irq.num_types = 1;
7071	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7072
7073	adev->gfx.priv_inst_irq.num_types = 1;
7074	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7075
7076	adev->gfx.cp_ecc_error_irq.num_types = 1;
7077	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7078
7079	adev->gfx.sq_irq.num_types = 1;
7080	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7081}
7082
7083static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7084{
7085	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 
 
 
 
 
 
 
 
 
 
 
7086}
7087
7088static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7089{
7090	/* init asci gds info */
7091	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7092	adev->gds.gws_size = 64;
7093	adev->gds.oa_size = 16;
7094	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7095}
7096
7097static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7098						 u32 bitmap)
7099{
7100	u32 data;
7101
7102	if (!bitmap)
7103		return;
7104
7105	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7106	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7107
7108	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7109}
7110
7111static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7112{
7113	u32 data, mask;
7114
7115	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7116		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7117
7118	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7119
7120	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7121}
7122
7123static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7124{
7125	int i, j, k, counter, active_cu_number = 0;
7126	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7127	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7128	unsigned disable_masks[4 * 2];
7129	u32 ao_cu_num;
7130
7131	memset(cu_info, 0, sizeof(*cu_info));
7132
7133	if (adev->flags & AMD_IS_APU)
7134		ao_cu_num = 2;
7135	else
7136		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7137
7138	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7139
7140	mutex_lock(&adev->grbm_idx_mutex);
7141	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7142		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7143			mask = 1;
7144			ao_bitmap = 0;
7145			counter = 0;
7146			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7147			if (i < 4 && j < 2)
7148				gfx_v8_0_set_user_cu_inactive_bitmap(
7149					adev, disable_masks[i * 2 + j]);
7150			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7151			cu_info->bitmap[i][j] = bitmap;
7152
7153			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7154				if (bitmap & mask) {
7155					if (counter < ao_cu_num)
7156						ao_bitmap |= mask;
7157					counter ++;
7158				}
7159				mask <<= 1;
7160			}
7161			active_cu_number += counter;
7162			if (i < 2 && j < 2)
7163				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7164			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7165		}
7166	}
7167	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7168	mutex_unlock(&adev->grbm_idx_mutex);
7169
7170	cu_info->number = active_cu_number;
7171	cu_info->ao_cu_mask = ao_cu_mask;
7172	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7173	cu_info->max_waves_per_simd = 10;
7174	cu_info->max_scratch_slots_per_cu = 32;
7175	cu_info->wave_front_size = 64;
7176	cu_info->lds_size = 64;
7177}
7178
7179const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7180{
7181	.type = AMD_IP_BLOCK_TYPE_GFX,
7182	.major = 8,
7183	.minor = 0,
7184	.rev = 0,
7185	.funcs = &gfx_v8_0_ip_funcs,
7186};
7187
7188const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7189{
7190	.type = AMD_IP_BLOCK_TYPE_GFX,
7191	.major = 8,
7192	.minor = 1,
7193	.rev = 0,
7194	.funcs = &gfx_v8_0_ip_funcs,
7195};
7196
7197static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7198{
7199	uint64_t ce_payload_addr;
7200	int cnt_ce;
7201	union {
7202		struct vi_ce_ib_state regular;
7203		struct vi_ce_ib_state_chained_ib chained;
7204	} ce_payload = {};
7205
7206	if (ring->adev->virt.chained_ib_support) {
7207		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7208			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7209		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7210	} else {
7211		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7212			offsetof(struct vi_gfx_meta_data, ce_payload);
7213		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7214	}
7215
7216	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7217	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7218				WRITE_DATA_DST_SEL(8) |
7219				WR_CONFIRM) |
7220				WRITE_DATA_CACHE_POLICY(0));
7221	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7222	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7223	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7224}
7225
7226static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7227{
7228	uint64_t de_payload_addr, gds_addr, csa_addr;
7229	int cnt_de;
7230	union {
7231		struct vi_de_ib_state regular;
7232		struct vi_de_ib_state_chained_ib chained;
7233	} de_payload = {};
7234
7235	csa_addr = amdgpu_csa_vaddr(ring->adev);
7236	gds_addr = csa_addr + 4096;
7237	if (ring->adev->virt.chained_ib_support) {
7238		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7239		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7240		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7241		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7242	} else {
7243		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7244		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7245		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7246		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7247	}
7248
7249	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7250	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7251				WRITE_DATA_DST_SEL(8) |
7252				WR_CONFIRM) |
7253				WRITE_DATA_CACHE_POLICY(0));
7254	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7255	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7256	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7257}
v4.10.11
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
 
 
 
  23#include <linux/firmware.h>
  24#include "drmP.h"
 
 
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
 
  27#include "vi.h"
  28#include "vi_structs.h"
  29#include "vid.h"
  30#include "amdgpu_ucode.h"
  31#include "amdgpu_atombios.h"
  32#include "atombios_i2c.h"
  33#include "clearstate_vi.h"
  34
  35#include "gmc/gmc_8_2_d.h"
  36#include "gmc/gmc_8_2_sh_mask.h"
  37
  38#include "oss/oss_3_0_d.h"
  39#include "oss/oss_3_0_sh_mask.h"
  40
  41#include "bif/bif_5_0_d.h"
  42#include "bif/bif_5_0_sh_mask.h"
  43
  44#include "gca/gfx_8_0_d.h"
  45#include "gca/gfx_8_0_enum.h"
  46#include "gca/gfx_8_0_sh_mask.h"
  47#include "gca/gfx_8_0_enum.h"
  48
  49#include "dce/dce_10_0_d.h"
  50#include "dce/dce_10_0_sh_mask.h"
  51
  52#include "smu/smu_7_1_3_d.h"
  53
 
 
  54#define GFX8_NUM_GFX_RINGS     1
  55#define GFX8_NUM_COMPUTE_RINGS 8
  56
  57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  61
  62#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  63#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  64#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  65#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  66#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  67#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  68#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  69#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  70#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  71
  72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  78
  79/* BPM SERDES CMD */
  80#define SET_BPM_SERDES_CMD    1
  81#define CLE_BPM_SERDES_CMD    0
  82
  83/* BPM Register Address*/
  84enum {
  85	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  86	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  87	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  88	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  89	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  90	BPM_REG_FGCG_MAX
  91};
  92
  93#define RLC_FormatDirectRegListLength        14
  94
  95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 101
 102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 114
 115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 127
 
 
 
 
 
 
 
 
 
 
 
 
 128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 
 129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 
 130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 
 131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 
 132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 
 133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 134
 135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 141
 142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 
 143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 
 144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 
 145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 
 146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 
 147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 148
 
 
 
 
 
 
 
 149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 150{
 151	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 152	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 153	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 154	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 155	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 156	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 157	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 158	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 159	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 160	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 161	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 162	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 163	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 164	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 165	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 166	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 167};
 168
 169static const u32 golden_settings_tonga_a11[] =
 170{
 171	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 172	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 173	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 174	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 175	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 176	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 177	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 178	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 179	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 180	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 181	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 182	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 183	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 184	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 185	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 186	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 187};
 188
 189static const u32 tonga_golden_common_all[] =
 190{
 191	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 192	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 193	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 194	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 195	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 196	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 197	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 198	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 199};
 200
 201static const u32 tonga_mgcg_cgcg_init[] =
 202{
 203	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 204	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 205	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 206	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 207	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 208	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 209	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 210	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 211	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 212	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 213	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 214	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 215	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 216	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 217	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 218	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 219	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 220	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 221	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 222	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 223	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 224	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 225	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 226	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 227	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 228	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 229	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 230	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 231	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 232	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 233	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 234	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 235	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 236	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 237	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 238	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 239	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 240	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 241	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 242	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 243	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 244	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 245	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 246	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 247	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 248	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 249	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 250	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 251	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 252	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 253	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 254	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 255	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 256	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 257	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 258	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 259	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 260	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 261	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 262	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 263	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 264	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 265	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 266	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 267	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 268	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 269	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 270	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 271	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 272	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 273	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 274	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 275	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 276	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 277	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 278};
 279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 280static const u32 golden_settings_polaris11_a11[] =
 281{
 282	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 283	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 284	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 285	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 286	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 287	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 288	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 289	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 290	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 291	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 292	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 293	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 294	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 295	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 296	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 297	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 298	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 299};
 300
 301static const u32 polaris11_golden_common_all[] =
 302{
 303	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 304	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 305	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 306	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 307	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 308	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 309};
 310
 311static const u32 golden_settings_polaris10_a11[] =
 312{
 313	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 314	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 315	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 316	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 317	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 318	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 319	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 320	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 321	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 322	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 323	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 324	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 325	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 326	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 327	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 328	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 329	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 330};
 331
 332static const u32 polaris10_golden_common_all[] =
 333{
 334	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 335	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 336	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 337	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 338	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 339	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 340	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 341	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 342};
 343
 344static const u32 fiji_golden_common_all[] =
 345{
 346	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 347	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 348	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 349	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 350	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 351	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 352	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 353	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 354	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 355	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 356};
 357
 358static const u32 golden_settings_fiji_a10[] =
 359{
 360	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 361	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 362	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 363	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 364	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 365	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 366	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 367	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 368	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 369	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 370	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 371};
 372
 373static const u32 fiji_mgcg_cgcg_init[] =
 374{
 375	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 376	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 377	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 378	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 379	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 380	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 381	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 382	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 383	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 384	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 385	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 386	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 387	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 388	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 389	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 390	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 391	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 392	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 393	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 394	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 395	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 396	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 397	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 398	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 399	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 400	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 401	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 402	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 403	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 404	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 407	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 408	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 409	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 410};
 411
 412static const u32 golden_settings_iceland_a11[] =
 413{
 414	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 415	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 416	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 417	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 418	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 419	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 420	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 421	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 422	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 423	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 424	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 425	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 426	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 428	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 429	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 430};
 431
 432static const u32 iceland_golden_common_all[] =
 433{
 434	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 435	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 436	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 437	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 438	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 439	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 440	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 441	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 442};
 443
 444static const u32 iceland_mgcg_cgcg_init[] =
 445{
 446	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 447	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 448	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 451	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 452	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 453	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 454	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 455	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 456	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 457	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 459	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 460	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 461	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 462	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 463	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 464	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 465	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 466	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 467	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 468	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 469	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 470	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 471	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 472	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 473	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 474	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 475	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 476	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 477	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 478	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 479	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 480	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 481	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 482	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 483	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 484	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 485	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 486	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 487	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 488	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 489	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 490	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 491	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 492	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 493	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 494	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 495	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 496	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 497	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 498	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 499	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 500	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 501	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 502	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 503	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 504	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 505	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 506	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 507	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 508	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 509	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 510};
 511
 512static const u32 cz_golden_settings_a11[] =
 513{
 514	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 515	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 516	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 517	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 518	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 519	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 520	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 521	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 522	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 523	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 524	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 525	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 526};
 527
 528static const u32 cz_golden_common_all[] =
 529{
 530	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 531	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 532	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 533	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 534	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 535	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 536	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 537	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 538};
 539
 540static const u32 cz_mgcg_cgcg_init[] =
 541{
 542	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 543	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 544	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 545	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 546	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 547	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 548	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 549	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 550	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 551	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 552	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 553	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 554	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 555	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 556	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 557	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 558	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 559	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 560	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 561	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 562	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 563	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 564	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 565	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 566	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 567	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 568	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 569	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 570	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 571	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 572	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 573	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 574	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 575	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 576	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 577	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 578	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 579	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 580	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 581	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 582	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 583	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 584	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 585	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 586	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 587	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 588	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 589	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 590	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 591	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 592	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 593	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 594	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 595	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 596	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 597	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 598	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 599	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 600	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 601	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 602	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 603	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 604	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 605	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 606	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 607	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 608	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 609	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 610	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 611	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 612	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 613	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 614	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 615	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 616	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 617};
 618
 619static const u32 stoney_golden_settings_a11[] =
 620{
 621	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 622	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 623	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 624	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 625	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 626	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 627	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 628	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 629	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 630	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 631};
 632
 633static const u32 stoney_golden_common_all[] =
 634{
 635	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 636	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 637	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 638	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 639	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 640	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 641	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 642	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 643};
 644
 645static const u32 stoney_mgcg_cgcg_init[] =
 646{
 647	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 648	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 649	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 650	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 651	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 652};
 653
 
 
 
 
 
 
 
 
 
 
 
 654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 
 
 
 
 
 660
 661static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 662{
 
 
 663	switch (adev->asic_type) {
 664	case CHIP_TOPAZ:
 665		amdgpu_program_register_sequence(adev,
 666						 iceland_mgcg_cgcg_init,
 667						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 668		amdgpu_program_register_sequence(adev,
 669						 golden_settings_iceland_a11,
 670						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 671		amdgpu_program_register_sequence(adev,
 672						 iceland_golden_common_all,
 673						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 674		break;
 675	case CHIP_FIJI:
 676		amdgpu_program_register_sequence(adev,
 677						 fiji_mgcg_cgcg_init,
 678						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 679		amdgpu_program_register_sequence(adev,
 680						 golden_settings_fiji_a10,
 681						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 682		amdgpu_program_register_sequence(adev,
 683						 fiji_golden_common_all,
 684						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 685		break;
 686
 687	case CHIP_TONGA:
 688		amdgpu_program_register_sequence(adev,
 689						 tonga_mgcg_cgcg_init,
 690						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 691		amdgpu_program_register_sequence(adev,
 692						 golden_settings_tonga_a11,
 693						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 694		amdgpu_program_register_sequence(adev,
 695						 tonga_golden_common_all,
 696						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 
 
 
 
 
 
 
 
 697		break;
 698	case CHIP_POLARIS11:
 699	case CHIP_POLARIS12:
 700		amdgpu_program_register_sequence(adev,
 701						 golden_settings_polaris11_a11,
 702						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
 703		amdgpu_program_register_sequence(adev,
 704						 polaris11_golden_common_all,
 705						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
 706		break;
 707	case CHIP_POLARIS10:
 708		amdgpu_program_register_sequence(adev,
 709						 golden_settings_polaris10_a11,
 710						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
 711		amdgpu_program_register_sequence(adev,
 712						 polaris10_golden_common_all,
 713						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
 714		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 715		if (adev->pdev->revision == 0xc7 &&
 
 
 
 716		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 717		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 718		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 719			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 720			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 721		}
 722		break;
 723	case CHIP_CARRIZO:
 724		amdgpu_program_register_sequence(adev,
 725						 cz_mgcg_cgcg_init,
 726						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 727		amdgpu_program_register_sequence(adev,
 728						 cz_golden_settings_a11,
 729						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 730		amdgpu_program_register_sequence(adev,
 731						 cz_golden_common_all,
 732						 (const u32)ARRAY_SIZE(cz_golden_common_all));
 733		break;
 734	case CHIP_STONEY:
 735		amdgpu_program_register_sequence(adev,
 736						 stoney_mgcg_cgcg_init,
 737						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 738		amdgpu_program_register_sequence(adev,
 739						 stoney_golden_settings_a11,
 740						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 741		amdgpu_program_register_sequence(adev,
 742						 stoney_golden_common_all,
 743						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 744		break;
 745	default:
 746		break;
 747	}
 748}
 749
 750static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 751{
 752	int i;
 753
 754	adev->gfx.scratch.num_reg = 7;
 755	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 756	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 757		adev->gfx.scratch.free[i] = true;
 758		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 759	}
 760}
 761
 762static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 763{
 764	struct amdgpu_device *adev = ring->adev;
 765	uint32_t scratch;
 766	uint32_t tmp = 0;
 767	unsigned i;
 768	int r;
 769
 770	r = amdgpu_gfx_scratch_get(adev, &scratch);
 771	if (r) {
 772		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 773		return r;
 774	}
 775	WREG32(scratch, 0xCAFEDEAD);
 776	r = amdgpu_ring_alloc(ring, 3);
 777	if (r) {
 778		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 779			  ring->idx, r);
 780		amdgpu_gfx_scratch_free(adev, scratch);
 781		return r;
 782	}
 783	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 784	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 785	amdgpu_ring_write(ring, 0xDEADBEEF);
 786	amdgpu_ring_commit(ring);
 787
 788	for (i = 0; i < adev->usec_timeout; i++) {
 789		tmp = RREG32(scratch);
 790		if (tmp == 0xDEADBEEF)
 791			break;
 792		DRM_UDELAY(1);
 793	}
 794	if (i < adev->usec_timeout) {
 795		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 796			 ring->idx, i);
 797	} else {
 798		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 799			  ring->idx, scratch, tmp);
 800		r = -EINVAL;
 801	}
 802	amdgpu_gfx_scratch_free(adev, scratch);
 803	return r;
 804}
 805
 806static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 807{
 808	struct amdgpu_device *adev = ring->adev;
 809	struct amdgpu_ib ib;
 810	struct dma_fence *f = NULL;
 811	uint32_t scratch;
 812	uint32_t tmp = 0;
 
 
 813	long r;
 814
 815	r = amdgpu_gfx_scratch_get(adev, &scratch);
 816	if (r) {
 817		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 818		return r;
 819	}
 820	WREG32(scratch, 0xCAFEDEAD);
 
 821	memset(&ib, 0, sizeof(ib));
 822	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 823	if (r) {
 824		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 825		goto err1;
 826	}
 827	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 828	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 829	ib.ptr[2] = 0xDEADBEEF;
 830	ib.length_dw = 3;
 831
 832	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 
 
 
 
 
 
 
 833	if (r)
 834		goto err2;
 835
 836	r = dma_fence_wait_timeout(f, false, timeout);
 837	if (r == 0) {
 838		DRM_ERROR("amdgpu: IB test timed out.\n");
 839		r = -ETIMEDOUT;
 840		goto err2;
 841	} else if (r < 0) {
 842		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 843		goto err2;
 844	}
 845	tmp = RREG32(scratch);
 846	if (tmp == 0xDEADBEEF) {
 847		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
 848		r = 0;
 849	} else {
 850		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 851			  scratch, tmp);
 852		r = -EINVAL;
 853	}
 854err2:
 855	amdgpu_ib_free(adev, &ib, NULL);
 856	dma_fence_put(f);
 857err1:
 858	amdgpu_gfx_scratch_free(adev, scratch);
 859	return r;
 860}
 861
 862
 863static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
 
 864	release_firmware(adev->gfx.pfp_fw);
 865	adev->gfx.pfp_fw = NULL;
 866	release_firmware(adev->gfx.me_fw);
 867	adev->gfx.me_fw = NULL;
 868	release_firmware(adev->gfx.ce_fw);
 869	adev->gfx.ce_fw = NULL;
 870	release_firmware(adev->gfx.rlc_fw);
 871	adev->gfx.rlc_fw = NULL;
 872	release_firmware(adev->gfx.mec_fw);
 873	adev->gfx.mec_fw = NULL;
 874	if ((adev->asic_type != CHIP_STONEY) &&
 875	    (adev->asic_type != CHIP_TOPAZ))
 876		release_firmware(adev->gfx.mec2_fw);
 877	adev->gfx.mec2_fw = NULL;
 878
 879	kfree(adev->gfx.rlc.register_list_format);
 880}
 881
 882static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 883{
 884	const char *chip_name;
 885	char fw_name[30];
 886	int err;
 887	struct amdgpu_firmware_info *info = NULL;
 888	const struct common_firmware_header *header = NULL;
 889	const struct gfx_firmware_header_v1_0 *cp_hdr;
 890	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 891	unsigned int *tmp = NULL, i;
 892
 893	DRM_DEBUG("\n");
 894
 895	switch (adev->asic_type) {
 896	case CHIP_TOPAZ:
 897		chip_name = "topaz";
 898		break;
 899	case CHIP_TONGA:
 900		chip_name = "tonga";
 901		break;
 902	case CHIP_CARRIZO:
 903		chip_name = "carrizo";
 904		break;
 905	case CHIP_FIJI:
 906		chip_name = "fiji";
 907		break;
 908	case CHIP_POLARIS11:
 909		chip_name = "polaris11";
 910		break;
 911	case CHIP_POLARIS10:
 912		chip_name = "polaris10";
 913		break;
 
 
 
 914	case CHIP_POLARIS12:
 915		chip_name = "polaris12";
 916		break;
 917	case CHIP_STONEY:
 918		chip_name = "stoney";
 919		break;
 920	default:
 921		BUG();
 922	}
 923
 924	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 925	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 926	if (err)
 927		goto out;
 928	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 929	if (err)
 930		goto out;
 931	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 932	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 933	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 934
 935	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 936	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 937	if (err)
 938		goto out;
 939	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 940	if (err)
 941		goto out;
 942	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 943	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 
 944	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 945
 946	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 947	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 948	if (err)
 949		goto out;
 950	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 951	if (err)
 952		goto out;
 953	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 954	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 955	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 956
 
 
 
 
 
 
 
 
 
 
 
 957	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 958	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 959	if (err)
 960		goto out;
 961	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 962	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 963	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 964	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 965
 966	adev->gfx.rlc.save_and_restore_offset =
 967			le32_to_cpu(rlc_hdr->save_and_restore_offset);
 968	adev->gfx.rlc.clear_state_descriptor_offset =
 969			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 970	adev->gfx.rlc.avail_scratch_ram_locations =
 971			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 972	adev->gfx.rlc.reg_restore_list_size =
 973			le32_to_cpu(rlc_hdr->reg_restore_list_size);
 974	adev->gfx.rlc.reg_list_format_start =
 975			le32_to_cpu(rlc_hdr->reg_list_format_start);
 976	adev->gfx.rlc.reg_list_format_separate_start =
 977			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 978	adev->gfx.rlc.starting_offsets_start =
 979			le32_to_cpu(rlc_hdr->starting_offsets_start);
 980	adev->gfx.rlc.reg_list_format_size_bytes =
 981			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 982	adev->gfx.rlc.reg_list_size_bytes =
 983			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 984
 985	adev->gfx.rlc.register_list_format =
 986			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 987					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 988
 989	if (!adev->gfx.rlc.register_list_format) {
 990		err = -ENOMEM;
 991		goto out;
 992	}
 993
 994	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 995			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
 996	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
 997		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
 998
 999	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1000
1001	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1005
1006	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
1008	if (err)
1009		goto out;
1010	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1011	if (err)
1012		goto out;
1013	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1016
1017	if ((adev->asic_type != CHIP_STONEY) &&
1018	    (adev->asic_type != CHIP_TOPAZ)) {
1019		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
1021		if (!err) {
1022			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1023			if (err)
1024				goto out;
1025			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026				adev->gfx.mec2_fw->data;
1027			adev->gfx.mec2_fw_version =
1028				le32_to_cpu(cp_hdr->header.ucode_version);
1029			adev->gfx.mec2_feature_version =
1030				le32_to_cpu(cp_hdr->ucode_feature_version);
1031		} else {
1032			err = 0;
1033			adev->gfx.mec2_fw = NULL;
1034		}
1035	}
1036
1037	if (adev->firmware.smu_load) {
1038		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040		info->fw = adev->gfx.pfp_fw;
1041		header = (const struct common_firmware_header *)info->fw->data;
1042		adev->firmware.fw_size +=
1043			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1044
1045		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047		info->fw = adev->gfx.me_fw;
1048		header = (const struct common_firmware_header *)info->fw->data;
1049		adev->firmware.fw_size +=
1050			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051
1052		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054		info->fw = adev->gfx.ce_fw;
1055		header = (const struct common_firmware_header *)info->fw->data;
1056		adev->firmware.fw_size +=
1057			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
1058
1059		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061		info->fw = adev->gfx.rlc_fw;
1062		header = (const struct common_firmware_header *)info->fw->data;
1063		adev->firmware.fw_size +=
1064			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065
1066		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068		info->fw = adev->gfx.mec_fw;
1069		header = (const struct common_firmware_header *)info->fw->data;
1070		adev->firmware.fw_size +=
1071			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072
1073		/* we need account JT in */
1074		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075		adev->firmware.fw_size +=
1076			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1077
1078		if (amdgpu_sriov_vf(adev)) {
1079			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081			info->fw = adev->gfx.mec_fw;
1082			adev->firmware.fw_size +=
1083				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1084		}
1085
1086		if (adev->gfx.mec2_fw) {
1087			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089			info->fw = adev->gfx.mec2_fw;
1090			header = (const struct common_firmware_header *)info->fw->data;
1091			adev->firmware.fw_size +=
1092				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1093		}
1094
1095	}
1096
1097out:
1098	if (err) {
1099		dev_err(adev->dev,
1100			"gfx8: Failed to load firmware \"%s\"\n",
1101			fw_name);
1102		release_firmware(adev->gfx.pfp_fw);
1103		adev->gfx.pfp_fw = NULL;
1104		release_firmware(adev->gfx.me_fw);
1105		adev->gfx.me_fw = NULL;
1106		release_firmware(adev->gfx.ce_fw);
1107		adev->gfx.ce_fw = NULL;
1108		release_firmware(adev->gfx.rlc_fw);
1109		adev->gfx.rlc_fw = NULL;
1110		release_firmware(adev->gfx.mec_fw);
1111		adev->gfx.mec_fw = NULL;
1112		release_firmware(adev->gfx.mec2_fw);
1113		adev->gfx.mec2_fw = NULL;
1114	}
1115	return err;
1116}
1117
1118static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119				    volatile u32 *buffer)
1120{
1121	u32 count = 0, i;
1122	const struct cs_section_def *sect = NULL;
1123	const struct cs_extent_def *ext = NULL;
1124
1125	if (adev->gfx.rlc.cs_data == NULL)
1126		return;
1127	if (buffer == NULL)
1128		return;
1129
1130	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1132
1133	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134	buffer[count++] = cpu_to_le32(0x80000000);
1135	buffer[count++] = cpu_to_le32(0x80000000);
1136
1137	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138		for (ext = sect->section; ext->extent != NULL; ++ext) {
1139			if (sect->id == SECT_CONTEXT) {
1140				buffer[count++] =
1141					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142				buffer[count++] = cpu_to_le32(ext->reg_index -
1143						PACKET3_SET_CONTEXT_REG_START);
1144				for (i = 0; i < ext->reg_count; i++)
1145					buffer[count++] = cpu_to_le32(ext->extent[i]);
1146			} else {
1147				return;
1148			}
1149		}
1150	}
1151
1152	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154			PACKET3_SET_CONTEXT_REG_START);
1155	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1157
1158	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162	buffer[count++] = cpu_to_le32(0);
1163}
1164
1165static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166{
1167	const __le32 *fw_data;
1168	volatile u32 *dst_ptr;
1169	int me, i, max_me = 4;
1170	u32 bo_offset = 0;
1171	u32 table_offset, table_size;
1172
1173	if (adev->asic_type == CHIP_CARRIZO)
1174		max_me = 5;
1175
1176	/* write the cp table buffer */
1177	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178	for (me = 0; me < max_me; me++) {
1179		if (me == 0) {
1180			const struct gfx_firmware_header_v1_0 *hdr =
1181				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182			fw_data = (const __le32 *)
1183				(adev->gfx.ce_fw->data +
1184				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185			table_offset = le32_to_cpu(hdr->jt_offset);
1186			table_size = le32_to_cpu(hdr->jt_size);
1187		} else if (me == 1) {
1188			const struct gfx_firmware_header_v1_0 *hdr =
1189				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190			fw_data = (const __le32 *)
1191				(adev->gfx.pfp_fw->data +
1192				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193			table_offset = le32_to_cpu(hdr->jt_offset);
1194			table_size = le32_to_cpu(hdr->jt_size);
1195		} else if (me == 2) {
1196			const struct gfx_firmware_header_v1_0 *hdr =
1197				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198			fw_data = (const __le32 *)
1199				(adev->gfx.me_fw->data +
1200				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201			table_offset = le32_to_cpu(hdr->jt_offset);
1202			table_size = le32_to_cpu(hdr->jt_size);
1203		} else if (me == 3) {
1204			const struct gfx_firmware_header_v1_0 *hdr =
1205				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206			fw_data = (const __le32 *)
1207				(adev->gfx.mec_fw->data +
1208				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209			table_offset = le32_to_cpu(hdr->jt_offset);
1210			table_size = le32_to_cpu(hdr->jt_size);
1211		} else  if (me == 4) {
1212			const struct gfx_firmware_header_v1_0 *hdr =
1213				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214			fw_data = (const __le32 *)
1215				(adev->gfx.mec2_fw->data +
1216				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217			table_offset = le32_to_cpu(hdr->jt_offset);
1218			table_size = le32_to_cpu(hdr->jt_size);
1219		}
1220
1221		for (i = 0; i < table_size; i ++) {
1222			dst_ptr[bo_offset + i] =
1223				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224		}
1225
1226		bo_offset += table_size;
1227	}
1228}
1229
1230static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231{
1232	int r;
1233
1234	/* clear state block */
1235	if (adev->gfx.rlc.clear_state_obj) {
1236		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237		if (unlikely(r != 0))
1238			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242		adev->gfx.rlc.clear_state_obj = NULL;
1243	}
1244
1245	/* jump table block */
1246	if (adev->gfx.rlc.cp_table_obj) {
1247		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248		if (unlikely(r != 0))
1249			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253		adev->gfx.rlc.cp_table_obj = NULL;
1254	}
1255}
1256
1257static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258{
1259	volatile u32 *dst_ptr;
1260	u32 dws;
1261	const struct cs_section_def *cs_data;
1262	int r;
1263
1264	adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266	cs_data = adev->gfx.rlc.cs_data;
1267
1268	if (cs_data) {
1269		/* clear state block */
1270		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272		if (adev->gfx.rlc.clear_state_obj == NULL) {
1273			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274					     AMDGPU_GEM_DOMAIN_VRAM,
1275					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1277					     NULL, NULL,
1278					     &adev->gfx.rlc.clear_state_obj);
1279			if (r) {
1280				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281				gfx_v8_0_rlc_fini(adev);
1282				return r;
1283			}
1284		}
1285		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286		if (unlikely(r != 0)) {
1287			gfx_v8_0_rlc_fini(adev);
1288			return r;
1289		}
1290		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291				  &adev->gfx.rlc.clear_state_gpu_addr);
1292		if (r) {
1293			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295			gfx_v8_0_rlc_fini(adev);
1296			return r;
1297		}
1298
1299		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300		if (r) {
1301			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302			gfx_v8_0_rlc_fini(adev);
1303			return r;
1304		}
1305		/* set up the cs buffer */
1306		dst_ptr = adev->gfx.rlc.cs_ptr;
1307		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310	}
1311
1312	if ((adev->asic_type == CHIP_CARRIZO) ||
1313	    (adev->asic_type == CHIP_STONEY)) {
1314		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315		if (adev->gfx.rlc.cp_table_obj == NULL) {
1316			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317					     AMDGPU_GEM_DOMAIN_VRAM,
1318					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1320					     NULL, NULL,
1321					     &adev->gfx.rlc.cp_table_obj);
1322			if (r) {
1323				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1324				return r;
1325			}
1326		}
1327
1328		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329		if (unlikely(r != 0)) {
1330			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1331			return r;
1332		}
1333		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334				  &adev->gfx.rlc.cp_table_gpu_addr);
1335		if (r) {
1336			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1338			return r;
1339		}
1340		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1341		if (r) {
1342			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1343			return r;
1344		}
1345
1346		cz_init_cp_jump_table(adev);
1347
1348		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1350	}
1351
1352	return 0;
1353}
1354
1355static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1356{
1357	int r;
1358
1359	if (adev->gfx.mec.hpd_eop_obj) {
1360		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361		if (unlikely(r != 0))
1362			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1365		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366		adev->gfx.mec.hpd_eop_obj = NULL;
1367	}
1368}
1369
1370#define MEC_HPD_SIZE 2048
1371
1372static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373{
1374	int r;
1375	u32 *hpd;
 
 
 
1376
1377	/*
1378	 * we assign only 1 pipe because all other pipes will
1379	 * be handled by KFD
1380	 */
1381	adev->gfx.mec.num_mec = 1;
1382	adev->gfx.mec.num_pipe = 1;
1383	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384
1385	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386		r = amdgpu_bo_create(adev,
1387				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1388				     PAGE_SIZE, true,
1389				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390				     &adev->gfx.mec.hpd_eop_obj);
1391		if (r) {
1392			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393			return r;
1394		}
1395	}
1396
1397	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398	if (unlikely(r != 0)) {
1399		gfx_v8_0_mec_fini(adev);
1400		return r;
1401	}
1402	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403			  &adev->gfx.mec.hpd_eop_gpu_addr);
1404	if (r) {
1405		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406		gfx_v8_0_mec_fini(adev);
1407		return r;
1408	}
1409	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410	if (r) {
1411		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412		gfx_v8_0_mec_fini(adev);
1413		return r;
1414	}
1415
1416	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1417
1418	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420
1421	return 0;
1422}
1423
1424static const u32 vgpr_init_compute_shader[] =
1425{
1426	0x7e000209, 0x7e020208,
1427	0x7e040207, 0x7e060206,
1428	0x7e080205, 0x7e0a0204,
1429	0x7e0c0203, 0x7e0e0202,
1430	0x7e100201, 0x7e120200,
1431	0x7e140209, 0x7e160208,
1432	0x7e180207, 0x7e1a0206,
1433	0x7e1c0205, 0x7e1e0204,
1434	0x7e200203, 0x7e220202,
1435	0x7e240201, 0x7e260200,
1436	0x7e280209, 0x7e2a0208,
1437	0x7e2c0207, 0x7e2e0206,
1438	0x7e300205, 0x7e320204,
1439	0x7e340203, 0x7e360202,
1440	0x7e380201, 0x7e3a0200,
1441	0x7e3c0209, 0x7e3e0208,
1442	0x7e400207, 0x7e420206,
1443	0x7e440205, 0x7e460204,
1444	0x7e480203, 0x7e4a0202,
1445	0x7e4c0201, 0x7e4e0200,
1446	0x7e500209, 0x7e520208,
1447	0x7e540207, 0x7e560206,
1448	0x7e580205, 0x7e5a0204,
1449	0x7e5c0203, 0x7e5e0202,
1450	0x7e600201, 0x7e620200,
1451	0x7e640209, 0x7e660208,
1452	0x7e680207, 0x7e6a0206,
1453	0x7e6c0205, 0x7e6e0204,
1454	0x7e700203, 0x7e720202,
1455	0x7e740201, 0x7e760200,
1456	0x7e780209, 0x7e7a0208,
1457	0x7e7c0207, 0x7e7e0206,
1458	0xbf8a0000, 0xbf810000,
1459};
1460
1461static const u32 sgpr_init_compute_shader[] =
1462{
1463	0xbe8a0100, 0xbe8c0102,
1464	0xbe8e0104, 0xbe900106,
1465	0xbe920108, 0xbe940100,
1466	0xbe960102, 0xbe980104,
1467	0xbe9a0106, 0xbe9c0108,
1468	0xbe9e0100, 0xbea00102,
1469	0xbea20104, 0xbea40106,
1470	0xbea60108, 0xbea80100,
1471	0xbeaa0102, 0xbeac0104,
1472	0xbeae0106, 0xbeb00108,
1473	0xbeb20100, 0xbeb40102,
1474	0xbeb60104, 0xbeb80106,
1475	0xbeba0108, 0xbebc0100,
1476	0xbebe0102, 0xbec00104,
1477	0xbec20106, 0xbec40108,
1478	0xbec60100, 0xbec80102,
1479	0xbee60004, 0xbee70005,
1480	0xbeea0006, 0xbeeb0007,
1481	0xbee80008, 0xbee90009,
1482	0xbefc0000, 0xbf8a0000,
1483	0xbf810000, 0x00000000,
1484};
1485
1486static const u32 vgpr_init_regs[] =
1487{
1488	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489	mmCOMPUTE_RESOURCE_LIMITS, 0,
1490	mmCOMPUTE_NUM_THREAD_X, 256*4,
1491	mmCOMPUTE_NUM_THREAD_Y, 1,
1492	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1493	mmCOMPUTE_PGM_RSRC2, 20,
1494	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504};
1505
1506static const u32 sgpr1_init_regs[] =
1507{
1508	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510	mmCOMPUTE_NUM_THREAD_X, 256*5,
1511	mmCOMPUTE_NUM_THREAD_Y, 1,
1512	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1513	mmCOMPUTE_PGM_RSRC2, 20,
1514	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524};
1525
1526static const u32 sgpr2_init_regs[] =
1527{
1528	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530	mmCOMPUTE_NUM_THREAD_X, 256*5,
1531	mmCOMPUTE_NUM_THREAD_Y, 1,
1532	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1533	mmCOMPUTE_PGM_RSRC2, 20,
1534	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544};
1545
1546static const u32 sec_ded_counter_registers[] =
1547{
1548	mmCPC_EDC_ATC_CNT,
1549	mmCPC_EDC_SCRATCH_CNT,
1550	mmCPC_EDC_UCODE_CNT,
1551	mmCPF_EDC_ATC_CNT,
1552	mmCPF_EDC_ROQ_CNT,
1553	mmCPF_EDC_TAG_CNT,
1554	mmCPG_EDC_ATC_CNT,
1555	mmCPG_EDC_DMA_CNT,
1556	mmCPG_EDC_TAG_CNT,
1557	mmDC_EDC_CSINVOC_CNT,
1558	mmDC_EDC_RESTORE_CNT,
1559	mmDC_EDC_STATE_CNT,
1560	mmGDS_EDC_CNT,
1561	mmGDS_EDC_GRBM_CNT,
1562	mmGDS_EDC_OA_DED,
1563	mmSPI_EDC_CNT,
1564	mmSQC_ATC_EDC_GATCL1_CNT,
1565	mmSQC_EDC_CNT,
1566	mmSQ_EDC_DED_CNT,
1567	mmSQ_EDC_INFO,
1568	mmSQ_EDC_SEC_CNT,
1569	mmTCC_EDC_CNT,
1570	mmTCP_ATC_EDC_GATCL1_CNT,
1571	mmTCP_EDC_CNT,
1572	mmTD_EDC_CNT
1573};
1574
1575static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576{
1577	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578	struct amdgpu_ib ib;
1579	struct dma_fence *f = NULL;
1580	int r, i;
1581	u32 tmp;
1582	unsigned total_size, vgpr_offset, sgpr_offset;
1583	u64 gpu_addr;
1584
1585	/* only supported on CZ */
1586	if (adev->asic_type != CHIP_CARRIZO)
1587		return 0;
1588
1589	/* bail if the compute ring is not ready */
1590	if (!ring->ready)
1591		return 0;
1592
1593	tmp = RREG32(mmGB_EDC_MODE);
1594	WREG32(mmGB_EDC_MODE, 0);
1595
1596	total_size =
1597		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598	total_size +=
1599		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600	total_size +=
1601		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602	total_size = ALIGN(total_size, 256);
1603	vgpr_offset = total_size;
1604	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605	sgpr_offset = total_size;
1606	total_size += sizeof(sgpr_init_compute_shader);
1607
1608	/* allocate an indirect buffer to put the commands in */
1609	memset(&ib, 0, sizeof(ib));
1610	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
 
1611	if (r) {
1612		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613		return r;
1614	}
1615
1616	/* load the compute shaders */
1617	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619
1620	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622
1623	/* init the ib length to 0 */
1624	ib.length_dw = 0;
1625
1626	/* VGPR */
1627	/* write the register state for the compute dispatch */
1628	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632	}
1633	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639
1640	/* write dispatch packet */
1641	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642	ib.ptr[ib.length_dw++] = 8; /* x */
1643	ib.ptr[ib.length_dw++] = 1; /* y */
1644	ib.ptr[ib.length_dw++] = 1; /* z */
1645	ib.ptr[ib.length_dw++] =
1646		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647
1648	/* write CS partial flush packet */
1649	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651
1652	/* SGPR1 */
1653	/* write the register state for the compute dispatch */
1654	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658	}
1659	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665
1666	/* write dispatch packet */
1667	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668	ib.ptr[ib.length_dw++] = 8; /* x */
1669	ib.ptr[ib.length_dw++] = 1; /* y */
1670	ib.ptr[ib.length_dw++] = 1; /* z */
1671	ib.ptr[ib.length_dw++] =
1672		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673
1674	/* write CS partial flush packet */
1675	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677
1678	/* SGPR2 */
1679	/* write the register state for the compute dispatch */
1680	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684	}
1685	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691
1692	/* write dispatch packet */
1693	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694	ib.ptr[ib.length_dw++] = 8; /* x */
1695	ib.ptr[ib.length_dw++] = 1; /* y */
1696	ib.ptr[ib.length_dw++] = 1; /* z */
1697	ib.ptr[ib.length_dw++] =
1698		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699
1700	/* write CS partial flush packet */
1701	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703
1704	/* shedule the ib on the ring */
1705	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706	if (r) {
1707		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708		goto fail;
1709	}
1710
1711	/* wait for the GPU to finish processing the IB */
1712	r = dma_fence_wait(f, false);
1713	if (r) {
1714		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715		goto fail;
1716	}
1717
1718	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720	WREG32(mmGB_EDC_MODE, tmp);
1721
1722	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725
1726
1727	/* read back registers to clear the counters */
1728	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729		RREG32(sec_ded_counter_registers[i]);
1730
1731fail:
1732	amdgpu_ib_free(adev, &ib, NULL);
1733	dma_fence_put(f);
1734
1735	return r;
1736}
1737
1738static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739{
1740	u32 gb_addr_config;
1741	u32 mc_shared_chmap, mc_arb_ramcfg;
1742	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743	u32 tmp;
1744	int ret;
1745
1746	switch (adev->asic_type) {
1747	case CHIP_TOPAZ:
1748		adev->gfx.config.max_shader_engines = 1;
1749		adev->gfx.config.max_tile_pipes = 2;
1750		adev->gfx.config.max_cu_per_sh = 6;
1751		adev->gfx.config.max_sh_per_se = 1;
1752		adev->gfx.config.max_backends_per_se = 2;
1753		adev->gfx.config.max_texture_channel_caches = 2;
1754		adev->gfx.config.max_gprs = 256;
1755		adev->gfx.config.max_gs_threads = 32;
1756		adev->gfx.config.max_hw_contexts = 8;
1757
1758		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763		break;
1764	case CHIP_FIJI:
1765		adev->gfx.config.max_shader_engines = 4;
1766		adev->gfx.config.max_tile_pipes = 16;
1767		adev->gfx.config.max_cu_per_sh = 16;
1768		adev->gfx.config.max_sh_per_se = 1;
1769		adev->gfx.config.max_backends_per_se = 4;
1770		adev->gfx.config.max_texture_channel_caches = 16;
1771		adev->gfx.config.max_gprs = 256;
1772		adev->gfx.config.max_gs_threads = 32;
1773		adev->gfx.config.max_hw_contexts = 8;
1774
1775		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780		break;
1781	case CHIP_POLARIS11:
1782	case CHIP_POLARIS12:
1783		ret = amdgpu_atombios_get_gfx_info(adev);
1784		if (ret)
1785			return ret;
1786		adev->gfx.config.max_gprs = 256;
1787		adev->gfx.config.max_gs_threads = 32;
1788		adev->gfx.config.max_hw_contexts = 8;
1789
1790		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1795		break;
1796	case CHIP_POLARIS10:
 
1797		ret = amdgpu_atombios_get_gfx_info(adev);
1798		if (ret)
1799			return ret;
1800		adev->gfx.config.max_gprs = 256;
1801		adev->gfx.config.max_gs_threads = 32;
1802		adev->gfx.config.max_hw_contexts = 8;
1803
1804		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1805		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1806		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1807		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1808		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1809		break;
1810	case CHIP_TONGA:
1811		adev->gfx.config.max_shader_engines = 4;
1812		adev->gfx.config.max_tile_pipes = 8;
1813		adev->gfx.config.max_cu_per_sh = 8;
1814		adev->gfx.config.max_sh_per_se = 1;
1815		adev->gfx.config.max_backends_per_se = 2;
1816		adev->gfx.config.max_texture_channel_caches = 8;
1817		adev->gfx.config.max_gprs = 256;
1818		adev->gfx.config.max_gs_threads = 32;
1819		adev->gfx.config.max_hw_contexts = 8;
1820
1821		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1822		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1823		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1824		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1825		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1826		break;
1827	case CHIP_CARRIZO:
1828		adev->gfx.config.max_shader_engines = 1;
1829		adev->gfx.config.max_tile_pipes = 2;
1830		adev->gfx.config.max_sh_per_se = 1;
1831		adev->gfx.config.max_backends_per_se = 2;
1832
1833		switch (adev->pdev->revision) {
1834		case 0xc4:
1835		case 0x84:
1836		case 0xc8:
1837		case 0xcc:
1838		case 0xe1:
1839		case 0xe3:
1840			/* B10 */
1841			adev->gfx.config.max_cu_per_sh = 8;
1842			break;
1843		case 0xc5:
1844		case 0x81:
1845		case 0x85:
1846		case 0xc9:
1847		case 0xcd:
1848		case 0xe2:
1849		case 0xe4:
1850			/* B8 */
1851			adev->gfx.config.max_cu_per_sh = 6;
1852			break;
1853		case 0xc6:
1854		case 0xca:
1855		case 0xce:
1856		case 0x88:
1857			/* B6 */
1858			adev->gfx.config.max_cu_per_sh = 6;
1859			break;
1860		case 0xc7:
1861		case 0x87:
1862		case 0xcb:
1863		case 0xe5:
1864		case 0x89:
1865		default:
1866			/* B4 */
1867			adev->gfx.config.max_cu_per_sh = 4;
1868			break;
1869		}
1870
1871		adev->gfx.config.max_texture_channel_caches = 2;
1872		adev->gfx.config.max_gprs = 256;
1873		adev->gfx.config.max_gs_threads = 32;
1874		adev->gfx.config.max_hw_contexts = 8;
1875
1876		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1881		break;
1882	case CHIP_STONEY:
1883		adev->gfx.config.max_shader_engines = 1;
1884		adev->gfx.config.max_tile_pipes = 2;
1885		adev->gfx.config.max_sh_per_se = 1;
1886		adev->gfx.config.max_backends_per_se = 1;
1887
1888		switch (adev->pdev->revision) {
1889		case 0xc0:
1890		case 0xc1:
1891		case 0xc2:
1892		case 0xc4:
1893		case 0xc8:
1894		case 0xc9:
1895			adev->gfx.config.max_cu_per_sh = 3;
1896			break;
1897		case 0xd0:
1898		case 0xd1:
1899		case 0xd2:
1900		default:
1901			adev->gfx.config.max_cu_per_sh = 2;
1902			break;
1903		}
1904
1905		adev->gfx.config.max_texture_channel_caches = 2;
1906		adev->gfx.config.max_gprs = 256;
1907		adev->gfx.config.max_gs_threads = 16;
1908		adev->gfx.config.max_hw_contexts = 8;
1909
1910		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1911		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1912		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1913		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1914		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1915		break;
1916	default:
1917		adev->gfx.config.max_shader_engines = 2;
1918		adev->gfx.config.max_tile_pipes = 4;
1919		adev->gfx.config.max_cu_per_sh = 2;
1920		adev->gfx.config.max_sh_per_se = 1;
1921		adev->gfx.config.max_backends_per_se = 2;
1922		adev->gfx.config.max_texture_channel_caches = 4;
1923		adev->gfx.config.max_gprs = 256;
1924		adev->gfx.config.max_gs_threads = 32;
1925		adev->gfx.config.max_hw_contexts = 8;
1926
1927		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1928		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1929		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1930		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1931		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1932		break;
1933	}
1934
1935	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1936	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1937	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1938
 
 
 
 
 
1939	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1940	adev->gfx.config.mem_max_burst_length_bytes = 256;
1941	if (adev->flags & AMD_IS_APU) {
1942		/* Get memory bank mapping mode. */
1943		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1944		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1945		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1946
1947		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1948		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1949		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1950
1951		/* Validate settings in case only one DIMM installed. */
1952		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1953			dimm00_addr_map = 0;
1954		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1955			dimm01_addr_map = 0;
1956		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1957			dimm10_addr_map = 0;
1958		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1959			dimm11_addr_map = 0;
1960
1961		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1962		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1963		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1964			adev->gfx.config.mem_row_size_in_kb = 2;
1965		else
1966			adev->gfx.config.mem_row_size_in_kb = 1;
1967	} else {
1968		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1969		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1970		if (adev->gfx.config.mem_row_size_in_kb > 4)
1971			adev->gfx.config.mem_row_size_in_kb = 4;
1972	}
1973
1974	adev->gfx.config.shader_engine_tile_size = 32;
1975	adev->gfx.config.num_gpus = 1;
1976	adev->gfx.config.multi_gpu_tile_size = 64;
1977
1978	/* fix up row size */
1979	switch (adev->gfx.config.mem_row_size_in_kb) {
1980	case 1:
1981	default:
1982		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1983		break;
1984	case 2:
1985		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1986		break;
1987	case 4:
1988		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1989		break;
1990	}
1991	adev->gfx.config.gb_addr_config = gb_addr_config;
1992
1993	return 0;
1994}
1995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1996static int gfx_v8_0_sw_init(void *handle)
1997{
1998	int i, r;
1999	struct amdgpu_ring *ring;
 
2000	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2002	/* EOP Event */
2003	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2004	if (r)
2005		return r;
2006
2007	/* Privileged reg */
2008	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
 
2009	if (r)
2010		return r;
2011
2012	/* Privileged inst */
2013	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
 
 
 
 
 
 
 
2014	if (r)
2015		return r;
2016
 
 
 
 
 
 
 
 
 
 
2017	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2018
2019	gfx_v8_0_scratch_init(adev);
2020
2021	r = gfx_v8_0_init_microcode(adev);
2022	if (r) {
2023		DRM_ERROR("Failed to load gfx firmware!\n");
2024		return r;
2025	}
2026
2027	r = gfx_v8_0_rlc_init(adev);
2028	if (r) {
2029		DRM_ERROR("Failed to init rlc BOs!\n");
2030		return r;
2031	}
2032
2033	r = gfx_v8_0_mec_init(adev);
2034	if (r) {
2035		DRM_ERROR("Failed to init MEC BOs!\n");
2036		return r;
2037	}
2038
2039	/* set up the gfx ring */
2040	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2041		ring = &adev->gfx.gfx_ring[i];
2042		ring->ring_obj = NULL;
2043		sprintf(ring->name, "gfx");
2044		/* no gfx doorbells on iceland */
2045		if (adev->asic_type != CHIP_TOPAZ) {
2046			ring->use_doorbell = true;
2047			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2048		}
2049
2050		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2051				     AMDGPU_CP_IRQ_GFX_EOP);
 
2052		if (r)
2053			return r;
2054	}
2055
2056	/* set up the compute queues */
2057	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058		unsigned irq_type;
2059
2060		/* max 32 queues per MEC */
2061		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062			DRM_ERROR("Too many (%d) compute rings!\n", i);
2063			break;
 
 
 
 
 
 
 
 
 
 
 
 
2064		}
2065		ring = &adev->gfx.compute_ring[i];
2066		ring->ring_obj = NULL;
2067		ring->use_doorbell = true;
2068		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069		ring->me = 1; /* first MEC */
2070		ring->pipe = i / 8;
2071		ring->queue = i % 8;
2072		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074		/* type-2 packets are deprecated on MEC, use type-3 instead */
2075		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2076				     irq_type);
2077		if (r)
2078			return r;
2079	}
2080
2081	/* reserve GDS, GWS and OA resource for gfx */
2082	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2083				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2084				    &adev->gds.gds_gfx_bo, NULL, NULL);
2085	if (r)
2086		return r;
 
2087
2088	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2089				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2090				    &adev->gds.gws_gfx_bo, NULL, NULL);
2091	if (r)
2092		return r;
2093
2094	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2095				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2096				    &adev->gds.oa_gfx_bo, NULL, NULL);
2097	if (r)
2098		return r;
2099
2100	adev->gfx.ce_ram_size = 0x8000;
2101
2102	r = gfx_v8_0_gpu_early_init(adev);
2103	if (r)
2104		return r;
2105
2106	return 0;
2107}
2108
2109static int gfx_v8_0_sw_fini(void *handle)
2110{
 
2111	int i;
2112	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2113
2114	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2115	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2116	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2117
2118	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2119		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2120	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2121		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2122
 
 
 
 
2123	gfx_v8_0_mec_fini(adev);
2124	gfx_v8_0_rlc_fini(adev);
 
 
 
 
 
 
 
 
 
2125	gfx_v8_0_free_microcode(adev);
2126
2127	return 0;
2128}
2129
2130static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2131{
2132	uint32_t *modearray, *mod2array;
2133	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2134	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2135	u32 reg_offset;
2136
2137	modearray = adev->gfx.config.tile_mode_array;
2138	mod2array = adev->gfx.config.macrotile_mode_array;
2139
2140	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2141		modearray[reg_offset] = 0;
2142
2143	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2144		mod2array[reg_offset] = 0;
2145
2146	switch (adev->asic_type) {
2147	case CHIP_TOPAZ:
2148		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149				PIPE_CONFIG(ADDR_SURF_P2) |
2150				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2151				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153				PIPE_CONFIG(ADDR_SURF_P2) |
2154				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2155				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157				PIPE_CONFIG(ADDR_SURF_P2) |
2158				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2159				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161				PIPE_CONFIG(ADDR_SURF_P2) |
2162				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2163				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165				PIPE_CONFIG(ADDR_SURF_P2) |
2166				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2169				PIPE_CONFIG(ADDR_SURF_P2) |
2170				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173				PIPE_CONFIG(ADDR_SURF_P2) |
2174				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2177				PIPE_CONFIG(ADDR_SURF_P2));
2178		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179				PIPE_CONFIG(ADDR_SURF_P2) |
2180				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183				 PIPE_CONFIG(ADDR_SURF_P2) |
2184				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187				 PIPE_CONFIG(ADDR_SURF_P2) |
2188				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191				 PIPE_CONFIG(ADDR_SURF_P2) |
2192				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2195				 PIPE_CONFIG(ADDR_SURF_P2) |
2196				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2199				 PIPE_CONFIG(ADDR_SURF_P2) |
2200				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203				 PIPE_CONFIG(ADDR_SURF_P2) |
2204				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2206		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207				 PIPE_CONFIG(ADDR_SURF_P2) |
2208				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211				 PIPE_CONFIG(ADDR_SURF_P2) |
2212				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215				 PIPE_CONFIG(ADDR_SURF_P2) |
2216				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2219				 PIPE_CONFIG(ADDR_SURF_P2) |
2220				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2223				 PIPE_CONFIG(ADDR_SURF_P2) |
2224				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2227				 PIPE_CONFIG(ADDR_SURF_P2) |
2228				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2231				 PIPE_CONFIG(ADDR_SURF_P2) |
2232				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2235				 PIPE_CONFIG(ADDR_SURF_P2) |
2236				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239				 PIPE_CONFIG(ADDR_SURF_P2) |
2240				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243				 PIPE_CONFIG(ADDR_SURF_P2) |
2244				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247				 PIPE_CONFIG(ADDR_SURF_P2) |
2248				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250
2251		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254				NUM_BANKS(ADDR_SURF_8_BANK));
2255		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258				NUM_BANKS(ADDR_SURF_8_BANK));
2259		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2260				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262				NUM_BANKS(ADDR_SURF_8_BANK));
2263		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266				NUM_BANKS(ADDR_SURF_8_BANK));
2267		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270				NUM_BANKS(ADDR_SURF_8_BANK));
2271		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274				NUM_BANKS(ADDR_SURF_8_BANK));
2275		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278				NUM_BANKS(ADDR_SURF_8_BANK));
2279		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2281				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282				NUM_BANKS(ADDR_SURF_16_BANK));
2283		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286				NUM_BANKS(ADDR_SURF_16_BANK));
2287		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290				 NUM_BANKS(ADDR_SURF_16_BANK));
2291		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294				 NUM_BANKS(ADDR_SURF_16_BANK));
2295		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298				 NUM_BANKS(ADDR_SURF_16_BANK));
2299		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302				 NUM_BANKS(ADDR_SURF_16_BANK));
2303		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306				 NUM_BANKS(ADDR_SURF_8_BANK));
2307
2308		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2309			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2310			    reg_offset != 23)
2311				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2312
2313		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2314			if (reg_offset != 7)
2315				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2316
2317		break;
2318	case CHIP_FIJI:
 
2319		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2322				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2326				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2330				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2334				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2349				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2352				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2353		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2378				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2387				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2398				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2402				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2414				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2418				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2422				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2439				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441
2442		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445				NUM_BANKS(ADDR_SURF_8_BANK));
2446		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449				NUM_BANKS(ADDR_SURF_8_BANK));
2450		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453				NUM_BANKS(ADDR_SURF_8_BANK));
2454		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457				NUM_BANKS(ADDR_SURF_8_BANK));
2458		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461				NUM_BANKS(ADDR_SURF_8_BANK));
2462		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465				NUM_BANKS(ADDR_SURF_8_BANK));
2466		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469				NUM_BANKS(ADDR_SURF_8_BANK));
2470		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473				NUM_BANKS(ADDR_SURF_8_BANK));
2474		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477				NUM_BANKS(ADDR_SURF_8_BANK));
2478		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481				 NUM_BANKS(ADDR_SURF_8_BANK));
2482		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485				 NUM_BANKS(ADDR_SURF_8_BANK));
2486		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489				 NUM_BANKS(ADDR_SURF_8_BANK));
2490		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493				 NUM_BANKS(ADDR_SURF_8_BANK));
2494		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497				 NUM_BANKS(ADDR_SURF_4_BANK));
2498
2499		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2501
2502		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2503			if (reg_offset != 7)
2504				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2505
2506		break;
2507	case CHIP_TONGA:
2508		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2511				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2519				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2523				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2567				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2587				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2591				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2603				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2607				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2611				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630
2631		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634				NUM_BANKS(ADDR_SURF_16_BANK));
2635		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638				NUM_BANKS(ADDR_SURF_16_BANK));
2639		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642				NUM_BANKS(ADDR_SURF_16_BANK));
2643		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646				NUM_BANKS(ADDR_SURF_16_BANK));
2647		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2649				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650				NUM_BANKS(ADDR_SURF_16_BANK));
2651		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654				NUM_BANKS(ADDR_SURF_16_BANK));
2655		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658				NUM_BANKS(ADDR_SURF_16_BANK));
2659		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2661				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662				NUM_BANKS(ADDR_SURF_16_BANK));
2663		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666				NUM_BANKS(ADDR_SURF_16_BANK));
2667		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2669				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670				 NUM_BANKS(ADDR_SURF_16_BANK));
2671		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674				 NUM_BANKS(ADDR_SURF_16_BANK));
2675		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678				 NUM_BANKS(ADDR_SURF_8_BANK));
2679		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682				 NUM_BANKS(ADDR_SURF_4_BANK));
2683		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686				 NUM_BANKS(ADDR_SURF_4_BANK));
2687
2688		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2689			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2690
2691		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2692			if (reg_offset != 7)
2693				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2694
2695		break;
2696	case CHIP_POLARIS11:
2697	case CHIP_POLARIS12:
2698		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820
2821		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829				NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834				NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844				NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849				NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854				NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859				NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864				NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869				NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874				NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879				NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884				NUM_BANKS(ADDR_SURF_8_BANK));
2885
2886		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889				NUM_BANKS(ADDR_SURF_4_BANK));
2890
2891		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893
2894		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895			if (reg_offset != 7)
2896				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897
2898		break;
2899	case CHIP_POLARIS10:
2900		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022
3023		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026				NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031				NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036				NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041				NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046				NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051				NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056				NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061				NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066				NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071				NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076				NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081				NUM_BANKS(ADDR_SURF_8_BANK));
3082
3083		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086				NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091				NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095
3096		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097			if (reg_offset != 7)
3098				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099
3100		break;
3101	case CHIP_STONEY:
3102		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103				PIPE_CONFIG(ADDR_SURF_P2) |
3104				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107				PIPE_CONFIG(ADDR_SURF_P2) |
3108				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111				PIPE_CONFIG(ADDR_SURF_P2) |
3112				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115				PIPE_CONFIG(ADDR_SURF_P2) |
3116				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119				PIPE_CONFIG(ADDR_SURF_P2) |
3120				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123				PIPE_CONFIG(ADDR_SURF_P2) |
3124				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127				PIPE_CONFIG(ADDR_SURF_P2) |
3128				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131				PIPE_CONFIG(ADDR_SURF_P2));
3132		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133				PIPE_CONFIG(ADDR_SURF_P2) |
3134				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137				 PIPE_CONFIG(ADDR_SURF_P2) |
3138				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141				 PIPE_CONFIG(ADDR_SURF_P2) |
3142				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145				 PIPE_CONFIG(ADDR_SURF_P2) |
3146				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149				 PIPE_CONFIG(ADDR_SURF_P2) |
3150				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153				 PIPE_CONFIG(ADDR_SURF_P2) |
3154				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157				 PIPE_CONFIG(ADDR_SURF_P2) |
3158				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161				 PIPE_CONFIG(ADDR_SURF_P2) |
3162				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165				 PIPE_CONFIG(ADDR_SURF_P2) |
3166				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169				 PIPE_CONFIG(ADDR_SURF_P2) |
3170				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173				 PIPE_CONFIG(ADDR_SURF_P2) |
3174				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177				 PIPE_CONFIG(ADDR_SURF_P2) |
3178				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181				 PIPE_CONFIG(ADDR_SURF_P2) |
3182				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185				 PIPE_CONFIG(ADDR_SURF_P2) |
3186				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189				 PIPE_CONFIG(ADDR_SURF_P2) |
3190				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193				 PIPE_CONFIG(ADDR_SURF_P2) |
3194				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197				 PIPE_CONFIG(ADDR_SURF_P2) |
3198				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201				 PIPE_CONFIG(ADDR_SURF_P2) |
3202				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204
3205		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208				NUM_BANKS(ADDR_SURF_8_BANK));
3209		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212				NUM_BANKS(ADDR_SURF_8_BANK));
3213		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216				NUM_BANKS(ADDR_SURF_8_BANK));
3217		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220				NUM_BANKS(ADDR_SURF_8_BANK));
3221		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224				NUM_BANKS(ADDR_SURF_8_BANK));
3225		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228				NUM_BANKS(ADDR_SURF_8_BANK));
3229		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232				NUM_BANKS(ADDR_SURF_8_BANK));
3233		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236				NUM_BANKS(ADDR_SURF_16_BANK));
3237		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240				NUM_BANKS(ADDR_SURF_16_BANK));
3241		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244				 NUM_BANKS(ADDR_SURF_16_BANK));
3245		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248				 NUM_BANKS(ADDR_SURF_16_BANK));
3249		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252				 NUM_BANKS(ADDR_SURF_16_BANK));
3253		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256				 NUM_BANKS(ADDR_SURF_16_BANK));
3257		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260				 NUM_BANKS(ADDR_SURF_8_BANK));
3261
3262		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264			    reg_offset != 23)
3265				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266
3267		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268			if (reg_offset != 7)
3269				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270
3271		break;
3272	default:
3273		dev_warn(adev->dev,
3274			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275			 adev->asic_type);
 
3276
3277	case CHIP_CARRIZO:
3278		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279				PIPE_CONFIG(ADDR_SURF_P2) |
3280				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283				PIPE_CONFIG(ADDR_SURF_P2) |
3284				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287				PIPE_CONFIG(ADDR_SURF_P2) |
3288				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291				PIPE_CONFIG(ADDR_SURF_P2) |
3292				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295				PIPE_CONFIG(ADDR_SURF_P2) |
3296				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299				PIPE_CONFIG(ADDR_SURF_P2) |
3300				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303				PIPE_CONFIG(ADDR_SURF_P2) |
3304				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307				PIPE_CONFIG(ADDR_SURF_P2));
3308		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309				PIPE_CONFIG(ADDR_SURF_P2) |
3310				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313				 PIPE_CONFIG(ADDR_SURF_P2) |
3314				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317				 PIPE_CONFIG(ADDR_SURF_P2) |
3318				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321				 PIPE_CONFIG(ADDR_SURF_P2) |
3322				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325				 PIPE_CONFIG(ADDR_SURF_P2) |
3326				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329				 PIPE_CONFIG(ADDR_SURF_P2) |
3330				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333				 PIPE_CONFIG(ADDR_SURF_P2) |
3334				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337				 PIPE_CONFIG(ADDR_SURF_P2) |
3338				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341				 PIPE_CONFIG(ADDR_SURF_P2) |
3342				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345				 PIPE_CONFIG(ADDR_SURF_P2) |
3346				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349				 PIPE_CONFIG(ADDR_SURF_P2) |
3350				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353				 PIPE_CONFIG(ADDR_SURF_P2) |
3354				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357				 PIPE_CONFIG(ADDR_SURF_P2) |
3358				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361				 PIPE_CONFIG(ADDR_SURF_P2) |
3362				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365				 PIPE_CONFIG(ADDR_SURF_P2) |
3366				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369				 PIPE_CONFIG(ADDR_SURF_P2) |
3370				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373				 PIPE_CONFIG(ADDR_SURF_P2) |
3374				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377				 PIPE_CONFIG(ADDR_SURF_P2) |
3378				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380
3381		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384				NUM_BANKS(ADDR_SURF_8_BANK));
3385		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388				NUM_BANKS(ADDR_SURF_8_BANK));
3389		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392				NUM_BANKS(ADDR_SURF_8_BANK));
3393		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396				NUM_BANKS(ADDR_SURF_8_BANK));
3397		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400				NUM_BANKS(ADDR_SURF_8_BANK));
3401		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404				NUM_BANKS(ADDR_SURF_8_BANK));
3405		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408				NUM_BANKS(ADDR_SURF_8_BANK));
3409		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412				NUM_BANKS(ADDR_SURF_16_BANK));
3413		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416				NUM_BANKS(ADDR_SURF_16_BANK));
3417		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420				 NUM_BANKS(ADDR_SURF_16_BANK));
3421		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424				 NUM_BANKS(ADDR_SURF_16_BANK));
3425		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428				 NUM_BANKS(ADDR_SURF_16_BANK));
3429		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432				 NUM_BANKS(ADDR_SURF_16_BANK));
3433		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436				 NUM_BANKS(ADDR_SURF_8_BANK));
3437
3438		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440			    reg_offset != 23)
3441				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442
3443		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444			if (reg_offset != 7)
3445				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446
3447		break;
3448	}
3449}
3450
3451static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452				  u32 se_num, u32 sh_num, u32 instance)
3453{
3454	u32 data;
3455
3456	if (instance == 0xffffffff)
3457		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458	else
3459		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460
3461	if (se_num == 0xffffffff)
3462		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463	else
3464		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465
3466	if (sh_num == 0xffffffff)
3467		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468	else
3469		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470
3471	WREG32(mmGRBM_GFX_INDEX, data);
3472}
3473
3474static u32 gfx_v8_0_create_bitmask(u32 bit_width)
 
3475{
3476	return (u32)((1ULL << bit_width) - 1);
3477}
3478
3479static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480{
3481	u32 data, mask;
3482
3483	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489				       adev->gfx.config.max_sh_per_se);
3490
3491	return (~data) & mask;
3492}
3493
3494static void
3495gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496{
3497	switch (adev->asic_type) {
3498	case CHIP_FIJI:
 
3499		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500			  RB_XSEL2(1) | PKR_MAP(2) |
3501			  PKR_XSEL(1) | PKR_YSEL(1) |
3502			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504			   SE_PAIR_YSEL(2);
3505		break;
3506	case CHIP_TONGA:
3507	case CHIP_POLARIS10:
3508		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509			  SE_XSEL(1) | SE_YSEL(1);
3510		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511			   SE_PAIR_YSEL(2);
3512		break;
3513	case CHIP_TOPAZ:
3514	case CHIP_CARRIZO:
3515		*rconf |= RB_MAP_PKR0(2);
3516		*rconf1 |= 0x0;
3517		break;
3518	case CHIP_POLARIS11:
3519	case CHIP_POLARIS12:
3520		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521			  SE_XSEL(1) | SE_YSEL(1);
3522		*rconf1 |= 0x0;
3523		break;
3524	case CHIP_STONEY:
3525		*rconf |= 0x0;
3526		*rconf1 |= 0x0;
3527		break;
3528	default:
3529		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530		break;
3531	}
3532}
3533
3534static void
3535gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536					u32 raster_config, u32 raster_config_1,
3537					unsigned rb_mask, unsigned num_rb)
3538{
3539	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542	unsigned rb_per_se = num_rb / num_se;
3543	unsigned se_mask[4];
3544	unsigned se;
3545
3546	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556			     (!se_mask[2] && !se_mask[3]))) {
3557		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559		if (!se_mask[0] && !se_mask[1]) {
3560			raster_config_1 |=
3561				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562		} else {
3563			raster_config_1 |=
3564				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565		}
3566	}
3567
3568	for (se = 0; se < num_se; se++) {
3569		unsigned raster_config_se = raster_config;
3570		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572		int idx = (se / 2) * 2;
3573
3574		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575			raster_config_se &= ~SE_MAP_MASK;
3576
3577			if (!se_mask[idx]) {
3578				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579			} else {
3580				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581			}
3582		}
3583
3584		pkr0_mask &= rb_mask;
3585		pkr1_mask &= rb_mask;
3586		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587			raster_config_se &= ~PKR_MAP_MASK;
3588
3589			if (!pkr0_mask) {
3590				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591			} else {
3592				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593			}
3594		}
3595
3596		if (rb_per_se >= 2) {
3597			unsigned rb0_mask = 1 << (se * rb_per_se);
3598			unsigned rb1_mask = rb0_mask << 1;
3599
3600			rb0_mask &= rb_mask;
3601			rb1_mask &= rb_mask;
3602			if (!rb0_mask || !rb1_mask) {
3603				raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605				if (!rb0_mask) {
3606					raster_config_se |=
3607						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608				} else {
3609					raster_config_se |=
3610						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611				}
3612			}
3613
3614			if (rb_per_se > 2) {
3615				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616				rb1_mask = rb0_mask << 1;
3617				rb0_mask &= rb_mask;
3618				rb1_mask &= rb_mask;
3619				if (!rb0_mask || !rb1_mask) {
3620					raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622					if (!rb0_mask) {
3623						raster_config_se |=
3624							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625					} else {
3626						raster_config_se |=
3627							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628					}
3629				}
3630			}
3631		}
3632
3633		/* GRBM_GFX_INDEX has a different offset on VI */
3634		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637	}
3638
3639	/* GRBM_GFX_INDEX has a different offset on VI */
3640	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641}
3642
3643static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644{
3645	int i, j;
3646	u32 data;
3647	u32 raster_config = 0, raster_config_1 = 0;
3648	u32 active_rbs = 0;
3649	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650					adev->gfx.config.max_sh_per_se;
3651	unsigned num_rb_pipes;
3652
3653	mutex_lock(&adev->grbm_idx_mutex);
3654	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657			data = gfx_v8_0_get_rb_active_bitmap(adev);
3658			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659					       rb_bitmap_width_per_sh);
3660		}
3661	}
3662	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664	adev->gfx.config.backend_enable_mask = active_rbs;
3665	adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668			     adev->gfx.config.max_shader_engines, 16);
3669
3670	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672	if (!adev->gfx.config.backend_enable_mask ||
3673			adev->gfx.config.num_rbs >= num_rb_pipes) {
3674		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676	} else {
3677		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678							adev->gfx.config.backend_enable_mask,
3679							num_rb_pipes);
3680	}
3681
3682	/* cache the values for userspace */
3683	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687				RREG32(mmCC_RB_BACKEND_DISABLE);
3688			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690			adev->gfx.config.rb_config[i][j].raster_config =
3691				RREG32(mmPA_SC_RASTER_CONFIG);
3692			adev->gfx.config.rb_config[i][j].raster_config_1 =
3693				RREG32(mmPA_SC_RASTER_CONFIG_1);
3694		}
3695	}
3696	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697	mutex_unlock(&adev->grbm_idx_mutex);
3698}
3699
 
3700/**
3701 * gfx_v8_0_init_compute_vmid - gart enable
3702 *
3703 * @rdev: amdgpu_device pointer
3704 *
3705 * Initialize compute vmid sh_mem registers
3706 *
3707 */
3708#define DEFAULT_SH_MEM_BASES	(0x6000)
3709#define FIRST_COMPUTE_VMID	(8)
3710#define LAST_COMPUTE_VMID	(16)
3711static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712{
3713	int i;
3714	uint32_t sh_mem_config;
3715	uint32_t sh_mem_bases;
3716
3717	/*
3718	 * Configure apertures:
3719	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722	 */
3723	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732	mutex_lock(&adev->srbm_mutex);
3733	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734		vi_srbm_select(adev, 0, 0, 0, i);
3735		/* CP and shaders */
3736		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737		WREG32(mmSH_MEM_APE1_BASE, 1);
3738		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740	}
3741	vi_srbm_select(adev, 0, 0, 0, 0);
3742	mutex_unlock(&adev->srbm_mutex);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3743}
3744
3745static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3746{
3747	u32 tmp;
3748	int i;
3749
3750	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3751	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3752	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3753	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3754
3755	gfx_v8_0_tiling_mode_table_init(adev);
3756	gfx_v8_0_setup_rb(adev);
3757	gfx_v8_0_get_cu_info(adev);
 
3758
3759	/* XXX SH_MEM regs */
3760	/* where to put LDS, scratch, GPUVM in FSA64 space */
 
 
 
 
 
 
 
 
3761	mutex_lock(&adev->srbm_mutex);
3762	for (i = 0; i < 16; i++) {
3763		vi_srbm_select(adev, 0, 0, 0, i);
3764		/* CP and shaders */
3765		if (i == 0) {
3766			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3767			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3768			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3769					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3770			WREG32(mmSH_MEM_CONFIG, tmp);
 
3771		} else {
3772			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3773			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3774			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3775					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3776			WREG32(mmSH_MEM_CONFIG, tmp);
 
 
3777		}
3778
3779		WREG32(mmSH_MEM_APE1_BASE, 1);
3780		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3781		WREG32(mmSH_MEM_BASES, 0);
3782	}
3783	vi_srbm_select(adev, 0, 0, 0, 0);
3784	mutex_unlock(&adev->srbm_mutex);
3785
3786	gfx_v8_0_init_compute_vmid(adev);
 
3787
3788	mutex_lock(&adev->grbm_idx_mutex);
3789	/*
3790	 * making sure that the following register writes will be broadcasted
3791	 * to all the shaders
3792	 */
3793	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794
3795	WREG32(mmPA_SC_FIFO_SIZE,
3796		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3797			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3798		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3799			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3800		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3801			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3802		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3803			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
 
 
 
 
 
 
 
 
3804	mutex_unlock(&adev->grbm_idx_mutex);
3805
3806}
3807
3808static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3809{
3810	u32 i, j, k;
3811	u32 mask;
3812
3813	mutex_lock(&adev->grbm_idx_mutex);
3814	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3815		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3816			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3817			for (k = 0; k < adev->usec_timeout; k++) {
3818				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3819					break;
3820				udelay(1);
3821			}
 
 
 
 
 
 
 
 
3822		}
3823	}
3824	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3825	mutex_unlock(&adev->grbm_idx_mutex);
3826
3827	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3828		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3829		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3830		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3831	for (k = 0; k < adev->usec_timeout; k++) {
3832		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3833			break;
3834		udelay(1);
3835	}
3836}
3837
3838static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3839					       bool enable)
3840{
3841	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3842
3843	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3844	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3845	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3846	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3847
3848	WREG32(mmCP_INT_CNTL_RING0, tmp);
3849}
3850
3851static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3852{
 
3853	/* csib */
3854	WREG32(mmRLC_CSIB_ADDR_HI,
3855			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3856	WREG32(mmRLC_CSIB_ADDR_LO,
3857			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3858	WREG32(mmRLC_CSIB_LENGTH,
3859			adev->gfx.rlc.clear_state_size);
3860}
3861
3862static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3863				int ind_offset,
3864				int list_size,
3865				int *unique_indices,
3866				int *indices_count,
3867				int max_indices,
3868				int *ind_start_offsets,
3869				int *offset_count,
3870				int max_offset)
3871{
3872	int indices;
3873	bool new_entry = true;
3874
3875	for (; ind_offset < list_size; ind_offset++) {
3876
3877		if (new_entry) {
3878			new_entry = false;
3879			ind_start_offsets[*offset_count] = ind_offset;
3880			*offset_count = *offset_count + 1;
3881			BUG_ON(*offset_count >= max_offset);
3882		}
3883
3884		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3885			new_entry = true;
3886			continue;
3887		}
3888
3889		ind_offset += 2;
3890
3891		/* look for the matching indice */
3892		for (indices = 0;
3893			indices < *indices_count;
3894			indices++) {
3895			if (unique_indices[indices] ==
3896				register_list_format[ind_offset])
3897				break;
3898		}
3899
3900		if (indices >= *indices_count) {
3901			unique_indices[*indices_count] =
3902				register_list_format[ind_offset];
3903			indices = *indices_count;
3904			*indices_count = *indices_count + 1;
3905			BUG_ON(*indices_count >= max_indices);
3906		}
3907
3908		register_list_format[ind_offset] = indices;
3909	}
3910}
3911
3912static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3913{
3914	int i, temp, data;
3915	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3916	int indices_count = 0;
3917	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3918	int offset_count = 0;
3919
3920	int list_size;
3921	unsigned int *register_list_format =
3922		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
 
3923	if (!register_list_format)
3924		return -ENOMEM;
3925	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3926			adev->gfx.rlc.reg_list_format_size_bytes);
3927
3928	gfx_v8_0_parse_ind_reg_list(register_list_format,
3929				RLC_FormatDirectRegListLength,
3930				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3931				unique_indices,
3932				&indices_count,
3933				sizeof(unique_indices) / sizeof(int),
3934				indirect_start_offsets,
3935				&offset_count,
3936				sizeof(indirect_start_offsets)/sizeof(int));
3937
3938	/* save and restore list */
3939	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3940
3941	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3942	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3943		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3944
3945	/* indirect list */
3946	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3947	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3948		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3949
3950	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3951	list_size = list_size >> 1;
3952	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3953	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3954
3955	/* starting offsets starts */
3956	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3957		adev->gfx.rlc.starting_offsets_start);
3958	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3959		WREG32(mmRLC_GPM_SCRATCH_DATA,
3960				indirect_start_offsets[i]);
3961
3962	/* unique indices */
3963	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3964	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3965	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3966		if (unique_indices[i] != 0) {
3967			amdgpu_mm_wreg(adev, temp + i,
3968					unique_indices[i] & 0x3FFFF, false);
3969			amdgpu_mm_wreg(adev, data + i,
3970					unique_indices[i] >> 20, false);
3971		}
3972	}
3973	kfree(register_list_format);
3974
3975	return 0;
3976}
3977
3978static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3979{
3980	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3981}
3982
3983static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3984{
3985	uint32_t data;
3986
3987	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3988
3989	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3990	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3991	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3992	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3993	WREG32(mmRLC_PG_DELAY, data);
3994
3995	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3996	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3997
3998}
3999
4000static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4001						bool enable)
4002{
4003	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4004}
4005
4006static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4007						  bool enable)
4008{
4009	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4010}
4011
4012static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4013{
4014	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4015}
4016
4017static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4018{
4019	if ((adev->asic_type == CHIP_CARRIZO) ||
4020	    (adev->asic_type == CHIP_STONEY)) {
4021		gfx_v8_0_init_csb(adev);
4022		gfx_v8_0_init_save_restore_list(adev);
4023		gfx_v8_0_enable_save_restore_machine(adev);
4024		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4025		gfx_v8_0_init_power_gating(adev);
4026		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4027		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4028			cz_enable_sck_slow_down_on_power_up(adev, true);
4029			cz_enable_sck_slow_down_on_power_down(adev, true);
4030		} else {
4031			cz_enable_sck_slow_down_on_power_up(adev, false);
4032			cz_enable_sck_slow_down_on_power_down(adev, false);
4033		}
4034		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4035			cz_enable_cp_power_gating(adev, true);
4036		else
4037			cz_enable_cp_power_gating(adev, false);
4038	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4039		   (adev->asic_type == CHIP_POLARIS12)) {
 
4040		gfx_v8_0_init_csb(adev);
4041		gfx_v8_0_init_save_restore_list(adev);
4042		gfx_v8_0_enable_save_restore_machine(adev);
4043		gfx_v8_0_init_power_gating(adev);
4044	}
4045
4046}
4047
4048static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4049{
4050	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4051
4052	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4053	gfx_v8_0_wait_for_rlc_serdes(adev);
4054}
4055
4056static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4057{
4058	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4059	udelay(50);
4060
4061	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4062	udelay(50);
4063}
4064
4065static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4066{
4067	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4068
4069	/* carrizo do enable cp interrupt after cp inited */
4070	if (!(adev->flags & AMD_IS_APU))
4071		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4072
4073	udelay(50);
4074}
4075
4076static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4077{
4078	const struct rlc_firmware_header_v2_0 *hdr;
4079	const __le32 *fw_data;
4080	unsigned i, fw_size;
4081
4082	if (!adev->gfx.rlc_fw)
4083		return -EINVAL;
4084
4085	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4086	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4087
4088	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4089			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4090	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4091
4092	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4093	for (i = 0; i < fw_size; i++)
4094		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4095	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4096
4097	return 0;
4098}
4099
4100static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4101{
4102	int r;
4103	u32 tmp;
4104
4105	gfx_v8_0_rlc_stop(adev);
4106
4107	/* disable CG */
4108	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4109	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4110		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4111	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4112	if (adev->asic_type == CHIP_POLARIS11 ||
4113	    adev->asic_type == CHIP_POLARIS10 ||
4114	    adev->asic_type == CHIP_POLARIS12) {
4115		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4116		tmp &= ~0x3;
4117		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4118	}
4119
4120	/* disable PG */
4121	WREG32(mmRLC_PG_CNTL, 0);
4122
4123	gfx_v8_0_rlc_reset(adev);
4124	gfx_v8_0_init_pg(adev);
4125
4126	if (!adev->pp_enabled) {
4127		if (!adev->firmware.smu_load) {
4128			/* legacy rlc firmware loading */
4129			r = gfx_v8_0_rlc_load_microcode(adev);
4130			if (r)
4131				return r;
4132		} else {
4133			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4134							AMDGPU_UCODE_ID_RLC_G);
4135			if (r)
4136				return -EINVAL;
4137		}
4138	}
4139
4140	gfx_v8_0_rlc_start(adev);
4141
4142	return 0;
4143}
4144
4145static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4146{
4147	int i;
4148	u32 tmp = RREG32(mmCP_ME_CNTL);
4149
4150	if (enable) {
4151		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4152		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4153		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4154	} else {
4155		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4156		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4157		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4158		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4159			adev->gfx.gfx_ring[i].ready = false;
4160	}
4161	WREG32(mmCP_ME_CNTL, tmp);
4162	udelay(50);
4163}
4164
4165static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4166{
4167	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4168	const struct gfx_firmware_header_v1_0 *ce_hdr;
4169	const struct gfx_firmware_header_v1_0 *me_hdr;
4170	const __le32 *fw_data;
4171	unsigned i, fw_size;
4172
4173	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4174		return -EINVAL;
4175
4176	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4177		adev->gfx.pfp_fw->data;
4178	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4179		adev->gfx.ce_fw->data;
4180	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4181		adev->gfx.me_fw->data;
4182
4183	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4184	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4185	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4186
4187	gfx_v8_0_cp_gfx_enable(adev, false);
4188
4189	/* PFP */
4190	fw_data = (const __le32 *)
4191		(adev->gfx.pfp_fw->data +
4192		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4193	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4194	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4195	for (i = 0; i < fw_size; i++)
4196		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4197	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4198
4199	/* CE */
4200	fw_data = (const __le32 *)
4201		(adev->gfx.ce_fw->data +
4202		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4203	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4204	WREG32(mmCP_CE_UCODE_ADDR, 0);
4205	for (i = 0; i < fw_size; i++)
4206		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4207	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4208
4209	/* ME */
4210	fw_data = (const __le32 *)
4211		(adev->gfx.me_fw->data +
4212		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4213	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4214	WREG32(mmCP_ME_RAM_WADDR, 0);
4215	for (i = 0; i < fw_size; i++)
4216		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4217	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4218
4219	return 0;
4220}
4221
4222static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4223{
4224	u32 count = 0;
4225	const struct cs_section_def *sect = NULL;
4226	const struct cs_extent_def *ext = NULL;
4227
4228	/* begin clear state */
4229	count += 2;
4230	/* context control state */
4231	count += 3;
4232
4233	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4234		for (ext = sect->section; ext->extent != NULL; ++ext) {
4235			if (sect->id == SECT_CONTEXT)
4236				count += 2 + ext->reg_count;
4237			else
4238				return 0;
4239		}
4240	}
4241	/* pa_sc_raster_config/pa_sc_raster_config1 */
4242	count += 4;
4243	/* end clear state */
4244	count += 2;
4245	/* clear state */
4246	count += 2;
4247
4248	return count;
4249}
4250
4251static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4252{
4253	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4254	const struct cs_section_def *sect = NULL;
4255	const struct cs_extent_def *ext = NULL;
4256	int r, i;
4257
4258	/* init the CP */
4259	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4260	WREG32(mmCP_ENDIAN_SWAP, 0);
4261	WREG32(mmCP_DEVICE_ID, 1);
4262
4263	gfx_v8_0_cp_gfx_enable(adev, true);
4264
4265	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4266	if (r) {
4267		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4268		return r;
4269	}
4270
4271	/* clear state buffer */
4272	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4273	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4274
4275	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4276	amdgpu_ring_write(ring, 0x80000000);
4277	amdgpu_ring_write(ring, 0x80000000);
4278
4279	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4280		for (ext = sect->section; ext->extent != NULL; ++ext) {
4281			if (sect->id == SECT_CONTEXT) {
4282				amdgpu_ring_write(ring,
4283				       PACKET3(PACKET3_SET_CONTEXT_REG,
4284					       ext->reg_count));
4285				amdgpu_ring_write(ring,
4286				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4287				for (i = 0; i < ext->reg_count; i++)
4288					amdgpu_ring_write(ring, ext->extent[i]);
4289			}
4290		}
4291	}
4292
4293	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4294	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4295	switch (adev->asic_type) {
4296	case CHIP_TONGA:
4297	case CHIP_POLARIS10:
4298		amdgpu_ring_write(ring, 0x16000012);
4299		amdgpu_ring_write(ring, 0x0000002A);
4300		break;
4301	case CHIP_POLARIS11:
4302	case CHIP_POLARIS12:
4303		amdgpu_ring_write(ring, 0x16000012);
4304		amdgpu_ring_write(ring, 0x00000000);
4305		break;
4306	case CHIP_FIJI:
4307		amdgpu_ring_write(ring, 0x3a00161a);
4308		amdgpu_ring_write(ring, 0x0000002e);
4309		break;
4310	case CHIP_CARRIZO:
4311		amdgpu_ring_write(ring, 0x00000002);
4312		amdgpu_ring_write(ring, 0x00000000);
4313		break;
4314	case CHIP_TOPAZ:
4315		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4316				0x00000000 : 0x00000002);
4317		amdgpu_ring_write(ring, 0x00000000);
4318		break;
4319	case CHIP_STONEY:
4320		amdgpu_ring_write(ring, 0x00000000);
4321		amdgpu_ring_write(ring, 0x00000000);
4322		break;
4323	default:
4324		BUG();
4325	}
4326
4327	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4328	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4329
4330	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4331	amdgpu_ring_write(ring, 0);
4332
4333	/* init the CE partitions */
4334	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336	amdgpu_ring_write(ring, 0x8000);
4337	amdgpu_ring_write(ring, 0x8000);
4338
4339	amdgpu_ring_commit(ring);
4340
4341	return 0;
4342}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4343
4344static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4345{
4346	struct amdgpu_ring *ring;
4347	u32 tmp;
4348	u32 rb_bufsz;
4349	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4350	int r;
4351
4352	/* Set the write pointer delay */
4353	WREG32(mmCP_RB_WPTR_DELAY, 0);
4354
4355	/* set the RB to use vmid 0 */
4356	WREG32(mmCP_RB_VMID, 0);
4357
4358	/* Set ring buffer size */
4359	ring = &adev->gfx.gfx_ring[0];
4360	rb_bufsz = order_base_2(ring->ring_size / 8);
4361	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4362	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4363	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4364	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4365#ifdef __BIG_ENDIAN
4366	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4367#endif
4368	WREG32(mmCP_RB0_CNTL, tmp);
4369
4370	/* Initialize the ring buffer's read and write pointers */
4371	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4372	ring->wptr = 0;
4373	WREG32(mmCP_RB0_WPTR, ring->wptr);
4374
4375	/* set the wb address wether it's enabled or not */
4376	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4377	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4378	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4379
4380	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4381	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4382	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4383	mdelay(1);
4384	WREG32(mmCP_RB0_CNTL, tmp);
4385
4386	rb_addr = ring->gpu_addr >> 8;
4387	WREG32(mmCP_RB0_BASE, rb_addr);
4388	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4389
4390	/* no gfx doorbells on iceland */
4391	if (adev->asic_type != CHIP_TOPAZ) {
4392		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393		if (ring->use_doorbell) {
4394			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4395					    DOORBELL_OFFSET, ring->doorbell_index);
4396			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4397					    DOORBELL_HIT, 0);
4398			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4399					    DOORBELL_EN, 1);
4400		} else {
4401			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4402					    DOORBELL_EN, 0);
4403		}
4404		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4405
4406		if (adev->asic_type == CHIP_TONGA) {
4407			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4408					    DOORBELL_RANGE_LOWER,
4409					    AMDGPU_DOORBELL_GFX_RING0);
4410			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4411
4412			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4413			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4414		}
4415
4416	}
4417
4418	/* start the ring */
 
4419	gfx_v8_0_cp_gfx_start(adev);
4420	ring->ready = true;
4421	r = amdgpu_ring_test_ring(ring);
4422	if (r)
4423		ring->ready = false;
4424
4425	return r;
4426}
4427
4428static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4429{
4430	int i;
4431
4432	if (enable) {
4433		WREG32(mmCP_MEC_CNTL, 0);
4434	} else {
4435		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4436		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4437			adev->gfx.compute_ring[i].ready = false;
4438	}
4439	udelay(50);
4440}
4441
4442static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 
4443{
4444	const struct gfx_firmware_header_v1_0 *mec_hdr;
4445	const __le32 *fw_data;
4446	unsigned i, fw_size;
4447
4448	if (!adev->gfx.mec_fw)
4449		return -EINVAL;
 
 
 
 
 
 
4450
4451	gfx_v8_0_cp_compute_enable(adev, false);
 
 
 
 
4452
4453	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4454	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
 
4455
4456	fw_data = (const __le32 *)
4457		(adev->gfx.mec_fw->data +
4458		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4459	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4460
4461	/* MEC1 */
4462	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4463	for (i = 0; i < fw_size; i++)
4464		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4465	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4466
4467	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4468	if (adev->gfx.mec2_fw) {
4469		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4470
4471		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4472		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4473
4474		fw_data = (const __le32 *)
4475			(adev->gfx.mec2_fw->data +
4476			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4477		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4478
4479		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4480		for (i = 0; i < fw_size; i++)
4481			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4482		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
 
 
 
 
 
4483	}
4484
 
 
4485	return 0;
4486}
4487
4488static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4489{
4490	int i, r;
4491
4492	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4493		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 
 
 
 
 
 
 
 
 
 
4494
4495		if (ring->mqd_obj) {
4496			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4497			if (unlikely(r != 0))
4498				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4499
4500			amdgpu_bo_unpin(ring->mqd_obj);
4501			amdgpu_bo_unreserve(ring->mqd_obj);
 
4502
4503			amdgpu_bo_unref(&ring->mqd_obj);
4504			ring->mqd_obj = NULL;
 
 
 
4505		}
4506	}
4507}
4508
4509static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4510{
4511	int r, i, j;
4512	u32 tmp;
4513	bool use_doorbell = true;
4514	u64 hqd_gpu_addr;
4515	u64 mqd_gpu_addr;
4516	u64 eop_gpu_addr;
4517	u64 wb_gpu_addr;
4518	u32 *buf;
4519	struct vi_mqd *mqd;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4520
4521	/* init the queues.  */
4522	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4523		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4524
4525		if (ring->mqd_obj == NULL) {
4526			r = amdgpu_bo_create(adev,
4527					     sizeof(struct vi_mqd),
4528					     PAGE_SIZE, true,
4529					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4530					     NULL, &ring->mqd_obj);
4531			if (r) {
4532				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4533				return r;
4534			}
4535		}
4536
4537		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4538		if (unlikely(r != 0)) {
4539			gfx_v8_0_cp_compute_fini(adev);
4540			return r;
4541		}
4542		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4543				  &mqd_gpu_addr);
4544		if (r) {
4545			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4546			gfx_v8_0_cp_compute_fini(adev);
4547			return r;
4548		}
4549		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4550		if (r) {
4551			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4552			gfx_v8_0_cp_compute_fini(adev);
4553			return r;
4554		}
4555
4556		/* init the mqd struct */
4557		memset(buf, 0, sizeof(struct vi_mqd));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4558
4559		mqd = (struct vi_mqd *)buf;
4560		mqd->header = 0xC0310800;
4561		mqd->compute_pipelinestat_enable = 0x00000001;
4562		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4563		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4564		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4565		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4566		mqd->compute_misc_reserved = 0x00000003;
4567
4568		mutex_lock(&adev->srbm_mutex);
4569		vi_srbm_select(adev, ring->me,
4570			       ring->pipe,
4571			       ring->queue, 0);
4572
4573		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4574		eop_gpu_addr >>= 8;
4575
4576		/* write the EOP addr */
4577		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4578		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4579
4580		/* set the VMID assigned */
4581		WREG32(mmCP_HQD_VMID, 0);
4582
4583		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4584		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4585		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4586				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4587		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4588
4589		/* disable wptr polling */
4590		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4591		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4592		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4593
4594		mqd->cp_hqd_eop_base_addr_lo =
4595			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4596		mqd->cp_hqd_eop_base_addr_hi =
4597			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4598
4599		/* enable doorbell? */
4600		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4601		if (use_doorbell) {
4602			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4603		} else {
4604			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4605		}
4606		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4607		mqd->cp_hqd_pq_doorbell_control = tmp;
4608
4609		/* disable the queue if it's active */
4610		mqd->cp_hqd_dequeue_request = 0;
4611		mqd->cp_hqd_pq_rptr = 0;
4612		mqd->cp_hqd_pq_wptr= 0;
4613		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4614			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4615			for (j = 0; j < adev->usec_timeout; j++) {
4616				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4617					break;
4618				udelay(1);
4619			}
4620			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4621			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4622			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4623		}
4624
4625		/* set the pointer to the MQD */
4626		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4627		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4628		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4629		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4630
4631		/* set MQD vmid to 0 */
4632		tmp = RREG32(mmCP_MQD_CONTROL);
4633		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4634		WREG32(mmCP_MQD_CONTROL, tmp);
4635		mqd->cp_mqd_control = tmp;
4636
4637		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4638		hqd_gpu_addr = ring->gpu_addr >> 8;
4639		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4640		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4641		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4642		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4643
4644		/* set up the HQD, this is similar to CP_RB0_CNTL */
4645		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4646		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4647				    (order_base_2(ring->ring_size / 4) - 1));
4648		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4649			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4650#ifdef __BIG_ENDIAN
4651		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4652#endif
4653		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4654		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4655		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4656		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4657		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4658		mqd->cp_hqd_pq_control = tmp;
4659
4660		/* set the wb address wether it's enabled or not */
4661		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4662		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4663		mqd->cp_hqd_pq_rptr_report_addr_hi =
4664			upper_32_bits(wb_gpu_addr) & 0xffff;
4665		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4666		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4667		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4668		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4669
4670		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4671		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4672		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4673		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4674		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4675		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4676		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4677
4678		/* enable the doorbell if requested */
4679		if (use_doorbell) {
4680			if ((adev->asic_type == CHIP_CARRIZO) ||
4681			    (adev->asic_type == CHIP_FIJI) ||
4682			    (adev->asic_type == CHIP_STONEY) ||
4683			    (adev->asic_type == CHIP_POLARIS11) ||
4684			    (adev->asic_type == CHIP_POLARIS10) ||
4685			    (adev->asic_type == CHIP_POLARIS12)) {
4686				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4687				       AMDGPU_DOORBELL_KIQ << 2);
4688				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4689				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4690			}
4691			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4692			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4693					    DOORBELL_OFFSET, ring->doorbell_index);
4694			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4695			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4696			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4697			mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699		} else {
4700			mqd->cp_hqd_pq_doorbell_control = 0;
4701		}
4702		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4703		       mqd->cp_hqd_pq_doorbell_control);
4704
4705		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4706		ring->wptr = 0;
4707		mqd->cp_hqd_pq_wptr = ring->wptr;
4708		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4709		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4710
4711		/* set the vmid for the queue */
4712		mqd->cp_hqd_vmid = 0;
4713		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4714
4715		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4716		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4717		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4718		mqd->cp_hqd_persistent_state = tmp;
4719		if (adev->asic_type == CHIP_STONEY ||
4720			adev->asic_type == CHIP_POLARIS11 ||
4721			adev->asic_type == CHIP_POLARIS10 ||
4722			adev->asic_type == CHIP_POLARIS12) {
4723			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4724			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4725			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4726		}
 
 
 
 
 
4727
4728		/* activate the queue */
4729		mqd->cp_hqd_active = 1;
4730		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 
4731
 
 
 
 
 
 
 
4732		vi_srbm_select(adev, 0, 0, 0, 0);
4733		mutex_unlock(&adev->srbm_mutex);
4734
4735		amdgpu_bo_kunmap(ring->mqd_obj);
4736		amdgpu_bo_unreserve(ring->mqd_obj);
 
 
 
 
 
 
 
 
 
4737	}
 
 
4738
4739	if (use_doorbell) {
4740		tmp = RREG32(mmCP_PQ_STATUS);
4741		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4742		WREG32(mmCP_PQ_STATUS, tmp);
 
4743	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4744
4745	gfx_v8_0_cp_compute_enable(adev, true);
4746
4747	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4749
4750		ring->ready = true;
4751		r = amdgpu_ring_test_ring(ring);
 
 
 
 
 
 
 
 
4752		if (r)
4753			ring->ready = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4754	}
4755
4756	return 0;
4757}
4758
4759static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4760{
4761	int r;
4762
4763	if (!(adev->flags & AMD_IS_APU))
4764		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4765
4766	if (!adev->pp_enabled) {
4767		if (!adev->firmware.smu_load) {
4768			/* legacy firmware loading */
4769			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4770			if (r)
4771				return r;
4772
4773			r = gfx_v8_0_cp_compute_load_microcode(adev);
4774			if (r)
4775				return r;
4776		} else {
4777			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4778							AMDGPU_UCODE_ID_CP_CE);
4779			if (r)
4780				return -EINVAL;
4781
4782			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4783							AMDGPU_UCODE_ID_CP_PFP);
4784			if (r)
4785				return -EINVAL;
4786
4787			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4788							AMDGPU_UCODE_ID_CP_ME);
4789			if (r)
4790				return -EINVAL;
4791
4792			if (adev->asic_type == CHIP_TOPAZ) {
4793				r = gfx_v8_0_cp_compute_load_microcode(adev);
4794				if (r)
4795					return r;
4796			} else {
4797				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4798										 AMDGPU_UCODE_ID_CP_MEC1);
4799				if (r)
4800					return -EINVAL;
4801			}
4802		}
4803	}
4804
4805	r = gfx_v8_0_cp_gfx_resume(adev);
4806	if (r)
4807		return r;
4808
4809	r = gfx_v8_0_cp_compute_resume(adev);
4810	if (r)
4811		return r;
4812
4813	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4814
4815	return 0;
4816}
4817
4818static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4819{
4820	gfx_v8_0_cp_gfx_enable(adev, enable);
4821	gfx_v8_0_cp_compute_enable(adev, enable);
4822}
4823
4824static int gfx_v8_0_hw_init(void *handle)
4825{
4826	int r;
4827	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828
4829	gfx_v8_0_init_golden_registers(adev);
4830	gfx_v8_0_gpu_init(adev);
4831
4832	r = gfx_v8_0_rlc_resume(adev);
4833	if (r)
4834		return r;
4835
4836	r = gfx_v8_0_cp_resume(adev);
4837
4838	return r;
4839}
4840
4841static int gfx_v8_0_hw_fini(void *handle)
4842{
4843	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
 
 
 
 
 
4844
4845	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4846	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4847	if (amdgpu_sriov_vf(adev)) {
4848		pr_debug("For SRIOV client, shouldn't do anything.\n");
4849		return 0;
 
 
 
 
 
4850	}
4851	gfx_v8_0_cp_enable(adev, false);
4852	gfx_v8_0_rlc_stop(adev);
4853	gfx_v8_0_cp_compute_fini(adev);
4854
4855	amdgpu_set_powergating_state(adev,
4856			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4857
4858	return 0;
4859}
4860
4861static int gfx_v8_0_suspend(void *handle)
4862{
4863	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4864
4865	return gfx_v8_0_hw_fini(adev);
 
 
 
 
4866}
4867
4868static int gfx_v8_0_resume(void *handle)
4869{
4870	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4871
4872	return gfx_v8_0_hw_init(adev);
 
 
 
4873}
4874
4875static bool gfx_v8_0_is_idle(void *handle)
4876{
 
4877	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878
4879	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4880		return false;
4881	else
4882		return true;
 
 
 
4883}
4884
4885static int gfx_v8_0_wait_for_idle(void *handle)
4886{
4887	unsigned i;
4888	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889
4890	for (i = 0; i < adev->usec_timeout; i++) {
4891		if (gfx_v8_0_is_idle(handle))
4892			return 0;
4893
4894		udelay(1);
4895	}
4896	return -ETIMEDOUT;
4897}
4898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4899static bool gfx_v8_0_check_soft_reset(void *handle)
4900{
4901	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4902	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4903	u32 tmp;
4904
4905	/* GRBM_STATUS */
4906	tmp = RREG32(mmGRBM_STATUS);
4907	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4908		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4909		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4910		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4911		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4912		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4913		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4914		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4915						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4916		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4917						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4918		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4919						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4920	}
4921
4922	/* GRBM_STATUS2 */
4923	tmp = RREG32(mmGRBM_STATUS2);
4924	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4925		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4926						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4927
4928	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4929	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4930	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4931		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4932						SOFT_RESET_CPF, 1);
4933		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4934						SOFT_RESET_CPC, 1);
4935		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4936						SOFT_RESET_CPG, 1);
4937		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4938						SOFT_RESET_GRBM, 1);
4939	}
4940
4941	/* SRBM_STATUS */
4942	tmp = RREG32(mmSRBM_STATUS);
4943	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4944		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4945						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4946	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4947		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4948						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4949
4950	if (grbm_soft_reset || srbm_soft_reset) {
4951		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4952		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4953		return true;
4954	} else {
4955		adev->gfx.grbm_soft_reset = 0;
4956		adev->gfx.srbm_soft_reset = 0;
4957		return false;
4958	}
4959}
4960
4961static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4962				  struct amdgpu_ring *ring)
4963{
4964	int i;
4965
4966	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4967	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4968		u32 tmp;
4969		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4970		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4971				    DEQUEUE_REQ, 2);
4972		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4973		for (i = 0; i < adev->usec_timeout; i++) {
4974			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4975				break;
4976			udelay(1);
4977		}
4978	}
4979}
4980
4981static int gfx_v8_0_pre_soft_reset(void *handle)
4982{
4983	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4984	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4985
4986	if ((!adev->gfx.grbm_soft_reset) &&
4987	    (!adev->gfx.srbm_soft_reset))
4988		return 0;
4989
4990	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4991	srbm_soft_reset = adev->gfx.srbm_soft_reset;
4992
4993	/* stop the rlc */
4994	gfx_v8_0_rlc_stop(adev);
4995
4996	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4997	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4998		/* Disable GFX parsing/prefetching */
4999		gfx_v8_0_cp_gfx_enable(adev, false);
5000
5001	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5002	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5003	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5004	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5005		int i;
5006
5007		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5008			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5009
5010			gfx_v8_0_inactive_hqd(adev, ring);
 
 
 
 
5011		}
5012		/* Disable MEC parsing/prefetching */
5013		gfx_v8_0_cp_compute_enable(adev, false);
5014	}
5015
5016       return 0;
5017}
5018
5019static int gfx_v8_0_soft_reset(void *handle)
5020{
5021	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5023	u32 tmp;
5024
5025	if ((!adev->gfx.grbm_soft_reset) &&
5026	    (!adev->gfx.srbm_soft_reset))
5027		return 0;
5028
5029	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5030	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5031
5032	if (grbm_soft_reset || srbm_soft_reset) {
5033		tmp = RREG32(mmGMCON_DEBUG);
5034		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5035		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5036		WREG32(mmGMCON_DEBUG, tmp);
5037		udelay(50);
5038	}
5039
5040	if (grbm_soft_reset) {
5041		tmp = RREG32(mmGRBM_SOFT_RESET);
5042		tmp |= grbm_soft_reset;
5043		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5044		WREG32(mmGRBM_SOFT_RESET, tmp);
5045		tmp = RREG32(mmGRBM_SOFT_RESET);
5046
5047		udelay(50);
5048
5049		tmp &= ~grbm_soft_reset;
5050		WREG32(mmGRBM_SOFT_RESET, tmp);
5051		tmp = RREG32(mmGRBM_SOFT_RESET);
5052	}
5053
5054	if (srbm_soft_reset) {
5055		tmp = RREG32(mmSRBM_SOFT_RESET);
5056		tmp |= srbm_soft_reset;
5057		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5058		WREG32(mmSRBM_SOFT_RESET, tmp);
5059		tmp = RREG32(mmSRBM_SOFT_RESET);
5060
5061		udelay(50);
5062
5063		tmp &= ~srbm_soft_reset;
5064		WREG32(mmSRBM_SOFT_RESET, tmp);
5065		tmp = RREG32(mmSRBM_SOFT_RESET);
5066	}
5067
5068	if (grbm_soft_reset || srbm_soft_reset) {
5069		tmp = RREG32(mmGMCON_DEBUG);
5070		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5071		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5072		WREG32(mmGMCON_DEBUG, tmp);
5073	}
5074
5075	/* Wait a little for things to settle down */
5076	udelay(50);
5077
5078	return 0;
5079}
5080
5081static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5082			      struct amdgpu_ring *ring)
5083{
5084	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5085	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5086	WREG32(mmCP_HQD_PQ_RPTR, 0);
5087	WREG32(mmCP_HQD_PQ_WPTR, 0);
5088	vi_srbm_select(adev, 0, 0, 0, 0);
5089}
5090
5091static int gfx_v8_0_post_soft_reset(void *handle)
5092{
5093	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5095
5096	if ((!adev->gfx.grbm_soft_reset) &&
5097	    (!adev->gfx.srbm_soft_reset))
5098		return 0;
5099
5100	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5101	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5102
5103	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5104	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5105		gfx_v8_0_cp_gfx_resume(adev);
5106
5107	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5108	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5109	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5110	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5111		int i;
5112
5113		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5114			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5115
5116			gfx_v8_0_init_hqd(adev, ring);
 
 
 
 
5117		}
5118		gfx_v8_0_cp_compute_resume(adev);
 
5119	}
5120	gfx_v8_0_rlc_start(adev);
 
 
 
 
 
 
 
5121
5122	return 0;
5123}
5124
5125/**
5126 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5127 *
5128 * @adev: amdgpu_device pointer
5129 *
5130 * Fetches a GPU clock counter snapshot.
5131 * Returns the 64 bit clock counter snapshot.
5132 */
5133static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5134{
5135	uint64_t clock;
5136
5137	mutex_lock(&adev->gfx.gpu_clock_mutex);
5138	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5139	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5140		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5141	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5142	return clock;
5143}
5144
5145static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5146					  uint32_t vmid,
5147					  uint32_t gds_base, uint32_t gds_size,
5148					  uint32_t gws_base, uint32_t gws_size,
5149					  uint32_t oa_base, uint32_t oa_size)
5150{
5151	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5152	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5153
5154	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5155	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5156
5157	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5158	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5159
5160	/* GDS Base */
5161	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163				WRITE_DATA_DST_SEL(0)));
5164	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5165	amdgpu_ring_write(ring, 0);
5166	amdgpu_ring_write(ring, gds_base);
5167
5168	/* GDS Size */
5169	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171				WRITE_DATA_DST_SEL(0)));
5172	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5173	amdgpu_ring_write(ring, 0);
5174	amdgpu_ring_write(ring, gds_size);
5175
5176	/* GWS */
5177	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5178	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5179				WRITE_DATA_DST_SEL(0)));
5180	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5181	amdgpu_ring_write(ring, 0);
5182	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5183
5184	/* OA */
5185	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5186	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5187				WRITE_DATA_DST_SEL(0)));
5188	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5189	amdgpu_ring_write(ring, 0);
5190	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5191}
5192
5193static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5194{
5195	WREG32(mmSQ_IND_INDEX,
5196		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5197		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5198		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5199		(SQ_IND_INDEX__FORCE_READ_MASK));
5200	return RREG32(mmSQ_IND_DATA);
5201}
5202
5203static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5204			   uint32_t wave, uint32_t thread,
5205			   uint32_t regno, uint32_t num, uint32_t *out)
5206{
5207	WREG32(mmSQ_IND_INDEX,
5208		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5209		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5210		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5211		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5212		(SQ_IND_INDEX__FORCE_READ_MASK) |
5213		(SQ_IND_INDEX__AUTO_INCR_MASK));
5214	while (num--)
5215		*(out++) = RREG32(mmSQ_IND_DATA);
5216}
5217
5218static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5219{
5220	/* type 0 wave data */
5221	dst[(*no_fields)++] = 0;
5222	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5223	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5224	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5225	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5226	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5227	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5228	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5229	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5230	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5231	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5232	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5233	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5234	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5235	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5236	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5237	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5238	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5239	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
 
5240}
5241
5242static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5243				     uint32_t wave, uint32_t start,
5244				     uint32_t size, uint32_t *dst)
5245{
5246	wave_read_regs(
5247		adev, simd, wave, 0,
5248		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5249}
5250
5251
5252static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5253	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5254	.select_se_sh = &gfx_v8_0_select_se_sh,
5255	.read_wave_data = &gfx_v8_0_read_wave_data,
5256	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
 
5257};
5258
5259static int gfx_v8_0_early_init(void *handle)
5260{
5261	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262
5263	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5264	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
 
5265	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5266	gfx_v8_0_set_ring_funcs(adev);
5267	gfx_v8_0_set_irq_funcs(adev);
5268	gfx_v8_0_set_gds_init(adev);
5269	gfx_v8_0_set_rlc_funcs(adev);
5270
5271	return 0;
5272}
5273
5274static int gfx_v8_0_late_init(void *handle)
5275{
5276	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5277	int r;
5278
5279	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5280	if (r)
5281		return r;
5282
5283	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5284	if (r)
5285		return r;
5286
5287	/* requires IBs so do in late init after IB pool is initialized */
5288	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5289	if (r)
5290		return r;
5291
5292	amdgpu_set_powergating_state(adev,
5293			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
 
 
 
 
 
 
 
 
 
 
 
5294
5295	return 0;
5296}
5297
5298static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5299						       bool enable)
5300{
5301	if ((adev->asic_type == CHIP_POLARIS11) ||
5302	    (adev->asic_type == CHIP_POLARIS12))
 
5303		/* Send msg to SMU via Powerplay */
5304		amdgpu_set_powergating_state(adev,
5305					     AMD_IP_BLOCK_TYPE_SMC,
5306					     enable ?
5307					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5308
5309	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5310}
5311
5312static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5313							bool enable)
5314{
5315	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5316}
5317
5318static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5319		bool enable)
5320{
5321	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5322}
5323
5324static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5325					  bool enable)
5326{
5327	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5328}
5329
5330static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5331						bool enable)
5332{
5333	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5334
5335	/* Read any GFX register to wake up GFX. */
5336	if (!enable)
5337		RREG32(mmDB_RENDER_CONTROL);
5338}
5339
5340static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5341					  bool enable)
5342{
5343	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5344		cz_enable_gfx_cg_power_gating(adev, true);
5345		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5346			cz_enable_gfx_pipeline_power_gating(adev, true);
5347	} else {
5348		cz_enable_gfx_cg_power_gating(adev, false);
5349		cz_enable_gfx_pipeline_power_gating(adev, false);
5350	}
5351}
5352
5353static int gfx_v8_0_set_powergating_state(void *handle,
5354					  enum amd_powergating_state state)
5355{
5356	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5357	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5358
 
 
 
 
 
 
 
 
5359	switch (adev->asic_type) {
5360	case CHIP_CARRIZO:
5361	case CHIP_STONEY:
5362
 
 
 
 
 
 
 
 
 
 
 
 
5363		cz_update_gfx_cg_power_gating(adev, enable);
5364
5365		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5366			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5367		else
5368			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5369
5370		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5371			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5372		else
5373			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5374		break;
5375	case CHIP_POLARIS11:
5376	case CHIP_POLARIS12:
 
5377		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5378			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5379		else
5380			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5381
5382		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5383			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5384		else
5385			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5386
5387		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5388			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5389		else
5390			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5391		break;
5392	default:
5393		break;
5394	}
 
 
 
 
 
 
 
5395
5396	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5397}
5398
5399static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5400				     uint32_t reg_addr, uint32_t cmd)
5401{
5402	uint32_t data;
5403
5404	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5405
5406	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5407	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5408
5409	data = RREG32(mmRLC_SERDES_WR_CTRL);
5410	if (adev->asic_type == CHIP_STONEY)
5411		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5412			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5413			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5414			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5415			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5416			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5417			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5418			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5419			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5420	else
5421		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5422			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5423			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5424			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5425			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5426			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5427			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5428			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5429			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5430			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5431			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5432	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5433		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5434		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5435		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5436
5437	WREG32(mmRLC_SERDES_WR_CTRL, data);
5438}
5439
5440#define MSG_ENTER_RLC_SAFE_MODE     1
5441#define MSG_EXIT_RLC_SAFE_MODE      0
5442#define RLC_GPR_REG2__REQ_MASK 0x00000001
5443#define RLC_GPR_REG2__REQ__SHIFT 0
5444#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5445#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5446
5447static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5448{
5449	u32 data = 0;
5450	unsigned i;
5451
5452	data = RREG32(mmRLC_CNTL);
5453	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5454		return;
5455
5456	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5457	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5458			       AMD_PG_SUPPORT_GFX_DMG))) {
5459		data |= RLC_GPR_REG2__REQ_MASK;
5460		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5461		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5462		WREG32(mmRLC_GPR_REG2, data);
5463
5464		for (i = 0; i < adev->usec_timeout; i++) {
5465			if ((RREG32(mmRLC_GPM_STAT) &
5466			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5467			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5468			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5469			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5470				break;
5471			udelay(1);
5472		}
5473
5474		for (i = 0; i < adev->usec_timeout; i++) {
5475			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5476				break;
5477			udelay(1);
5478		}
5479		adev->gfx.rlc.in_safe_mode = true;
5480	}
5481}
5482
5483static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5484{
5485	u32 data;
5486	unsigned i;
5487
5488	data = RREG32(mmRLC_CNTL);
5489	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5490		return;
 
 
5491
5492	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5493	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5494			       AMD_PG_SUPPORT_GFX_DMG))) {
5495		data |= RLC_GPR_REG2__REQ_MASK;
5496		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5497		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5498		WREG32(mmRLC_GPR_REG2, data);
5499		adev->gfx.rlc.in_safe_mode = false;
 
5500	}
5501
5502	for (i = 0; i < adev->usec_timeout; i++) {
5503		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5504			break;
5505		udelay(1);
5506	}
5507}
5508
5509static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5510{
5511	u32 data;
5512	unsigned i;
5513
5514	data = RREG32(mmRLC_CNTL);
5515	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5516		return;
 
5517
5518	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5519		data |= RLC_SAFE_MODE__CMD_MASK;
5520		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5521		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5522		WREG32(mmRLC_SAFE_MODE, data);
5523
5524		for (i = 0; i < adev->usec_timeout; i++) {
5525			if ((RREG32(mmRLC_GPM_STAT) &
5526			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5527			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5528			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5529			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5530				break;
5531			udelay(1);
5532		}
5533
5534		for (i = 0; i < adev->usec_timeout; i++) {
5535			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5536				break;
5537			udelay(1);
5538		}
5539		adev->gfx.rlc.in_safe_mode = true;
5540	}
5541}
5542
5543static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5544{
5545	u32 data = 0;
5546	unsigned i;
5547
5548	data = RREG32(mmRLC_CNTL);
5549	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5550		return;
5551
5552	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5553		if (adev->gfx.rlc.in_safe_mode) {
5554			data |= RLC_SAFE_MODE__CMD_MASK;
5555			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5556			WREG32(mmRLC_SAFE_MODE, data);
5557			adev->gfx.rlc.in_safe_mode = false;
5558		}
5559	}
5560
5561	for (i = 0; i < adev->usec_timeout; i++) {
5562		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5563			break;
5564		udelay(1);
5565	}
5566}
5567
5568static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5569{
5570	adev->gfx.rlc.in_safe_mode = true;
5571}
5572
5573static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5574{
5575	adev->gfx.rlc.in_safe_mode = false;
5576}
5577
5578static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5579	.enter_safe_mode = cz_enter_rlc_safe_mode,
5580	.exit_safe_mode = cz_exit_rlc_safe_mode
5581};
5582
5583static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5584	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5585	.exit_safe_mode = iceland_exit_rlc_safe_mode
5586};
5587
5588static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5589	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5590	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
 
 
 
 
 
5591};
5592
5593static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5594						      bool enable)
5595{
5596	uint32_t temp, data;
5597
5598	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5599
5600	/* It is disabled by HW by default */
5601	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5602		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5603			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5604				/* 1 - RLC memory Light sleep */
5605				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5606
5607			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5608				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5609		}
5610
5611		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5612		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5613		if (adev->flags & AMD_IS_APU)
5614			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5615				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5616				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5617		else
5618			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5619				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5620				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5621				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5622
5623		if (temp != data)
5624			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5625
5626		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5627		gfx_v8_0_wait_for_rlc_serdes(adev);
5628
5629		/* 5 - clear mgcg override */
5630		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5631
5632		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5633			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5634			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5635			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5636			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5637			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5638			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5639			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5640			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5641				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5642			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5643			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5644			if (temp != data)
5645				WREG32(mmCGTS_SM_CTRL_REG, data);
5646		}
5647		udelay(50);
5648
5649		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5650		gfx_v8_0_wait_for_rlc_serdes(adev);
5651	} else {
5652		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5653		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5654		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5655				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5656				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5657				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5658		if (temp != data)
5659			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5660
5661		/* 2 - disable MGLS in RLC */
5662		data = RREG32(mmRLC_MEM_SLP_CNTL);
5663		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5664			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5665			WREG32(mmRLC_MEM_SLP_CNTL, data);
5666		}
5667
5668		/* 3 - disable MGLS in CP */
5669		data = RREG32(mmCP_MEM_SLP_CNTL);
5670		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5671			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5672			WREG32(mmCP_MEM_SLP_CNTL, data);
5673		}
5674
5675		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5676		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5677		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5678				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5679		if (temp != data)
5680			WREG32(mmCGTS_SM_CTRL_REG, data);
5681
5682		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5683		gfx_v8_0_wait_for_rlc_serdes(adev);
5684
5685		/* 6 - set mgcg override */
5686		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5687
5688		udelay(50);
5689
5690		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691		gfx_v8_0_wait_for_rlc_serdes(adev);
5692	}
5693
5694	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5695}
5696
5697static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5698						      bool enable)
5699{
5700	uint32_t temp, temp1, data, data1;
5701
5702	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5703
5704	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5705
5706	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5707		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5709		if (temp1 != data1)
5710			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5711
5712		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713		gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715		/* 2 - clear cgcg override */
5716		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5717
5718		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719		gfx_v8_0_wait_for_rlc_serdes(adev);
5720
5721		/* 3 - write cmd to set CGLS */
5722		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5723
5724		/* 4 - enable cgcg */
5725		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5726
5727		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5728			/* enable cgls*/
5729			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5730
5731			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5732			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5733
5734			if (temp1 != data1)
5735				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736		} else {
5737			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5738		}
5739
5740		if (temp != data)
5741			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5742
5743		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5744		 * Cmp_busy/GFX_Idle interrupts
5745		 */
5746		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5747	} else {
5748		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5749		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5750
5751		/* TEST CGCG */
5752		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5754				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5755		if (temp1 != data1)
5756			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5757
5758		/* read gfx register to wake up cgcg */
5759		RREG32(mmCB_CGTT_SCLK_CTRL);
5760		RREG32(mmCB_CGTT_SCLK_CTRL);
5761		RREG32(mmCB_CGTT_SCLK_CTRL);
5762		RREG32(mmCB_CGTT_SCLK_CTRL);
5763
5764		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765		gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767		/* write cmd to Set CGCG Overrride */
5768		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5769
5770		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771		gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773		/* write cmd to Clear CGLS */
5774		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5775
5776		/* disable cgcg, cgls should be disabled too. */
5777		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5778			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5779		if (temp != data)
5780			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 
 
5781	}
5782
5783	gfx_v8_0_wait_for_rlc_serdes(adev);
5784
5785	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5786}
5787static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5788					    bool enable)
5789{
5790	if (enable) {
5791		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5792		 * ===  MGCG + MGLS + TS(CG/LS) ===
5793		 */
5794		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5795		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5796	} else {
5797		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5798		 * ===  CGCG + CGLS ===
5799		 */
5800		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5801		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5802	}
5803	return 0;
5804}
5805
5806static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5807					  enum amd_clockgating_state state)
5808{
5809	uint32_t msg_id, pp_state = 0;
5810	uint32_t pp_support_state = 0;
5811	void *pp_handle = adev->powerplay.pp_handle;
5812
5813	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5814		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5815			pp_support_state = PP_STATE_SUPPORT_LS;
5816			pp_state = PP_STATE_LS;
5817		}
5818		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5819			pp_support_state |= PP_STATE_SUPPORT_CG;
5820			pp_state |= PP_STATE_CG;
5821		}
5822		if (state == AMD_CG_STATE_UNGATE)
5823			pp_state = 0;
5824
5825		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5826				PP_BLOCK_GFX_CG,
5827				pp_support_state,
5828				pp_state);
5829		amd_set_clockgating_by_smu(pp_handle, msg_id);
5830	}
5831
5832	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5833		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834			pp_support_state = PP_STATE_SUPPORT_LS;
5835			pp_state = PP_STATE_LS;
5836		}
5837
5838		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5839			pp_support_state |= PP_STATE_SUPPORT_CG;
5840			pp_state |= PP_STATE_CG;
5841		}
5842
5843		if (state == AMD_CG_STATE_UNGATE)
5844			pp_state = 0;
5845
5846		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5847				PP_BLOCK_GFX_MG,
5848				pp_support_state,
5849				pp_state);
5850		amd_set_clockgating_by_smu(pp_handle, msg_id);
5851	}
5852
5853	return 0;
5854}
5855
5856static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5857					  enum amd_clockgating_state state)
5858{
5859
5860	uint32_t msg_id, pp_state = 0;
5861	uint32_t pp_support_state = 0;
5862	void *pp_handle = adev->powerplay.pp_handle;
5863
5864	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5865		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5866			pp_support_state = PP_STATE_SUPPORT_LS;
5867			pp_state = PP_STATE_LS;
5868		}
5869		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5870			pp_support_state |= PP_STATE_SUPPORT_CG;
5871			pp_state |= PP_STATE_CG;
5872		}
5873		if (state == AMD_CG_STATE_UNGATE)
5874			pp_state = 0;
5875
5876		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5877				PP_BLOCK_GFX_CG,
5878				pp_support_state,
5879				pp_state);
5880		amd_set_clockgating_by_smu(pp_handle, msg_id);
5881	}
5882
5883	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5884		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5885			pp_support_state = PP_STATE_SUPPORT_LS;
5886			pp_state = PP_STATE_LS;
5887		}
5888		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5889			pp_support_state |= PP_STATE_SUPPORT_CG;
5890			pp_state |= PP_STATE_CG;
5891		}
5892		if (state == AMD_CG_STATE_UNGATE)
5893			pp_state = 0;
5894
5895		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896				PP_BLOCK_GFX_3D,
5897				pp_support_state,
5898				pp_state);
5899		amd_set_clockgating_by_smu(pp_handle, msg_id);
5900	}
5901
5902	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5903		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5904			pp_support_state = PP_STATE_SUPPORT_LS;
5905			pp_state = PP_STATE_LS;
5906		}
5907
5908		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5909			pp_support_state |= PP_STATE_SUPPORT_CG;
5910			pp_state |= PP_STATE_CG;
5911		}
5912
5913		if (state == AMD_CG_STATE_UNGATE)
5914			pp_state = 0;
5915
5916		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5917				PP_BLOCK_GFX_MG,
5918				pp_support_state,
5919				pp_state);
5920		amd_set_clockgating_by_smu(pp_handle, msg_id);
5921	}
5922
5923	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5924		pp_support_state = PP_STATE_SUPPORT_LS;
5925
5926		if (state == AMD_CG_STATE_UNGATE)
5927			pp_state = 0;
5928		else
5929			pp_state = PP_STATE_LS;
5930
5931		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5932				PP_BLOCK_GFX_RLC,
5933				pp_support_state,
5934				pp_state);
5935		amd_set_clockgating_by_smu(pp_handle, msg_id);
5936	}
5937
5938	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5939		pp_support_state = PP_STATE_SUPPORT_LS;
5940
5941		if (state == AMD_CG_STATE_UNGATE)
5942			pp_state = 0;
5943		else
5944			pp_state = PP_STATE_LS;
5945		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5946			PP_BLOCK_GFX_CP,
5947			pp_support_state,
5948			pp_state);
5949		amd_set_clockgating_by_smu(pp_handle, msg_id);
5950	}
5951
5952	return 0;
5953}
5954
5955static int gfx_v8_0_set_clockgating_state(void *handle,
5956					  enum amd_clockgating_state state)
5957{
5958	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5959
 
 
 
5960	switch (adev->asic_type) {
5961	case CHIP_FIJI:
5962	case CHIP_CARRIZO:
5963	case CHIP_STONEY:
5964		gfx_v8_0_update_gfx_clock_gating(adev,
5965						 state == AMD_CG_STATE_GATE ? true : false);
5966		break;
5967	case CHIP_TONGA:
5968		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5969		break;
5970	case CHIP_POLARIS10:
5971	case CHIP_POLARIS11:
 
 
5972		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5973		break;
5974	default:
5975		break;
5976	}
5977	return 0;
5978}
5979
5980static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5981{
5982	return ring->adev->wb.wb[ring->rptr_offs];
5983}
5984
5985static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5986{
5987	struct amdgpu_device *adev = ring->adev;
5988
5989	if (ring->use_doorbell)
5990		/* XXX check if swapping is necessary on BE */
5991		return ring->adev->wb.wb[ring->wptr_offs];
5992	else
5993		return RREG32(mmCP_RB0_WPTR);
5994}
5995
5996static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5997{
5998	struct amdgpu_device *adev = ring->adev;
5999
6000	if (ring->use_doorbell) {
6001		/* XXX check if swapping is necessary on BE */
6002		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6003		WDOORBELL32(ring->doorbell_index, ring->wptr);
6004	} else {
6005		WREG32(mmCP_RB0_WPTR, ring->wptr);
6006		(void)RREG32(mmCP_RB0_WPTR);
6007	}
6008}
6009
6010static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6011{
6012	u32 ref_and_mask, reg_mem_engine;
6013
6014	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
 
6015		switch (ring->me) {
6016		case 1:
6017			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6018			break;
6019		case 2:
6020			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6021			break;
6022		default:
6023			return;
6024		}
6025		reg_mem_engine = 0;
6026	} else {
6027		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6028		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6029	}
6030
6031	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6032	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6033				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6034				 reg_mem_engine));
6035	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6036	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6037	amdgpu_ring_write(ring, ref_and_mask);
6038	amdgpu_ring_write(ring, ref_and_mask);
6039	amdgpu_ring_write(ring, 0x20); /* poll interval */
6040}
6041
6042static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6043{
6044	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6045	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6046		EVENT_INDEX(4));
6047
6048	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6049	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6050		EVENT_INDEX(0));
6051}
6052
6053
6054static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6055{
6056	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6057	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6058				 WRITE_DATA_DST_SEL(0) |
6059				 WR_CONFIRM));
6060	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6061	amdgpu_ring_write(ring, 0);
6062	amdgpu_ring_write(ring, 1);
6063
6064}
6065
6066static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6067				      struct amdgpu_ib *ib,
6068				      unsigned vm_id, bool ctx_switch)
 
6069{
 
6070	u32 header, control = 0;
6071
6072	if (ib->flags & AMDGPU_IB_FLAG_CE)
6073		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6074	else
6075		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6076
6077	control |= ib->length_dw | (vm_id << 24);
 
 
 
 
 
 
 
6078
6079	amdgpu_ring_write(ring, header);
6080	amdgpu_ring_write(ring,
6081#ifdef __BIG_ENDIAN
6082			  (2 << 0) |
6083#endif
6084			  (ib->gpu_addr & 0xFFFFFFFC));
6085	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6086	amdgpu_ring_write(ring, control);
6087}
6088
6089static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
6090					  struct amdgpu_ib *ib,
6091					  unsigned vm_id, bool ctx_switch)
6092{
6093	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6094
6095	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6096	amdgpu_ring_write(ring,
6097#ifdef __BIG_ENDIAN
6098				(2 << 0) |
6099#endif
6100				(ib->gpu_addr & 0xFFFFFFFC));
6101	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6102	amdgpu_ring_write(ring, control);
6103}
6104
6105static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6106					 u64 seq, unsigned flags)
6107{
6108	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6109	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6110
6111	/* EVENT_WRITE_EOP - flush caches, send int */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6112	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6113	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6114				 EOP_TC_ACTION_EN |
6115				 EOP_TC_WB_ACTION_EN |
6116				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6117				 EVENT_INDEX(5)));
6118	amdgpu_ring_write(ring, addr & 0xfffffffc);
6119	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6120			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6121	amdgpu_ring_write(ring, lower_32_bits(seq));
6122	amdgpu_ring_write(ring, upper_32_bits(seq));
6123
6124}
6125
6126static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6127{
6128	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6129	uint32_t seq = ring->fence_drv.sync_seq;
6130	uint64_t addr = ring->fence_drv.gpu_addr;
6131
6132	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6133	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6134				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6135				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6136	amdgpu_ring_write(ring, addr & 0xfffffffc);
6137	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6138	amdgpu_ring_write(ring, seq);
6139	amdgpu_ring_write(ring, 0xffffffff);
6140	amdgpu_ring_write(ring, 4); /* poll interval */
6141}
6142
6143static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6144					unsigned vm_id, uint64_t pd_addr)
6145{
6146	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6147
6148	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6149	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6150				 WRITE_DATA_DST_SEL(0)) |
6151				 WR_CONFIRM);
6152	if (vm_id < 8) {
6153		amdgpu_ring_write(ring,
6154				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6155	} else {
6156		amdgpu_ring_write(ring,
6157				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6158	}
6159	amdgpu_ring_write(ring, 0);
6160	amdgpu_ring_write(ring, pd_addr >> 12);
6161
6162	/* bits 0-15 are the VM contexts0-15 */
6163	/* invalidate the cache */
6164	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6165	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6166				 WRITE_DATA_DST_SEL(0)));
6167	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6168	amdgpu_ring_write(ring, 0);
6169	amdgpu_ring_write(ring, 1 << vm_id);
6170
6171	/* wait for the invalidate to complete */
6172	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6173	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6174				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6175				 WAIT_REG_MEM_ENGINE(0))); /* me */
6176	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6177	amdgpu_ring_write(ring, 0);
6178	amdgpu_ring_write(ring, 0); /* ref */
6179	amdgpu_ring_write(ring, 0); /* mask */
6180	amdgpu_ring_write(ring, 0x20); /* poll interval */
6181
6182	/* compute doesn't have PFP */
6183	if (usepfp) {
6184		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6185		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6186		amdgpu_ring_write(ring, 0x0);
6187		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6188		amdgpu_ring_insert_nop(ring, 128);
6189	}
6190}
6191
6192static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6193{
6194	return ring->adev->wb.wb[ring->wptr_offs];
6195}
6196
6197static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6198{
6199	struct amdgpu_device *adev = ring->adev;
6200
6201	/* XXX check if swapping is necessary on BE */
6202	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6203	WDOORBELL32(ring->doorbell_index, ring->wptr);
6204}
6205
6206static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6207					     u64 addr, u64 seq,
6208					     unsigned flags)
6209{
6210	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6211	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6212
6213	/* RELEASE_MEM - flush caches, send int */
6214	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6215	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6216				 EOP_TC_ACTION_EN |
6217				 EOP_TC_WB_ACTION_EN |
6218				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6219				 EVENT_INDEX(5)));
6220	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6221	amdgpu_ring_write(ring, addr & 0xfffffffc);
6222	amdgpu_ring_write(ring, upper_32_bits(addr));
6223	amdgpu_ring_write(ring, lower_32_bits(seq));
6224	amdgpu_ring_write(ring, upper_32_bits(seq));
6225}
6226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6227static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6228{
6229	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6230	amdgpu_ring_write(ring, 0);
6231}
6232
6233static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6234{
6235	uint32_t dw2 = 0;
6236
 
 
 
6237	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6238	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6239		gfx_v8_0_ring_emit_vgt_flush(ring);
6240		/* set load_global_config & load_global_uconfig */
6241		dw2 |= 0x8001;
6242		/* set load_cs_sh_regs */
6243		dw2 |= 0x01000000;
6244		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6245		dw2 |= 0x10002;
6246
6247		/* set load_ce_ram if preamble presented */
6248		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6249			dw2 |= 0x10000000;
6250	} else {
6251		/* still load_ce_ram if this is the first time preamble presented
6252		 * although there is no context switch happens.
6253		 */
6254		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6255			dw2 |= 0x10000000;
6256	}
6257
6258	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6259	amdgpu_ring_write(ring, dw2);
6260	amdgpu_ring_write(ring, 0);
6261}
6262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6263static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6264						 enum amdgpu_interrupt_state state)
6265{
6266	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6267		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6268}
6269
6270static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6271						     int me, int pipe,
6272						     enum amdgpu_interrupt_state state)
6273{
 
 
6274	/*
6275	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6276	 * handles the setting of interrupts for this specific pipe. All other
6277	 * pipes' interrupts are set by amdkfd.
6278	 */
6279
6280	if (me == 1) {
6281		switch (pipe) {
6282		case 0:
 
 
 
 
 
 
 
 
 
 
6283			break;
6284		default:
6285			DRM_DEBUG("invalid pipe %d\n", pipe);
6286			return;
6287		}
6288	} else {
6289		DRM_DEBUG("invalid me %d\n", me);
6290		return;
6291	}
6292
6293	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6294		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
6295}
6296
6297static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6298					     struct amdgpu_irq_src *source,
6299					     unsigned type,
6300					     enum amdgpu_interrupt_state state)
6301{
6302	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6303		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6304
6305	return 0;
6306}
6307
6308static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6309					      struct amdgpu_irq_src *source,
6310					      unsigned type,
6311					      enum amdgpu_interrupt_state state)
6312{
6313	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6314		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6315
6316	return 0;
6317}
6318
6319static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6320					    struct amdgpu_irq_src *src,
6321					    unsigned type,
6322					    enum amdgpu_interrupt_state state)
6323{
6324	switch (type) {
6325	case AMDGPU_CP_IRQ_GFX_EOP:
6326		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6327		break;
6328	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6329		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6330		break;
6331	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6332		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6333		break;
6334	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6335		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6336		break;
6337	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6338		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6339		break;
6340	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6341		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6342		break;
6343	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6344		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6345		break;
6346	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6347		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6348		break;
6349	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6350		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6351		break;
6352	default:
6353		break;
6354	}
6355	return 0;
6356}
6357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6358static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6359			    struct amdgpu_irq_src *source,
6360			    struct amdgpu_iv_entry *entry)
6361{
6362	int i;
6363	u8 me_id, pipe_id, queue_id;
6364	struct amdgpu_ring *ring;
6365
6366	DRM_DEBUG("IH: CP EOP\n");
6367	me_id = (entry->ring_id & 0x0c) >> 2;
6368	pipe_id = (entry->ring_id & 0x03) >> 0;
6369	queue_id = (entry->ring_id & 0x70) >> 4;
6370
6371	switch (me_id) {
6372	case 0:
6373		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6374		break;
6375	case 1:
6376	case 2:
6377		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6378			ring = &adev->gfx.compute_ring[i];
6379			/* Per-queue interrupt is supported for MEC starting from VI.
6380			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6381			  */
6382			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6383				amdgpu_fence_process(ring);
6384		}
6385		break;
6386	}
6387	return 0;
6388}
6389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6390static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6391				 struct amdgpu_irq_src *source,
6392				 struct amdgpu_iv_entry *entry)
6393{
6394	DRM_ERROR("Illegal register access in command stream\n");
6395	schedule_work(&adev->reset_work);
6396	return 0;
6397}
6398
6399static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6400				  struct amdgpu_irq_src *source,
6401				  struct amdgpu_iv_entry *entry)
6402{
6403	DRM_ERROR("Illegal instruction in command stream\n");
6404	schedule_work(&adev->reset_work);
 
 
 
 
 
 
 
 
6405	return 0;
6406}
6407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6408static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6409	.name = "gfx_v8_0",
6410	.early_init = gfx_v8_0_early_init,
6411	.late_init = gfx_v8_0_late_init,
6412	.sw_init = gfx_v8_0_sw_init,
6413	.sw_fini = gfx_v8_0_sw_fini,
6414	.hw_init = gfx_v8_0_hw_init,
6415	.hw_fini = gfx_v8_0_hw_fini,
6416	.suspend = gfx_v8_0_suspend,
6417	.resume = gfx_v8_0_resume,
6418	.is_idle = gfx_v8_0_is_idle,
6419	.wait_for_idle = gfx_v8_0_wait_for_idle,
6420	.check_soft_reset = gfx_v8_0_check_soft_reset,
6421	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6422	.soft_reset = gfx_v8_0_soft_reset,
6423	.post_soft_reset = gfx_v8_0_post_soft_reset,
6424	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6425	.set_powergating_state = gfx_v8_0_set_powergating_state,
 
6426};
6427
6428static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6429	.type = AMDGPU_RING_TYPE_GFX,
6430	.align_mask = 0xff,
6431	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 
6432	.get_rptr = gfx_v8_0_ring_get_rptr,
6433	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6434	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6435	.emit_frame_size =
6436		20 + /* gfx_v8_0_ring_emit_gds_switch */
6437		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6438		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6439		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6440		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6441		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6442		2 + /* gfx_v8_ring_emit_sb */
6443		3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
 
 
 
 
 
 
 
 
 
 
6444	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6445	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6446	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6447	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6448	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6449	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6450	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6451	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6452	.test_ring = gfx_v8_0_ring_test_ring,
6453	.test_ib = gfx_v8_0_ring_test_ib,
6454	.insert_nop = amdgpu_ring_insert_nop,
6455	.pad_ib = amdgpu_ring_generic_pad_ib,
6456	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6457	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
 
 
 
 
 
6458};
6459
6460static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6461	.type = AMDGPU_RING_TYPE_COMPUTE,
6462	.align_mask = 0xff,
6463	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 
6464	.get_rptr = gfx_v8_0_ring_get_rptr,
6465	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6466	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6467	.emit_frame_size =
6468		20 + /* gfx_v8_0_ring_emit_gds_switch */
6469		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6470		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6471		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6472		17 + /* gfx_v8_0_ring_emit_vm_flush */
6473		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6474	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 
 
 
6475	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6476	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6477	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6478	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6479	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6480	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6481	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6482	.test_ring = gfx_v8_0_ring_test_ring,
6483	.test_ib = gfx_v8_0_ring_test_ib,
6484	.insert_nop = amdgpu_ring_insert_nop,
6485	.pad_ib = amdgpu_ring_generic_pad_ib,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6486};
6487
6488static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6489{
6490	int i;
6491
 
 
6492	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6493		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6494
6495	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6496		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6497}
6498
6499static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6500	.set = gfx_v8_0_set_eop_interrupt_state,
6501	.process = gfx_v8_0_eop_irq,
6502};
6503
6504static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6505	.set = gfx_v8_0_set_priv_reg_fault_state,
6506	.process = gfx_v8_0_priv_reg_irq,
6507};
6508
6509static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6510	.set = gfx_v8_0_set_priv_inst_fault_state,
6511	.process = gfx_v8_0_priv_inst_irq,
6512};
6513
 
 
 
 
 
 
 
 
 
 
6514static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6515{
6516	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6517	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6518
6519	adev->gfx.priv_reg_irq.num_types = 1;
6520	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6521
6522	adev->gfx.priv_inst_irq.num_types = 1;
6523	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
 
 
 
 
 
 
6524}
6525
6526static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6527{
6528	switch (adev->asic_type) {
6529	case CHIP_TOPAZ:
6530		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6531		break;
6532	case CHIP_STONEY:
6533	case CHIP_CARRIZO:
6534		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6535		break;
6536	default:
6537		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6538		break;
6539	}
6540}
6541
6542static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6543{
6544	/* init asci gds info */
6545	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6546	adev->gds.gws.total_size = 64;
6547	adev->gds.oa.total_size = 16;
6548
6549	if (adev->gds.mem.total_size == 64 * 1024) {
6550		adev->gds.mem.gfx_partition_size = 4096;
6551		adev->gds.mem.cs_partition_size = 4096;
6552
6553		adev->gds.gws.gfx_partition_size = 4;
6554		adev->gds.gws.cs_partition_size = 4;
6555
6556		adev->gds.oa.gfx_partition_size = 4;
6557		adev->gds.oa.cs_partition_size = 1;
6558	} else {
6559		adev->gds.mem.gfx_partition_size = 1024;
6560		adev->gds.mem.cs_partition_size = 1024;
6561
6562		adev->gds.gws.gfx_partition_size = 16;
6563		adev->gds.gws.cs_partition_size = 16;
6564
6565		adev->gds.oa.gfx_partition_size = 4;
6566		adev->gds.oa.cs_partition_size = 4;
6567	}
6568}
6569
6570static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6571						 u32 bitmap)
6572{
6573	u32 data;
6574
6575	if (!bitmap)
6576		return;
6577
6578	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6579	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6580
6581	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6582}
6583
6584static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6585{
6586	u32 data, mask;
6587
6588	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6589		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6590
6591	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6592
6593	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6594}
6595
6596static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6597{
6598	int i, j, k, counter, active_cu_number = 0;
6599	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6600	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6601	unsigned disable_masks[4 * 2];
 
6602
6603	memset(cu_info, 0, sizeof(*cu_info));
6604
 
 
 
 
 
6605	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6606
6607	mutex_lock(&adev->grbm_idx_mutex);
6608	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6609		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6610			mask = 1;
6611			ao_bitmap = 0;
6612			counter = 0;
6613			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6614			if (i < 4 && j < 2)
6615				gfx_v8_0_set_user_cu_inactive_bitmap(
6616					adev, disable_masks[i * 2 + j]);
6617			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6618			cu_info->bitmap[i][j] = bitmap;
6619
6620			for (k = 0; k < 16; k ++) {
6621				if (bitmap & mask) {
6622					if (counter < 2)
6623						ao_bitmap |= mask;
6624					counter ++;
6625				}
6626				mask <<= 1;
6627			}
6628			active_cu_number += counter;
6629			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 
 
6630		}
6631	}
6632	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6633	mutex_unlock(&adev->grbm_idx_mutex);
6634
6635	cu_info->number = active_cu_number;
6636	cu_info->ao_cu_mask = ao_cu_mask;
 
 
 
 
 
6637}
6638
6639const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6640{
6641	.type = AMD_IP_BLOCK_TYPE_GFX,
6642	.major = 8,
6643	.minor = 0,
6644	.rev = 0,
6645	.funcs = &gfx_v8_0_ip_funcs,
6646};
6647
6648const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6649{
6650	.type = AMD_IP_BLOCK_TYPE_GFX,
6651	.major = 8,
6652	.minor = 1,
6653	.rev = 0,
6654	.funcs = &gfx_v8_0_ip_funcs,
6655};