Linux Audio

Check our new training course

Loading...
v4.10.11
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include "drmP.h"
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
  27#include "vi.h"
  28#include "vi_structs.h"
  29#include "vid.h"
  30#include "amdgpu_ucode.h"
  31#include "amdgpu_atombios.h"
  32#include "atombios_i2c.h"
  33#include "clearstate_vi.h"
  34
  35#include "gmc/gmc_8_2_d.h"
  36#include "gmc/gmc_8_2_sh_mask.h"
  37
  38#include "oss/oss_3_0_d.h"
  39#include "oss/oss_3_0_sh_mask.h"
  40
  41#include "bif/bif_5_0_d.h"
  42#include "bif/bif_5_0_sh_mask.h"
  43
  44#include "gca/gfx_8_0_d.h"
  45#include "gca/gfx_8_0_enum.h"
  46#include "gca/gfx_8_0_sh_mask.h"
  47#include "gca/gfx_8_0_enum.h"
  48
  49#include "dce/dce_10_0_d.h"
  50#include "dce/dce_10_0_sh_mask.h"
  51
  52#include "smu/smu_7_1_3_d.h"
  53
  54#define GFX8_NUM_GFX_RINGS     1
  55#define GFX8_NUM_COMPUTE_RINGS 8
  56
  57#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  58#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  59#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  60#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  61
  62#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  63#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  64#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  65#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  66#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  67#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  68#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  69#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  70#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  71
  72#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  73#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  74#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  75#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  76#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  77#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  78
  79/* BPM SERDES CMD */
  80#define SET_BPM_SERDES_CMD    1
  81#define CLE_BPM_SERDES_CMD    0
  82
  83/* BPM Register Address*/
  84enum {
  85	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  86	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  87	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  88	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  89	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  90	BPM_REG_FGCG_MAX
  91};
  92
  93#define RLC_FormatDirectRegListLength        14
  94
  95MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  96MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  97MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  98MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  99MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 100MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 101
 102MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 103MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 104MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 105MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 106MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 109MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 111MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 112MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 113MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 114
 115MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 116MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 117MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 118MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 119MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 122MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 124MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 125MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 126MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 127
 128MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 129MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 130MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 131MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 132MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 133MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 134
 135MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 141
 142MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 143MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 144MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 145MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 146MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 147MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 148
 149static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 150{
 151	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 152	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 153	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 154	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 155	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 156	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 157	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 158	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 159	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 160	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 161	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 162	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 163	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 164	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 165	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 166	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 167};
 168
 169static const u32 golden_settings_tonga_a11[] =
 170{
 171	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 172	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 173	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 174	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 175	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 176	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 177	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 178	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 179	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 180	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 181	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 182	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 183	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 184	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 185	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 186	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 187};
 188
 189static const u32 tonga_golden_common_all[] =
 190{
 191	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 192	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 193	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 194	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 195	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 196	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 197	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 198	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 199};
 200
 201static const u32 tonga_mgcg_cgcg_init[] =
 202{
 203	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 204	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 205	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 206	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 207	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 208	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 209	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 210	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 211	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 212	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 213	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 214	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 215	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 216	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 217	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 218	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 219	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 220	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 221	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 222	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 223	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 224	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 225	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 226	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 227	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 228	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 229	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 230	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 231	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 232	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 233	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 234	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 235	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 236	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 237	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 238	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 239	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 240	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 241	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 242	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 243	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 244	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 245	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 246	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 247	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 248	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 249	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 250	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 251	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 252	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 253	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 254	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 255	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 256	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 257	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 258	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 259	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 260	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 261	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 262	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 263	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 264	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 265	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 266	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 267	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 268	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 269	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 270	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 271	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 272	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 273	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 274	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 275	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 276	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 277	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 278};
 279
 280static const u32 golden_settings_polaris11_a11[] =
 281{
 282	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 283	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 284	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 285	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 286	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 287	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 288	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 289	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 290	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 291	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 292	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 293	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 294	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 295	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 296	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 297	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 298	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 299};
 300
 301static const u32 polaris11_golden_common_all[] =
 302{
 303	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 304	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 305	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 306	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 307	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 308	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 309};
 310
 311static const u32 golden_settings_polaris10_a11[] =
 312{
 313	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 314	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 315	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 316	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 317	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 318	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 319	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 320	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 321	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 322	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 323	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 324	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 325	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 326	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 327	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 328	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 329	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 330};
 331
 332static const u32 polaris10_golden_common_all[] =
 333{
 334	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 335	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 336	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 337	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 338	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 339	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 340	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 341	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 342};
 343
 344static const u32 fiji_golden_common_all[] =
 345{
 346	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 347	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 348	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 349	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 350	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 351	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 352	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 353	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 354	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 355	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 356};
 357
 358static const u32 golden_settings_fiji_a10[] =
 359{
 360	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 361	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 362	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 363	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 364	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 365	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 366	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 367	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 368	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 369	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 370	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 371};
 372
 373static const u32 fiji_mgcg_cgcg_init[] =
 374{
 375	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 376	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 377	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 378	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 379	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 380	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 381	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 382	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 383	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 384	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 385	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 386	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 387	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 388	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 389	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 390	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 391	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 392	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 393	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 394	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 395	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 396	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 397	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 398	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 399	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 400	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 401	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 402	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 403	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 404	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 407	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 408	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 409	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 410};
 411
 412static const u32 golden_settings_iceland_a11[] =
 413{
 414	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 415	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 416	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 417	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 418	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 419	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 420	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 421	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 422	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 423	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 424	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 425	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 426	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 428	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 429	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 430};
 431
 432static const u32 iceland_golden_common_all[] =
 433{
 434	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 435	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 436	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 437	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 438	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 439	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 440	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 441	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 442};
 443
 444static const u32 iceland_mgcg_cgcg_init[] =
 445{
 446	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 447	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 448	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 451	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 452	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 453	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 454	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 455	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 456	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 457	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 459	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 460	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 461	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 462	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 463	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 464	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 465	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 466	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 467	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 468	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 469	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 470	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 471	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 472	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 473	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 474	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 475	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 476	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 477	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 478	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 479	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 480	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 481	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 482	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 483	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 484	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 485	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 486	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 487	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 488	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 489	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 490	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 491	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 492	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 493	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 494	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 495	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 496	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 497	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 498	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 499	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 500	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 501	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 502	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 503	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 504	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 505	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 506	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 507	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 508	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 509	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 510};
 511
 512static const u32 cz_golden_settings_a11[] =
 513{
 514	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 515	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 516	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 517	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 518	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 519	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 520	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 521	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 522	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 523	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 524	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 525	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 526};
 527
 528static const u32 cz_golden_common_all[] =
 529{
 530	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 531	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 532	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 533	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 534	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 535	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 536	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 537	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 538};
 539
 540static const u32 cz_mgcg_cgcg_init[] =
 541{
 542	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 543	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 544	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 545	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 546	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 547	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 548	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 549	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 550	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 551	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 552	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 553	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 554	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 555	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 556	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 557	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 558	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 559	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 560	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 561	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 562	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 563	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 564	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 565	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 566	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 567	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 568	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 569	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 570	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 571	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 572	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 573	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 574	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 575	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 576	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 577	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 578	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 579	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 580	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 581	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 582	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 583	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 584	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 585	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 586	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 587	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 588	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 589	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 590	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 591	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 592	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 593	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 594	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 595	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 596	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 597	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 598	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 599	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 600	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 601	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 602	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 603	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 604	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 605	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 606	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 607	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 608	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 609	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 610	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 611	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 612	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 613	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 614	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 615	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 616	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 617};
 618
 619static const u32 stoney_golden_settings_a11[] =
 620{
 621	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 622	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 623	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 624	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 625	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 626	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 627	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 628	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 629	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 630	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 631};
 632
 633static const u32 stoney_golden_common_all[] =
 634{
 635	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 636	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 637	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 638	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 639	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 640	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 641	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 642	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 643};
 644
 645static const u32 stoney_mgcg_cgcg_init[] =
 646{
 647	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 648	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 649	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 650	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 651	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 
 652};
 653
 654static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 655static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 656static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 657static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 658static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 659static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 660
 661static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 662{
 663	switch (adev->asic_type) {
 664	case CHIP_TOPAZ:
 665		amdgpu_program_register_sequence(adev,
 666						 iceland_mgcg_cgcg_init,
 667						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 668		amdgpu_program_register_sequence(adev,
 669						 golden_settings_iceland_a11,
 670						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 671		amdgpu_program_register_sequence(adev,
 672						 iceland_golden_common_all,
 673						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 674		break;
 675	case CHIP_FIJI:
 676		amdgpu_program_register_sequence(adev,
 677						 fiji_mgcg_cgcg_init,
 678						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 679		amdgpu_program_register_sequence(adev,
 680						 golden_settings_fiji_a10,
 681						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 682		amdgpu_program_register_sequence(adev,
 683						 fiji_golden_common_all,
 684						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 685		break;
 686
 687	case CHIP_TONGA:
 688		amdgpu_program_register_sequence(adev,
 689						 tonga_mgcg_cgcg_init,
 690						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 691		amdgpu_program_register_sequence(adev,
 692						 golden_settings_tonga_a11,
 693						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 694		amdgpu_program_register_sequence(adev,
 695						 tonga_golden_common_all,
 696						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 697		break;
 698	case CHIP_POLARIS11:
 699	case CHIP_POLARIS12:
 700		amdgpu_program_register_sequence(adev,
 701						 golden_settings_polaris11_a11,
 702						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
 703		amdgpu_program_register_sequence(adev,
 704						 polaris11_golden_common_all,
 705						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
 706		break;
 707	case CHIP_POLARIS10:
 708		amdgpu_program_register_sequence(adev,
 709						 golden_settings_polaris10_a11,
 710						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
 711		amdgpu_program_register_sequence(adev,
 712						 polaris10_golden_common_all,
 713						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
 714		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 715		if (adev->pdev->revision == 0xc7 &&
 716		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 717		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 718		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
 719			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 720			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 721		}
 722		break;
 723	case CHIP_CARRIZO:
 724		amdgpu_program_register_sequence(adev,
 725						 cz_mgcg_cgcg_init,
 726						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 727		amdgpu_program_register_sequence(adev,
 728						 cz_golden_settings_a11,
 729						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 730		amdgpu_program_register_sequence(adev,
 731						 cz_golden_common_all,
 732						 (const u32)ARRAY_SIZE(cz_golden_common_all));
 733		break;
 734	case CHIP_STONEY:
 735		amdgpu_program_register_sequence(adev,
 736						 stoney_mgcg_cgcg_init,
 737						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 738		amdgpu_program_register_sequence(adev,
 739						 stoney_golden_settings_a11,
 740						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 741		amdgpu_program_register_sequence(adev,
 742						 stoney_golden_common_all,
 743						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 744		break;
 745	default:
 746		break;
 747	}
 748}
 749
 750static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 751{
 752	int i;
 753
 754	adev->gfx.scratch.num_reg = 7;
 755	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 756	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 757		adev->gfx.scratch.free[i] = true;
 758		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 759	}
 760}
 761
 762static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 763{
 764	struct amdgpu_device *adev = ring->adev;
 765	uint32_t scratch;
 766	uint32_t tmp = 0;
 767	unsigned i;
 768	int r;
 769
 770	r = amdgpu_gfx_scratch_get(adev, &scratch);
 771	if (r) {
 772		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 773		return r;
 774	}
 775	WREG32(scratch, 0xCAFEDEAD);
 776	r = amdgpu_ring_alloc(ring, 3);
 777	if (r) {
 778		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 779			  ring->idx, r);
 780		amdgpu_gfx_scratch_free(adev, scratch);
 781		return r;
 782	}
 783	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 784	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 785	amdgpu_ring_write(ring, 0xDEADBEEF);
 786	amdgpu_ring_commit(ring);
 787
 788	for (i = 0; i < adev->usec_timeout; i++) {
 789		tmp = RREG32(scratch);
 790		if (tmp == 0xDEADBEEF)
 791			break;
 792		DRM_UDELAY(1);
 793	}
 794	if (i < adev->usec_timeout) {
 795		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 796			 ring->idx, i);
 797	} else {
 798		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 799			  ring->idx, scratch, tmp);
 800		r = -EINVAL;
 801	}
 802	amdgpu_gfx_scratch_free(adev, scratch);
 803	return r;
 804}
 805
 806static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 807{
 808	struct amdgpu_device *adev = ring->adev;
 809	struct amdgpu_ib ib;
 810	struct dma_fence *f = NULL;
 811	uint32_t scratch;
 812	uint32_t tmp = 0;
 813	long r;
 
 814
 815	r = amdgpu_gfx_scratch_get(adev, &scratch);
 816	if (r) {
 817		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 818		return r;
 819	}
 820	WREG32(scratch, 0xCAFEDEAD);
 821	memset(&ib, 0, sizeof(ib));
 822	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 823	if (r) {
 824		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 825		goto err1;
 826	}
 827	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 828	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 829	ib.ptr[2] = 0xDEADBEEF;
 830	ib.length_dw = 3;
 831
 832	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 833	if (r)
 834		goto err2;
 835
 836	r = dma_fence_wait_timeout(f, false, timeout);
 837	if (r == 0) {
 838		DRM_ERROR("amdgpu: IB test timed out.\n");
 839		r = -ETIMEDOUT;
 840		goto err2;
 841	} else if (r < 0) {
 842		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 843		goto err2;
 844	}
 845	tmp = RREG32(scratch);
 846	if (tmp == 0xDEADBEEF) {
 847		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
 848		r = 0;
 
 
 
 
 
 
 849	} else {
 850		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 851			  scratch, tmp);
 852		r = -EINVAL;
 853	}
 854err2:
 
 855	amdgpu_ib_free(adev, &ib, NULL);
 856	dma_fence_put(f);
 857err1:
 858	amdgpu_gfx_scratch_free(adev, scratch);
 859	return r;
 860}
 861
 862
 863static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
 864	release_firmware(adev->gfx.pfp_fw);
 865	adev->gfx.pfp_fw = NULL;
 866	release_firmware(adev->gfx.me_fw);
 867	adev->gfx.me_fw = NULL;
 868	release_firmware(adev->gfx.ce_fw);
 869	adev->gfx.ce_fw = NULL;
 870	release_firmware(adev->gfx.rlc_fw);
 871	adev->gfx.rlc_fw = NULL;
 872	release_firmware(adev->gfx.mec_fw);
 873	adev->gfx.mec_fw = NULL;
 874	if ((adev->asic_type != CHIP_STONEY) &&
 875	    (adev->asic_type != CHIP_TOPAZ))
 876		release_firmware(adev->gfx.mec2_fw);
 877	adev->gfx.mec2_fw = NULL;
 878
 879	kfree(adev->gfx.rlc.register_list_format);
 880}
 881
 882static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 883{
 884	const char *chip_name;
 885	char fw_name[30];
 886	int err;
 887	struct amdgpu_firmware_info *info = NULL;
 888	const struct common_firmware_header *header = NULL;
 889	const struct gfx_firmware_header_v1_0 *cp_hdr;
 890	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 891	unsigned int *tmp = NULL, i;
 892
 893	DRM_DEBUG("\n");
 894
 895	switch (adev->asic_type) {
 896	case CHIP_TOPAZ:
 897		chip_name = "topaz";
 898		break;
 899	case CHIP_TONGA:
 900		chip_name = "tonga";
 901		break;
 902	case CHIP_CARRIZO:
 903		chip_name = "carrizo";
 904		break;
 905	case CHIP_FIJI:
 906		chip_name = "fiji";
 907		break;
 908	case CHIP_POLARIS11:
 909		chip_name = "polaris11";
 910		break;
 911	case CHIP_POLARIS10:
 912		chip_name = "polaris10";
 913		break;
 914	case CHIP_POLARIS12:
 915		chip_name = "polaris12";
 916		break;
 917	case CHIP_STONEY:
 918		chip_name = "stoney";
 919		break;
 920	default:
 921		BUG();
 922	}
 923
 924	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 925	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 926	if (err)
 927		goto out;
 928	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 929	if (err)
 930		goto out;
 931	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 932	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 933	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 934
 935	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 936	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 937	if (err)
 938		goto out;
 939	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 940	if (err)
 941		goto out;
 942	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 943	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 944	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 945
 946	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 947	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 948	if (err)
 949		goto out;
 950	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 951	if (err)
 952		goto out;
 953	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 954	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 955	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 956
 957	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 958	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 959	if (err)
 960		goto out;
 961	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 962	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 963	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 964	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 965
 966	adev->gfx.rlc.save_and_restore_offset =
 967			le32_to_cpu(rlc_hdr->save_and_restore_offset);
 968	adev->gfx.rlc.clear_state_descriptor_offset =
 969			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 970	adev->gfx.rlc.avail_scratch_ram_locations =
 971			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 972	adev->gfx.rlc.reg_restore_list_size =
 973			le32_to_cpu(rlc_hdr->reg_restore_list_size);
 974	adev->gfx.rlc.reg_list_format_start =
 975			le32_to_cpu(rlc_hdr->reg_list_format_start);
 976	adev->gfx.rlc.reg_list_format_separate_start =
 977			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 978	adev->gfx.rlc.starting_offsets_start =
 979			le32_to_cpu(rlc_hdr->starting_offsets_start);
 980	adev->gfx.rlc.reg_list_format_size_bytes =
 981			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 982	adev->gfx.rlc.reg_list_size_bytes =
 983			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 984
 985	adev->gfx.rlc.register_list_format =
 986			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 987					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 988
 989	if (!adev->gfx.rlc.register_list_format) {
 990		err = -ENOMEM;
 991		goto out;
 992	}
 993
 994	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 995			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
 996	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
 997		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
 998
 999	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1000
1001	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1005
1006	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1008	if (err)
1009		goto out;
1010	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1011	if (err)
1012		goto out;
1013	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1016
1017	if ((adev->asic_type != CHIP_STONEY) &&
1018	    (adev->asic_type != CHIP_TOPAZ)) {
1019		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1021		if (!err) {
1022			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1023			if (err)
1024				goto out;
1025			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026				adev->gfx.mec2_fw->data;
1027			adev->gfx.mec2_fw_version =
1028				le32_to_cpu(cp_hdr->header.ucode_version);
1029			adev->gfx.mec2_feature_version =
1030				le32_to_cpu(cp_hdr->ucode_feature_version);
1031		} else {
1032			err = 0;
1033			adev->gfx.mec2_fw = NULL;
1034		}
1035	}
1036
1037	if (adev->firmware.smu_load) {
1038		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040		info->fw = adev->gfx.pfp_fw;
1041		header = (const struct common_firmware_header *)info->fw->data;
1042		adev->firmware.fw_size +=
1043			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1044
1045		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047		info->fw = adev->gfx.me_fw;
1048		header = (const struct common_firmware_header *)info->fw->data;
1049		adev->firmware.fw_size +=
1050			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051
1052		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054		info->fw = adev->gfx.ce_fw;
1055		header = (const struct common_firmware_header *)info->fw->data;
1056		adev->firmware.fw_size +=
1057			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058
1059		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061		info->fw = adev->gfx.rlc_fw;
1062		header = (const struct common_firmware_header *)info->fw->data;
1063		adev->firmware.fw_size +=
1064			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065
1066		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068		info->fw = adev->gfx.mec_fw;
1069		header = (const struct common_firmware_header *)info->fw->data;
1070		adev->firmware.fw_size +=
1071			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072
1073		/* we need account JT in */
1074		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075		adev->firmware.fw_size +=
1076			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1077
1078		if (amdgpu_sriov_vf(adev)) {
1079			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081			info->fw = adev->gfx.mec_fw;
1082			adev->firmware.fw_size +=
1083				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1084		}
1085
1086		if (adev->gfx.mec2_fw) {
1087			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089			info->fw = adev->gfx.mec2_fw;
1090			header = (const struct common_firmware_header *)info->fw->data;
1091			adev->firmware.fw_size +=
1092				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1093		}
1094
1095	}
1096
1097out:
1098	if (err) {
1099		dev_err(adev->dev,
1100			"gfx8: Failed to load firmware \"%s\"\n",
1101			fw_name);
1102		release_firmware(adev->gfx.pfp_fw);
1103		adev->gfx.pfp_fw = NULL;
1104		release_firmware(adev->gfx.me_fw);
1105		adev->gfx.me_fw = NULL;
1106		release_firmware(adev->gfx.ce_fw);
1107		adev->gfx.ce_fw = NULL;
1108		release_firmware(adev->gfx.rlc_fw);
1109		adev->gfx.rlc_fw = NULL;
1110		release_firmware(adev->gfx.mec_fw);
1111		adev->gfx.mec_fw = NULL;
1112		release_firmware(adev->gfx.mec2_fw);
1113		adev->gfx.mec2_fw = NULL;
1114	}
1115	return err;
1116}
1117
1118static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119				    volatile u32 *buffer)
1120{
1121	u32 count = 0, i;
1122	const struct cs_section_def *sect = NULL;
1123	const struct cs_extent_def *ext = NULL;
1124
1125	if (adev->gfx.rlc.cs_data == NULL)
1126		return;
1127	if (buffer == NULL)
1128		return;
1129
1130	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1132
1133	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134	buffer[count++] = cpu_to_le32(0x80000000);
1135	buffer[count++] = cpu_to_le32(0x80000000);
1136
1137	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138		for (ext = sect->section; ext->extent != NULL; ++ext) {
1139			if (sect->id == SECT_CONTEXT) {
1140				buffer[count++] =
1141					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142				buffer[count++] = cpu_to_le32(ext->reg_index -
1143						PACKET3_SET_CONTEXT_REG_START);
1144				for (i = 0; i < ext->reg_count; i++)
1145					buffer[count++] = cpu_to_le32(ext->extent[i]);
1146			} else {
1147				return;
1148			}
1149		}
1150	}
1151
1152	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154			PACKET3_SET_CONTEXT_REG_START);
1155	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1157
1158	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162	buffer[count++] = cpu_to_le32(0);
1163}
1164
1165static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166{
1167	const __le32 *fw_data;
1168	volatile u32 *dst_ptr;
1169	int me, i, max_me = 4;
1170	u32 bo_offset = 0;
1171	u32 table_offset, table_size;
1172
1173	if (adev->asic_type == CHIP_CARRIZO)
1174		max_me = 5;
1175
1176	/* write the cp table buffer */
1177	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178	for (me = 0; me < max_me; me++) {
1179		if (me == 0) {
1180			const struct gfx_firmware_header_v1_0 *hdr =
1181				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182			fw_data = (const __le32 *)
1183				(adev->gfx.ce_fw->data +
1184				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185			table_offset = le32_to_cpu(hdr->jt_offset);
1186			table_size = le32_to_cpu(hdr->jt_size);
1187		} else if (me == 1) {
1188			const struct gfx_firmware_header_v1_0 *hdr =
1189				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190			fw_data = (const __le32 *)
1191				(adev->gfx.pfp_fw->data +
1192				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193			table_offset = le32_to_cpu(hdr->jt_offset);
1194			table_size = le32_to_cpu(hdr->jt_size);
1195		} else if (me == 2) {
1196			const struct gfx_firmware_header_v1_0 *hdr =
1197				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198			fw_data = (const __le32 *)
1199				(adev->gfx.me_fw->data +
1200				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201			table_offset = le32_to_cpu(hdr->jt_offset);
1202			table_size = le32_to_cpu(hdr->jt_size);
1203		} else if (me == 3) {
1204			const struct gfx_firmware_header_v1_0 *hdr =
1205				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206			fw_data = (const __le32 *)
1207				(adev->gfx.mec_fw->data +
1208				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209			table_offset = le32_to_cpu(hdr->jt_offset);
1210			table_size = le32_to_cpu(hdr->jt_size);
1211		} else  if (me == 4) {
1212			const struct gfx_firmware_header_v1_0 *hdr =
1213				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214			fw_data = (const __le32 *)
1215				(adev->gfx.mec2_fw->data +
1216				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217			table_offset = le32_to_cpu(hdr->jt_offset);
1218			table_size = le32_to_cpu(hdr->jt_size);
1219		}
1220
1221		for (i = 0; i < table_size; i ++) {
1222			dst_ptr[bo_offset + i] =
1223				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224		}
1225
1226		bo_offset += table_size;
1227	}
1228}
1229
1230static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231{
1232	int r;
1233
1234	/* clear state block */
1235	if (adev->gfx.rlc.clear_state_obj) {
1236		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237		if (unlikely(r != 0))
1238			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242		adev->gfx.rlc.clear_state_obj = NULL;
1243	}
1244
1245	/* jump table block */
1246	if (adev->gfx.rlc.cp_table_obj) {
1247		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248		if (unlikely(r != 0))
1249			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253		adev->gfx.rlc.cp_table_obj = NULL;
1254	}
1255}
1256
1257static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258{
1259	volatile u32 *dst_ptr;
1260	u32 dws;
1261	const struct cs_section_def *cs_data;
1262	int r;
1263
1264	adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266	cs_data = adev->gfx.rlc.cs_data;
1267
1268	if (cs_data) {
1269		/* clear state block */
1270		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272		if (adev->gfx.rlc.clear_state_obj == NULL) {
1273			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274					     AMDGPU_GEM_DOMAIN_VRAM,
1275					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1277					     NULL, NULL,
1278					     &adev->gfx.rlc.clear_state_obj);
1279			if (r) {
1280				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281				gfx_v8_0_rlc_fini(adev);
1282				return r;
1283			}
1284		}
1285		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286		if (unlikely(r != 0)) {
1287			gfx_v8_0_rlc_fini(adev);
1288			return r;
1289		}
1290		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291				  &adev->gfx.rlc.clear_state_gpu_addr);
1292		if (r) {
1293			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295			gfx_v8_0_rlc_fini(adev);
1296			return r;
1297		}
1298
1299		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300		if (r) {
1301			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302			gfx_v8_0_rlc_fini(adev);
1303			return r;
1304		}
1305		/* set up the cs buffer */
1306		dst_ptr = adev->gfx.rlc.cs_ptr;
1307		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310	}
1311
1312	if ((adev->asic_type == CHIP_CARRIZO) ||
1313	    (adev->asic_type == CHIP_STONEY)) {
1314		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315		if (adev->gfx.rlc.cp_table_obj == NULL) {
1316			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317					     AMDGPU_GEM_DOMAIN_VRAM,
1318					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1320					     NULL, NULL,
1321					     &adev->gfx.rlc.cp_table_obj);
1322			if (r) {
1323				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1324				return r;
1325			}
1326		}
1327
1328		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329		if (unlikely(r != 0)) {
1330			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1331			return r;
1332		}
1333		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334				  &adev->gfx.rlc.cp_table_gpu_addr);
1335		if (r) {
1336			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1338			return r;
1339		}
1340		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1341		if (r) {
1342			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1343			return r;
1344		}
1345
1346		cz_init_cp_jump_table(adev);
1347
1348		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1350	}
1351
1352	return 0;
1353}
1354
1355static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1356{
1357	int r;
1358
1359	if (adev->gfx.mec.hpd_eop_obj) {
1360		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361		if (unlikely(r != 0))
1362			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
1365		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366		adev->gfx.mec.hpd_eop_obj = NULL;
1367	}
1368}
1369
1370#define MEC_HPD_SIZE 2048
1371
1372static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373{
1374	int r;
1375	u32 *hpd;
1376
1377	/*
1378	 * we assign only 1 pipe because all other pipes will
1379	 * be handled by KFD
1380	 */
1381	adev->gfx.mec.num_mec = 1;
1382	adev->gfx.mec.num_pipe = 1;
1383	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384
1385	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386		r = amdgpu_bo_create(adev,
1387				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1388				     PAGE_SIZE, true,
1389				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390				     &adev->gfx.mec.hpd_eop_obj);
1391		if (r) {
1392			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393			return r;
1394		}
1395	}
1396
1397	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398	if (unlikely(r != 0)) {
1399		gfx_v8_0_mec_fini(adev);
1400		return r;
1401	}
1402	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403			  &adev->gfx.mec.hpd_eop_gpu_addr);
1404	if (r) {
1405		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406		gfx_v8_0_mec_fini(adev);
1407		return r;
1408	}
1409	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410	if (r) {
1411		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412		gfx_v8_0_mec_fini(adev);
1413		return r;
1414	}
1415
1416	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1417
1418	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420
1421	return 0;
1422}
1423
1424static const u32 vgpr_init_compute_shader[] =
1425{
1426	0x7e000209, 0x7e020208,
1427	0x7e040207, 0x7e060206,
1428	0x7e080205, 0x7e0a0204,
1429	0x7e0c0203, 0x7e0e0202,
1430	0x7e100201, 0x7e120200,
1431	0x7e140209, 0x7e160208,
1432	0x7e180207, 0x7e1a0206,
1433	0x7e1c0205, 0x7e1e0204,
1434	0x7e200203, 0x7e220202,
1435	0x7e240201, 0x7e260200,
1436	0x7e280209, 0x7e2a0208,
1437	0x7e2c0207, 0x7e2e0206,
1438	0x7e300205, 0x7e320204,
1439	0x7e340203, 0x7e360202,
1440	0x7e380201, 0x7e3a0200,
1441	0x7e3c0209, 0x7e3e0208,
1442	0x7e400207, 0x7e420206,
1443	0x7e440205, 0x7e460204,
1444	0x7e480203, 0x7e4a0202,
1445	0x7e4c0201, 0x7e4e0200,
1446	0x7e500209, 0x7e520208,
1447	0x7e540207, 0x7e560206,
1448	0x7e580205, 0x7e5a0204,
1449	0x7e5c0203, 0x7e5e0202,
1450	0x7e600201, 0x7e620200,
1451	0x7e640209, 0x7e660208,
1452	0x7e680207, 0x7e6a0206,
1453	0x7e6c0205, 0x7e6e0204,
1454	0x7e700203, 0x7e720202,
1455	0x7e740201, 0x7e760200,
1456	0x7e780209, 0x7e7a0208,
1457	0x7e7c0207, 0x7e7e0206,
1458	0xbf8a0000, 0xbf810000,
1459};
1460
1461static const u32 sgpr_init_compute_shader[] =
1462{
1463	0xbe8a0100, 0xbe8c0102,
1464	0xbe8e0104, 0xbe900106,
1465	0xbe920108, 0xbe940100,
1466	0xbe960102, 0xbe980104,
1467	0xbe9a0106, 0xbe9c0108,
1468	0xbe9e0100, 0xbea00102,
1469	0xbea20104, 0xbea40106,
1470	0xbea60108, 0xbea80100,
1471	0xbeaa0102, 0xbeac0104,
1472	0xbeae0106, 0xbeb00108,
1473	0xbeb20100, 0xbeb40102,
1474	0xbeb60104, 0xbeb80106,
1475	0xbeba0108, 0xbebc0100,
1476	0xbebe0102, 0xbec00104,
1477	0xbec20106, 0xbec40108,
1478	0xbec60100, 0xbec80102,
1479	0xbee60004, 0xbee70005,
1480	0xbeea0006, 0xbeeb0007,
1481	0xbee80008, 0xbee90009,
1482	0xbefc0000, 0xbf8a0000,
1483	0xbf810000, 0x00000000,
1484};
1485
1486static const u32 vgpr_init_regs[] =
1487{
1488	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489	mmCOMPUTE_RESOURCE_LIMITS, 0,
1490	mmCOMPUTE_NUM_THREAD_X, 256*4,
1491	mmCOMPUTE_NUM_THREAD_Y, 1,
1492	mmCOMPUTE_NUM_THREAD_Z, 1,
1493	mmCOMPUTE_PGM_RSRC2, 20,
1494	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504};
1505
1506static const u32 sgpr1_init_regs[] =
1507{
1508	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510	mmCOMPUTE_NUM_THREAD_X, 256*5,
1511	mmCOMPUTE_NUM_THREAD_Y, 1,
1512	mmCOMPUTE_NUM_THREAD_Z, 1,
1513	mmCOMPUTE_PGM_RSRC2, 20,
1514	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524};
1525
1526static const u32 sgpr2_init_regs[] =
1527{
1528	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530	mmCOMPUTE_NUM_THREAD_X, 256*5,
1531	mmCOMPUTE_NUM_THREAD_Y, 1,
1532	mmCOMPUTE_NUM_THREAD_Z, 1,
1533	mmCOMPUTE_PGM_RSRC2, 20,
1534	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544};
1545
1546static const u32 sec_ded_counter_registers[] =
1547{
1548	mmCPC_EDC_ATC_CNT,
1549	mmCPC_EDC_SCRATCH_CNT,
1550	mmCPC_EDC_UCODE_CNT,
1551	mmCPF_EDC_ATC_CNT,
1552	mmCPF_EDC_ROQ_CNT,
1553	mmCPF_EDC_TAG_CNT,
1554	mmCPG_EDC_ATC_CNT,
1555	mmCPG_EDC_DMA_CNT,
1556	mmCPG_EDC_TAG_CNT,
1557	mmDC_EDC_CSINVOC_CNT,
1558	mmDC_EDC_RESTORE_CNT,
1559	mmDC_EDC_STATE_CNT,
1560	mmGDS_EDC_CNT,
1561	mmGDS_EDC_GRBM_CNT,
1562	mmGDS_EDC_OA_DED,
1563	mmSPI_EDC_CNT,
1564	mmSQC_ATC_EDC_GATCL1_CNT,
1565	mmSQC_EDC_CNT,
1566	mmSQ_EDC_DED_CNT,
1567	mmSQ_EDC_INFO,
1568	mmSQ_EDC_SEC_CNT,
1569	mmTCC_EDC_CNT,
1570	mmTCP_ATC_EDC_GATCL1_CNT,
1571	mmTCP_EDC_CNT,
1572	mmTD_EDC_CNT
1573};
1574
1575static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576{
1577	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578	struct amdgpu_ib ib;
1579	struct dma_fence *f = NULL;
1580	int r, i;
1581	u32 tmp;
1582	unsigned total_size, vgpr_offset, sgpr_offset;
1583	u64 gpu_addr;
1584
1585	/* only supported on CZ */
1586	if (adev->asic_type != CHIP_CARRIZO)
1587		return 0;
1588
1589	/* bail if the compute ring is not ready */
1590	if (!ring->ready)
1591		return 0;
1592
1593	tmp = RREG32(mmGB_EDC_MODE);
1594	WREG32(mmGB_EDC_MODE, 0);
1595
1596	total_size =
1597		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598	total_size +=
1599		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600	total_size +=
1601		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602	total_size = ALIGN(total_size, 256);
1603	vgpr_offset = total_size;
1604	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605	sgpr_offset = total_size;
1606	total_size += sizeof(sgpr_init_compute_shader);
1607
1608	/* allocate an indirect buffer to put the commands in */
1609	memset(&ib, 0, sizeof(ib));
1610	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1611	if (r) {
1612		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613		return r;
1614	}
1615
1616	/* load the compute shaders */
1617	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619
1620	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622
1623	/* init the ib length to 0 */
1624	ib.length_dw = 0;
1625
1626	/* VGPR */
1627	/* write the register state for the compute dispatch */
1628	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632	}
1633	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639
1640	/* write dispatch packet */
1641	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642	ib.ptr[ib.length_dw++] = 8; /* x */
1643	ib.ptr[ib.length_dw++] = 1; /* y */
1644	ib.ptr[ib.length_dw++] = 1; /* z */
1645	ib.ptr[ib.length_dw++] =
1646		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647
1648	/* write CS partial flush packet */
1649	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651
1652	/* SGPR1 */
1653	/* write the register state for the compute dispatch */
1654	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658	}
1659	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665
1666	/* write dispatch packet */
1667	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668	ib.ptr[ib.length_dw++] = 8; /* x */
1669	ib.ptr[ib.length_dw++] = 1; /* y */
1670	ib.ptr[ib.length_dw++] = 1; /* z */
1671	ib.ptr[ib.length_dw++] =
1672		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673
1674	/* write CS partial flush packet */
1675	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677
1678	/* SGPR2 */
1679	/* write the register state for the compute dispatch */
1680	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684	}
1685	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691
1692	/* write dispatch packet */
1693	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694	ib.ptr[ib.length_dw++] = 8; /* x */
1695	ib.ptr[ib.length_dw++] = 1; /* y */
1696	ib.ptr[ib.length_dw++] = 1; /* z */
1697	ib.ptr[ib.length_dw++] =
1698		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699
1700	/* write CS partial flush packet */
1701	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703
1704	/* shedule the ib on the ring */
1705	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706	if (r) {
1707		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708		goto fail;
1709	}
1710
1711	/* wait for the GPU to finish processing the IB */
1712	r = dma_fence_wait(f, false);
1713	if (r) {
1714		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715		goto fail;
1716	}
1717
1718	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720	WREG32(mmGB_EDC_MODE, tmp);
1721
1722	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725
1726
1727	/* read back registers to clear the counters */
1728	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729		RREG32(sec_ded_counter_registers[i]);
1730
1731fail:
 
1732	amdgpu_ib_free(adev, &ib, NULL);
1733	dma_fence_put(f);
1734
1735	return r;
1736}
1737
1738static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739{
1740	u32 gb_addr_config;
1741	u32 mc_shared_chmap, mc_arb_ramcfg;
1742	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743	u32 tmp;
1744	int ret;
1745
1746	switch (adev->asic_type) {
1747	case CHIP_TOPAZ:
1748		adev->gfx.config.max_shader_engines = 1;
1749		adev->gfx.config.max_tile_pipes = 2;
1750		adev->gfx.config.max_cu_per_sh = 6;
1751		adev->gfx.config.max_sh_per_se = 1;
1752		adev->gfx.config.max_backends_per_se = 2;
1753		adev->gfx.config.max_texture_channel_caches = 2;
1754		adev->gfx.config.max_gprs = 256;
1755		adev->gfx.config.max_gs_threads = 32;
1756		adev->gfx.config.max_hw_contexts = 8;
1757
1758		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763		break;
1764	case CHIP_FIJI:
1765		adev->gfx.config.max_shader_engines = 4;
1766		adev->gfx.config.max_tile_pipes = 16;
1767		adev->gfx.config.max_cu_per_sh = 16;
1768		adev->gfx.config.max_sh_per_se = 1;
1769		adev->gfx.config.max_backends_per_se = 4;
1770		adev->gfx.config.max_texture_channel_caches = 16;
1771		adev->gfx.config.max_gprs = 256;
1772		adev->gfx.config.max_gs_threads = 32;
1773		adev->gfx.config.max_hw_contexts = 8;
1774
1775		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780		break;
1781	case CHIP_POLARIS11:
1782	case CHIP_POLARIS12:
1783		ret = amdgpu_atombios_get_gfx_info(adev);
1784		if (ret)
1785			return ret;
1786		adev->gfx.config.max_gprs = 256;
1787		adev->gfx.config.max_gs_threads = 32;
1788		adev->gfx.config.max_hw_contexts = 8;
1789
1790		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1791		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1792		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1793		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1794		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1795		break;
1796	case CHIP_POLARIS10:
1797		ret = amdgpu_atombios_get_gfx_info(adev);
1798		if (ret)
1799			return ret;
1800		adev->gfx.config.max_gprs = 256;
1801		adev->gfx.config.max_gs_threads = 32;
1802		adev->gfx.config.max_hw_contexts = 8;
1803
1804		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1805		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1806		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1807		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1808		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1809		break;
1810	case CHIP_TONGA:
1811		adev->gfx.config.max_shader_engines = 4;
1812		adev->gfx.config.max_tile_pipes = 8;
1813		adev->gfx.config.max_cu_per_sh = 8;
1814		adev->gfx.config.max_sh_per_se = 1;
1815		adev->gfx.config.max_backends_per_se = 2;
1816		adev->gfx.config.max_texture_channel_caches = 8;
1817		adev->gfx.config.max_gprs = 256;
1818		adev->gfx.config.max_gs_threads = 32;
1819		adev->gfx.config.max_hw_contexts = 8;
1820
1821		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1822		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1823		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1824		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1825		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1826		break;
1827	case CHIP_CARRIZO:
1828		adev->gfx.config.max_shader_engines = 1;
1829		adev->gfx.config.max_tile_pipes = 2;
1830		adev->gfx.config.max_sh_per_se = 1;
1831		adev->gfx.config.max_backends_per_se = 2;
1832
1833		switch (adev->pdev->revision) {
1834		case 0xc4:
1835		case 0x84:
1836		case 0xc8:
1837		case 0xcc:
1838		case 0xe1:
1839		case 0xe3:
1840			/* B10 */
1841			adev->gfx.config.max_cu_per_sh = 8;
1842			break;
1843		case 0xc5:
1844		case 0x81:
1845		case 0x85:
1846		case 0xc9:
1847		case 0xcd:
1848		case 0xe2:
1849		case 0xe4:
1850			/* B8 */
1851			adev->gfx.config.max_cu_per_sh = 6;
1852			break;
1853		case 0xc6:
1854		case 0xca:
1855		case 0xce:
1856		case 0x88:
1857			/* B6 */
1858			adev->gfx.config.max_cu_per_sh = 6;
1859			break;
1860		case 0xc7:
1861		case 0x87:
1862		case 0xcb:
1863		case 0xe5:
1864		case 0x89:
1865		default:
1866			/* B4 */
1867			adev->gfx.config.max_cu_per_sh = 4;
1868			break;
1869		}
1870
1871		adev->gfx.config.max_texture_channel_caches = 2;
1872		adev->gfx.config.max_gprs = 256;
1873		adev->gfx.config.max_gs_threads = 32;
1874		adev->gfx.config.max_hw_contexts = 8;
1875
1876		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1877		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1878		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1879		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1880		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1881		break;
1882	case CHIP_STONEY:
1883		adev->gfx.config.max_shader_engines = 1;
1884		adev->gfx.config.max_tile_pipes = 2;
1885		adev->gfx.config.max_sh_per_se = 1;
1886		adev->gfx.config.max_backends_per_se = 1;
1887
1888		switch (adev->pdev->revision) {
1889		case 0xc0:
1890		case 0xc1:
1891		case 0xc2:
1892		case 0xc4:
1893		case 0xc8:
1894		case 0xc9:
1895			adev->gfx.config.max_cu_per_sh = 3;
1896			break;
1897		case 0xd0:
1898		case 0xd1:
1899		case 0xd2:
1900		default:
1901			adev->gfx.config.max_cu_per_sh = 2;
1902			break;
1903		}
1904
1905		adev->gfx.config.max_texture_channel_caches = 2;
1906		adev->gfx.config.max_gprs = 256;
1907		adev->gfx.config.max_gs_threads = 16;
1908		adev->gfx.config.max_hw_contexts = 8;
1909
1910		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1911		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1912		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1913		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1914		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1915		break;
1916	default:
1917		adev->gfx.config.max_shader_engines = 2;
1918		adev->gfx.config.max_tile_pipes = 4;
1919		adev->gfx.config.max_cu_per_sh = 2;
1920		adev->gfx.config.max_sh_per_se = 1;
1921		adev->gfx.config.max_backends_per_se = 2;
1922		adev->gfx.config.max_texture_channel_caches = 4;
1923		adev->gfx.config.max_gprs = 256;
1924		adev->gfx.config.max_gs_threads = 32;
1925		adev->gfx.config.max_hw_contexts = 8;
1926
1927		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1928		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1929		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1930		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1931		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1932		break;
1933	}
1934
1935	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1936	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1937	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1938
1939	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1940	adev->gfx.config.mem_max_burst_length_bytes = 256;
1941	if (adev->flags & AMD_IS_APU) {
1942		/* Get memory bank mapping mode. */
1943		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1944		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1945		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1946
1947		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1948		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1949		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1950
1951		/* Validate settings in case only one DIMM installed. */
1952		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1953			dimm00_addr_map = 0;
1954		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1955			dimm01_addr_map = 0;
1956		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1957			dimm10_addr_map = 0;
1958		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1959			dimm11_addr_map = 0;
1960
1961		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1962		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1963		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1964			adev->gfx.config.mem_row_size_in_kb = 2;
1965		else
1966			adev->gfx.config.mem_row_size_in_kb = 1;
1967	} else {
1968		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1969		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1970		if (adev->gfx.config.mem_row_size_in_kb > 4)
1971			adev->gfx.config.mem_row_size_in_kb = 4;
1972	}
1973
1974	adev->gfx.config.shader_engine_tile_size = 32;
1975	adev->gfx.config.num_gpus = 1;
1976	adev->gfx.config.multi_gpu_tile_size = 64;
1977
1978	/* fix up row size */
1979	switch (adev->gfx.config.mem_row_size_in_kb) {
1980	case 1:
1981	default:
1982		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1983		break;
1984	case 2:
1985		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1986		break;
1987	case 4:
1988		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1989		break;
1990	}
1991	adev->gfx.config.gb_addr_config = gb_addr_config;
1992
1993	return 0;
1994}
1995
1996static int gfx_v8_0_sw_init(void *handle)
1997{
1998	int i, r;
1999	struct amdgpu_ring *ring;
2000	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001
2002	/* EOP Event */
2003	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2004	if (r)
2005		return r;
2006
2007	/* Privileged reg */
2008	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2009	if (r)
2010		return r;
2011
2012	/* Privileged inst */
2013	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2014	if (r)
2015		return r;
2016
2017	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2018
2019	gfx_v8_0_scratch_init(adev);
2020
2021	r = gfx_v8_0_init_microcode(adev);
2022	if (r) {
2023		DRM_ERROR("Failed to load gfx firmware!\n");
2024		return r;
2025	}
2026
2027	r = gfx_v8_0_rlc_init(adev);
2028	if (r) {
2029		DRM_ERROR("Failed to init rlc BOs!\n");
2030		return r;
2031	}
2032
2033	r = gfx_v8_0_mec_init(adev);
2034	if (r) {
2035		DRM_ERROR("Failed to init MEC BOs!\n");
2036		return r;
2037	}
2038
2039	/* set up the gfx ring */
2040	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2041		ring = &adev->gfx.gfx_ring[i];
2042		ring->ring_obj = NULL;
2043		sprintf(ring->name, "gfx");
2044		/* no gfx doorbells on iceland */
2045		if (adev->asic_type != CHIP_TOPAZ) {
2046			ring->use_doorbell = true;
2047			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2048		}
2049
2050		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2051				     AMDGPU_CP_IRQ_GFX_EOP);
 
 
2052		if (r)
2053			return r;
2054	}
2055
2056	/* set up the compute queues */
2057	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058		unsigned irq_type;
2059
2060		/* max 32 queues per MEC */
2061		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062			DRM_ERROR("Too many (%d) compute rings!\n", i);
2063			break;
2064		}
2065		ring = &adev->gfx.compute_ring[i];
2066		ring->ring_obj = NULL;
2067		ring->use_doorbell = true;
2068		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069		ring->me = 1; /* first MEC */
2070		ring->pipe = i / 8;
2071		ring->queue = i % 8;
2072		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074		/* type-2 packets are deprecated on MEC, use type-3 instead */
2075		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2076				     irq_type);
 
 
2077		if (r)
2078			return r;
2079	}
2080
2081	/* reserve GDS, GWS and OA resource for gfx */
2082	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2083				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2084				    &adev->gds.gds_gfx_bo, NULL, NULL);
 
2085	if (r)
2086		return r;
2087
2088	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2089				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2090				    &adev->gds.gws_gfx_bo, NULL, NULL);
 
2091	if (r)
2092		return r;
2093
2094	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2095				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2096				    &adev->gds.oa_gfx_bo, NULL, NULL);
 
2097	if (r)
2098		return r;
2099
2100	adev->gfx.ce_ram_size = 0x8000;
2101
2102	r = gfx_v8_0_gpu_early_init(adev);
2103	if (r)
2104		return r;
2105
2106	return 0;
2107}
2108
2109static int gfx_v8_0_sw_fini(void *handle)
2110{
2111	int i;
2112	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2113
2114	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2115	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2116	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2117
2118	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2119		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2120	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2121		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2122
2123	gfx_v8_0_mec_fini(adev);
2124	gfx_v8_0_rlc_fini(adev);
2125	gfx_v8_0_free_microcode(adev);
2126
2127	return 0;
2128}
2129
2130static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2131{
2132	uint32_t *modearray, *mod2array;
2133	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2134	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2135	u32 reg_offset;
2136
2137	modearray = adev->gfx.config.tile_mode_array;
2138	mod2array = adev->gfx.config.macrotile_mode_array;
2139
2140	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2141		modearray[reg_offset] = 0;
2142
2143	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2144		mod2array[reg_offset] = 0;
2145
2146	switch (adev->asic_type) {
2147	case CHIP_TOPAZ:
2148		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149				PIPE_CONFIG(ADDR_SURF_P2) |
2150				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2151				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153				PIPE_CONFIG(ADDR_SURF_P2) |
2154				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2155				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157				PIPE_CONFIG(ADDR_SURF_P2) |
2158				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2159				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161				PIPE_CONFIG(ADDR_SURF_P2) |
2162				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2163				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165				PIPE_CONFIG(ADDR_SURF_P2) |
2166				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2167				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2169				PIPE_CONFIG(ADDR_SURF_P2) |
2170				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173				PIPE_CONFIG(ADDR_SURF_P2) |
2174				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2177				PIPE_CONFIG(ADDR_SURF_P2));
2178		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179				PIPE_CONFIG(ADDR_SURF_P2) |
2180				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2181				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183				 PIPE_CONFIG(ADDR_SURF_P2) |
2184				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187				 PIPE_CONFIG(ADDR_SURF_P2) |
2188				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2191				 PIPE_CONFIG(ADDR_SURF_P2) |
2192				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2195				 PIPE_CONFIG(ADDR_SURF_P2) |
2196				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2199				 PIPE_CONFIG(ADDR_SURF_P2) |
2200				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2203				 PIPE_CONFIG(ADDR_SURF_P2) |
2204				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2206		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2207				 PIPE_CONFIG(ADDR_SURF_P2) |
2208				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211				 PIPE_CONFIG(ADDR_SURF_P2) |
2212				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215				 PIPE_CONFIG(ADDR_SURF_P2) |
2216				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2219				 PIPE_CONFIG(ADDR_SURF_P2) |
2220				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2223				 PIPE_CONFIG(ADDR_SURF_P2) |
2224				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2227				 PIPE_CONFIG(ADDR_SURF_P2) |
2228				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2229				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2231				 PIPE_CONFIG(ADDR_SURF_P2) |
2232				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2235				 PIPE_CONFIG(ADDR_SURF_P2) |
2236				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239				 PIPE_CONFIG(ADDR_SURF_P2) |
2240				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2241				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243				 PIPE_CONFIG(ADDR_SURF_P2) |
2244				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247				 PIPE_CONFIG(ADDR_SURF_P2) |
2248				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250
2251		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2252				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2254				NUM_BANKS(ADDR_SURF_8_BANK));
2255		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258				NUM_BANKS(ADDR_SURF_8_BANK));
2259		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2260				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262				NUM_BANKS(ADDR_SURF_8_BANK));
2263		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2266				NUM_BANKS(ADDR_SURF_8_BANK));
2267		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2269				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270				NUM_BANKS(ADDR_SURF_8_BANK));
2271		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274				NUM_BANKS(ADDR_SURF_8_BANK));
2275		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278				NUM_BANKS(ADDR_SURF_8_BANK));
2279		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2280				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2281				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282				NUM_BANKS(ADDR_SURF_16_BANK));
2283		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286				NUM_BANKS(ADDR_SURF_16_BANK));
2287		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2288				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290				 NUM_BANKS(ADDR_SURF_16_BANK));
2291		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294				 NUM_BANKS(ADDR_SURF_16_BANK));
2295		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298				 NUM_BANKS(ADDR_SURF_16_BANK));
2299		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2301				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302				 NUM_BANKS(ADDR_SURF_16_BANK));
2303		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306				 NUM_BANKS(ADDR_SURF_8_BANK));
2307
2308		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2309			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2310			    reg_offset != 23)
2311				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2312
2313		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2314			if (reg_offset != 7)
2315				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2316
2317		break;
2318	case CHIP_FIJI:
2319		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2322				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2326				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2330				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2334				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2340				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2349				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2352				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2353		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2354				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2357		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2365		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2367				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2370				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2378				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2382				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2385		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2387				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2390				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2398				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2402				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2406				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2414				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2416				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2418				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2422				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2434				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2437		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2439				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441
2442		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445				NUM_BANKS(ADDR_SURF_8_BANK));
2446		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449				NUM_BANKS(ADDR_SURF_8_BANK));
2450		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453				NUM_BANKS(ADDR_SURF_8_BANK));
2454		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457				NUM_BANKS(ADDR_SURF_8_BANK));
2458		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461				NUM_BANKS(ADDR_SURF_8_BANK));
2462		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465				NUM_BANKS(ADDR_SURF_8_BANK));
2466		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469				NUM_BANKS(ADDR_SURF_8_BANK));
2470		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473				NUM_BANKS(ADDR_SURF_8_BANK));
2474		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477				NUM_BANKS(ADDR_SURF_8_BANK));
2478		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2480				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481				 NUM_BANKS(ADDR_SURF_8_BANK));
2482		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485				 NUM_BANKS(ADDR_SURF_8_BANK));
2486		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489				 NUM_BANKS(ADDR_SURF_8_BANK));
2490		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493				 NUM_BANKS(ADDR_SURF_8_BANK));
2494		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497				 NUM_BANKS(ADDR_SURF_4_BANK));
2498
2499		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2501
2502		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2503			if (reg_offset != 7)
2504				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2505
2506		break;
2507	case CHIP_TONGA:
2508		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2511				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2515				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2519				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2523				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2533				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2551				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2554		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2562		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2567				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2574		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2579				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2587				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2591				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2595				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2603				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2607				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2611				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2626		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630
2631		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634				NUM_BANKS(ADDR_SURF_16_BANK));
2635		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638				NUM_BANKS(ADDR_SURF_16_BANK));
2639		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642				NUM_BANKS(ADDR_SURF_16_BANK));
2643		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646				NUM_BANKS(ADDR_SURF_16_BANK));
2647		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2649				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650				NUM_BANKS(ADDR_SURF_16_BANK));
2651		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654				NUM_BANKS(ADDR_SURF_16_BANK));
2655		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658				NUM_BANKS(ADDR_SURF_16_BANK));
2659		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2661				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2662				NUM_BANKS(ADDR_SURF_16_BANK));
2663		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666				NUM_BANKS(ADDR_SURF_16_BANK));
2667		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2669				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2670				 NUM_BANKS(ADDR_SURF_16_BANK));
2671		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674				 NUM_BANKS(ADDR_SURF_16_BANK));
2675		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2678				 NUM_BANKS(ADDR_SURF_8_BANK));
2679		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682				 NUM_BANKS(ADDR_SURF_4_BANK));
2683		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686				 NUM_BANKS(ADDR_SURF_4_BANK));
2687
2688		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2689			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2690
2691		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2692			if (reg_offset != 7)
2693				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2694
2695		break;
2696	case CHIP_POLARIS11:
2697	case CHIP_POLARIS12:
2698		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820
2821		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829				NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834				NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844				NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849				NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854				NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859				NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864				NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869				NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874				NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879				NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884				NUM_BANKS(ADDR_SURF_8_BANK));
2885
2886		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889				NUM_BANKS(ADDR_SURF_4_BANK));
2890
2891		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893
2894		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895			if (reg_offset != 7)
2896				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897
2898		break;
2899	case CHIP_POLARIS10:
2900		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022
3023		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026				NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031				NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036				NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041				NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046				NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051				NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056				NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061				NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066				NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071				NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076				NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081				NUM_BANKS(ADDR_SURF_8_BANK));
3082
3083		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086				NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091				NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095
3096		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097			if (reg_offset != 7)
3098				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099
3100		break;
3101	case CHIP_STONEY:
3102		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103				PIPE_CONFIG(ADDR_SURF_P2) |
3104				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107				PIPE_CONFIG(ADDR_SURF_P2) |
3108				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111				PIPE_CONFIG(ADDR_SURF_P2) |
3112				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115				PIPE_CONFIG(ADDR_SURF_P2) |
3116				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119				PIPE_CONFIG(ADDR_SURF_P2) |
3120				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123				PIPE_CONFIG(ADDR_SURF_P2) |
3124				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127				PIPE_CONFIG(ADDR_SURF_P2) |
3128				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131				PIPE_CONFIG(ADDR_SURF_P2));
3132		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133				PIPE_CONFIG(ADDR_SURF_P2) |
3134				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137				 PIPE_CONFIG(ADDR_SURF_P2) |
3138				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141				 PIPE_CONFIG(ADDR_SURF_P2) |
3142				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145				 PIPE_CONFIG(ADDR_SURF_P2) |
3146				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149				 PIPE_CONFIG(ADDR_SURF_P2) |
3150				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153				 PIPE_CONFIG(ADDR_SURF_P2) |
3154				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157				 PIPE_CONFIG(ADDR_SURF_P2) |
3158				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161				 PIPE_CONFIG(ADDR_SURF_P2) |
3162				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165				 PIPE_CONFIG(ADDR_SURF_P2) |
3166				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169				 PIPE_CONFIG(ADDR_SURF_P2) |
3170				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173				 PIPE_CONFIG(ADDR_SURF_P2) |
3174				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177				 PIPE_CONFIG(ADDR_SURF_P2) |
3178				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181				 PIPE_CONFIG(ADDR_SURF_P2) |
3182				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185				 PIPE_CONFIG(ADDR_SURF_P2) |
3186				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189				 PIPE_CONFIG(ADDR_SURF_P2) |
3190				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193				 PIPE_CONFIG(ADDR_SURF_P2) |
3194				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197				 PIPE_CONFIG(ADDR_SURF_P2) |
3198				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201				 PIPE_CONFIG(ADDR_SURF_P2) |
3202				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204
3205		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208				NUM_BANKS(ADDR_SURF_8_BANK));
3209		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212				NUM_BANKS(ADDR_SURF_8_BANK));
3213		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216				NUM_BANKS(ADDR_SURF_8_BANK));
3217		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220				NUM_BANKS(ADDR_SURF_8_BANK));
3221		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224				NUM_BANKS(ADDR_SURF_8_BANK));
3225		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228				NUM_BANKS(ADDR_SURF_8_BANK));
3229		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232				NUM_BANKS(ADDR_SURF_8_BANK));
3233		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236				NUM_BANKS(ADDR_SURF_16_BANK));
3237		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240				NUM_BANKS(ADDR_SURF_16_BANK));
3241		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244				 NUM_BANKS(ADDR_SURF_16_BANK));
3245		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248				 NUM_BANKS(ADDR_SURF_16_BANK));
3249		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252				 NUM_BANKS(ADDR_SURF_16_BANK));
3253		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256				 NUM_BANKS(ADDR_SURF_16_BANK));
3257		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260				 NUM_BANKS(ADDR_SURF_8_BANK));
3261
3262		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264			    reg_offset != 23)
3265				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266
3267		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268			if (reg_offset != 7)
3269				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270
3271		break;
3272	default:
3273		dev_warn(adev->dev,
3274			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275			 adev->asic_type);
3276
3277	case CHIP_CARRIZO:
3278		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279				PIPE_CONFIG(ADDR_SURF_P2) |
3280				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283				PIPE_CONFIG(ADDR_SURF_P2) |
3284				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287				PIPE_CONFIG(ADDR_SURF_P2) |
3288				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291				PIPE_CONFIG(ADDR_SURF_P2) |
3292				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295				PIPE_CONFIG(ADDR_SURF_P2) |
3296				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299				PIPE_CONFIG(ADDR_SURF_P2) |
3300				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303				PIPE_CONFIG(ADDR_SURF_P2) |
3304				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307				PIPE_CONFIG(ADDR_SURF_P2));
3308		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309				PIPE_CONFIG(ADDR_SURF_P2) |
3310				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313				 PIPE_CONFIG(ADDR_SURF_P2) |
3314				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317				 PIPE_CONFIG(ADDR_SURF_P2) |
3318				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321				 PIPE_CONFIG(ADDR_SURF_P2) |
3322				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325				 PIPE_CONFIG(ADDR_SURF_P2) |
3326				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329				 PIPE_CONFIG(ADDR_SURF_P2) |
3330				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333				 PIPE_CONFIG(ADDR_SURF_P2) |
3334				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337				 PIPE_CONFIG(ADDR_SURF_P2) |
3338				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341				 PIPE_CONFIG(ADDR_SURF_P2) |
3342				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345				 PIPE_CONFIG(ADDR_SURF_P2) |
3346				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349				 PIPE_CONFIG(ADDR_SURF_P2) |
3350				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353				 PIPE_CONFIG(ADDR_SURF_P2) |
3354				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357				 PIPE_CONFIG(ADDR_SURF_P2) |
3358				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361				 PIPE_CONFIG(ADDR_SURF_P2) |
3362				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365				 PIPE_CONFIG(ADDR_SURF_P2) |
3366				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369				 PIPE_CONFIG(ADDR_SURF_P2) |
3370				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373				 PIPE_CONFIG(ADDR_SURF_P2) |
3374				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377				 PIPE_CONFIG(ADDR_SURF_P2) |
3378				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380
3381		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384				NUM_BANKS(ADDR_SURF_8_BANK));
3385		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388				NUM_BANKS(ADDR_SURF_8_BANK));
3389		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392				NUM_BANKS(ADDR_SURF_8_BANK));
3393		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396				NUM_BANKS(ADDR_SURF_8_BANK));
3397		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400				NUM_BANKS(ADDR_SURF_8_BANK));
3401		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404				NUM_BANKS(ADDR_SURF_8_BANK));
3405		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408				NUM_BANKS(ADDR_SURF_8_BANK));
3409		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412				NUM_BANKS(ADDR_SURF_16_BANK));
3413		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416				NUM_BANKS(ADDR_SURF_16_BANK));
3417		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420				 NUM_BANKS(ADDR_SURF_16_BANK));
3421		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424				 NUM_BANKS(ADDR_SURF_16_BANK));
3425		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428				 NUM_BANKS(ADDR_SURF_16_BANK));
3429		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432				 NUM_BANKS(ADDR_SURF_16_BANK));
3433		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436				 NUM_BANKS(ADDR_SURF_8_BANK));
3437
3438		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440			    reg_offset != 23)
3441				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442
3443		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444			if (reg_offset != 7)
3445				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446
3447		break;
3448	}
3449}
3450
3451static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452				  u32 se_num, u32 sh_num, u32 instance)
3453{
3454	u32 data;
3455
3456	if (instance == 0xffffffff)
3457		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458	else
3459		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460
3461	if (se_num == 0xffffffff)
3462		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463	else
3464		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465
3466	if (sh_num == 0xffffffff)
3467		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468	else
 
3469		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470
 
3471	WREG32(mmGRBM_GFX_INDEX, data);
3472}
3473
3474static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3475{
3476	return (u32)((1ULL << bit_width) - 1);
3477}
3478
3479static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480{
3481	u32 data, mask;
3482
3483	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 
3487
3488	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489				       adev->gfx.config.max_sh_per_se);
3490
3491	return (~data) & mask;
3492}
3493
3494static void
3495gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496{
3497	switch (adev->asic_type) {
3498	case CHIP_FIJI:
3499		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500			  RB_XSEL2(1) | PKR_MAP(2) |
3501			  PKR_XSEL(1) | PKR_YSEL(1) |
3502			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504			   SE_PAIR_YSEL(2);
3505		break;
3506	case CHIP_TONGA:
3507	case CHIP_POLARIS10:
3508		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509			  SE_XSEL(1) | SE_YSEL(1);
3510		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511			   SE_PAIR_YSEL(2);
3512		break;
3513	case CHIP_TOPAZ:
3514	case CHIP_CARRIZO:
3515		*rconf |= RB_MAP_PKR0(2);
3516		*rconf1 |= 0x0;
3517		break;
3518	case CHIP_POLARIS11:
3519	case CHIP_POLARIS12:
3520		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521			  SE_XSEL(1) | SE_YSEL(1);
3522		*rconf1 |= 0x0;
3523		break;
3524	case CHIP_STONEY:
3525		*rconf |= 0x0;
3526		*rconf1 |= 0x0;
3527		break;
3528	default:
3529		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530		break;
3531	}
3532}
3533
3534static void
3535gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536					u32 raster_config, u32 raster_config_1,
3537					unsigned rb_mask, unsigned num_rb)
3538{
3539	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542	unsigned rb_per_se = num_rb / num_se;
3543	unsigned se_mask[4];
3544	unsigned se;
3545
3546	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556			     (!se_mask[2] && !se_mask[3]))) {
3557		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559		if (!se_mask[0] && !se_mask[1]) {
3560			raster_config_1 |=
3561				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562		} else {
3563			raster_config_1 |=
3564				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565		}
3566	}
3567
3568	for (se = 0; se < num_se; se++) {
3569		unsigned raster_config_se = raster_config;
3570		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572		int idx = (se / 2) * 2;
3573
3574		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575			raster_config_se &= ~SE_MAP_MASK;
3576
3577			if (!se_mask[idx]) {
3578				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579			} else {
3580				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581			}
3582		}
3583
3584		pkr0_mask &= rb_mask;
3585		pkr1_mask &= rb_mask;
3586		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587			raster_config_se &= ~PKR_MAP_MASK;
3588
3589			if (!pkr0_mask) {
3590				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591			} else {
3592				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593			}
3594		}
3595
3596		if (rb_per_se >= 2) {
3597			unsigned rb0_mask = 1 << (se * rb_per_se);
3598			unsigned rb1_mask = rb0_mask << 1;
3599
3600			rb0_mask &= rb_mask;
3601			rb1_mask &= rb_mask;
3602			if (!rb0_mask || !rb1_mask) {
3603				raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605				if (!rb0_mask) {
3606					raster_config_se |=
3607						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608				} else {
3609					raster_config_se |=
3610						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611				}
3612			}
3613
3614			if (rb_per_se > 2) {
3615				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616				rb1_mask = rb0_mask << 1;
3617				rb0_mask &= rb_mask;
3618				rb1_mask &= rb_mask;
3619				if (!rb0_mask || !rb1_mask) {
3620					raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622					if (!rb0_mask) {
3623						raster_config_se |=
3624							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625					} else {
3626						raster_config_se |=
3627							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628					}
3629				}
3630			}
3631		}
3632
3633		/* GRBM_GFX_INDEX has a different offset on VI */
3634		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637	}
3638
3639	/* GRBM_GFX_INDEX has a different offset on VI */
3640	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641}
3642
3643static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644{
3645	int i, j;
3646	u32 data;
3647	u32 raster_config = 0, raster_config_1 = 0;
3648	u32 active_rbs = 0;
3649	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650					adev->gfx.config.max_sh_per_se;
3651	unsigned num_rb_pipes;
3652
3653	mutex_lock(&adev->grbm_idx_mutex);
3654	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657			data = gfx_v8_0_get_rb_active_bitmap(adev);
3658			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659					       rb_bitmap_width_per_sh);
3660		}
3661	}
3662	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
3663
3664	adev->gfx.config.backend_enable_mask = active_rbs;
3665	adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668			     adev->gfx.config.max_shader_engines, 16);
3669
3670	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672	if (!adev->gfx.config.backend_enable_mask ||
3673			adev->gfx.config.num_rbs >= num_rb_pipes) {
3674		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676	} else {
3677		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678							adev->gfx.config.backend_enable_mask,
3679							num_rb_pipes);
3680	}
3681
3682	/* cache the values for userspace */
3683	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687				RREG32(mmCC_RB_BACKEND_DISABLE);
3688			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690			adev->gfx.config.rb_config[i][j].raster_config =
3691				RREG32(mmPA_SC_RASTER_CONFIG);
3692			adev->gfx.config.rb_config[i][j].raster_config_1 =
3693				RREG32(mmPA_SC_RASTER_CONFIG_1);
3694		}
3695	}
3696	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697	mutex_unlock(&adev->grbm_idx_mutex);
3698}
3699
3700/**
3701 * gfx_v8_0_init_compute_vmid - gart enable
3702 *
3703 * @rdev: amdgpu_device pointer
3704 *
3705 * Initialize compute vmid sh_mem registers
3706 *
3707 */
3708#define DEFAULT_SH_MEM_BASES	(0x6000)
3709#define FIRST_COMPUTE_VMID	(8)
3710#define LAST_COMPUTE_VMID	(16)
3711static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712{
3713	int i;
3714	uint32_t sh_mem_config;
3715	uint32_t sh_mem_bases;
3716
3717	/*
3718	 * Configure apertures:
3719	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722	 */
3723	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732	mutex_lock(&adev->srbm_mutex);
3733	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734		vi_srbm_select(adev, 0, 0, 0, i);
3735		/* CP and shaders */
3736		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737		WREG32(mmSH_MEM_APE1_BASE, 1);
3738		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740	}
3741	vi_srbm_select(adev, 0, 0, 0, 0);
3742	mutex_unlock(&adev->srbm_mutex);
3743}
3744
3745static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3746{
3747	u32 tmp;
3748	int i;
3749
3750	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
 
 
 
3751	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3752	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3753	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3754
3755	gfx_v8_0_tiling_mode_table_init(adev);
 
3756	gfx_v8_0_setup_rb(adev);
3757	gfx_v8_0_get_cu_info(adev);
3758
3759	/* XXX SH_MEM regs */
3760	/* where to put LDS, scratch, GPUVM in FSA64 space */
3761	mutex_lock(&adev->srbm_mutex);
3762	for (i = 0; i < 16; i++) {
3763		vi_srbm_select(adev, 0, 0, 0, i);
3764		/* CP and shaders */
3765		if (i == 0) {
3766			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3767			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3768			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3769					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3770			WREG32(mmSH_MEM_CONFIG, tmp);
3771		} else {
3772			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3773			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3774			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3775					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3776			WREG32(mmSH_MEM_CONFIG, tmp);
3777		}
3778
3779		WREG32(mmSH_MEM_APE1_BASE, 1);
3780		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3781		WREG32(mmSH_MEM_BASES, 0);
3782	}
3783	vi_srbm_select(adev, 0, 0, 0, 0);
3784	mutex_unlock(&adev->srbm_mutex);
3785
3786	gfx_v8_0_init_compute_vmid(adev);
3787
3788	mutex_lock(&adev->grbm_idx_mutex);
3789	/*
3790	 * making sure that the following register writes will be broadcasted
3791	 * to all the shaders
3792	 */
3793	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794
3795	WREG32(mmPA_SC_FIFO_SIZE,
3796		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3797			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3798		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3799			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3800		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3801			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3802		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3803			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3804	mutex_unlock(&adev->grbm_idx_mutex);
3805
3806}
3807
3808static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3809{
3810	u32 i, j, k;
3811	u32 mask;
3812
3813	mutex_lock(&adev->grbm_idx_mutex);
3814	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3815		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3816			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3817			for (k = 0; k < adev->usec_timeout; k++) {
3818				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3819					break;
3820				udelay(1);
3821			}
3822		}
3823	}
3824	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3825	mutex_unlock(&adev->grbm_idx_mutex);
3826
3827	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3828		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3829		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3830		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3831	for (k = 0; k < adev->usec_timeout; k++) {
3832		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3833			break;
3834		udelay(1);
3835	}
3836}
3837
3838static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3839					       bool enable)
3840{
3841	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3842
3843	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3844	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3845	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3846	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3847
3848	WREG32(mmCP_INT_CNTL_RING0, tmp);
3849}
3850
3851static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3852{
3853	/* csib */
3854	WREG32(mmRLC_CSIB_ADDR_HI,
3855			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3856	WREG32(mmRLC_CSIB_ADDR_LO,
3857			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3858	WREG32(mmRLC_CSIB_LENGTH,
3859			adev->gfx.rlc.clear_state_size);
3860}
3861
3862static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3863				int ind_offset,
3864				int list_size,
3865				int *unique_indices,
3866				int *indices_count,
3867				int max_indices,
3868				int *ind_start_offsets,
3869				int *offset_count,
3870				int max_offset)
3871{
3872	int indices;
3873	bool new_entry = true;
3874
3875	for (; ind_offset < list_size; ind_offset++) {
3876
3877		if (new_entry) {
3878			new_entry = false;
3879			ind_start_offsets[*offset_count] = ind_offset;
3880			*offset_count = *offset_count + 1;
3881			BUG_ON(*offset_count >= max_offset);
3882		}
3883
3884		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3885			new_entry = true;
3886			continue;
3887		}
3888
3889		ind_offset += 2;
3890
3891		/* look for the matching indice */
3892		for (indices = 0;
3893			indices < *indices_count;
3894			indices++) {
3895			if (unique_indices[indices] ==
3896				register_list_format[ind_offset])
3897				break;
3898		}
3899
3900		if (indices >= *indices_count) {
3901			unique_indices[*indices_count] =
3902				register_list_format[ind_offset];
3903			indices = *indices_count;
3904			*indices_count = *indices_count + 1;
3905			BUG_ON(*indices_count >= max_indices);
3906		}
3907
3908		register_list_format[ind_offset] = indices;
3909	}
3910}
3911
3912static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3913{
3914	int i, temp, data;
3915	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3916	int indices_count = 0;
3917	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3918	int offset_count = 0;
3919
3920	int list_size;
3921	unsigned int *register_list_format =
3922		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3923	if (!register_list_format)
3924		return -ENOMEM;
3925	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3926			adev->gfx.rlc.reg_list_format_size_bytes);
3927
3928	gfx_v8_0_parse_ind_reg_list(register_list_format,
3929				RLC_FormatDirectRegListLength,
3930				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3931				unique_indices,
3932				&indices_count,
3933				sizeof(unique_indices) / sizeof(int),
3934				indirect_start_offsets,
3935				&offset_count,
3936				sizeof(indirect_start_offsets)/sizeof(int));
3937
3938	/* save and restore list */
3939	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3940
3941	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3942	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3943		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3944
3945	/* indirect list */
3946	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3947	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3948		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3949
3950	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3951	list_size = list_size >> 1;
3952	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3953	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3954
3955	/* starting offsets starts */
3956	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3957		adev->gfx.rlc.starting_offsets_start);
3958	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3959		WREG32(mmRLC_GPM_SCRATCH_DATA,
3960				indirect_start_offsets[i]);
3961
3962	/* unique indices */
3963	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3964	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3965	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3966		if (unique_indices[i] != 0) {
3967			amdgpu_mm_wreg(adev, temp + i,
3968					unique_indices[i] & 0x3FFFF, false);
3969			amdgpu_mm_wreg(adev, data + i,
3970					unique_indices[i] >> 20, false);
3971		}
3972	}
3973	kfree(register_list_format);
3974
3975	return 0;
3976}
3977
3978static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3979{
3980	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3981}
3982
3983static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3984{
3985	uint32_t data;
3986
3987	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3988
3989	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3990	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3991	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3992	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3993	WREG32(mmRLC_PG_DELAY, data);
3994
3995	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3996	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3997
3998}
3999
4000static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4001						bool enable)
4002{
4003	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4004}
4005
4006static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4007						  bool enable)
4008{
4009	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4010}
4011
4012static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4013{
4014	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4015}
4016
4017static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4018{
4019	if ((adev->asic_type == CHIP_CARRIZO) ||
4020	    (adev->asic_type == CHIP_STONEY)) {
4021		gfx_v8_0_init_csb(adev);
4022		gfx_v8_0_init_save_restore_list(adev);
4023		gfx_v8_0_enable_save_restore_machine(adev);
4024		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4025		gfx_v8_0_init_power_gating(adev);
4026		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4027		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4028			cz_enable_sck_slow_down_on_power_up(adev, true);
4029			cz_enable_sck_slow_down_on_power_down(adev, true);
4030		} else {
4031			cz_enable_sck_slow_down_on_power_up(adev, false);
4032			cz_enable_sck_slow_down_on_power_down(adev, false);
4033		}
4034		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4035			cz_enable_cp_power_gating(adev, true);
4036		else
4037			cz_enable_cp_power_gating(adev, false);
4038	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4039		   (adev->asic_type == CHIP_POLARIS12)) {
4040		gfx_v8_0_init_csb(adev);
4041		gfx_v8_0_init_save_restore_list(adev);
4042		gfx_v8_0_enable_save_restore_machine(adev);
4043		gfx_v8_0_init_power_gating(adev);
4044	}
4045
4046}
4047
4048static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4049{
4050	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4051
4052	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
 
4053	gfx_v8_0_wait_for_rlc_serdes(adev);
4054}
4055
4056static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4057{
4058	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4059	udelay(50);
4060
4061	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
 
 
 
 
4062	udelay(50);
4063}
4064
4065static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4066{
4067	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
 
 
 
4068
4069	/* carrizo do enable cp interrupt after cp inited */
4070	if (!(adev->flags & AMD_IS_APU))
4071		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4072
4073	udelay(50);
4074}
4075
4076static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4077{
4078	const struct rlc_firmware_header_v2_0 *hdr;
4079	const __le32 *fw_data;
4080	unsigned i, fw_size;
4081
4082	if (!adev->gfx.rlc_fw)
4083		return -EINVAL;
4084
4085	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4086	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4087
4088	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4089			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4090	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4091
4092	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4093	for (i = 0; i < fw_size; i++)
4094		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4095	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4096
4097	return 0;
4098}
4099
4100static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4101{
4102	int r;
4103	u32 tmp;
4104
4105	gfx_v8_0_rlc_stop(adev);
4106
4107	/* disable CG */
4108	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4109	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4110		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4111	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4112	if (adev->asic_type == CHIP_POLARIS11 ||
4113	    adev->asic_type == CHIP_POLARIS10 ||
4114	    adev->asic_type == CHIP_POLARIS12) {
4115		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4116		tmp &= ~0x3;
4117		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4118	}
4119
4120	/* disable PG */
4121	WREG32(mmRLC_PG_CNTL, 0);
4122
4123	gfx_v8_0_rlc_reset(adev);
4124	gfx_v8_0_init_pg(adev);
4125
4126	if (!adev->pp_enabled) {
4127		if (!adev->firmware.smu_load) {
4128			/* legacy rlc firmware loading */
4129			r = gfx_v8_0_rlc_load_microcode(adev);
4130			if (r)
4131				return r;
4132		} else {
4133			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4134							AMDGPU_UCODE_ID_RLC_G);
4135			if (r)
4136				return -EINVAL;
4137		}
4138	}
4139
4140	gfx_v8_0_rlc_start(adev);
4141
4142	return 0;
4143}
4144
4145static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4146{
4147	int i;
4148	u32 tmp = RREG32(mmCP_ME_CNTL);
4149
4150	if (enable) {
4151		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4152		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4153		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4154	} else {
4155		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4156		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4157		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4158		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4159			adev->gfx.gfx_ring[i].ready = false;
4160	}
4161	WREG32(mmCP_ME_CNTL, tmp);
4162	udelay(50);
4163}
4164
4165static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4166{
4167	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4168	const struct gfx_firmware_header_v1_0 *ce_hdr;
4169	const struct gfx_firmware_header_v1_0 *me_hdr;
4170	const __le32 *fw_data;
4171	unsigned i, fw_size;
4172
4173	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4174		return -EINVAL;
4175
4176	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4177		adev->gfx.pfp_fw->data;
4178	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4179		adev->gfx.ce_fw->data;
4180	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4181		adev->gfx.me_fw->data;
4182
4183	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4184	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4185	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4186
4187	gfx_v8_0_cp_gfx_enable(adev, false);
4188
4189	/* PFP */
4190	fw_data = (const __le32 *)
4191		(adev->gfx.pfp_fw->data +
4192		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4193	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4194	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4195	for (i = 0; i < fw_size; i++)
4196		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4197	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4198
4199	/* CE */
4200	fw_data = (const __le32 *)
4201		(adev->gfx.ce_fw->data +
4202		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4203	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4204	WREG32(mmCP_CE_UCODE_ADDR, 0);
4205	for (i = 0; i < fw_size; i++)
4206		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4207	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4208
4209	/* ME */
4210	fw_data = (const __le32 *)
4211		(adev->gfx.me_fw->data +
4212		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4213	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4214	WREG32(mmCP_ME_RAM_WADDR, 0);
4215	for (i = 0; i < fw_size; i++)
4216		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4217	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4218
4219	return 0;
4220}
4221
4222static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4223{
4224	u32 count = 0;
4225	const struct cs_section_def *sect = NULL;
4226	const struct cs_extent_def *ext = NULL;
4227
4228	/* begin clear state */
4229	count += 2;
4230	/* context control state */
4231	count += 3;
4232
4233	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4234		for (ext = sect->section; ext->extent != NULL; ++ext) {
4235			if (sect->id == SECT_CONTEXT)
4236				count += 2 + ext->reg_count;
4237			else
4238				return 0;
4239		}
4240	}
4241	/* pa_sc_raster_config/pa_sc_raster_config1 */
4242	count += 4;
4243	/* end clear state */
4244	count += 2;
4245	/* clear state */
4246	count += 2;
4247
4248	return count;
4249}
4250
4251static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4252{
4253	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4254	const struct cs_section_def *sect = NULL;
4255	const struct cs_extent_def *ext = NULL;
4256	int r, i;
4257
4258	/* init the CP */
4259	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4260	WREG32(mmCP_ENDIAN_SWAP, 0);
4261	WREG32(mmCP_DEVICE_ID, 1);
4262
4263	gfx_v8_0_cp_gfx_enable(adev, true);
4264
4265	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4266	if (r) {
4267		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4268		return r;
4269	}
4270
4271	/* clear state buffer */
4272	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4273	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4274
4275	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4276	amdgpu_ring_write(ring, 0x80000000);
4277	amdgpu_ring_write(ring, 0x80000000);
4278
4279	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4280		for (ext = sect->section; ext->extent != NULL; ++ext) {
4281			if (sect->id == SECT_CONTEXT) {
4282				amdgpu_ring_write(ring,
4283				       PACKET3(PACKET3_SET_CONTEXT_REG,
4284					       ext->reg_count));
4285				amdgpu_ring_write(ring,
4286				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4287				for (i = 0; i < ext->reg_count; i++)
4288					amdgpu_ring_write(ring, ext->extent[i]);
4289			}
4290		}
4291	}
4292
4293	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4294	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4295	switch (adev->asic_type) {
4296	case CHIP_TONGA:
4297	case CHIP_POLARIS10:
4298		amdgpu_ring_write(ring, 0x16000012);
4299		amdgpu_ring_write(ring, 0x0000002A);
4300		break;
4301	case CHIP_POLARIS11:
4302	case CHIP_POLARIS12:
4303		amdgpu_ring_write(ring, 0x16000012);
4304		amdgpu_ring_write(ring, 0x00000000);
4305		break;
4306	case CHIP_FIJI:
4307		amdgpu_ring_write(ring, 0x3a00161a);
4308		amdgpu_ring_write(ring, 0x0000002e);
4309		break;
 
4310	case CHIP_CARRIZO:
4311		amdgpu_ring_write(ring, 0x00000002);
4312		amdgpu_ring_write(ring, 0x00000000);
4313		break;
4314	case CHIP_TOPAZ:
4315		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4316				0x00000000 : 0x00000002);
4317		amdgpu_ring_write(ring, 0x00000000);
4318		break;
4319	case CHIP_STONEY:
4320		amdgpu_ring_write(ring, 0x00000000);
4321		amdgpu_ring_write(ring, 0x00000000);
4322		break;
4323	default:
4324		BUG();
4325	}
4326
4327	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4328	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4329
4330	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4331	amdgpu_ring_write(ring, 0);
4332
4333	/* init the CE partitions */
4334	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336	amdgpu_ring_write(ring, 0x8000);
4337	amdgpu_ring_write(ring, 0x8000);
4338
4339	amdgpu_ring_commit(ring);
4340
4341	return 0;
4342}
4343
4344static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4345{
4346	struct amdgpu_ring *ring;
4347	u32 tmp;
4348	u32 rb_bufsz;
4349	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4350	int r;
4351
4352	/* Set the write pointer delay */
4353	WREG32(mmCP_RB_WPTR_DELAY, 0);
4354
4355	/* set the RB to use vmid 0 */
4356	WREG32(mmCP_RB_VMID, 0);
4357
4358	/* Set ring buffer size */
4359	ring = &adev->gfx.gfx_ring[0];
4360	rb_bufsz = order_base_2(ring->ring_size / 8);
4361	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4362	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4363	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4364	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4365#ifdef __BIG_ENDIAN
4366	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4367#endif
4368	WREG32(mmCP_RB0_CNTL, tmp);
4369
4370	/* Initialize the ring buffer's read and write pointers */
4371	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4372	ring->wptr = 0;
4373	WREG32(mmCP_RB0_WPTR, ring->wptr);
4374
4375	/* set the wb address wether it's enabled or not */
4376	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4377	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4378	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4379
4380	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4381	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4382	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4383	mdelay(1);
4384	WREG32(mmCP_RB0_CNTL, tmp);
4385
4386	rb_addr = ring->gpu_addr >> 8;
4387	WREG32(mmCP_RB0_BASE, rb_addr);
4388	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4389
4390	/* no gfx doorbells on iceland */
4391	if (adev->asic_type != CHIP_TOPAZ) {
4392		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4393		if (ring->use_doorbell) {
4394			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4395					    DOORBELL_OFFSET, ring->doorbell_index);
4396			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4397					    DOORBELL_HIT, 0);
4398			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4399					    DOORBELL_EN, 1);
4400		} else {
4401			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4402					    DOORBELL_EN, 0);
4403		}
4404		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4405
4406		if (adev->asic_type == CHIP_TONGA) {
4407			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4408					    DOORBELL_RANGE_LOWER,
4409					    AMDGPU_DOORBELL_GFX_RING0);
4410			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4411
4412			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4413			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4414		}
4415
4416	}
4417
4418	/* start the ring */
4419	gfx_v8_0_cp_gfx_start(adev);
4420	ring->ready = true;
4421	r = amdgpu_ring_test_ring(ring);
4422	if (r)
4423		ring->ready = false;
 
 
4424
4425	return r;
4426}
4427
4428static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4429{
4430	int i;
4431
4432	if (enable) {
4433		WREG32(mmCP_MEC_CNTL, 0);
4434	} else {
4435		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4436		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4437			adev->gfx.compute_ring[i].ready = false;
4438	}
4439	udelay(50);
4440}
4441
4442static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4443{
4444	const struct gfx_firmware_header_v1_0 *mec_hdr;
4445	const __le32 *fw_data;
4446	unsigned i, fw_size;
4447
4448	if (!adev->gfx.mec_fw)
4449		return -EINVAL;
4450
4451	gfx_v8_0_cp_compute_enable(adev, false);
4452
4453	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4454	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4455
4456	fw_data = (const __le32 *)
4457		(adev->gfx.mec_fw->data +
4458		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4459	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4460
4461	/* MEC1 */
4462	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4463	for (i = 0; i < fw_size; i++)
4464		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4465	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4466
4467	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4468	if (adev->gfx.mec2_fw) {
4469		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4470
4471		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4472		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4473
4474		fw_data = (const __le32 *)
4475			(adev->gfx.mec2_fw->data +
4476			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4477		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4478
4479		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4480		for (i = 0; i < fw_size; i++)
4481			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4482		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4483	}
4484
4485	return 0;
4486}
4487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4488static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4489{
4490	int i, r;
4491
4492	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4493		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4494
4495		if (ring->mqd_obj) {
4496			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4497			if (unlikely(r != 0))
4498				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4499
4500			amdgpu_bo_unpin(ring->mqd_obj);
4501			amdgpu_bo_unreserve(ring->mqd_obj);
4502
4503			amdgpu_bo_unref(&ring->mqd_obj);
4504			ring->mqd_obj = NULL;
4505		}
4506	}
4507}
4508
4509static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4510{
4511	int r, i, j;
4512	u32 tmp;
4513	bool use_doorbell = true;
4514	u64 hqd_gpu_addr;
4515	u64 mqd_gpu_addr;
4516	u64 eop_gpu_addr;
4517	u64 wb_gpu_addr;
4518	u32 *buf;
4519	struct vi_mqd *mqd;
4520
4521	/* init the queues.  */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4522	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4523		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4524
4525		if (ring->mqd_obj == NULL) {
4526			r = amdgpu_bo_create(adev,
4527					     sizeof(struct vi_mqd),
4528					     PAGE_SIZE, true,
4529					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4530					     NULL, &ring->mqd_obj);
4531			if (r) {
4532				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4533				return r;
4534			}
4535		}
4536
4537		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4538		if (unlikely(r != 0)) {
4539			gfx_v8_0_cp_compute_fini(adev);
4540			return r;
4541		}
4542		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4543				  &mqd_gpu_addr);
4544		if (r) {
4545			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4546			gfx_v8_0_cp_compute_fini(adev);
4547			return r;
4548		}
4549		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4550		if (r) {
4551			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4552			gfx_v8_0_cp_compute_fini(adev);
4553			return r;
4554		}
4555
4556		/* init the mqd struct */
4557		memset(buf, 0, sizeof(struct vi_mqd));
4558
4559		mqd = (struct vi_mqd *)buf;
4560		mqd->header = 0xC0310800;
4561		mqd->compute_pipelinestat_enable = 0x00000001;
4562		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4563		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4564		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4565		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4566		mqd->compute_misc_reserved = 0x00000003;
4567
4568		mutex_lock(&adev->srbm_mutex);
4569		vi_srbm_select(adev, ring->me,
4570			       ring->pipe,
4571			       ring->queue, 0);
4572
4573		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4574		eop_gpu_addr >>= 8;
4575
4576		/* write the EOP addr */
4577		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4578		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4579
4580		/* set the VMID assigned */
4581		WREG32(mmCP_HQD_VMID, 0);
4582
4583		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4584		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4585		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4586				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4587		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4588
4589		/* disable wptr polling */
4590		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4591		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4592		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4593
4594		mqd->cp_hqd_eop_base_addr_lo =
4595			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4596		mqd->cp_hqd_eop_base_addr_hi =
4597			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4598
4599		/* enable doorbell? */
4600		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4601		if (use_doorbell) {
4602			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4603		} else {
4604			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4605		}
4606		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4607		mqd->cp_hqd_pq_doorbell_control = tmp;
4608
4609		/* disable the queue if it's active */
4610		mqd->cp_hqd_dequeue_request = 0;
4611		mqd->cp_hqd_pq_rptr = 0;
4612		mqd->cp_hqd_pq_wptr= 0;
4613		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4614			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4615			for (j = 0; j < adev->usec_timeout; j++) {
4616				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4617					break;
4618				udelay(1);
4619			}
4620			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4621			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4622			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4623		}
4624
4625		/* set the pointer to the MQD */
4626		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4627		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4628		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4629		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4630
4631		/* set MQD vmid to 0 */
4632		tmp = RREG32(mmCP_MQD_CONTROL);
4633		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4634		WREG32(mmCP_MQD_CONTROL, tmp);
4635		mqd->cp_mqd_control = tmp;
4636
4637		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4638		hqd_gpu_addr = ring->gpu_addr >> 8;
4639		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4640		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4641		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4642		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4643
4644		/* set up the HQD, this is similar to CP_RB0_CNTL */
4645		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4646		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4647				    (order_base_2(ring->ring_size / 4) - 1));
4648		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4649			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4650#ifdef __BIG_ENDIAN
4651		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4652#endif
4653		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4654		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4655		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4656		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4657		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4658		mqd->cp_hqd_pq_control = tmp;
4659
4660		/* set the wb address wether it's enabled or not */
4661		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4662		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4663		mqd->cp_hqd_pq_rptr_report_addr_hi =
4664			upper_32_bits(wb_gpu_addr) & 0xffff;
4665		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4666		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4667		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4668		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4669
4670		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4671		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4672		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4673		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4674		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4675		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4676		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4677
4678		/* enable the doorbell if requested */
4679		if (use_doorbell) {
4680			if ((adev->asic_type == CHIP_CARRIZO) ||
4681			    (adev->asic_type == CHIP_FIJI) ||
4682			    (adev->asic_type == CHIP_STONEY) ||
4683			    (adev->asic_type == CHIP_POLARIS11) ||
4684			    (adev->asic_type == CHIP_POLARIS10) ||
4685			    (adev->asic_type == CHIP_POLARIS12)) {
4686				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4687				       AMDGPU_DOORBELL_KIQ << 2);
4688				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4689				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4690			}
4691			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4692			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4693					    DOORBELL_OFFSET, ring->doorbell_index);
4694			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4695			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4696			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4697			mqd->cp_hqd_pq_doorbell_control = tmp;
4698
4699		} else {
4700			mqd->cp_hqd_pq_doorbell_control = 0;
4701		}
4702		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4703		       mqd->cp_hqd_pq_doorbell_control);
4704
4705		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4706		ring->wptr = 0;
4707		mqd->cp_hqd_pq_wptr = ring->wptr;
4708		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4709		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4710
4711		/* set the vmid for the queue */
4712		mqd->cp_hqd_vmid = 0;
4713		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4714
4715		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4716		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4717		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4718		mqd->cp_hqd_persistent_state = tmp;
4719		if (adev->asic_type == CHIP_STONEY ||
4720			adev->asic_type == CHIP_POLARIS11 ||
4721			adev->asic_type == CHIP_POLARIS10 ||
4722			adev->asic_type == CHIP_POLARIS12) {
4723			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4724			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4725			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4726		}
4727
4728		/* activate the queue */
4729		mqd->cp_hqd_active = 1;
4730		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4731
4732		vi_srbm_select(adev, 0, 0, 0, 0);
4733		mutex_unlock(&adev->srbm_mutex);
4734
4735		amdgpu_bo_kunmap(ring->mqd_obj);
4736		amdgpu_bo_unreserve(ring->mqd_obj);
4737	}
4738
4739	if (use_doorbell) {
4740		tmp = RREG32(mmCP_PQ_STATUS);
4741		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4742		WREG32(mmCP_PQ_STATUS, tmp);
4743	}
4744
4745	gfx_v8_0_cp_compute_enable(adev, true);
4746
4747	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4748		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4749
4750		ring->ready = true;
4751		r = amdgpu_ring_test_ring(ring);
4752		if (r)
4753			ring->ready = false;
4754	}
4755
4756	return 0;
4757}
4758
4759static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4760{
4761	int r;
4762
4763	if (!(adev->flags & AMD_IS_APU))
4764		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4765
4766	if (!adev->pp_enabled) {
4767		if (!adev->firmware.smu_load) {
4768			/* legacy firmware loading */
4769			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4770			if (r)
4771				return r;
4772
4773			r = gfx_v8_0_cp_compute_load_microcode(adev);
4774			if (r)
4775				return r;
4776		} else {
4777			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4778							AMDGPU_UCODE_ID_CP_CE);
4779			if (r)
4780				return -EINVAL;
4781
4782			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4783							AMDGPU_UCODE_ID_CP_PFP);
4784			if (r)
4785				return -EINVAL;
4786
4787			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4788							AMDGPU_UCODE_ID_CP_ME);
4789			if (r)
4790				return -EINVAL;
4791
4792			if (adev->asic_type == CHIP_TOPAZ) {
4793				r = gfx_v8_0_cp_compute_load_microcode(adev);
4794				if (r)
4795					return r;
4796			} else {
4797				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4798										 AMDGPU_UCODE_ID_CP_MEC1);
4799				if (r)
4800					return -EINVAL;
4801			}
4802		}
4803	}
4804
4805	r = gfx_v8_0_cp_gfx_resume(adev);
4806	if (r)
4807		return r;
4808
4809	r = gfx_v8_0_cp_compute_resume(adev);
4810	if (r)
4811		return r;
4812
4813	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4814
4815	return 0;
4816}
4817
4818static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4819{
4820	gfx_v8_0_cp_gfx_enable(adev, enable);
4821	gfx_v8_0_cp_compute_enable(adev, enable);
4822}
4823
4824static int gfx_v8_0_hw_init(void *handle)
4825{
4826	int r;
4827	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828
4829	gfx_v8_0_init_golden_registers(adev);
 
4830	gfx_v8_0_gpu_init(adev);
4831
4832	r = gfx_v8_0_rlc_resume(adev);
4833	if (r)
4834		return r;
4835
4836	r = gfx_v8_0_cp_resume(adev);
 
 
4837
4838	return r;
4839}
4840
4841static int gfx_v8_0_hw_fini(void *handle)
4842{
4843	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4846	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4847	if (amdgpu_sriov_vf(adev)) {
4848		pr_debug("For SRIOV client, shouldn't do anything.\n");
4849		return 0;
4850	}
4851	gfx_v8_0_cp_enable(adev, false);
4852	gfx_v8_0_rlc_stop(adev);
4853	gfx_v8_0_cp_compute_fini(adev);
4854
4855	amdgpu_set_powergating_state(adev,
4856			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4857
4858	return 0;
4859}
4860
4861static int gfx_v8_0_suspend(void *handle)
4862{
4863	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4864
4865	return gfx_v8_0_hw_fini(adev);
4866}
4867
4868static int gfx_v8_0_resume(void *handle)
4869{
4870	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4871
4872	return gfx_v8_0_hw_init(adev);
4873}
4874
4875static bool gfx_v8_0_is_idle(void *handle)
4876{
4877	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4878
4879	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4880		return false;
4881	else
4882		return true;
4883}
4884
4885static int gfx_v8_0_wait_for_idle(void *handle)
4886{
4887	unsigned i;
 
4888	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4889
4890	for (i = 0; i < adev->usec_timeout; i++) {
4891		if (gfx_v8_0_is_idle(handle))
4892			return 0;
4893
 
 
4894		udelay(1);
4895	}
4896	return -ETIMEDOUT;
4897}
4898
4899static bool gfx_v8_0_check_soft_reset(void *handle)
4900{
 
4901	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4902	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4903	u32 tmp;
 
4904
4905	/* GRBM_STATUS */
4906	tmp = RREG32(mmGRBM_STATUS);
4907	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4908		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4909		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4910		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4911		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4912		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4913		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4914		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4915						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4916		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4917						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
 
 
 
 
 
4918		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4919						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4920	}
4921
4922	/* GRBM_STATUS2 */
4923	tmp = RREG32(mmGRBM_STATUS2);
4924	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4925		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4926						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4927
4928	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4929	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4930	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4931		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4932						SOFT_RESET_CPF, 1);
4933		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4934						SOFT_RESET_CPC, 1);
4935		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4936						SOFT_RESET_CPG, 1);
4937		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4938						SOFT_RESET_GRBM, 1);
4939	}
4940
4941	/* SRBM_STATUS */
4942	tmp = RREG32(mmSRBM_STATUS);
4943	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4944		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4945						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4946	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4947		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4948						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4949
4950	if (grbm_soft_reset || srbm_soft_reset) {
4951		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4952		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4953		return true;
4954	} else {
4955		adev->gfx.grbm_soft_reset = 0;
4956		adev->gfx.srbm_soft_reset = 0;
4957		return false;
4958	}
4959}
4960
4961static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4962				  struct amdgpu_ring *ring)
4963{
4964	int i;
4965
4966	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4967	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4968		u32 tmp;
4969		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4970		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4971				    DEQUEUE_REQ, 2);
4972		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4973		for (i = 0; i < adev->usec_timeout; i++) {
4974			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4975				break;
4976			udelay(1);
4977		}
4978	}
4979}
4980
4981static int gfx_v8_0_pre_soft_reset(void *handle)
4982{
4983	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4984	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4985
4986	if ((!adev->gfx.grbm_soft_reset) &&
4987	    (!adev->gfx.srbm_soft_reset))
4988		return 0;
4989
4990	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4991	srbm_soft_reset = adev->gfx.srbm_soft_reset;
4992
4993	/* stop the rlc */
4994	gfx_v8_0_rlc_stop(adev);
4995
4996	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4997	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4998		/* Disable GFX parsing/prefetching */
4999		gfx_v8_0_cp_gfx_enable(adev, false);
5000
5001	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5002	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5003	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5004	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5005		int i;
5006
5007		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5008			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5009
5010			gfx_v8_0_inactive_hqd(adev, ring);
5011		}
5012		/* Disable MEC parsing/prefetching */
5013		gfx_v8_0_cp_compute_enable(adev, false);
5014	}
5015
5016       return 0;
5017}
 
 
 
 
 
5018
5019static int gfx_v8_0_soft_reset(void *handle)
5020{
5021	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5023	u32 tmp;
5024
5025	if ((!adev->gfx.grbm_soft_reset) &&
5026	    (!adev->gfx.srbm_soft_reset))
5027		return 0;
 
 
 
5028
5029	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5030	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5031
5032	if (grbm_soft_reset || srbm_soft_reset) {
5033		tmp = RREG32(mmGMCON_DEBUG);
5034		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5035		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5036		WREG32(mmGMCON_DEBUG, tmp);
5037		udelay(50);
5038	}
5039
5040	if (grbm_soft_reset) {
5041		tmp = RREG32(mmGRBM_SOFT_RESET);
5042		tmp |= grbm_soft_reset;
5043		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5044		WREG32(mmGRBM_SOFT_RESET, tmp);
5045		tmp = RREG32(mmGRBM_SOFT_RESET);
5046
5047		udelay(50);
5048
5049		tmp &= ~grbm_soft_reset;
5050		WREG32(mmGRBM_SOFT_RESET, tmp);
5051		tmp = RREG32(mmGRBM_SOFT_RESET);
5052	}
5053
5054	if (srbm_soft_reset) {
5055		tmp = RREG32(mmSRBM_SOFT_RESET);
5056		tmp |= srbm_soft_reset;
5057		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5058		WREG32(mmSRBM_SOFT_RESET, tmp);
5059		tmp = RREG32(mmSRBM_SOFT_RESET);
 
 
5060
 
5061		udelay(50);
5062
5063		tmp &= ~srbm_soft_reset;
5064		WREG32(mmSRBM_SOFT_RESET, tmp);
5065		tmp = RREG32(mmSRBM_SOFT_RESET);
5066	}
5067
5068	if (grbm_soft_reset || srbm_soft_reset) {
5069		tmp = RREG32(mmGMCON_DEBUG);
5070		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5071		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5072		WREG32(mmGMCON_DEBUG, tmp);
5073	}
5074
5075	/* Wait a little for things to settle down */
5076	udelay(50);
5077
5078	return 0;
5079}
5080
5081static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5082			      struct amdgpu_ring *ring)
5083{
5084	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5085	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5086	WREG32(mmCP_HQD_PQ_RPTR, 0);
5087	WREG32(mmCP_HQD_PQ_WPTR, 0);
5088	vi_srbm_select(adev, 0, 0, 0, 0);
5089}
5090
5091static int gfx_v8_0_post_soft_reset(void *handle)
5092{
5093	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5094	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5095
5096	if ((!adev->gfx.grbm_soft_reset) &&
5097	    (!adev->gfx.srbm_soft_reset))
5098		return 0;
5099
5100	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5101	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5102
5103	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5104	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5105		gfx_v8_0_cp_gfx_resume(adev);
5106
5107	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5108	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5109	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5110	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5111		int i;
5112
5113		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5114			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5115
5116			gfx_v8_0_init_hqd(adev, ring);
5117		}
5118		gfx_v8_0_cp_compute_resume(adev);
5119	}
5120	gfx_v8_0_rlc_start(adev);
5121
5122	return 0;
5123}
5124
5125/**
5126 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5127 *
5128 * @adev: amdgpu_device pointer
5129 *
5130 * Fetches a GPU clock counter snapshot.
5131 * Returns the 64 bit clock counter snapshot.
5132 */
5133static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5134{
5135	uint64_t clock;
5136
5137	mutex_lock(&adev->gfx.gpu_clock_mutex);
5138	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5139	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5140		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5141	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5142	return clock;
5143}
5144
5145static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5146					  uint32_t vmid,
5147					  uint32_t gds_base, uint32_t gds_size,
5148					  uint32_t gws_base, uint32_t gws_size,
5149					  uint32_t oa_base, uint32_t oa_size)
5150{
5151	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5152	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5153
5154	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5155	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5156
5157	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5158	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5159
5160	/* GDS Base */
5161	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163				WRITE_DATA_DST_SEL(0)));
5164	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5165	amdgpu_ring_write(ring, 0);
5166	amdgpu_ring_write(ring, gds_base);
5167
5168	/* GDS Size */
5169	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171				WRITE_DATA_DST_SEL(0)));
5172	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5173	amdgpu_ring_write(ring, 0);
5174	amdgpu_ring_write(ring, gds_size);
5175
5176	/* GWS */
5177	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5178	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5179				WRITE_DATA_DST_SEL(0)));
5180	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5181	amdgpu_ring_write(ring, 0);
5182	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5183
5184	/* OA */
5185	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5186	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5187				WRITE_DATA_DST_SEL(0)));
5188	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5189	amdgpu_ring_write(ring, 0);
5190	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5191}
5192
5193static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5194{
5195	WREG32(mmSQ_IND_INDEX,
5196		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5197		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5198		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5199		(SQ_IND_INDEX__FORCE_READ_MASK));
5200	return RREG32(mmSQ_IND_DATA);
5201}
5202
5203static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5204			   uint32_t wave, uint32_t thread,
5205			   uint32_t regno, uint32_t num, uint32_t *out)
5206{
5207	WREG32(mmSQ_IND_INDEX,
5208		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5209		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5210		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5211		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5212		(SQ_IND_INDEX__FORCE_READ_MASK) |
5213		(SQ_IND_INDEX__AUTO_INCR_MASK));
5214	while (num--)
5215		*(out++) = RREG32(mmSQ_IND_DATA);
5216}
5217
5218static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5219{
5220	/* type 0 wave data */
5221	dst[(*no_fields)++] = 0;
5222	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5223	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5224	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5225	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5226	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5227	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5228	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5229	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5230	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5231	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5232	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5233	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5234	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5235	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5236	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5237	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5238	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5239	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5240}
5241
5242static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5243				     uint32_t wave, uint32_t start,
5244				     uint32_t size, uint32_t *dst)
5245{
5246	wave_read_regs(
5247		adev, simd, wave, 0,
5248		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5249}
5250
5251
5252static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5253	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5254	.select_se_sh = &gfx_v8_0_select_se_sh,
5255	.read_wave_data = &gfx_v8_0_read_wave_data,
5256	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5257};
5258
5259static int gfx_v8_0_early_init(void *handle)
5260{
5261	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5262
5263	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5264	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5265	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5266	gfx_v8_0_set_ring_funcs(adev);
5267	gfx_v8_0_set_irq_funcs(adev);
5268	gfx_v8_0_set_gds_init(adev);
5269	gfx_v8_0_set_rlc_funcs(adev);
5270
5271	return 0;
5272}
5273
5274static int gfx_v8_0_late_init(void *handle)
5275{
5276	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5277	int r;
5278
5279	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5280	if (r)
5281		return r;
5282
5283	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5284	if (r)
5285		return r;
5286
5287	/* requires IBs so do in late init after IB pool is initialized */
5288	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5289	if (r)
5290		return r;
5291
5292	amdgpu_set_powergating_state(adev,
5293			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5294
5295	return 0;
5296}
5297
5298static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5299						       bool enable)
5300{
5301	if ((adev->asic_type == CHIP_POLARIS11) ||
5302	    (adev->asic_type == CHIP_POLARIS12))
5303		/* Send msg to SMU via Powerplay */
5304		amdgpu_set_powergating_state(adev,
5305					     AMD_IP_BLOCK_TYPE_SMC,
5306					     enable ?
5307					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5308
5309	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5310}
5311
5312static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5313							bool enable)
5314{
5315	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5316}
5317
5318static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5319		bool enable)
5320{
5321	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5322}
5323
5324static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5325					  bool enable)
5326{
5327	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5328}
5329
5330static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5331						bool enable)
5332{
5333	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5334
5335	/* Read any GFX register to wake up GFX. */
5336	if (!enable)
5337		RREG32(mmDB_RENDER_CONTROL);
5338}
5339
5340static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5341					  bool enable)
5342{
5343	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5344		cz_enable_gfx_cg_power_gating(adev, true);
5345		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5346			cz_enable_gfx_pipeline_power_gating(adev, true);
5347	} else {
5348		cz_enable_gfx_cg_power_gating(adev, false);
5349		cz_enable_gfx_pipeline_power_gating(adev, false);
5350	}
5351}
5352
5353static int gfx_v8_0_set_powergating_state(void *handle,
5354					  enum amd_powergating_state state)
5355{
5356	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5357	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5358
5359	switch (adev->asic_type) {
5360	case CHIP_CARRIZO:
5361	case CHIP_STONEY:
5362
5363		cz_update_gfx_cg_power_gating(adev, enable);
5364
5365		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5366			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5367		else
5368			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5369
5370		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5371			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5372		else
5373			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5374		break;
5375	case CHIP_POLARIS11:
5376	case CHIP_POLARIS12:
5377		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5378			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5379		else
5380			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5381
5382		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5383			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5384		else
5385			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5386
5387		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5388			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5389		else
5390			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5391		break;
5392	default:
5393		break;
5394	}
5395
5396	return 0;
5397}
5398
5399static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5400				     uint32_t reg_addr, uint32_t cmd)
5401{
5402	uint32_t data;
5403
5404	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5405
5406	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5407	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5408
5409	data = RREG32(mmRLC_SERDES_WR_CTRL);
5410	if (adev->asic_type == CHIP_STONEY)
5411		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5412			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5413			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5414			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5415			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5416			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5417			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5418			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5419			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5420	else
5421		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5422			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5423			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5424			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5425			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5426			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5427			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5428			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5429			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5430			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5431			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5432	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5433		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5434		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5435		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5436
5437	WREG32(mmRLC_SERDES_WR_CTRL, data);
5438}
5439
5440#define MSG_ENTER_RLC_SAFE_MODE     1
5441#define MSG_EXIT_RLC_SAFE_MODE      0
5442#define RLC_GPR_REG2__REQ_MASK 0x00000001
5443#define RLC_GPR_REG2__REQ__SHIFT 0
5444#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5445#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5446
5447static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5448{
5449	u32 data = 0;
5450	unsigned i;
5451
5452	data = RREG32(mmRLC_CNTL);
5453	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5454		return;
5455
5456	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5457	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5458			       AMD_PG_SUPPORT_GFX_DMG))) {
5459		data |= RLC_GPR_REG2__REQ_MASK;
5460		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5461		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5462		WREG32(mmRLC_GPR_REG2, data);
5463
5464		for (i = 0; i < adev->usec_timeout; i++) {
5465			if ((RREG32(mmRLC_GPM_STAT) &
5466			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5467			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5468			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5469			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5470				break;
5471			udelay(1);
5472		}
5473
5474		for (i = 0; i < adev->usec_timeout; i++) {
5475			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5476				break;
5477			udelay(1);
5478		}
5479		adev->gfx.rlc.in_safe_mode = true;
5480	}
5481}
5482
5483static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5484{
5485	u32 data;
5486	unsigned i;
5487
5488	data = RREG32(mmRLC_CNTL);
5489	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5490		return;
5491
5492	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5493	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5494			       AMD_PG_SUPPORT_GFX_DMG))) {
5495		data |= RLC_GPR_REG2__REQ_MASK;
5496		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5497		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5498		WREG32(mmRLC_GPR_REG2, data);
5499		adev->gfx.rlc.in_safe_mode = false;
5500	}
5501
5502	for (i = 0; i < adev->usec_timeout; i++) {
5503		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5504			break;
5505		udelay(1);
5506	}
5507}
5508
5509static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5510{
5511	u32 data;
5512	unsigned i;
5513
5514	data = RREG32(mmRLC_CNTL);
5515	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5516		return;
5517
5518	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5519		data |= RLC_SAFE_MODE__CMD_MASK;
5520		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5521		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5522		WREG32(mmRLC_SAFE_MODE, data);
5523
5524		for (i = 0; i < adev->usec_timeout; i++) {
5525			if ((RREG32(mmRLC_GPM_STAT) &
5526			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5527			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5528			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5529			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5530				break;
5531			udelay(1);
5532		}
5533
5534		for (i = 0; i < adev->usec_timeout; i++) {
5535			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5536				break;
5537			udelay(1);
5538		}
5539		adev->gfx.rlc.in_safe_mode = true;
5540	}
5541}
5542
5543static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5544{
5545	u32 data = 0;
5546	unsigned i;
5547
5548	data = RREG32(mmRLC_CNTL);
5549	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5550		return;
5551
5552	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5553		if (adev->gfx.rlc.in_safe_mode) {
5554			data |= RLC_SAFE_MODE__CMD_MASK;
5555			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5556			WREG32(mmRLC_SAFE_MODE, data);
5557			adev->gfx.rlc.in_safe_mode = false;
5558		}
5559	}
5560
5561	for (i = 0; i < adev->usec_timeout; i++) {
5562		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5563			break;
5564		udelay(1);
5565	}
5566}
5567
5568static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5569{
5570	adev->gfx.rlc.in_safe_mode = true;
5571}
5572
5573static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5574{
5575	adev->gfx.rlc.in_safe_mode = false;
5576}
5577
5578static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5579	.enter_safe_mode = cz_enter_rlc_safe_mode,
5580	.exit_safe_mode = cz_exit_rlc_safe_mode
5581};
5582
5583static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5584	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5585	.exit_safe_mode = iceland_exit_rlc_safe_mode
5586};
5587
5588static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5589	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5590	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5591};
5592
5593static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5594						      bool enable)
5595{
5596	uint32_t temp, data;
5597
5598	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5599
5600	/* It is disabled by HW by default */
5601	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5602		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5603			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5604				/* 1 - RLC memory Light sleep */
5605				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
 
5606
5607			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5608				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5609		}
 
 
5610
5611		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5612		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5613		if (adev->flags & AMD_IS_APU)
5614			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5615				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5616				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5617		else
5618			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5619				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5620				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5621				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5622
5623		if (temp != data)
5624			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5625
5626		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5627		gfx_v8_0_wait_for_rlc_serdes(adev);
5628
5629		/* 5 - clear mgcg override */
5630		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5631
5632		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5633			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5634			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5635			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5636			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5637			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5638			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5639			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5640			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5641				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5642			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5643			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5644			if (temp != data)
5645				WREG32(mmCGTS_SM_CTRL_REG, data);
5646		}
5647		udelay(50);
5648
5649		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5650		gfx_v8_0_wait_for_rlc_serdes(adev);
5651	} else {
5652		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5653		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5654		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5655				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5656				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5657				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5658		if (temp != data)
5659			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5660
5661		/* 2 - disable MGLS in RLC */
5662		data = RREG32(mmRLC_MEM_SLP_CNTL);
5663		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5664			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5665			WREG32(mmRLC_MEM_SLP_CNTL, data);
5666		}
5667
5668		/* 3 - disable MGLS in CP */
5669		data = RREG32(mmCP_MEM_SLP_CNTL);
5670		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5671			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5672			WREG32(mmCP_MEM_SLP_CNTL, data);
5673		}
5674
5675		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5676		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5677		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5678				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5679		if (temp != data)
5680			WREG32(mmCGTS_SM_CTRL_REG, data);
5681
5682		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5683		gfx_v8_0_wait_for_rlc_serdes(adev);
5684
5685		/* 6 - set mgcg override */
5686		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5687
5688		udelay(50);
5689
5690		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5691		gfx_v8_0_wait_for_rlc_serdes(adev);
5692	}
5693
5694	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5695}
5696
5697static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5698						      bool enable)
5699{
5700	uint32_t temp, temp1, data, data1;
5701
5702	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5703
5704	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 
 
 
5705
5706	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5707		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5709		if (temp1 != data1)
5710			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5711
5712		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713		gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715		/* 2 - clear cgcg override */
5716		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5717
5718		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5719		gfx_v8_0_wait_for_rlc_serdes(adev);
5720
5721		/* 3 - write cmd to set CGLS */
5722		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5723
5724		/* 4 - enable cgcg */
5725		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5726
5727		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5728			/* enable cgls*/
5729			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5730
5731			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5732			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5733
5734			if (temp1 != data1)
5735				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5736		} else {
5737			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5738		}
5739
5740		if (temp != data)
5741			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5742
5743		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5744		 * Cmp_busy/GFX_Idle interrupts
5745		 */
5746		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5747	} else {
5748		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5749		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5750
5751		/* TEST CGCG */
5752		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5753		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5754				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5755		if (temp1 != data1)
5756			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5757
5758		/* read gfx register to wake up cgcg */
5759		RREG32(mmCB_CGTT_SCLK_CTRL);
5760		RREG32(mmCB_CGTT_SCLK_CTRL);
5761		RREG32(mmCB_CGTT_SCLK_CTRL);
5762		RREG32(mmCB_CGTT_SCLK_CTRL);
5763
5764		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5765		gfx_v8_0_wait_for_rlc_serdes(adev);
5766
5767		/* write cmd to Set CGCG Overrride */
5768		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5769
5770		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5771		gfx_v8_0_wait_for_rlc_serdes(adev);
5772
5773		/* write cmd to Clear CGLS */
5774		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5775
5776		/* disable cgcg, cgls should be disabled too. */
5777		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5778			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5779		if (temp != data)
5780			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5781	}
5782
5783	gfx_v8_0_wait_for_rlc_serdes(adev);
5784
5785	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5786}
5787static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5788					    bool enable)
5789{
5790	if (enable) {
5791		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5792		 * ===  MGCG + MGLS + TS(CG/LS) ===
5793		 */
5794		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5795		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5796	} else {
5797		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5798		 * ===  CGCG + CGLS ===
5799		 */
5800		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5801		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5802	}
5803	return 0;
5804}
5805
5806static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5807					  enum amd_clockgating_state state)
5808{
5809	uint32_t msg_id, pp_state = 0;
5810	uint32_t pp_support_state = 0;
5811	void *pp_handle = adev->powerplay.pp_handle;
5812
5813	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5814		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5815			pp_support_state = PP_STATE_SUPPORT_LS;
5816			pp_state = PP_STATE_LS;
5817		}
5818		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5819			pp_support_state |= PP_STATE_SUPPORT_CG;
5820			pp_state |= PP_STATE_CG;
5821		}
5822		if (state == AMD_CG_STATE_UNGATE)
5823			pp_state = 0;
5824
5825		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5826				PP_BLOCK_GFX_CG,
5827				pp_support_state,
5828				pp_state);
5829		amd_set_clockgating_by_smu(pp_handle, msg_id);
5830	}
5831
5832	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5833		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5834			pp_support_state = PP_STATE_SUPPORT_LS;
5835			pp_state = PP_STATE_LS;
5836		}
5837
5838		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5839			pp_support_state |= PP_STATE_SUPPORT_CG;
5840			pp_state |= PP_STATE_CG;
5841		}
5842
5843		if (state == AMD_CG_STATE_UNGATE)
5844			pp_state = 0;
5845
5846		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5847				PP_BLOCK_GFX_MG,
5848				pp_support_state,
5849				pp_state);
5850		amd_set_clockgating_by_smu(pp_handle, msg_id);
5851	}
5852
5853	return 0;
5854}
5855
5856static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5857					  enum amd_clockgating_state state)
5858{
5859
5860	uint32_t msg_id, pp_state = 0;
5861	uint32_t pp_support_state = 0;
5862	void *pp_handle = adev->powerplay.pp_handle;
5863
5864	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5865		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5866			pp_support_state = PP_STATE_SUPPORT_LS;
5867			pp_state = PP_STATE_LS;
5868		}
5869		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5870			pp_support_state |= PP_STATE_SUPPORT_CG;
5871			pp_state |= PP_STATE_CG;
5872		}
5873		if (state == AMD_CG_STATE_UNGATE)
5874			pp_state = 0;
5875
5876		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5877				PP_BLOCK_GFX_CG,
5878				pp_support_state,
5879				pp_state);
5880		amd_set_clockgating_by_smu(pp_handle, msg_id);
5881	}
5882
5883	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5884		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5885			pp_support_state = PP_STATE_SUPPORT_LS;
5886			pp_state = PP_STATE_LS;
5887		}
5888		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5889			pp_support_state |= PP_STATE_SUPPORT_CG;
5890			pp_state |= PP_STATE_CG;
5891		}
5892		if (state == AMD_CG_STATE_UNGATE)
5893			pp_state = 0;
5894
5895		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5896				PP_BLOCK_GFX_3D,
5897				pp_support_state,
5898				pp_state);
5899		amd_set_clockgating_by_smu(pp_handle, msg_id);
5900	}
5901
5902	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5903		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5904			pp_support_state = PP_STATE_SUPPORT_LS;
5905			pp_state = PP_STATE_LS;
5906		}
5907
5908		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5909			pp_support_state |= PP_STATE_SUPPORT_CG;
5910			pp_state |= PP_STATE_CG;
5911		}
5912
5913		if (state == AMD_CG_STATE_UNGATE)
5914			pp_state = 0;
5915
5916		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5917				PP_BLOCK_GFX_MG,
5918				pp_support_state,
5919				pp_state);
5920		amd_set_clockgating_by_smu(pp_handle, msg_id);
5921	}
5922
5923	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5924		pp_support_state = PP_STATE_SUPPORT_LS;
5925
5926		if (state == AMD_CG_STATE_UNGATE)
5927			pp_state = 0;
5928		else
5929			pp_state = PP_STATE_LS;
5930
5931		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5932				PP_BLOCK_GFX_RLC,
5933				pp_support_state,
5934				pp_state);
5935		amd_set_clockgating_by_smu(pp_handle, msg_id);
5936	}
5937
5938	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5939		pp_support_state = PP_STATE_SUPPORT_LS;
5940
5941		if (state == AMD_CG_STATE_UNGATE)
5942			pp_state = 0;
5943		else
5944			pp_state = PP_STATE_LS;
5945		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5946			PP_BLOCK_GFX_CP,
5947			pp_support_state,
5948			pp_state);
5949		amd_set_clockgating_by_smu(pp_handle, msg_id);
5950	}
5951
5952	return 0;
5953}
5954
5955static int gfx_v8_0_set_clockgating_state(void *handle,
5956					  enum amd_clockgating_state state)
5957{
5958	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5959
5960	switch (adev->asic_type) {
5961	case CHIP_FIJI:
5962	case CHIP_CARRIZO:
5963	case CHIP_STONEY:
5964		gfx_v8_0_update_gfx_clock_gating(adev,
5965						 state == AMD_CG_STATE_GATE ? true : false);
5966		break;
5967	case CHIP_TONGA:
5968		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5969		break;
5970	case CHIP_POLARIS10:
5971	case CHIP_POLARIS11:
5972		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5973		break;
5974	default:
5975		break;
5976	}
5977	return 0;
5978}
5979
5980static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5981{
5982	return ring->adev->wb.wb[ring->rptr_offs];
 
 
 
 
5983}
5984
5985static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5986{
5987	struct amdgpu_device *adev = ring->adev;
 
5988
5989	if (ring->use_doorbell)
5990		/* XXX check if swapping is necessary on BE */
5991		return ring->adev->wb.wb[ring->wptr_offs];
5992	else
5993		return RREG32(mmCP_RB0_WPTR);
 
 
5994}
5995
5996static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5997{
5998	struct amdgpu_device *adev = ring->adev;
5999
6000	if (ring->use_doorbell) {
6001		/* XXX check if swapping is necessary on BE */
6002		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6003		WDOORBELL32(ring->doorbell_index, ring->wptr);
6004	} else {
6005		WREG32(mmCP_RB0_WPTR, ring->wptr);
6006		(void)RREG32(mmCP_RB0_WPTR);
6007	}
6008}
6009
6010static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6011{
6012	u32 ref_and_mask, reg_mem_engine;
6013
6014	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
6015		switch (ring->me) {
6016		case 1:
6017			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6018			break;
6019		case 2:
6020			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6021			break;
6022		default:
6023			return;
6024		}
6025		reg_mem_engine = 0;
6026	} else {
6027		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6028		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6029	}
6030
6031	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6032	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6033				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6034				 reg_mem_engine));
6035	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6036	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6037	amdgpu_ring_write(ring, ref_and_mask);
6038	amdgpu_ring_write(ring, ref_and_mask);
6039	amdgpu_ring_write(ring, 0x20); /* poll interval */
6040}
6041
6042static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6043{
6044	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6045	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6046		EVENT_INDEX(4));
6047
6048	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6049	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6050		EVENT_INDEX(0));
6051}
6052
6053
6054static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6055{
6056	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6057	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6058				 WRITE_DATA_DST_SEL(0) |
6059				 WR_CONFIRM));
6060	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6061	amdgpu_ring_write(ring, 0);
6062	amdgpu_ring_write(ring, 1);
6063
6064}
6065
6066static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6067				      struct amdgpu_ib *ib,
6068				      unsigned vm_id, bool ctx_switch)
6069{
 
6070	u32 header, control = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6071
6072	if (ib->flags & AMDGPU_IB_FLAG_CE)
6073		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6074	else
6075		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6076
6077	control |= ib->length_dw | (vm_id << 24);
6078
6079	amdgpu_ring_write(ring, header);
6080	amdgpu_ring_write(ring,
6081#ifdef __BIG_ENDIAN
6082			  (2 << 0) |
6083#endif
6084			  (ib->gpu_addr & 0xFFFFFFFC));
6085	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6086	amdgpu_ring_write(ring, control);
6087}
6088
6089static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6090					  struct amdgpu_ib *ib,
6091					  unsigned vm_id, bool ctx_switch)
6092{
6093	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
 
 
 
 
 
 
 
 
 
 
 
6094
6095	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 
 
6096	amdgpu_ring_write(ring,
6097#ifdef __BIG_ENDIAN
6098				(2 << 0) |
6099#endif
6100				(ib->gpu_addr & 0xFFFFFFFC));
6101	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6102	amdgpu_ring_write(ring, control);
6103}
6104
6105static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6106					 u64 seq, unsigned flags)
6107{
6108	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6109	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6110
6111	/* EVENT_WRITE_EOP - flush caches, send int */
6112	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6113	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6114				 EOP_TC_ACTION_EN |
6115				 EOP_TC_WB_ACTION_EN |
6116				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6117				 EVENT_INDEX(5)));
6118	amdgpu_ring_write(ring, addr & 0xfffffffc);
6119	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6120			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6121	amdgpu_ring_write(ring, lower_32_bits(seq));
6122	amdgpu_ring_write(ring, upper_32_bits(seq));
6123
6124}
6125
6126static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6127{
6128	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6129	uint32_t seq = ring->fence_drv.sync_seq;
6130	uint64_t addr = ring->fence_drv.gpu_addr;
6131
6132	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6133	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6134				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6135				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6136	amdgpu_ring_write(ring, addr & 0xfffffffc);
6137	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6138	amdgpu_ring_write(ring, seq);
6139	amdgpu_ring_write(ring, 0xffffffff);
6140	amdgpu_ring_write(ring, 4); /* poll interval */
 
 
 
 
 
 
 
 
6141}
6142
6143static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6144					unsigned vm_id, uint64_t pd_addr)
6145{
6146	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6147
6148	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6149	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6150				 WRITE_DATA_DST_SEL(0)) |
6151				 WR_CONFIRM);
6152	if (vm_id < 8) {
6153		amdgpu_ring_write(ring,
6154				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6155	} else {
6156		amdgpu_ring_write(ring,
6157				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6158	}
6159	amdgpu_ring_write(ring, 0);
6160	amdgpu_ring_write(ring, pd_addr >> 12);
6161
6162	/* bits 0-15 are the VM contexts0-15 */
6163	/* invalidate the cache */
6164	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6165	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6166				 WRITE_DATA_DST_SEL(0)));
6167	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6168	amdgpu_ring_write(ring, 0);
6169	amdgpu_ring_write(ring, 1 << vm_id);
6170
6171	/* wait for the invalidate to complete */
6172	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6173	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6174				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6175				 WAIT_REG_MEM_ENGINE(0))); /* me */
6176	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6177	amdgpu_ring_write(ring, 0);
6178	amdgpu_ring_write(ring, 0); /* ref */
6179	amdgpu_ring_write(ring, 0); /* mask */
6180	amdgpu_ring_write(ring, 0x20); /* poll interval */
6181
6182	/* compute doesn't have PFP */
6183	if (usepfp) {
6184		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6185		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6186		amdgpu_ring_write(ring, 0x0);
6187		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6188		amdgpu_ring_insert_nop(ring, 128);
 
 
6189	}
6190}
6191
 
 
 
 
 
6192static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6193{
6194	return ring->adev->wb.wb[ring->wptr_offs];
6195}
6196
6197static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6198{
6199	struct amdgpu_device *adev = ring->adev;
6200
6201	/* XXX check if swapping is necessary on BE */
6202	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6203	WDOORBELL32(ring->doorbell_index, ring->wptr);
6204}
6205
6206static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6207					     u64 addr, u64 seq,
6208					     unsigned flags)
6209{
6210	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6211	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6212
6213	/* RELEASE_MEM - flush caches, send int */
6214	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6215	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6216				 EOP_TC_ACTION_EN |
6217				 EOP_TC_WB_ACTION_EN |
6218				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6219				 EVENT_INDEX(5)));
6220	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6221	amdgpu_ring_write(ring, addr & 0xfffffffc);
6222	amdgpu_ring_write(ring, upper_32_bits(addr));
6223	amdgpu_ring_write(ring, lower_32_bits(seq));
6224	amdgpu_ring_write(ring, upper_32_bits(seq));
6225}
6226
6227static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6228{
6229	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6230	amdgpu_ring_write(ring, 0);
6231}
6232
6233static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6234{
6235	uint32_t dw2 = 0;
6236
6237	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6238	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6239		gfx_v8_0_ring_emit_vgt_flush(ring);
6240		/* set load_global_config & load_global_uconfig */
6241		dw2 |= 0x8001;
6242		/* set load_cs_sh_regs */
6243		dw2 |= 0x01000000;
6244		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6245		dw2 |= 0x10002;
6246
6247		/* set load_ce_ram if preamble presented */
6248		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6249			dw2 |= 0x10000000;
6250	} else {
6251		/* still load_ce_ram if this is the first time preamble presented
6252		 * although there is no context switch happens.
6253		 */
6254		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6255			dw2 |= 0x10000000;
6256	}
6257
6258	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6259	amdgpu_ring_write(ring, dw2);
6260	amdgpu_ring_write(ring, 0);
6261}
6262
6263static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6264						 enum amdgpu_interrupt_state state)
6265{
6266	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6267		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6268}
6269
6270static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6271						     int me, int pipe,
6272						     enum amdgpu_interrupt_state state)
6273{
 
 
6274	/*
6275	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6276	 * handles the setting of interrupts for this specific pipe. All other
6277	 * pipes' interrupts are set by amdkfd.
6278	 */
6279
6280	if (me == 1) {
6281		switch (pipe) {
6282		case 0:
 
6283			break;
6284		default:
6285			DRM_DEBUG("invalid pipe %d\n", pipe);
6286			return;
6287		}
6288	} else {
6289		DRM_DEBUG("invalid me %d\n", me);
6290		return;
6291	}
6292
6293	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6294		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6295}
6296
6297static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6298					     struct amdgpu_irq_src *source,
6299					     unsigned type,
6300					     enum amdgpu_interrupt_state state)
6301{
6302	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6303		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6304
6305	return 0;
6306}
6307
6308static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6309					      struct amdgpu_irq_src *source,
6310					      unsigned type,
6311					      enum amdgpu_interrupt_state state)
6312{
6313	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6314		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6315
6316	return 0;
6317}
6318
6319static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6320					    struct amdgpu_irq_src *src,
6321					    unsigned type,
6322					    enum amdgpu_interrupt_state state)
6323{
6324	switch (type) {
6325	case AMDGPU_CP_IRQ_GFX_EOP:
6326		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6327		break;
6328	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6329		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6330		break;
6331	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6332		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6333		break;
6334	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6335		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6336		break;
6337	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6338		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6339		break;
6340	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6341		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6342		break;
6343	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6344		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6345		break;
6346	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6347		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6348		break;
6349	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6350		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6351		break;
6352	default:
6353		break;
6354	}
6355	return 0;
6356}
6357
6358static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6359			    struct amdgpu_irq_src *source,
6360			    struct amdgpu_iv_entry *entry)
6361{
6362	int i;
6363	u8 me_id, pipe_id, queue_id;
6364	struct amdgpu_ring *ring;
6365
6366	DRM_DEBUG("IH: CP EOP\n");
6367	me_id = (entry->ring_id & 0x0c) >> 2;
6368	pipe_id = (entry->ring_id & 0x03) >> 0;
6369	queue_id = (entry->ring_id & 0x70) >> 4;
6370
6371	switch (me_id) {
6372	case 0:
6373		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6374		break;
6375	case 1:
6376	case 2:
6377		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6378			ring = &adev->gfx.compute_ring[i];
6379			/* Per-queue interrupt is supported for MEC starting from VI.
6380			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6381			  */
6382			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6383				amdgpu_fence_process(ring);
6384		}
6385		break;
6386	}
6387	return 0;
6388}
6389
6390static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6391				 struct amdgpu_irq_src *source,
6392				 struct amdgpu_iv_entry *entry)
6393{
6394	DRM_ERROR("Illegal register access in command stream\n");
6395	schedule_work(&adev->reset_work);
6396	return 0;
6397}
6398
6399static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6400				  struct amdgpu_irq_src *source,
6401				  struct amdgpu_iv_entry *entry)
6402{
6403	DRM_ERROR("Illegal instruction in command stream\n");
6404	schedule_work(&adev->reset_work);
6405	return 0;
6406}
6407
6408static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6409	.name = "gfx_v8_0",
6410	.early_init = gfx_v8_0_early_init,
6411	.late_init = gfx_v8_0_late_init,
6412	.sw_init = gfx_v8_0_sw_init,
6413	.sw_fini = gfx_v8_0_sw_fini,
6414	.hw_init = gfx_v8_0_hw_init,
6415	.hw_fini = gfx_v8_0_hw_fini,
6416	.suspend = gfx_v8_0_suspend,
6417	.resume = gfx_v8_0_resume,
6418	.is_idle = gfx_v8_0_is_idle,
6419	.wait_for_idle = gfx_v8_0_wait_for_idle,
6420	.check_soft_reset = gfx_v8_0_check_soft_reset,
6421	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6422	.soft_reset = gfx_v8_0_soft_reset,
6423	.post_soft_reset = gfx_v8_0_post_soft_reset,
6424	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6425	.set_powergating_state = gfx_v8_0_set_powergating_state,
6426};
6427
6428static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6429	.type = AMDGPU_RING_TYPE_GFX,
6430	.align_mask = 0xff,
6431	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6432	.get_rptr = gfx_v8_0_ring_get_rptr,
6433	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6434	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6435	.emit_frame_size =
6436		20 + /* gfx_v8_0_ring_emit_gds_switch */
6437		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6438		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6439		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6440		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6441		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6442		2 + /* gfx_v8_ring_emit_sb */
6443		3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6444	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6445	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6446	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6447	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6448	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6449	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6450	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6451	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6452	.test_ring = gfx_v8_0_ring_test_ring,
6453	.test_ib = gfx_v8_0_ring_test_ib,
6454	.insert_nop = amdgpu_ring_insert_nop,
6455	.pad_ib = amdgpu_ring_generic_pad_ib,
6456	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6457	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6458};
6459
6460static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6461	.type = AMDGPU_RING_TYPE_COMPUTE,
6462	.align_mask = 0xff,
6463	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6464	.get_rptr = gfx_v8_0_ring_get_rptr,
6465	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6466	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6467	.emit_frame_size =
6468		20 + /* gfx_v8_0_ring_emit_gds_switch */
6469		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6470		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6471		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6472		17 + /* gfx_v8_0_ring_emit_vm_flush */
6473		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6474	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6475	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6476	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6477	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6478	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6479	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6480	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6481	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6482	.test_ring = gfx_v8_0_ring_test_ring,
6483	.test_ib = gfx_v8_0_ring_test_ib,
6484	.insert_nop = amdgpu_ring_insert_nop,
6485	.pad_ib = amdgpu_ring_generic_pad_ib,
6486};
6487
6488static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6489{
6490	int i;
6491
6492	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6493		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6494
6495	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6496		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6497}
6498
6499static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6500	.set = gfx_v8_0_set_eop_interrupt_state,
6501	.process = gfx_v8_0_eop_irq,
6502};
6503
6504static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6505	.set = gfx_v8_0_set_priv_reg_fault_state,
6506	.process = gfx_v8_0_priv_reg_irq,
6507};
6508
6509static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6510	.set = gfx_v8_0_set_priv_inst_fault_state,
6511	.process = gfx_v8_0_priv_inst_irq,
6512};
6513
6514static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6515{
6516	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6517	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6518
6519	adev->gfx.priv_reg_irq.num_types = 1;
6520	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6521
6522	adev->gfx.priv_inst_irq.num_types = 1;
6523	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6524}
6525
6526static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6527{
6528	switch (adev->asic_type) {
6529	case CHIP_TOPAZ:
6530		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6531		break;
6532	case CHIP_STONEY:
6533	case CHIP_CARRIZO:
6534		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6535		break;
6536	default:
6537		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6538		break;
6539	}
6540}
6541
6542static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6543{
6544	/* init asci gds info */
6545	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6546	adev->gds.gws.total_size = 64;
6547	adev->gds.oa.total_size = 16;
6548
6549	if (adev->gds.mem.total_size == 64 * 1024) {
6550		adev->gds.mem.gfx_partition_size = 4096;
6551		adev->gds.mem.cs_partition_size = 4096;
6552
6553		adev->gds.gws.gfx_partition_size = 4;
6554		adev->gds.gws.cs_partition_size = 4;
6555
6556		adev->gds.oa.gfx_partition_size = 4;
6557		adev->gds.oa.cs_partition_size = 1;
6558	} else {
6559		adev->gds.mem.gfx_partition_size = 1024;
6560		adev->gds.mem.cs_partition_size = 1024;
6561
6562		adev->gds.gws.gfx_partition_size = 16;
6563		adev->gds.gws.cs_partition_size = 16;
6564
6565		adev->gds.oa.gfx_partition_size = 4;
6566		adev->gds.oa.cs_partition_size = 4;
6567	}
6568}
6569
6570static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6571						 u32 bitmap)
6572{
6573	u32 data;
6574
6575	if (!bitmap)
6576		return;
6577
6578	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6579	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6580
6581	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6582}
6583
6584static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6585{
6586	u32 data, mask;
6587
6588	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6589		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 
 
 
6590
6591	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6592
6593	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6594}
6595
6596static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 
6597{
6598	int i, j, k, counter, active_cu_number = 0;
6599	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6600	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6601	unsigned disable_masks[4 * 2];
6602
6603	memset(cu_info, 0, sizeof(*cu_info));
 
6604
6605	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6606
6607	mutex_lock(&adev->grbm_idx_mutex);
6608	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6609		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6610			mask = 1;
6611			ao_bitmap = 0;
6612			counter = 0;
6613			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6614			if (i < 4 && j < 2)
6615				gfx_v8_0_set_user_cu_inactive_bitmap(
6616					adev, disable_masks[i * 2 + j]);
6617			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6618			cu_info->bitmap[i][j] = bitmap;
6619
6620			for (k = 0; k < 16; k ++) {
6621				if (bitmap & mask) {
6622					if (counter < 2)
6623						ao_bitmap |= mask;
6624					counter ++;
6625				}
6626				mask <<= 1;
6627			}
6628			active_cu_number += counter;
6629			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6630		}
6631	}
6632	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6633	mutex_unlock(&adev->grbm_idx_mutex);
6634
6635	cu_info->number = active_cu_number;
6636	cu_info->ao_cu_mask = ao_cu_mask;
6637}
6638
6639const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6640{
6641	.type = AMD_IP_BLOCK_TYPE_GFX,
6642	.major = 8,
6643	.minor = 0,
6644	.rev = 0,
6645	.funcs = &gfx_v8_0_ip_funcs,
6646};
6647
6648const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6649{
6650	.type = AMD_IP_BLOCK_TYPE_GFX,
6651	.major = 8,
6652	.minor = 1,
6653	.rev = 0,
6654	.funcs = &gfx_v8_0_ip_funcs,
6655};
v4.6
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/firmware.h>
  24#include "drmP.h"
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
  27#include "vi.h"
 
  28#include "vid.h"
  29#include "amdgpu_ucode.h"
 
 
  30#include "clearstate_vi.h"
  31
  32#include "gmc/gmc_8_2_d.h"
  33#include "gmc/gmc_8_2_sh_mask.h"
  34
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37
  38#include "bif/bif_5_0_d.h"
  39#include "bif/bif_5_0_sh_mask.h"
  40
  41#include "gca/gfx_8_0_d.h"
  42#include "gca/gfx_8_0_enum.h"
  43#include "gca/gfx_8_0_sh_mask.h"
  44#include "gca/gfx_8_0_enum.h"
  45
  46#include "dce/dce_10_0_d.h"
  47#include "dce/dce_10_0_sh_mask.h"
  48
 
 
  49#define GFX8_NUM_GFX_RINGS     1
  50#define GFX8_NUM_COMPUTE_RINGS 8
  51
  52#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  53#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 
  54#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  55
  56#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  57#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  58#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  59#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  60#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  61#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  62#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  63#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  64#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  65
  66#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  67#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  68#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  69#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  70#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  71#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  72
  73/* BPM SERDES CMD */
  74#define SET_BPM_SERDES_CMD    1
  75#define CLE_BPM_SERDES_CMD    0
  76
  77/* BPM Register Address*/
  78enum {
  79	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  80	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  81	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  82	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  83	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  84	BPM_REG_FGCG_MAX
  85};
  86
 
 
  87MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  88MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  89MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  90MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  91MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
  92MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
  93
  94MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
  95MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/stoney_me.bin");
  97MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
  98MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
  99
 100MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 101MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 102MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 103MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 104MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 105MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 108MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 110MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 111MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 112
 113MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 114MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 115MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 116MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 117MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 118MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 120static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 121{
 122	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 123	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 124	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 125	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 126	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 127	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 128	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 129	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 130	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 131	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 132	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 133	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 134	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 135	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 136	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 137	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 138};
 139
 140static const u32 golden_settings_tonga_a11[] =
 141{
 142	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 143	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 144	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 145	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 146	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 147	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 148	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 
 149	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 150	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 151	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 152	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 153	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 154	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 155	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 156	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 157};
 158
 159static const u32 tonga_golden_common_all[] =
 160{
 161	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 162	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 163	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 164	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 165	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 166	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 167	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 168	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 169};
 170
 171static const u32 tonga_mgcg_cgcg_init[] =
 172{
 173	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 174	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 175	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 176	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 177	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 178	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 179	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 180	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 181	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 182	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 183	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 184	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 185	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 186	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 187	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 188	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 189	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 190	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 191	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 192	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 193	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 194	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 195	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 196	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 197	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 198	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 199	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 200	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 201	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 202	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 203	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 204	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 205	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 206	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 207	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 208	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 209	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 210	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 211	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 212	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 213	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 214	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 215	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 216	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 217	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 218	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 219	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 220	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 221	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 222	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 223	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 224	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 225	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 226	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 227	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 228	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 229	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 230	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 231	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 232	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 233	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 234	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 235	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 236	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 237	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 238	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 239	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 240	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 241	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 242	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 243	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 244	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 245	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 246	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 247	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 248};
 249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 250static const u32 fiji_golden_common_all[] =
 251{
 252	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 253	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 254	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 255	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 256	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 257	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 258	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 259	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 260	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 261	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 262};
 263
 264static const u32 golden_settings_fiji_a10[] =
 265{
 266	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 267	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 268	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 269	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 270	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 271	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 272	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 273	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 274	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 275	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 276	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 277};
 278
 279static const u32 fiji_mgcg_cgcg_init[] =
 280{
 281	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 282	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 283	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 284	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 285	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 286	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 287	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 288	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 289	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 290	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 291	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 292	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 293	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 294	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 295	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 296	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 297	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 298	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 299	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 300	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 301	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 302	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 303	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 304	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 305	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 306	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 307	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 308	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 309	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 310	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 311	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 312	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 313	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 314	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 315	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 316};
 317
 318static const u32 golden_settings_iceland_a11[] =
 319{
 320	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 321	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 322	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 323	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 324	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 325	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 326	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 327	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 
 328	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 329	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 330	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 331	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 332	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 333	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 334	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 335};
 336
 337static const u32 iceland_golden_common_all[] =
 338{
 339	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 340	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 341	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 342	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 343	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 344	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 345	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 346	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 347};
 348
 349static const u32 iceland_mgcg_cgcg_init[] =
 350{
 351	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 352	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 353	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 354	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 355	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 356	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 357	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 358	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 359	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 360	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 361	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 362	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 363	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 364	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 365	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 366	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 367	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 368	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 369	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 370	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 371	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 372	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 373	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 374	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 375	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 376	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 377	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 378	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 379	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 380	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 381	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 382	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 383	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 384	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 385	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 386	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 387	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 388	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 389	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 390	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 391	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 392	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 393	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 394	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 395	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 396	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 397	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 398	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 399	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 400	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 401	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 402	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 403	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 404	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 405	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 406	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 407	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 408	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 409	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 410	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 411	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 412	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 413	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 414	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 415};
 416
 417static const u32 cz_golden_settings_a11[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 422	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 423	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 
 426	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 428	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 429};
 430
 431static const u32 cz_golden_common_all[] =
 432{
 433	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 434	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 435	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 436	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 437	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 438	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 439	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 440	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 441};
 442
 443static const u32 cz_mgcg_cgcg_init[] =
 444{
 445	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 446	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 447	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 452	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 453	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 454	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 455	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 456	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 459	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 460	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 461	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 462	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 463	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 464	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 465	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 466	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 467	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 468	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 469	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 470	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 471	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 472	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 473	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 474	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 475	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 476	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 477	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 478	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 479	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 480	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 481	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 482	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 483	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 484	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 485	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 486	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 487	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 488	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 489	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 490	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 491	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 492	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 493	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 494	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 495	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 496	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 497	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 498	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 499	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 500	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 501	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 502	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 503	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 504	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 505	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 506	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 507	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 508	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 509	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 510	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 511	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 512	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 513	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 514	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 515	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 516	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 517	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 518	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 519	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 520};
 521
 522static const u32 stoney_golden_settings_a11[] =
 523{
 524	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 525	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 526	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 527	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 528	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 529	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 530  	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 531	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 532	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 533	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 534};
 535
 536static const u32 stoney_golden_common_all[] =
 537{
 538	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 539	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 540	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 541	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 542	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 543	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 544	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 545	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 546};
 547
 548static const u32 stoney_mgcg_cgcg_init[] =
 549{
 550	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 551	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 552	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 553	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 554	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 555	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
 556};
 557
 558static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 559static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 560static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 
 
 
 561
 562static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 563{
 564	switch (adev->asic_type) {
 565	case CHIP_TOPAZ:
 566		amdgpu_program_register_sequence(adev,
 567						 iceland_mgcg_cgcg_init,
 568						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 569		amdgpu_program_register_sequence(adev,
 570						 golden_settings_iceland_a11,
 571						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 572		amdgpu_program_register_sequence(adev,
 573						 iceland_golden_common_all,
 574						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 575		break;
 576	case CHIP_FIJI:
 577		amdgpu_program_register_sequence(adev,
 578						 fiji_mgcg_cgcg_init,
 579						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 580		amdgpu_program_register_sequence(adev,
 581						 golden_settings_fiji_a10,
 582						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 583		amdgpu_program_register_sequence(adev,
 584						 fiji_golden_common_all,
 585						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 586		break;
 587
 588	case CHIP_TONGA:
 589		amdgpu_program_register_sequence(adev,
 590						 tonga_mgcg_cgcg_init,
 591						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 592		amdgpu_program_register_sequence(adev,
 593						 golden_settings_tonga_a11,
 594						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 595		amdgpu_program_register_sequence(adev,
 596						 tonga_golden_common_all,
 597						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 598		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 599	case CHIP_CARRIZO:
 600		amdgpu_program_register_sequence(adev,
 601						 cz_mgcg_cgcg_init,
 602						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 603		amdgpu_program_register_sequence(adev,
 604						 cz_golden_settings_a11,
 605						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 606		amdgpu_program_register_sequence(adev,
 607						 cz_golden_common_all,
 608						 (const u32)ARRAY_SIZE(cz_golden_common_all));
 609		break;
 610	case CHIP_STONEY:
 611		amdgpu_program_register_sequence(adev,
 612						 stoney_mgcg_cgcg_init,
 613						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 614		amdgpu_program_register_sequence(adev,
 615						 stoney_golden_settings_a11,
 616						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 617		amdgpu_program_register_sequence(adev,
 618						 stoney_golden_common_all,
 619						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 620		break;
 621	default:
 622		break;
 623	}
 624}
 625
 626static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 627{
 628	int i;
 629
 630	adev->gfx.scratch.num_reg = 7;
 631	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 632	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 633		adev->gfx.scratch.free[i] = true;
 634		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 635	}
 636}
 637
 638static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 639{
 640	struct amdgpu_device *adev = ring->adev;
 641	uint32_t scratch;
 642	uint32_t tmp = 0;
 643	unsigned i;
 644	int r;
 645
 646	r = amdgpu_gfx_scratch_get(adev, &scratch);
 647	if (r) {
 648		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 649		return r;
 650	}
 651	WREG32(scratch, 0xCAFEDEAD);
 652	r = amdgpu_ring_alloc(ring, 3);
 653	if (r) {
 654		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 655			  ring->idx, r);
 656		amdgpu_gfx_scratch_free(adev, scratch);
 657		return r;
 658	}
 659	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 660	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 661	amdgpu_ring_write(ring, 0xDEADBEEF);
 662	amdgpu_ring_commit(ring);
 663
 664	for (i = 0; i < adev->usec_timeout; i++) {
 665		tmp = RREG32(scratch);
 666		if (tmp == 0xDEADBEEF)
 667			break;
 668		DRM_UDELAY(1);
 669	}
 670	if (i < adev->usec_timeout) {
 671		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 672			 ring->idx, i);
 673	} else {
 674		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 675			  ring->idx, scratch, tmp);
 676		r = -EINVAL;
 677	}
 678	amdgpu_gfx_scratch_free(adev, scratch);
 679	return r;
 680}
 681
 682static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 683{
 684	struct amdgpu_device *adev = ring->adev;
 685	struct amdgpu_ib ib;
 686	struct fence *f = NULL;
 687	uint32_t scratch;
 688	uint32_t tmp = 0;
 689	unsigned i;
 690	int r;
 691
 692	r = amdgpu_gfx_scratch_get(adev, &scratch);
 693	if (r) {
 694		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
 695		return r;
 696	}
 697	WREG32(scratch, 0xCAFEDEAD);
 698	memset(&ib, 0, sizeof(ib));
 699	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 700	if (r) {
 701		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 702		goto err1;
 703	}
 704	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 705	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 706	ib.ptr[2] = 0xDEADBEEF;
 707	ib.length_dw = 3;
 708
 709	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 710	if (r)
 711		goto err2;
 712
 713	r = fence_wait(f, false);
 714	if (r) {
 715		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 
 
 
 
 716		goto err2;
 717	}
 718	for (i = 0; i < adev->usec_timeout; i++) {
 719		tmp = RREG32(scratch);
 720		if (tmp == 0xDEADBEEF)
 721			break;
 722		DRM_UDELAY(1);
 723	}
 724	if (i < adev->usec_timeout) {
 725		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
 726			 ring->idx, i);
 727		goto err2;
 728	} else {
 729		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 730			  scratch, tmp);
 731		r = -EINVAL;
 732	}
 733err2:
 734	fence_put(f);
 735	amdgpu_ib_free(adev, &ib, NULL);
 736	fence_put(f);
 737err1:
 738	amdgpu_gfx_scratch_free(adev, scratch);
 739	return r;
 740}
 741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 742static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 743{
 744	const char *chip_name;
 745	char fw_name[30];
 746	int err;
 747	struct amdgpu_firmware_info *info = NULL;
 748	const struct common_firmware_header *header = NULL;
 749	const struct gfx_firmware_header_v1_0 *cp_hdr;
 
 
 750
 751	DRM_DEBUG("\n");
 752
 753	switch (adev->asic_type) {
 754	case CHIP_TOPAZ:
 755		chip_name = "topaz";
 756		break;
 757	case CHIP_TONGA:
 758		chip_name = "tonga";
 759		break;
 760	case CHIP_CARRIZO:
 761		chip_name = "carrizo";
 762		break;
 763	case CHIP_FIJI:
 764		chip_name = "fiji";
 765		break;
 
 
 
 
 
 
 
 
 
 766	case CHIP_STONEY:
 767		chip_name = "stoney";
 768		break;
 769	default:
 770		BUG();
 771	}
 772
 773	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 774	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 775	if (err)
 776		goto out;
 777	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 778	if (err)
 779		goto out;
 780	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 781	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 782	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 783
 784	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 785	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 786	if (err)
 787		goto out;
 788	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 789	if (err)
 790		goto out;
 791	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 792	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 793	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 794
 795	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 796	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 797	if (err)
 798		goto out;
 799	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 800	if (err)
 801		goto out;
 802	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 803	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 804	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 805
 806	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 807	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 808	if (err)
 809		goto out;
 810	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 811	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
 812	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 813	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 814
 815	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 816	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 817	if (err)
 818		goto out;
 819	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 820	if (err)
 821		goto out;
 822	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 823	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 824	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 825
 826	if ((adev->asic_type != CHIP_STONEY) &&
 827	    (adev->asic_type != CHIP_TOPAZ)) {
 828		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 829		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 830		if (!err) {
 831			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 832			if (err)
 833				goto out;
 834			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
 835				adev->gfx.mec2_fw->data;
 836			adev->gfx.mec2_fw_version =
 837				le32_to_cpu(cp_hdr->header.ucode_version);
 838			adev->gfx.mec2_feature_version =
 839				le32_to_cpu(cp_hdr->ucode_feature_version);
 840		} else {
 841			err = 0;
 842			adev->gfx.mec2_fw = NULL;
 843		}
 844	}
 845
 846	if (adev->firmware.smu_load) {
 847		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
 848		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
 849		info->fw = adev->gfx.pfp_fw;
 850		header = (const struct common_firmware_header *)info->fw->data;
 851		adev->firmware.fw_size +=
 852			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 853
 854		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
 855		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
 856		info->fw = adev->gfx.me_fw;
 857		header = (const struct common_firmware_header *)info->fw->data;
 858		adev->firmware.fw_size +=
 859			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 860
 861		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
 862		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
 863		info->fw = adev->gfx.ce_fw;
 864		header = (const struct common_firmware_header *)info->fw->data;
 865		adev->firmware.fw_size +=
 866			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 867
 868		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 869		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 870		info->fw = adev->gfx.rlc_fw;
 871		header = (const struct common_firmware_header *)info->fw->data;
 872		adev->firmware.fw_size +=
 873			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 874
 875		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 876		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 877		info->fw = adev->gfx.mec_fw;
 878		header = (const struct common_firmware_header *)info->fw->data;
 879		adev->firmware.fw_size +=
 880			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 881
 
 
 
 
 
 
 
 
 
 
 
 
 
 882		if (adev->gfx.mec2_fw) {
 883			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 884			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
 885			info->fw = adev->gfx.mec2_fw;
 886			header = (const struct common_firmware_header *)info->fw->data;
 887			adev->firmware.fw_size +=
 888				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 889		}
 890
 891	}
 892
 893out:
 894	if (err) {
 895		dev_err(adev->dev,
 896			"gfx8: Failed to load firmware \"%s\"\n",
 897			fw_name);
 898		release_firmware(adev->gfx.pfp_fw);
 899		adev->gfx.pfp_fw = NULL;
 900		release_firmware(adev->gfx.me_fw);
 901		adev->gfx.me_fw = NULL;
 902		release_firmware(adev->gfx.ce_fw);
 903		adev->gfx.ce_fw = NULL;
 904		release_firmware(adev->gfx.rlc_fw);
 905		adev->gfx.rlc_fw = NULL;
 906		release_firmware(adev->gfx.mec_fw);
 907		adev->gfx.mec_fw = NULL;
 908		release_firmware(adev->gfx.mec2_fw);
 909		adev->gfx.mec2_fw = NULL;
 910	}
 911	return err;
 912}
 913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 914static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 915{
 916	int r;
 917
 918	if (adev->gfx.mec.hpd_eop_obj) {
 919		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 920		if (unlikely(r != 0))
 921			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 922		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 923		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 924
 925		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 926		adev->gfx.mec.hpd_eop_obj = NULL;
 927	}
 928}
 929
 930#define MEC_HPD_SIZE 2048
 931
 932static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 933{
 934	int r;
 935	u32 *hpd;
 936
 937	/*
 938	 * we assign only 1 pipe because all other pipes will
 939	 * be handled by KFD
 940	 */
 941	adev->gfx.mec.num_mec = 1;
 942	adev->gfx.mec.num_pipe = 1;
 943	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 944
 945	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 946		r = amdgpu_bo_create(adev,
 947				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
 948				     PAGE_SIZE, true,
 949				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 950				     &adev->gfx.mec.hpd_eop_obj);
 951		if (r) {
 952			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 953			return r;
 954		}
 955	}
 956
 957	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 958	if (unlikely(r != 0)) {
 959		gfx_v8_0_mec_fini(adev);
 960		return r;
 961	}
 962	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 963			  &adev->gfx.mec.hpd_eop_gpu_addr);
 964	if (r) {
 965		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 966		gfx_v8_0_mec_fini(adev);
 967		return r;
 968	}
 969	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 970	if (r) {
 971		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 972		gfx_v8_0_mec_fini(adev);
 973		return r;
 974	}
 975
 976	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
 977
 978	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 979	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 980
 981	return 0;
 982}
 983
 984static const u32 vgpr_init_compute_shader[] =
 985{
 986	0x7e000209, 0x7e020208,
 987	0x7e040207, 0x7e060206,
 988	0x7e080205, 0x7e0a0204,
 989	0x7e0c0203, 0x7e0e0202,
 990	0x7e100201, 0x7e120200,
 991	0x7e140209, 0x7e160208,
 992	0x7e180207, 0x7e1a0206,
 993	0x7e1c0205, 0x7e1e0204,
 994	0x7e200203, 0x7e220202,
 995	0x7e240201, 0x7e260200,
 996	0x7e280209, 0x7e2a0208,
 997	0x7e2c0207, 0x7e2e0206,
 998	0x7e300205, 0x7e320204,
 999	0x7e340203, 0x7e360202,
1000	0x7e380201, 0x7e3a0200,
1001	0x7e3c0209, 0x7e3e0208,
1002	0x7e400207, 0x7e420206,
1003	0x7e440205, 0x7e460204,
1004	0x7e480203, 0x7e4a0202,
1005	0x7e4c0201, 0x7e4e0200,
1006	0x7e500209, 0x7e520208,
1007	0x7e540207, 0x7e560206,
1008	0x7e580205, 0x7e5a0204,
1009	0x7e5c0203, 0x7e5e0202,
1010	0x7e600201, 0x7e620200,
1011	0x7e640209, 0x7e660208,
1012	0x7e680207, 0x7e6a0206,
1013	0x7e6c0205, 0x7e6e0204,
1014	0x7e700203, 0x7e720202,
1015	0x7e740201, 0x7e760200,
1016	0x7e780209, 0x7e7a0208,
1017	0x7e7c0207, 0x7e7e0206,
1018	0xbf8a0000, 0xbf810000,
1019};
1020
1021static const u32 sgpr_init_compute_shader[] =
1022{
1023	0xbe8a0100, 0xbe8c0102,
1024	0xbe8e0104, 0xbe900106,
1025	0xbe920108, 0xbe940100,
1026	0xbe960102, 0xbe980104,
1027	0xbe9a0106, 0xbe9c0108,
1028	0xbe9e0100, 0xbea00102,
1029	0xbea20104, 0xbea40106,
1030	0xbea60108, 0xbea80100,
1031	0xbeaa0102, 0xbeac0104,
1032	0xbeae0106, 0xbeb00108,
1033	0xbeb20100, 0xbeb40102,
1034	0xbeb60104, 0xbeb80106,
1035	0xbeba0108, 0xbebc0100,
1036	0xbebe0102, 0xbec00104,
1037	0xbec20106, 0xbec40108,
1038	0xbec60100, 0xbec80102,
1039	0xbee60004, 0xbee70005,
1040	0xbeea0006, 0xbeeb0007,
1041	0xbee80008, 0xbee90009,
1042	0xbefc0000, 0xbf8a0000,
1043	0xbf810000, 0x00000000,
1044};
1045
1046static const u32 vgpr_init_regs[] =
1047{
1048	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1049	mmCOMPUTE_RESOURCE_LIMITS, 0,
1050	mmCOMPUTE_NUM_THREAD_X, 256*4,
1051	mmCOMPUTE_NUM_THREAD_Y, 1,
1052	mmCOMPUTE_NUM_THREAD_Z, 1,
1053	mmCOMPUTE_PGM_RSRC2, 20,
1054	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1055	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1056	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1057	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1058	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1059	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1060	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1061	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1062	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1063	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1064};
1065
1066static const u32 sgpr1_init_regs[] =
1067{
1068	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1069	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1070	mmCOMPUTE_NUM_THREAD_X, 256*5,
1071	mmCOMPUTE_NUM_THREAD_Y, 1,
1072	mmCOMPUTE_NUM_THREAD_Z, 1,
1073	mmCOMPUTE_PGM_RSRC2, 20,
1074	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1075	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1076	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1077	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1078	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1079	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1080	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1081	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1082	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1083	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1084};
1085
1086static const u32 sgpr2_init_regs[] =
1087{
1088	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1089	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1090	mmCOMPUTE_NUM_THREAD_X, 256*5,
1091	mmCOMPUTE_NUM_THREAD_Y, 1,
1092	mmCOMPUTE_NUM_THREAD_Z, 1,
1093	mmCOMPUTE_PGM_RSRC2, 20,
1094	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1095	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1096	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1097	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1098	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1099	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1100	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1101	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1102	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1103	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1104};
1105
1106static const u32 sec_ded_counter_registers[] =
1107{
1108	mmCPC_EDC_ATC_CNT,
1109	mmCPC_EDC_SCRATCH_CNT,
1110	mmCPC_EDC_UCODE_CNT,
1111	mmCPF_EDC_ATC_CNT,
1112	mmCPF_EDC_ROQ_CNT,
1113	mmCPF_EDC_TAG_CNT,
1114	mmCPG_EDC_ATC_CNT,
1115	mmCPG_EDC_DMA_CNT,
1116	mmCPG_EDC_TAG_CNT,
1117	mmDC_EDC_CSINVOC_CNT,
1118	mmDC_EDC_RESTORE_CNT,
1119	mmDC_EDC_STATE_CNT,
1120	mmGDS_EDC_CNT,
1121	mmGDS_EDC_GRBM_CNT,
1122	mmGDS_EDC_OA_DED,
1123	mmSPI_EDC_CNT,
1124	mmSQC_ATC_EDC_GATCL1_CNT,
1125	mmSQC_EDC_CNT,
1126	mmSQ_EDC_DED_CNT,
1127	mmSQ_EDC_INFO,
1128	mmSQ_EDC_SEC_CNT,
1129	mmTCC_EDC_CNT,
1130	mmTCP_ATC_EDC_GATCL1_CNT,
1131	mmTCP_EDC_CNT,
1132	mmTD_EDC_CNT
1133};
1134
1135static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1136{
1137	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1138	struct amdgpu_ib ib;
1139	struct fence *f = NULL;
1140	int r, i;
1141	u32 tmp;
1142	unsigned total_size, vgpr_offset, sgpr_offset;
1143	u64 gpu_addr;
1144
1145	/* only supported on CZ */
1146	if (adev->asic_type != CHIP_CARRIZO)
1147		return 0;
1148
1149	/* bail if the compute ring is not ready */
1150	if (!ring->ready)
1151		return 0;
1152
1153	tmp = RREG32(mmGB_EDC_MODE);
1154	WREG32(mmGB_EDC_MODE, 0);
1155
1156	total_size =
1157		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1158	total_size +=
1159		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1160	total_size +=
1161		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162	total_size = ALIGN(total_size, 256);
1163	vgpr_offset = total_size;
1164	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1165	sgpr_offset = total_size;
1166	total_size += sizeof(sgpr_init_compute_shader);
1167
1168	/* allocate an indirect buffer to put the commands in */
1169	memset(&ib, 0, sizeof(ib));
1170	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1171	if (r) {
1172		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1173		return r;
1174	}
1175
1176	/* load the compute shaders */
1177	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1178		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1179
1180	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1181		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1182
1183	/* init the ib length to 0 */
1184	ib.length_dw = 0;
1185
1186	/* VGPR */
1187	/* write the register state for the compute dispatch */
1188	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1189		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1190		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1191		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1192	}
1193	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1194	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1195	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1196	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1197	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1198	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1199
1200	/* write dispatch packet */
1201	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1202	ib.ptr[ib.length_dw++] = 8; /* x */
1203	ib.ptr[ib.length_dw++] = 1; /* y */
1204	ib.ptr[ib.length_dw++] = 1; /* z */
1205	ib.ptr[ib.length_dw++] =
1206		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1207
1208	/* write CS partial flush packet */
1209	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1210	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1211
1212	/* SGPR1 */
1213	/* write the register state for the compute dispatch */
1214	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1215		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1216		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1217		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1218	}
1219	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1220	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1221	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1222	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1223	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1224	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1225
1226	/* write dispatch packet */
1227	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1228	ib.ptr[ib.length_dw++] = 8; /* x */
1229	ib.ptr[ib.length_dw++] = 1; /* y */
1230	ib.ptr[ib.length_dw++] = 1; /* z */
1231	ib.ptr[ib.length_dw++] =
1232		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1233
1234	/* write CS partial flush packet */
1235	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1236	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1237
1238	/* SGPR2 */
1239	/* write the register state for the compute dispatch */
1240	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1241		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1242		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1243		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1244	}
1245	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1246	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1247	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1248	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1249	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1250	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1251
1252	/* write dispatch packet */
1253	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1254	ib.ptr[ib.length_dw++] = 8; /* x */
1255	ib.ptr[ib.length_dw++] = 1; /* y */
1256	ib.ptr[ib.length_dw++] = 1; /* z */
1257	ib.ptr[ib.length_dw++] =
1258		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1259
1260	/* write CS partial flush packet */
1261	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1262	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1263
1264	/* shedule the ib on the ring */
1265	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1266	if (r) {
1267		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1268		goto fail;
1269	}
1270
1271	/* wait for the GPU to finish processing the IB */
1272	r = fence_wait(f, false);
1273	if (r) {
1274		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1275		goto fail;
1276	}
1277
1278	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1279	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1280	WREG32(mmGB_EDC_MODE, tmp);
1281
1282	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1283	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1284	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1285
1286
1287	/* read back registers to clear the counters */
1288	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1289		RREG32(sec_ded_counter_registers[i]);
1290
1291fail:
1292	fence_put(f);
1293	amdgpu_ib_free(adev, &ib, NULL);
1294	fence_put(f);
1295
1296	return r;
1297}
1298
1299static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1300{
1301	u32 gb_addr_config;
1302	u32 mc_shared_chmap, mc_arb_ramcfg;
1303	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1304	u32 tmp;
 
1305
1306	switch (adev->asic_type) {
1307	case CHIP_TOPAZ:
1308		adev->gfx.config.max_shader_engines = 1;
1309		adev->gfx.config.max_tile_pipes = 2;
1310		adev->gfx.config.max_cu_per_sh = 6;
1311		adev->gfx.config.max_sh_per_se = 1;
1312		adev->gfx.config.max_backends_per_se = 2;
1313		adev->gfx.config.max_texture_channel_caches = 2;
1314		adev->gfx.config.max_gprs = 256;
1315		adev->gfx.config.max_gs_threads = 32;
1316		adev->gfx.config.max_hw_contexts = 8;
1317
1318		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1319		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1320		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1321		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1322		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1323		break;
1324	case CHIP_FIJI:
1325		adev->gfx.config.max_shader_engines = 4;
1326		adev->gfx.config.max_tile_pipes = 16;
1327		adev->gfx.config.max_cu_per_sh = 16;
1328		adev->gfx.config.max_sh_per_se = 1;
1329		adev->gfx.config.max_backends_per_se = 4;
1330		adev->gfx.config.max_texture_channel_caches = 16;
1331		adev->gfx.config.max_gprs = 256;
1332		adev->gfx.config.max_gs_threads = 32;
1333		adev->gfx.config.max_hw_contexts = 8;
1334
1335		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1336		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1337		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1338		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1339		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1340		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1341	case CHIP_TONGA:
1342		adev->gfx.config.max_shader_engines = 4;
1343		adev->gfx.config.max_tile_pipes = 8;
1344		adev->gfx.config.max_cu_per_sh = 8;
1345		adev->gfx.config.max_sh_per_se = 1;
1346		adev->gfx.config.max_backends_per_se = 2;
1347		adev->gfx.config.max_texture_channel_caches = 8;
1348		adev->gfx.config.max_gprs = 256;
1349		adev->gfx.config.max_gs_threads = 32;
1350		adev->gfx.config.max_hw_contexts = 8;
1351
1352		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1356		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1357		break;
1358	case CHIP_CARRIZO:
1359		adev->gfx.config.max_shader_engines = 1;
1360		adev->gfx.config.max_tile_pipes = 2;
1361		adev->gfx.config.max_sh_per_se = 1;
1362		adev->gfx.config.max_backends_per_se = 2;
1363
1364		switch (adev->pdev->revision) {
1365		case 0xc4:
1366		case 0x84:
1367		case 0xc8:
1368		case 0xcc:
1369		case 0xe1:
1370		case 0xe3:
1371			/* B10 */
1372			adev->gfx.config.max_cu_per_sh = 8;
1373			break;
1374		case 0xc5:
1375		case 0x81:
1376		case 0x85:
1377		case 0xc9:
1378		case 0xcd:
1379		case 0xe2:
1380		case 0xe4:
1381			/* B8 */
1382			adev->gfx.config.max_cu_per_sh = 6;
1383			break;
1384		case 0xc6:
1385		case 0xca:
1386		case 0xce:
1387		case 0x88:
1388			/* B6 */
1389			adev->gfx.config.max_cu_per_sh = 6;
1390			break;
1391		case 0xc7:
1392		case 0x87:
1393		case 0xcb:
1394		case 0xe5:
1395		case 0x89:
1396		default:
1397			/* B4 */
1398			adev->gfx.config.max_cu_per_sh = 4;
1399			break;
1400		}
1401
1402		adev->gfx.config.max_texture_channel_caches = 2;
1403		adev->gfx.config.max_gprs = 256;
1404		adev->gfx.config.max_gs_threads = 32;
1405		adev->gfx.config.max_hw_contexts = 8;
1406
1407		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1408		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1409		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1410		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1411		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1412		break;
1413	case CHIP_STONEY:
1414		adev->gfx.config.max_shader_engines = 1;
1415		adev->gfx.config.max_tile_pipes = 2;
1416		adev->gfx.config.max_sh_per_se = 1;
1417		adev->gfx.config.max_backends_per_se = 1;
1418
1419		switch (adev->pdev->revision) {
1420		case 0xc0:
1421		case 0xc1:
1422		case 0xc2:
1423		case 0xc4:
1424		case 0xc8:
1425		case 0xc9:
1426			adev->gfx.config.max_cu_per_sh = 3;
1427			break;
1428		case 0xd0:
1429		case 0xd1:
1430		case 0xd2:
1431		default:
1432			adev->gfx.config.max_cu_per_sh = 2;
1433			break;
1434		}
1435
1436		adev->gfx.config.max_texture_channel_caches = 2;
1437		adev->gfx.config.max_gprs = 256;
1438		adev->gfx.config.max_gs_threads = 16;
1439		adev->gfx.config.max_hw_contexts = 8;
1440
1441		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1442		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1443		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1444		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1445		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1446		break;
1447	default:
1448		adev->gfx.config.max_shader_engines = 2;
1449		adev->gfx.config.max_tile_pipes = 4;
1450		adev->gfx.config.max_cu_per_sh = 2;
1451		adev->gfx.config.max_sh_per_se = 1;
1452		adev->gfx.config.max_backends_per_se = 2;
1453		adev->gfx.config.max_texture_channel_caches = 4;
1454		adev->gfx.config.max_gprs = 256;
1455		adev->gfx.config.max_gs_threads = 32;
1456		adev->gfx.config.max_hw_contexts = 8;
1457
1458		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1459		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1460		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1461		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1462		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1463		break;
1464	}
1465
1466	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1467	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1468	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1469
1470	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1471	adev->gfx.config.mem_max_burst_length_bytes = 256;
1472	if (adev->flags & AMD_IS_APU) {
1473		/* Get memory bank mapping mode. */
1474		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1475		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1476		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1477
1478		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1479		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1480		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1481
1482		/* Validate settings in case only one DIMM installed. */
1483		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1484			dimm00_addr_map = 0;
1485		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1486			dimm01_addr_map = 0;
1487		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1488			dimm10_addr_map = 0;
1489		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1490			dimm11_addr_map = 0;
1491
1492		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1493		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1494		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1495			adev->gfx.config.mem_row_size_in_kb = 2;
1496		else
1497			adev->gfx.config.mem_row_size_in_kb = 1;
1498	} else {
1499		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1500		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1501		if (adev->gfx.config.mem_row_size_in_kb > 4)
1502			adev->gfx.config.mem_row_size_in_kb = 4;
1503	}
1504
1505	adev->gfx.config.shader_engine_tile_size = 32;
1506	adev->gfx.config.num_gpus = 1;
1507	adev->gfx.config.multi_gpu_tile_size = 64;
1508
1509	/* fix up row size */
1510	switch (adev->gfx.config.mem_row_size_in_kb) {
1511	case 1:
1512	default:
1513		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1514		break;
1515	case 2:
1516		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1517		break;
1518	case 4:
1519		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1520		break;
1521	}
1522	adev->gfx.config.gb_addr_config = gb_addr_config;
 
 
1523}
1524
1525static int gfx_v8_0_sw_init(void *handle)
1526{
1527	int i, r;
1528	struct amdgpu_ring *ring;
1529	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1530
1531	/* EOP Event */
1532	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1533	if (r)
1534		return r;
1535
1536	/* Privileged reg */
1537	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1538	if (r)
1539		return r;
1540
1541	/* Privileged inst */
1542	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1543	if (r)
1544		return r;
1545
1546	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1547
1548	gfx_v8_0_scratch_init(adev);
1549
1550	r = gfx_v8_0_init_microcode(adev);
1551	if (r) {
1552		DRM_ERROR("Failed to load gfx firmware!\n");
1553		return r;
1554	}
1555
 
 
 
 
 
 
1556	r = gfx_v8_0_mec_init(adev);
1557	if (r) {
1558		DRM_ERROR("Failed to init MEC BOs!\n");
1559		return r;
1560	}
1561
1562	/* set up the gfx ring */
1563	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1564		ring = &adev->gfx.gfx_ring[i];
1565		ring->ring_obj = NULL;
1566		sprintf(ring->name, "gfx");
1567		/* no gfx doorbells on iceland */
1568		if (adev->asic_type != CHIP_TOPAZ) {
1569			ring->use_doorbell = true;
1570			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1571		}
1572
1573		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1574				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1575				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1576				     AMDGPU_RING_TYPE_GFX);
1577		if (r)
1578			return r;
1579	}
1580
1581	/* set up the compute queues */
1582	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1583		unsigned irq_type;
1584
1585		/* max 32 queues per MEC */
1586		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1587			DRM_ERROR("Too many (%d) compute rings!\n", i);
1588			break;
1589		}
1590		ring = &adev->gfx.compute_ring[i];
1591		ring->ring_obj = NULL;
1592		ring->use_doorbell = true;
1593		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1594		ring->me = 1; /* first MEC */
1595		ring->pipe = i / 8;
1596		ring->queue = i % 8;
1597		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1598		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1599		/* type-2 packets are deprecated on MEC, use type-3 instead */
1600		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1601				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1602				     &adev->gfx.eop_irq, irq_type,
1603				     AMDGPU_RING_TYPE_COMPUTE);
1604		if (r)
1605			return r;
1606	}
1607
1608	/* reserve GDS, GWS and OA resource for gfx */
1609	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1610			PAGE_SIZE, true,
1611			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1612			NULL, &adev->gds.gds_gfx_bo);
1613	if (r)
1614		return r;
1615
1616	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1617		PAGE_SIZE, true,
1618		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1619		NULL, &adev->gds.gws_gfx_bo);
1620	if (r)
1621		return r;
1622
1623	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1624			PAGE_SIZE, true,
1625			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1626			NULL, &adev->gds.oa_gfx_bo);
1627	if (r)
1628		return r;
1629
1630	adev->gfx.ce_ram_size = 0x8000;
1631
1632	gfx_v8_0_gpu_early_init(adev);
 
 
1633
1634	return 0;
1635}
1636
1637static int gfx_v8_0_sw_fini(void *handle)
1638{
1639	int i;
1640	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1641
1642	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1643	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1644	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1645
1646	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1647		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1648	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1649		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1650
1651	gfx_v8_0_mec_fini(adev);
 
 
1652
1653	return 0;
1654}
1655
1656static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1657{
1658	uint32_t *modearray, *mod2array;
1659	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1660	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1661	u32 reg_offset;
1662
1663	modearray = adev->gfx.config.tile_mode_array;
1664	mod2array = adev->gfx.config.macrotile_mode_array;
1665
1666	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1667		modearray[reg_offset] = 0;
1668
1669	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1670		mod2array[reg_offset] = 0;
1671
1672	switch (adev->asic_type) {
1673	case CHIP_TOPAZ:
1674		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675				PIPE_CONFIG(ADDR_SURF_P2) |
1676				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1677				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679				PIPE_CONFIG(ADDR_SURF_P2) |
1680				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1681				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683				PIPE_CONFIG(ADDR_SURF_P2) |
1684				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1685				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1687				PIPE_CONFIG(ADDR_SURF_P2) |
1688				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1689				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1690		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1691				PIPE_CONFIG(ADDR_SURF_P2) |
1692				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1693				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695				PIPE_CONFIG(ADDR_SURF_P2) |
1696				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1698		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1699				PIPE_CONFIG(ADDR_SURF_P2) |
1700				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1701				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1702		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1703				PIPE_CONFIG(ADDR_SURF_P2));
1704		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1705				PIPE_CONFIG(ADDR_SURF_P2) |
1706				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1707				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709				 PIPE_CONFIG(ADDR_SURF_P2) |
1710				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1711				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713				 PIPE_CONFIG(ADDR_SURF_P2) |
1714				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1715				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1717				 PIPE_CONFIG(ADDR_SURF_P2) |
1718				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1720		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721				 PIPE_CONFIG(ADDR_SURF_P2) |
1722				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1724		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1725				 PIPE_CONFIG(ADDR_SURF_P2) |
1726				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1727				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1728		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1729				 PIPE_CONFIG(ADDR_SURF_P2) |
1730				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1732		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1733				 PIPE_CONFIG(ADDR_SURF_P2) |
1734				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1735				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1737				 PIPE_CONFIG(ADDR_SURF_P2) |
1738				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1739				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1741				 PIPE_CONFIG(ADDR_SURF_P2) |
1742				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1745				 PIPE_CONFIG(ADDR_SURF_P2) |
1746				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1749				 PIPE_CONFIG(ADDR_SURF_P2) |
1750				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1751				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1752		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1753				 PIPE_CONFIG(ADDR_SURF_P2) |
1754				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1756		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1757				 PIPE_CONFIG(ADDR_SURF_P2) |
1758				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1759				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1761				 PIPE_CONFIG(ADDR_SURF_P2) |
1762				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1763				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1765				 PIPE_CONFIG(ADDR_SURF_P2) |
1766				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1767				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1768		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1769				 PIPE_CONFIG(ADDR_SURF_P2) |
1770				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1771				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1772		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1773				 PIPE_CONFIG(ADDR_SURF_P2) |
1774				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1775				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1776
1777		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1778				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1779				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780				NUM_BANKS(ADDR_SURF_8_BANK));
1781		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1782				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1783				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784				NUM_BANKS(ADDR_SURF_8_BANK));
1785		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1786				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1787				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788				NUM_BANKS(ADDR_SURF_8_BANK));
1789		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1790				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1791				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792				NUM_BANKS(ADDR_SURF_8_BANK));
1793		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1794				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1795				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1796				NUM_BANKS(ADDR_SURF_8_BANK));
1797		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1798				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1799				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1800				NUM_BANKS(ADDR_SURF_8_BANK));
1801		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1802				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1803				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1804				NUM_BANKS(ADDR_SURF_8_BANK));
1805		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1806				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1807				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808				NUM_BANKS(ADDR_SURF_16_BANK));
1809		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1810				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1811				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812				NUM_BANKS(ADDR_SURF_16_BANK));
1813		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1814				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1816				 NUM_BANKS(ADDR_SURF_16_BANK));
1817		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1818				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1819				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1820				 NUM_BANKS(ADDR_SURF_16_BANK));
1821		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1823				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824				 NUM_BANKS(ADDR_SURF_16_BANK));
1825		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1827				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1828				 NUM_BANKS(ADDR_SURF_16_BANK));
1829		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1830				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1831				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1832				 NUM_BANKS(ADDR_SURF_8_BANK));
1833
1834		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1835			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1836			    reg_offset != 23)
1837				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1838
1839		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1840			if (reg_offset != 7)
1841				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1842
1843		break;
1844	case CHIP_FIJI:
1845		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1848				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1850				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1852				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1854				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1856				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1859				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1860				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1862				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1863				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1864				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1865		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1867				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1868				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1869		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1870				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1871				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1872				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1873		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1875				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1876				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1877		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1878				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1879		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1882				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1886				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1888				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1890				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1891		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1893				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1894				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1896				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1897				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1899		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1900				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1903		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1904				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1906				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1907		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1908				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1910				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1911		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1912				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1913				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1914				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1915		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1916				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1918				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1920				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1921				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1926				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1928				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1932				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1936				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1937				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1938				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1939		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1940				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1942				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1943		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1944				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1946				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1947		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1948				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1949				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1950				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1951		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1952				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1953				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1954				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1955		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1956				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1957				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1958				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1959		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1960				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1961				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1962				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1963		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1964				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1965				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1966				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1967
1968		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1971				NUM_BANKS(ADDR_SURF_8_BANK));
1972		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1975				NUM_BANKS(ADDR_SURF_8_BANK));
1976		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1979				NUM_BANKS(ADDR_SURF_8_BANK));
1980		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983				NUM_BANKS(ADDR_SURF_8_BANK));
1984		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1987				NUM_BANKS(ADDR_SURF_8_BANK));
1988		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991				NUM_BANKS(ADDR_SURF_8_BANK));
1992		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995				NUM_BANKS(ADDR_SURF_8_BANK));
1996		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999				NUM_BANKS(ADDR_SURF_8_BANK));
2000		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003				NUM_BANKS(ADDR_SURF_8_BANK));
2004		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007				 NUM_BANKS(ADDR_SURF_8_BANK));
2008		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2009				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2010				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2011				 NUM_BANKS(ADDR_SURF_8_BANK));
2012		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2014				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2015				 NUM_BANKS(ADDR_SURF_8_BANK));
2016		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2018				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2019				 NUM_BANKS(ADDR_SURF_8_BANK));
2020		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2022				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2023				 NUM_BANKS(ADDR_SURF_4_BANK));
2024
2025		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2027
2028		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029			if (reg_offset != 7)
2030				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2031
2032		break;
2033	case CHIP_TONGA:
2034		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2037				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2041				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2045				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2048				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2049				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2051				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2052				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2056				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2060				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2061				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2062		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2063				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2064				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2065				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2066		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2067				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2068		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2080		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2083				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2093				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2095				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2099				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2100		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2101				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2102				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2103				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2104		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2105				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2107				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2109				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2110				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2117				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2121				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2125				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2126				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2127				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2128		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2129				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2132		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2133				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2135				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2136		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2137				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2138				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2139				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2142				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2143				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2146				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2150				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2151				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2153				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2154				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2156
2157		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2159				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2160				NUM_BANKS(ADDR_SURF_16_BANK));
2161		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2163				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164				NUM_BANKS(ADDR_SURF_16_BANK));
2165		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168				NUM_BANKS(ADDR_SURF_16_BANK));
2169		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172				NUM_BANKS(ADDR_SURF_16_BANK));
2173		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176				NUM_BANKS(ADDR_SURF_16_BANK));
2177		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2179				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180				NUM_BANKS(ADDR_SURF_16_BANK));
2181		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2184				NUM_BANKS(ADDR_SURF_16_BANK));
2185		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2187				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2188				NUM_BANKS(ADDR_SURF_16_BANK));
2189		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2192				NUM_BANKS(ADDR_SURF_16_BANK));
2193		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2195				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196				 NUM_BANKS(ADDR_SURF_16_BANK));
2197		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200				 NUM_BANKS(ADDR_SURF_16_BANK));
2201		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204				 NUM_BANKS(ADDR_SURF_8_BANK));
2205		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208				 NUM_BANKS(ADDR_SURF_4_BANK));
2209		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2212				 NUM_BANKS(ADDR_SURF_4_BANK));
2213
2214		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2215			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2216
2217		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2218			if (reg_offset != 7)
2219				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2220
2221		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2222	case CHIP_STONEY:
2223		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224				PIPE_CONFIG(ADDR_SURF_P2) |
2225				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2226				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228				PIPE_CONFIG(ADDR_SURF_P2) |
2229				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232				PIPE_CONFIG(ADDR_SURF_P2) |
2233				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2234				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236				PIPE_CONFIG(ADDR_SURF_P2) |
2237				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2238				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240				PIPE_CONFIG(ADDR_SURF_P2) |
2241				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244				PIPE_CONFIG(ADDR_SURF_P2) |
2245				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248				PIPE_CONFIG(ADDR_SURF_P2) |
2249				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2252				PIPE_CONFIG(ADDR_SURF_P2));
2253		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254				PIPE_CONFIG(ADDR_SURF_P2) |
2255				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258				 PIPE_CONFIG(ADDR_SURF_P2) |
2259				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262				 PIPE_CONFIG(ADDR_SURF_P2) |
2263				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266				 PIPE_CONFIG(ADDR_SURF_P2) |
2267				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270				 PIPE_CONFIG(ADDR_SURF_P2) |
2271				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2274				 PIPE_CONFIG(ADDR_SURF_P2) |
2275				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278				 PIPE_CONFIG(ADDR_SURF_P2) |
2279				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282				 PIPE_CONFIG(ADDR_SURF_P2) |
2283				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286				 PIPE_CONFIG(ADDR_SURF_P2) |
2287				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290				 PIPE_CONFIG(ADDR_SURF_P2) |
2291				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294				 PIPE_CONFIG(ADDR_SURF_P2) |
2295				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298				 PIPE_CONFIG(ADDR_SURF_P2) |
2299				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2302				 PIPE_CONFIG(ADDR_SURF_P2) |
2303				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2306				 PIPE_CONFIG(ADDR_SURF_P2) |
2307				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2308				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2310				 PIPE_CONFIG(ADDR_SURF_P2) |
2311				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314				 PIPE_CONFIG(ADDR_SURF_P2) |
2315				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318				 PIPE_CONFIG(ADDR_SURF_P2) |
2319				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322				 PIPE_CONFIG(ADDR_SURF_P2) |
2323				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325
2326		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2329				NUM_BANKS(ADDR_SURF_8_BANK));
2330		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2332				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2333				NUM_BANKS(ADDR_SURF_8_BANK));
2334		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337				NUM_BANKS(ADDR_SURF_8_BANK));
2338		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341				NUM_BANKS(ADDR_SURF_8_BANK));
2342		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2344				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345				NUM_BANKS(ADDR_SURF_8_BANK));
2346		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2348				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349				NUM_BANKS(ADDR_SURF_8_BANK));
2350		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353				NUM_BANKS(ADDR_SURF_8_BANK));
2354		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2355				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2356				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357				NUM_BANKS(ADDR_SURF_16_BANK));
2358		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2360				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361				NUM_BANKS(ADDR_SURF_16_BANK));
2362		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2363				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365				 NUM_BANKS(ADDR_SURF_16_BANK));
2366		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2368				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369				 NUM_BANKS(ADDR_SURF_16_BANK));
2370		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373				 NUM_BANKS(ADDR_SURF_16_BANK));
2374		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377				 NUM_BANKS(ADDR_SURF_16_BANK));
2378		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381				 NUM_BANKS(ADDR_SURF_8_BANK));
2382
2383		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2384			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2385			    reg_offset != 23)
2386				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387
2388		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389			if (reg_offset != 7)
2390				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2391
2392		break;
2393	default:
2394		dev_warn(adev->dev,
2395			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2396			 adev->asic_type);
2397
2398	case CHIP_CARRIZO:
2399		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400				PIPE_CONFIG(ADDR_SURF_P2) |
2401				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404				PIPE_CONFIG(ADDR_SURF_P2) |
2405				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408				PIPE_CONFIG(ADDR_SURF_P2) |
2409				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412				PIPE_CONFIG(ADDR_SURF_P2) |
2413				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416				PIPE_CONFIG(ADDR_SURF_P2) |
2417				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420				PIPE_CONFIG(ADDR_SURF_P2) |
2421				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424				PIPE_CONFIG(ADDR_SURF_P2) |
2425				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2428				PIPE_CONFIG(ADDR_SURF_P2));
2429		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430				PIPE_CONFIG(ADDR_SURF_P2) |
2431				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434				 PIPE_CONFIG(ADDR_SURF_P2) |
2435				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438				 PIPE_CONFIG(ADDR_SURF_P2) |
2439				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442				 PIPE_CONFIG(ADDR_SURF_P2) |
2443				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446				 PIPE_CONFIG(ADDR_SURF_P2) |
2447				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2448				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2450				 PIPE_CONFIG(ADDR_SURF_P2) |
2451				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454				 PIPE_CONFIG(ADDR_SURF_P2) |
2455				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2457		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2458				 PIPE_CONFIG(ADDR_SURF_P2) |
2459				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2462				 PIPE_CONFIG(ADDR_SURF_P2) |
2463				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2464				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2466				 PIPE_CONFIG(ADDR_SURF_P2) |
2467				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2470				 PIPE_CONFIG(ADDR_SURF_P2) |
2471				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2474				 PIPE_CONFIG(ADDR_SURF_P2) |
2475				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478				 PIPE_CONFIG(ADDR_SURF_P2) |
2479				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2482				 PIPE_CONFIG(ADDR_SURF_P2) |
2483				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2486				 PIPE_CONFIG(ADDR_SURF_P2) |
2487				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490				 PIPE_CONFIG(ADDR_SURF_P2) |
2491				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2492				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494				 PIPE_CONFIG(ADDR_SURF_P2) |
2495				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498				 PIPE_CONFIG(ADDR_SURF_P2) |
2499				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2501
2502		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505				NUM_BANKS(ADDR_SURF_8_BANK));
2506		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509				NUM_BANKS(ADDR_SURF_8_BANK));
2510		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513				NUM_BANKS(ADDR_SURF_8_BANK));
2514		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2517				NUM_BANKS(ADDR_SURF_8_BANK));
2518		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521				NUM_BANKS(ADDR_SURF_8_BANK));
2522		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525				NUM_BANKS(ADDR_SURF_8_BANK));
2526		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529				NUM_BANKS(ADDR_SURF_8_BANK));
2530		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2531				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2532				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533				NUM_BANKS(ADDR_SURF_16_BANK));
2534		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2535				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537				NUM_BANKS(ADDR_SURF_16_BANK));
2538		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2539				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2541				 NUM_BANKS(ADDR_SURF_16_BANK));
2542		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2543				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2545				 NUM_BANKS(ADDR_SURF_16_BANK));
2546		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2549				 NUM_BANKS(ADDR_SURF_16_BANK));
2550		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2553				 NUM_BANKS(ADDR_SURF_16_BANK));
2554		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557				 NUM_BANKS(ADDR_SURF_8_BANK));
2558
2559		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2560			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2561			    reg_offset != 23)
2562				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2563
2564		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2565			if (reg_offset != 7)
2566				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2567
2568		break;
2569	}
2570}
2571
2572void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
 
2573{
2574	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2575
2576	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2577		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
 
 
 
 
2578		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2579	} else if (se_num == 0xffffffff) {
2580		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2581		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582	} else if (sh_num == 0xffffffff) {
2583		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2584		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2585	} else {
2586		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2587		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588	}
2589	WREG32(mmGRBM_GFX_INDEX, data);
2590}
2591
2592static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2593{
2594	return (u32)((1ULL << bit_width) - 1);
2595}
2596
2597static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2598{
2599	u32 data, mask;
2600
2601	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2602	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2603
2604	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2605	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2606
2607	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608				       adev->gfx.config.max_sh_per_se);
2609
2610	return (~data) & mask;
2611}
2612
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2613static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2614{
2615	int i, j;
2616	u32 data;
 
2617	u32 active_rbs = 0;
2618	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2619					adev->gfx.config.max_sh_per_se;
 
2620
2621	mutex_lock(&adev->grbm_idx_mutex);
2622	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2623		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2624			gfx_v8_0_select_se_sh(adev, i, j);
2625			data = gfx_v8_0_get_rb_active_bitmap(adev);
2626			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2627					       rb_bitmap_width_per_sh);
2628		}
2629	}
2630	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2631	mutex_unlock(&adev->grbm_idx_mutex);
2632
2633	adev->gfx.config.backend_enable_mask = active_rbs;
2634	adev->gfx.config.num_rbs = hweight32(active_rbs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2635}
2636
2637/**
2638 * gfx_v8_0_init_compute_vmid - gart enable
2639 *
2640 * @rdev: amdgpu_device pointer
2641 *
2642 * Initialize compute vmid sh_mem registers
2643 *
2644 */
2645#define DEFAULT_SH_MEM_BASES	(0x6000)
2646#define FIRST_COMPUTE_VMID	(8)
2647#define LAST_COMPUTE_VMID	(16)
2648static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2649{
2650	int i;
2651	uint32_t sh_mem_config;
2652	uint32_t sh_mem_bases;
2653
2654	/*
2655	 * Configure apertures:
2656	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2657	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2658	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2659	 */
2660	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2661
2662	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2663			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2664			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2665			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2666			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2667			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2668
2669	mutex_lock(&adev->srbm_mutex);
2670	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2671		vi_srbm_select(adev, 0, 0, 0, i);
2672		/* CP and shaders */
2673		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2674		WREG32(mmSH_MEM_APE1_BASE, 1);
2675		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2676		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2677	}
2678	vi_srbm_select(adev, 0, 0, 0, 0);
2679	mutex_unlock(&adev->srbm_mutex);
2680}
2681
2682static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2683{
2684	u32 tmp;
2685	int i;
2686
2687	tmp = RREG32(mmGRBM_CNTL);
2688	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2689	WREG32(mmGRBM_CNTL, tmp);
2690
2691	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2692	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2694
2695	gfx_v8_0_tiling_mode_table_init(adev);
2696
2697	gfx_v8_0_setup_rb(adev);
 
2698
2699	/* XXX SH_MEM regs */
2700	/* where to put LDS, scratch, GPUVM in FSA64 space */
2701	mutex_lock(&adev->srbm_mutex);
2702	for (i = 0; i < 16; i++) {
2703		vi_srbm_select(adev, 0, 0, 0, i);
2704		/* CP and shaders */
2705		if (i == 0) {
2706			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2707			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2708			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2709					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2710			WREG32(mmSH_MEM_CONFIG, tmp);
2711		} else {
2712			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2713			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2714			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2715					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2716			WREG32(mmSH_MEM_CONFIG, tmp);
2717		}
2718
2719		WREG32(mmSH_MEM_APE1_BASE, 1);
2720		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2721		WREG32(mmSH_MEM_BASES, 0);
2722	}
2723	vi_srbm_select(adev, 0, 0, 0, 0);
2724	mutex_unlock(&adev->srbm_mutex);
2725
2726	gfx_v8_0_init_compute_vmid(adev);
2727
2728	mutex_lock(&adev->grbm_idx_mutex);
2729	/*
2730	 * making sure that the following register writes will be broadcasted
2731	 * to all the shaders
2732	 */
2733	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2734
2735	WREG32(mmPA_SC_FIFO_SIZE,
2736		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2737			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2738		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2739			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2740		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2741			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2742		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2743			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2744	mutex_unlock(&adev->grbm_idx_mutex);
2745
2746}
2747
2748static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2749{
2750	u32 i, j, k;
2751	u32 mask;
2752
2753	mutex_lock(&adev->grbm_idx_mutex);
2754	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2755		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2756			gfx_v8_0_select_se_sh(adev, i, j);
2757			for (k = 0; k < adev->usec_timeout; k++) {
2758				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2759					break;
2760				udelay(1);
2761			}
2762		}
2763	}
2764	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2765	mutex_unlock(&adev->grbm_idx_mutex);
2766
2767	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2768		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2769		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2770		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2771	for (k = 0; k < adev->usec_timeout; k++) {
2772		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2773			break;
2774		udelay(1);
2775	}
2776}
2777
2778static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2779					       bool enable)
2780{
2781	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2782
2783	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2784	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2785	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2786	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2787
2788	WREG32(mmCP_INT_CNTL_RING0, tmp);
2789}
2790
2791void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2792{
2793	u32 tmp = RREG32(mmRLC_CNTL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2794
2795	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2796	WREG32(mmRLC_CNTL, tmp);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2797
2798	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2799
2800	gfx_v8_0_wait_for_rlc_serdes(adev);
2801}
2802
2803static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2804{
2805	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
 
2806
2807	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2808	WREG32(mmGRBM_SOFT_RESET, tmp);
2809	udelay(50);
2810	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2811	WREG32(mmGRBM_SOFT_RESET, tmp);
2812	udelay(50);
2813}
2814
2815static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2816{
2817	u32 tmp = RREG32(mmRLC_CNTL);
2818
2819	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2820	WREG32(mmRLC_CNTL, tmp);
2821
2822	/* carrizo do enable cp interrupt after cp inited */
2823	if (!(adev->flags & AMD_IS_APU))
2824		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2825
2826	udelay(50);
2827}
2828
2829static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2830{
2831	const struct rlc_firmware_header_v2_0 *hdr;
2832	const __le32 *fw_data;
2833	unsigned i, fw_size;
2834
2835	if (!adev->gfx.rlc_fw)
2836		return -EINVAL;
2837
2838	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2839	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2840
2841	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2842			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2843	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2844
2845	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2846	for (i = 0; i < fw_size; i++)
2847		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2848	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2849
2850	return 0;
2851}
2852
2853static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2854{
2855	int r;
 
2856
2857	gfx_v8_0_rlc_stop(adev);
2858
2859	/* disable CG */
2860	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
 
 
 
 
 
 
 
 
 
 
2861
2862	/* disable PG */
2863	WREG32(mmRLC_PG_CNTL, 0);
2864
2865	gfx_v8_0_rlc_reset(adev);
 
2866
2867	if (!adev->pp_enabled) {
2868		if (!adev->firmware.smu_load) {
2869			/* legacy rlc firmware loading */
2870			r = gfx_v8_0_rlc_load_microcode(adev);
2871			if (r)
2872				return r;
2873		} else {
2874			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2875							AMDGPU_UCODE_ID_RLC_G);
2876			if (r)
2877				return -EINVAL;
2878		}
2879	}
2880
2881	gfx_v8_0_rlc_start(adev);
2882
2883	return 0;
2884}
2885
2886static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2887{
2888	int i;
2889	u32 tmp = RREG32(mmCP_ME_CNTL);
2890
2891	if (enable) {
2892		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2893		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2894		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2895	} else {
2896		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2897		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2898		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2899		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2900			adev->gfx.gfx_ring[i].ready = false;
2901	}
2902	WREG32(mmCP_ME_CNTL, tmp);
2903	udelay(50);
2904}
2905
2906static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2907{
2908	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2909	const struct gfx_firmware_header_v1_0 *ce_hdr;
2910	const struct gfx_firmware_header_v1_0 *me_hdr;
2911	const __le32 *fw_data;
2912	unsigned i, fw_size;
2913
2914	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2915		return -EINVAL;
2916
2917	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2918		adev->gfx.pfp_fw->data;
2919	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2920		adev->gfx.ce_fw->data;
2921	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2922		adev->gfx.me_fw->data;
2923
2924	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2925	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2926	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2927
2928	gfx_v8_0_cp_gfx_enable(adev, false);
2929
2930	/* PFP */
2931	fw_data = (const __le32 *)
2932		(adev->gfx.pfp_fw->data +
2933		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2934	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2935	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2936	for (i = 0; i < fw_size; i++)
2937		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2938	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2939
2940	/* CE */
2941	fw_data = (const __le32 *)
2942		(adev->gfx.ce_fw->data +
2943		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2944	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2945	WREG32(mmCP_CE_UCODE_ADDR, 0);
2946	for (i = 0; i < fw_size; i++)
2947		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2948	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2949
2950	/* ME */
2951	fw_data = (const __le32 *)
2952		(adev->gfx.me_fw->data +
2953		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2954	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2955	WREG32(mmCP_ME_RAM_WADDR, 0);
2956	for (i = 0; i < fw_size; i++)
2957		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2958	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2959
2960	return 0;
2961}
2962
2963static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2964{
2965	u32 count = 0;
2966	const struct cs_section_def *sect = NULL;
2967	const struct cs_extent_def *ext = NULL;
2968
2969	/* begin clear state */
2970	count += 2;
2971	/* context control state */
2972	count += 3;
2973
2974	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2975		for (ext = sect->section; ext->extent != NULL; ++ext) {
2976			if (sect->id == SECT_CONTEXT)
2977				count += 2 + ext->reg_count;
2978			else
2979				return 0;
2980		}
2981	}
2982	/* pa_sc_raster_config/pa_sc_raster_config1 */
2983	count += 4;
2984	/* end clear state */
2985	count += 2;
2986	/* clear state */
2987	count += 2;
2988
2989	return count;
2990}
2991
2992static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2993{
2994	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2995	const struct cs_section_def *sect = NULL;
2996	const struct cs_extent_def *ext = NULL;
2997	int r, i;
2998
2999	/* init the CP */
3000	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3001	WREG32(mmCP_ENDIAN_SWAP, 0);
3002	WREG32(mmCP_DEVICE_ID, 1);
3003
3004	gfx_v8_0_cp_gfx_enable(adev, true);
3005
3006	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3007	if (r) {
3008		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3009		return r;
3010	}
3011
3012	/* clear state buffer */
3013	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3014	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3015
3016	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3017	amdgpu_ring_write(ring, 0x80000000);
3018	amdgpu_ring_write(ring, 0x80000000);
3019
3020	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3021		for (ext = sect->section; ext->extent != NULL; ++ext) {
3022			if (sect->id == SECT_CONTEXT) {
3023				amdgpu_ring_write(ring,
3024				       PACKET3(PACKET3_SET_CONTEXT_REG,
3025					       ext->reg_count));
3026				amdgpu_ring_write(ring,
3027				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3028				for (i = 0; i < ext->reg_count; i++)
3029					amdgpu_ring_write(ring, ext->extent[i]);
3030			}
3031		}
3032	}
3033
3034	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3035	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3036	switch (adev->asic_type) {
3037	case CHIP_TONGA:
 
3038		amdgpu_ring_write(ring, 0x16000012);
3039		amdgpu_ring_write(ring, 0x0000002A);
3040		break;
 
 
 
 
 
3041	case CHIP_FIJI:
3042		amdgpu_ring_write(ring, 0x3a00161a);
3043		amdgpu_ring_write(ring, 0x0000002e);
3044		break;
3045	case CHIP_TOPAZ:
3046	case CHIP_CARRIZO:
3047		amdgpu_ring_write(ring, 0x00000002);
3048		amdgpu_ring_write(ring, 0x00000000);
3049		break;
 
 
 
 
 
3050	case CHIP_STONEY:
3051		amdgpu_ring_write(ring, 0x00000000);
3052		amdgpu_ring_write(ring, 0x00000000);
3053		break;
3054	default:
3055		BUG();
3056	}
3057
3058	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3059	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3060
3061	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3062	amdgpu_ring_write(ring, 0);
3063
3064	/* init the CE partitions */
3065	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3066	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3067	amdgpu_ring_write(ring, 0x8000);
3068	amdgpu_ring_write(ring, 0x8000);
3069
3070	amdgpu_ring_commit(ring);
3071
3072	return 0;
3073}
3074
3075static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3076{
3077	struct amdgpu_ring *ring;
3078	u32 tmp;
3079	u32 rb_bufsz;
3080	u64 rb_addr, rptr_addr;
3081	int r;
3082
3083	/* Set the write pointer delay */
3084	WREG32(mmCP_RB_WPTR_DELAY, 0);
3085
3086	/* set the RB to use vmid 0 */
3087	WREG32(mmCP_RB_VMID, 0);
3088
3089	/* Set ring buffer size */
3090	ring = &adev->gfx.gfx_ring[0];
3091	rb_bufsz = order_base_2(ring->ring_size / 8);
3092	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3093	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3094	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3095	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3096#ifdef __BIG_ENDIAN
3097	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3098#endif
3099	WREG32(mmCP_RB0_CNTL, tmp);
3100
3101	/* Initialize the ring buffer's read and write pointers */
3102	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3103	ring->wptr = 0;
3104	WREG32(mmCP_RB0_WPTR, ring->wptr);
3105
3106	/* set the wb address wether it's enabled or not */
3107	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3108	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3109	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3110
 
 
 
3111	mdelay(1);
3112	WREG32(mmCP_RB0_CNTL, tmp);
3113
3114	rb_addr = ring->gpu_addr >> 8;
3115	WREG32(mmCP_RB0_BASE, rb_addr);
3116	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3117
3118	/* no gfx doorbells on iceland */
3119	if (adev->asic_type != CHIP_TOPAZ) {
3120		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3121		if (ring->use_doorbell) {
3122			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3123					    DOORBELL_OFFSET, ring->doorbell_index);
3124			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
 
 
3125					    DOORBELL_EN, 1);
3126		} else {
3127			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3128					    DOORBELL_EN, 0);
3129		}
3130		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3131
3132		if (adev->asic_type == CHIP_TONGA) {
3133			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3134					    DOORBELL_RANGE_LOWER,
3135					    AMDGPU_DOORBELL_GFX_RING0);
3136			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3137
3138			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3139			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3140		}
3141
3142	}
3143
3144	/* start the ring */
3145	gfx_v8_0_cp_gfx_start(adev);
3146	ring->ready = true;
3147	r = amdgpu_ring_test_ring(ring);
3148	if (r) {
3149		ring->ready = false;
3150		return r;
3151	}
3152
3153	return 0;
3154}
3155
3156static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3157{
3158	int i;
3159
3160	if (enable) {
3161		WREG32(mmCP_MEC_CNTL, 0);
3162	} else {
3163		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3164		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3165			adev->gfx.compute_ring[i].ready = false;
3166	}
3167	udelay(50);
3168}
3169
3170static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3171{
3172	const struct gfx_firmware_header_v1_0 *mec_hdr;
3173	const __le32 *fw_data;
3174	unsigned i, fw_size;
3175
3176	if (!adev->gfx.mec_fw)
3177		return -EINVAL;
3178
3179	gfx_v8_0_cp_compute_enable(adev, false);
3180
3181	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3182	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3183
3184	fw_data = (const __le32 *)
3185		(adev->gfx.mec_fw->data +
3186		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3187	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3188
3189	/* MEC1 */
3190	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3191	for (i = 0; i < fw_size; i++)
3192		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3193	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3194
3195	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3196	if (adev->gfx.mec2_fw) {
3197		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3198
3199		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3200		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3201
3202		fw_data = (const __le32 *)
3203			(adev->gfx.mec2_fw->data +
3204			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3205		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3206
3207		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3208		for (i = 0; i < fw_size; i++)
3209			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3210		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3211	}
3212
3213	return 0;
3214}
3215
3216struct vi_mqd {
3217	uint32_t header;  /* ordinal0 */
3218	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3219	uint32_t compute_dim_x;  /* ordinal2 */
3220	uint32_t compute_dim_y;  /* ordinal3 */
3221	uint32_t compute_dim_z;  /* ordinal4 */
3222	uint32_t compute_start_x;  /* ordinal5 */
3223	uint32_t compute_start_y;  /* ordinal6 */
3224	uint32_t compute_start_z;  /* ordinal7 */
3225	uint32_t compute_num_thread_x;  /* ordinal8 */
3226	uint32_t compute_num_thread_y;  /* ordinal9 */
3227	uint32_t compute_num_thread_z;  /* ordinal10 */
3228	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3229	uint32_t compute_perfcount_enable;  /* ordinal12 */
3230	uint32_t compute_pgm_lo;  /* ordinal13 */
3231	uint32_t compute_pgm_hi;  /* ordinal14 */
3232	uint32_t compute_tba_lo;  /* ordinal15 */
3233	uint32_t compute_tba_hi;  /* ordinal16 */
3234	uint32_t compute_tma_lo;  /* ordinal17 */
3235	uint32_t compute_tma_hi;  /* ordinal18 */
3236	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3237	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3238	uint32_t compute_vmid;  /* ordinal21 */
3239	uint32_t compute_resource_limits;  /* ordinal22 */
3240	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3241	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3242	uint32_t compute_tmpring_size;  /* ordinal25 */
3243	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3244	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3245	uint32_t compute_restart_x;  /* ordinal28 */
3246	uint32_t compute_restart_y;  /* ordinal29 */
3247	uint32_t compute_restart_z;  /* ordinal30 */
3248	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3249	uint32_t compute_misc_reserved;  /* ordinal32 */
3250	uint32_t compute_dispatch_id;  /* ordinal33 */
3251	uint32_t compute_threadgroup_id;  /* ordinal34 */
3252	uint32_t compute_relaunch;  /* ordinal35 */
3253	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3254	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3255	uint32_t compute_wave_restore_control;  /* ordinal38 */
3256	uint32_t reserved9;  /* ordinal39 */
3257	uint32_t reserved10;  /* ordinal40 */
3258	uint32_t reserved11;  /* ordinal41 */
3259	uint32_t reserved12;  /* ordinal42 */
3260	uint32_t reserved13;  /* ordinal43 */
3261	uint32_t reserved14;  /* ordinal44 */
3262	uint32_t reserved15;  /* ordinal45 */
3263	uint32_t reserved16;  /* ordinal46 */
3264	uint32_t reserved17;  /* ordinal47 */
3265	uint32_t reserved18;  /* ordinal48 */
3266	uint32_t reserved19;  /* ordinal49 */
3267	uint32_t reserved20;  /* ordinal50 */
3268	uint32_t reserved21;  /* ordinal51 */
3269	uint32_t reserved22;  /* ordinal52 */
3270	uint32_t reserved23;  /* ordinal53 */
3271	uint32_t reserved24;  /* ordinal54 */
3272	uint32_t reserved25;  /* ordinal55 */
3273	uint32_t reserved26;  /* ordinal56 */
3274	uint32_t reserved27;  /* ordinal57 */
3275	uint32_t reserved28;  /* ordinal58 */
3276	uint32_t reserved29;  /* ordinal59 */
3277	uint32_t reserved30;  /* ordinal60 */
3278	uint32_t reserved31;  /* ordinal61 */
3279	uint32_t reserved32;  /* ordinal62 */
3280	uint32_t reserved33;  /* ordinal63 */
3281	uint32_t reserved34;  /* ordinal64 */
3282	uint32_t compute_user_data_0;  /* ordinal65 */
3283	uint32_t compute_user_data_1;  /* ordinal66 */
3284	uint32_t compute_user_data_2;  /* ordinal67 */
3285	uint32_t compute_user_data_3;  /* ordinal68 */
3286	uint32_t compute_user_data_4;  /* ordinal69 */
3287	uint32_t compute_user_data_5;  /* ordinal70 */
3288	uint32_t compute_user_data_6;  /* ordinal71 */
3289	uint32_t compute_user_data_7;  /* ordinal72 */
3290	uint32_t compute_user_data_8;  /* ordinal73 */
3291	uint32_t compute_user_data_9;  /* ordinal74 */
3292	uint32_t compute_user_data_10;  /* ordinal75 */
3293	uint32_t compute_user_data_11;  /* ordinal76 */
3294	uint32_t compute_user_data_12;  /* ordinal77 */
3295	uint32_t compute_user_data_13;  /* ordinal78 */
3296	uint32_t compute_user_data_14;  /* ordinal79 */
3297	uint32_t compute_user_data_15;  /* ordinal80 */
3298	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3299	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3300	uint32_t reserved35;  /* ordinal83 */
3301	uint32_t reserved36;  /* ordinal84 */
3302	uint32_t reserved37;  /* ordinal85 */
3303	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3304	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3305	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3306	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3307	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3308	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3309	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3310	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3311	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3312	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3313	uint32_t reserved38;  /* ordinal96 */
3314	uint32_t reserved39;  /* ordinal97 */
3315	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3316	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3317	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3318	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3319	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3320	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3321	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3322	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3323	uint32_t reserved40;  /* ordinal106 */
3324	uint32_t reserved41;  /* ordinal107 */
3325	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3326	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3327	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3328	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3329	uint32_t reserved42;  /* ordinal112 */
3330	uint32_t reserved43;  /* ordinal113 */
3331	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3332	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3333	uint32_t cp_packet_id_lo;  /* ordinal116 */
3334	uint32_t cp_packet_id_hi;  /* ordinal117 */
3335	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3336	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3337	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3338	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3339	uint32_t gds_save_mask_lo;  /* ordinal122 */
3340	uint32_t gds_save_mask_hi;  /* ordinal123 */
3341	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3342	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3343	uint32_t reserved44;  /* ordinal126 */
3344	uint32_t reserved45;  /* ordinal127 */
3345	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3346	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3347	uint32_t cp_hqd_active;  /* ordinal130 */
3348	uint32_t cp_hqd_vmid;  /* ordinal131 */
3349	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3350	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3351	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3352	uint32_t cp_hqd_quantum;  /* ordinal135 */
3353	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3354	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3355	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3356	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3357	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3358	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3359	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3360	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3361	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3362	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3363	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3364	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3365	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3366	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3367	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3368	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3369	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3370	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3371	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3372	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3373	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3374	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3375	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3376	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3377	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3378	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3379	uint32_t cp_mqd_control;  /* ordinal162 */
3380	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3381	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3382	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3383	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3384	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3385	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3386	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3387	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3388	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3389	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3390	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3391	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3392	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3393	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3394	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3395	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3396	uint32_t cp_hqd_error;  /* ordinal179 */
3397	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3398	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3399	uint32_t reserved46;  /* ordinal182 */
3400	uint32_t reserved47;  /* ordinal183 */
3401	uint32_t reserved48;  /* ordinal184 */
3402	uint32_t reserved49;  /* ordinal185 */
3403	uint32_t reserved50;  /* ordinal186 */
3404	uint32_t reserved51;  /* ordinal187 */
3405	uint32_t reserved52;  /* ordinal188 */
3406	uint32_t reserved53;  /* ordinal189 */
3407	uint32_t reserved54;  /* ordinal190 */
3408	uint32_t reserved55;  /* ordinal191 */
3409	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3410	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3411	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3412	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3413	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3414	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3415	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3416	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3417	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3418	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3419	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3420	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3421	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3422	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3423	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3424	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3425	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3426	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3427	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3428	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3429	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3430	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3431	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3432	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3433	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3434	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3435	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3436	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3437	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3438	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3439	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3440	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3441	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3442	uint32_t reserved56;  /* ordinal225 */
3443	uint32_t reserved57;  /* ordinal226 */
3444	uint32_t reserved58;  /* ordinal227 */
3445	uint32_t set_resources_header;  /* ordinal228 */
3446	uint32_t set_resources_dw1;  /* ordinal229 */
3447	uint32_t set_resources_dw2;  /* ordinal230 */
3448	uint32_t set_resources_dw3;  /* ordinal231 */
3449	uint32_t set_resources_dw4;  /* ordinal232 */
3450	uint32_t set_resources_dw5;  /* ordinal233 */
3451	uint32_t set_resources_dw6;  /* ordinal234 */
3452	uint32_t set_resources_dw7;  /* ordinal235 */
3453	uint32_t reserved59;  /* ordinal236 */
3454	uint32_t reserved60;  /* ordinal237 */
3455	uint32_t reserved61;  /* ordinal238 */
3456	uint32_t reserved62;  /* ordinal239 */
3457	uint32_t reserved63;  /* ordinal240 */
3458	uint32_t reserved64;  /* ordinal241 */
3459	uint32_t reserved65;  /* ordinal242 */
3460	uint32_t reserved66;  /* ordinal243 */
3461	uint32_t reserved67;  /* ordinal244 */
3462	uint32_t reserved68;  /* ordinal245 */
3463	uint32_t reserved69;  /* ordinal246 */
3464	uint32_t reserved70;  /* ordinal247 */
3465	uint32_t reserved71;  /* ordinal248 */
3466	uint32_t reserved72;  /* ordinal249 */
3467	uint32_t reserved73;  /* ordinal250 */
3468	uint32_t reserved74;  /* ordinal251 */
3469	uint32_t reserved75;  /* ordinal252 */
3470	uint32_t reserved76;  /* ordinal253 */
3471	uint32_t reserved77;  /* ordinal254 */
3472	uint32_t reserved78;  /* ordinal255 */
3473
3474	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3475};
3476
3477static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3478{
3479	int i, r;
3480
3481	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3482		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3483
3484		if (ring->mqd_obj) {
3485			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3486			if (unlikely(r != 0))
3487				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3488
3489			amdgpu_bo_unpin(ring->mqd_obj);
3490			amdgpu_bo_unreserve(ring->mqd_obj);
3491
3492			amdgpu_bo_unref(&ring->mqd_obj);
3493			ring->mqd_obj = NULL;
3494		}
3495	}
3496}
3497
3498static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3499{
3500	int r, i, j;
3501	u32 tmp;
3502	bool use_doorbell = true;
3503	u64 hqd_gpu_addr;
3504	u64 mqd_gpu_addr;
3505	u64 eop_gpu_addr;
3506	u64 wb_gpu_addr;
3507	u32 *buf;
3508	struct vi_mqd *mqd;
3509
3510	/* init the pipes */
3511	mutex_lock(&adev->srbm_mutex);
3512	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3513		int me = (i < 4) ? 1 : 2;
3514		int pipe = (i < 4) ? i : (i - 4);
3515
3516		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3517		eop_gpu_addr >>= 8;
3518
3519		vi_srbm_select(adev, me, pipe, 0, 0);
3520
3521		/* write the EOP addr */
3522		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3523		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3524
3525		/* set the VMID assigned */
3526		WREG32(mmCP_HQD_VMID, 0);
3527
3528		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3529		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3530		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3531				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3532		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3533	}
3534	vi_srbm_select(adev, 0, 0, 0, 0);
3535	mutex_unlock(&adev->srbm_mutex);
3536
3537	/* init the queues.  Just two for now. */
3538	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3539		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3540
3541		if (ring->mqd_obj == NULL) {
3542			r = amdgpu_bo_create(adev,
3543					     sizeof(struct vi_mqd),
3544					     PAGE_SIZE, true,
3545					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3546					     NULL, &ring->mqd_obj);
3547			if (r) {
3548				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3549				return r;
3550			}
3551		}
3552
3553		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3554		if (unlikely(r != 0)) {
3555			gfx_v8_0_cp_compute_fini(adev);
3556			return r;
3557		}
3558		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3559				  &mqd_gpu_addr);
3560		if (r) {
3561			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3562			gfx_v8_0_cp_compute_fini(adev);
3563			return r;
3564		}
3565		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3566		if (r) {
3567			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3568			gfx_v8_0_cp_compute_fini(adev);
3569			return r;
3570		}
3571
3572		/* init the mqd struct */
3573		memset(buf, 0, sizeof(struct vi_mqd));
3574
3575		mqd = (struct vi_mqd *)buf;
3576		mqd->header = 0xC0310800;
3577		mqd->compute_pipelinestat_enable = 0x00000001;
3578		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3579		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3580		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3581		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3582		mqd->compute_misc_reserved = 0x00000003;
3583
3584		mutex_lock(&adev->srbm_mutex);
3585		vi_srbm_select(adev, ring->me,
3586			       ring->pipe,
3587			       ring->queue, 0);
3588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3589		/* disable wptr polling */
3590		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3591		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3592		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3593
3594		mqd->cp_hqd_eop_base_addr_lo =
3595			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3596		mqd->cp_hqd_eop_base_addr_hi =
3597			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3598
3599		/* enable doorbell? */
3600		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3601		if (use_doorbell) {
3602			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3603		} else {
3604			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3605		}
3606		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3607		mqd->cp_hqd_pq_doorbell_control = tmp;
3608
3609		/* disable the queue if it's active */
3610		mqd->cp_hqd_dequeue_request = 0;
3611		mqd->cp_hqd_pq_rptr = 0;
3612		mqd->cp_hqd_pq_wptr= 0;
3613		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3614			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3615			for (j = 0; j < adev->usec_timeout; j++) {
3616				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3617					break;
3618				udelay(1);
3619			}
3620			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3621			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3622			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3623		}
3624
3625		/* set the pointer to the MQD */
3626		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3627		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3628		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3629		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3630
3631		/* set MQD vmid to 0 */
3632		tmp = RREG32(mmCP_MQD_CONTROL);
3633		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3634		WREG32(mmCP_MQD_CONTROL, tmp);
3635		mqd->cp_mqd_control = tmp;
3636
3637		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3638		hqd_gpu_addr = ring->gpu_addr >> 8;
3639		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3640		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3641		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3642		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3643
3644		/* set up the HQD, this is similar to CP_RB0_CNTL */
3645		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3646		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3647				    (order_base_2(ring->ring_size / 4) - 1));
3648		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3649			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3650#ifdef __BIG_ENDIAN
3651		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3652#endif
3653		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3654		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3655		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3656		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3657		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3658		mqd->cp_hqd_pq_control = tmp;
3659
3660		/* set the wb address wether it's enabled or not */
3661		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3662		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3663		mqd->cp_hqd_pq_rptr_report_addr_hi =
3664			upper_32_bits(wb_gpu_addr) & 0xffff;
3665		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3666		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3667		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3668		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3669
3670		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3671		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3672		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3673		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3674		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3675		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3676		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3677
3678		/* enable the doorbell if requested */
3679		if (use_doorbell) {
3680			if ((adev->asic_type == CHIP_CARRIZO) ||
3681			    (adev->asic_type == CHIP_FIJI) ||
3682			    (adev->asic_type == CHIP_STONEY)) {
 
 
 
3683				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3684				       AMDGPU_DOORBELL_KIQ << 2);
3685				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3686				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3687			}
3688			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3689			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3690					    DOORBELL_OFFSET, ring->doorbell_index);
3691			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3692			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3693			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3694			mqd->cp_hqd_pq_doorbell_control = tmp;
3695
3696		} else {
3697			mqd->cp_hqd_pq_doorbell_control = 0;
3698		}
3699		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3700		       mqd->cp_hqd_pq_doorbell_control);
3701
3702		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3703		ring->wptr = 0;
3704		mqd->cp_hqd_pq_wptr = ring->wptr;
3705		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3706		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3707
3708		/* set the vmid for the queue */
3709		mqd->cp_hqd_vmid = 0;
3710		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3711
3712		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3713		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3714		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3715		mqd->cp_hqd_persistent_state = tmp;
3716		if (adev->asic_type == CHIP_STONEY) {
 
 
 
3717			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3718			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3719			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3720		}
3721
3722		/* activate the queue */
3723		mqd->cp_hqd_active = 1;
3724		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3725
3726		vi_srbm_select(adev, 0, 0, 0, 0);
3727		mutex_unlock(&adev->srbm_mutex);
3728
3729		amdgpu_bo_kunmap(ring->mqd_obj);
3730		amdgpu_bo_unreserve(ring->mqd_obj);
3731	}
3732
3733	if (use_doorbell) {
3734		tmp = RREG32(mmCP_PQ_STATUS);
3735		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3736		WREG32(mmCP_PQ_STATUS, tmp);
3737	}
3738
3739	gfx_v8_0_cp_compute_enable(adev, true);
3740
3741	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3742		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3743
3744		ring->ready = true;
3745		r = amdgpu_ring_test_ring(ring);
3746		if (r)
3747			ring->ready = false;
3748	}
3749
3750	return 0;
3751}
3752
3753static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3754{
3755	int r;
3756
3757	if (!(adev->flags & AMD_IS_APU))
3758		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3759
3760	if (!adev->pp_enabled) {
3761		if (!adev->firmware.smu_load) {
3762			/* legacy firmware loading */
3763			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3764			if (r)
3765				return r;
3766
3767			r = gfx_v8_0_cp_compute_load_microcode(adev);
3768			if (r)
3769				return r;
3770		} else {
3771			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3772							AMDGPU_UCODE_ID_CP_CE);
3773			if (r)
3774				return -EINVAL;
3775
3776			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3777							AMDGPU_UCODE_ID_CP_PFP);
3778			if (r)
3779				return -EINVAL;
3780
3781			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3782							AMDGPU_UCODE_ID_CP_ME);
3783			if (r)
3784				return -EINVAL;
3785
3786			if (adev->asic_type == CHIP_TOPAZ) {
3787				r = gfx_v8_0_cp_compute_load_microcode(adev);
3788				if (r)
3789					return r;
3790			} else {
3791				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3792										 AMDGPU_UCODE_ID_CP_MEC1);
3793				if (r)
3794					return -EINVAL;
3795			}
3796		}
3797	}
3798
3799	r = gfx_v8_0_cp_gfx_resume(adev);
3800	if (r)
3801		return r;
3802
3803	r = gfx_v8_0_cp_compute_resume(adev);
3804	if (r)
3805		return r;
3806
3807	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3808
3809	return 0;
3810}
3811
3812static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3813{
3814	gfx_v8_0_cp_gfx_enable(adev, enable);
3815	gfx_v8_0_cp_compute_enable(adev, enable);
3816}
3817
3818static int gfx_v8_0_hw_init(void *handle)
3819{
3820	int r;
3821	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3822
3823	gfx_v8_0_init_golden_registers(adev);
3824
3825	gfx_v8_0_gpu_init(adev);
3826
3827	r = gfx_v8_0_rlc_resume(adev);
3828	if (r)
3829		return r;
3830
3831	r = gfx_v8_0_cp_resume(adev);
3832	if (r)
3833		return r;
3834
3835	return r;
3836}
3837
3838static int gfx_v8_0_hw_fini(void *handle)
3839{
3840	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3841
3842	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3843	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
 
 
 
3844	gfx_v8_0_cp_enable(adev, false);
3845	gfx_v8_0_rlc_stop(adev);
3846	gfx_v8_0_cp_compute_fini(adev);
3847
 
 
 
3848	return 0;
3849}
3850
3851static int gfx_v8_0_suspend(void *handle)
3852{
3853	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3854
3855	return gfx_v8_0_hw_fini(adev);
3856}
3857
3858static int gfx_v8_0_resume(void *handle)
3859{
3860	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861
3862	return gfx_v8_0_hw_init(adev);
3863}
3864
3865static bool gfx_v8_0_is_idle(void *handle)
3866{
3867	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868
3869	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3870		return false;
3871	else
3872		return true;
3873}
3874
3875static int gfx_v8_0_wait_for_idle(void *handle)
3876{
3877	unsigned i;
3878	u32 tmp;
3879	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3880
3881	for (i = 0; i < adev->usec_timeout; i++) {
3882		/* read MC_STATUS */
3883		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3884
3885		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3886			return 0;
3887		udelay(1);
3888	}
3889	return -ETIMEDOUT;
3890}
3891
3892static void gfx_v8_0_print_status(void *handle)
3893{
3894	int i;
3895	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3896
3897	dev_info(adev->dev, "GFX 8.x registers\n");
3898	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3899		 RREG32(mmGRBM_STATUS));
3900	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3901		 RREG32(mmGRBM_STATUS2));
3902	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3903		 RREG32(mmGRBM_STATUS_SE0));
3904	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3905		 RREG32(mmGRBM_STATUS_SE1));
3906	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3907		 RREG32(mmGRBM_STATUS_SE2));
3908	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3909		 RREG32(mmGRBM_STATUS_SE3));
3910	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3911	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3912		 RREG32(mmCP_STALLED_STAT1));
3913	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3914		 RREG32(mmCP_STALLED_STAT2));
3915	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3916		 RREG32(mmCP_STALLED_STAT3));
3917	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3918		 RREG32(mmCP_CPF_BUSY_STAT));
3919	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3920		 RREG32(mmCP_CPF_STALLED_STAT1));
3921	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3922	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3923	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3924		 RREG32(mmCP_CPC_STALLED_STAT1));
3925	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3926
3927	for (i = 0; i < 32; i++) {
3928		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3929			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3930	}
3931	for (i = 0; i < 16; i++) {
3932		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3933			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3934	}
3935	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3936		dev_info(adev->dev, "  se: %d\n", i);
3937		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3938		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3939			 RREG32(mmPA_SC_RASTER_CONFIG));
3940		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3941			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3942	}
3943	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3944
3945	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3946		 RREG32(mmGB_ADDR_CONFIG));
3947	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3948		 RREG32(mmHDP_ADDR_CONFIG));
3949	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3950		 RREG32(mmDMIF_ADDR_CALC));
3951
3952	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3953		 RREG32(mmCP_MEQ_THRESHOLDS));
3954	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3955		 RREG32(mmSX_DEBUG_1));
3956	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3957		 RREG32(mmTA_CNTL_AUX));
3958	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3959		 RREG32(mmSPI_CONFIG_CNTL));
3960	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3961		 RREG32(mmSQ_CONFIG));
3962	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3963		 RREG32(mmDB_DEBUG));
3964	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3965		 RREG32(mmDB_DEBUG2));
3966	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3967		 RREG32(mmDB_DEBUG3));
3968	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3969		 RREG32(mmCB_HW_CONTROL));
3970	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3971		 RREG32(mmSPI_CONFIG_CNTL_1));
3972	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3973		 RREG32(mmPA_SC_FIFO_SIZE));
3974	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3975		 RREG32(mmVGT_NUM_INSTANCES));
3976	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3977		 RREG32(mmCP_PERFMON_CNTL));
3978	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3979		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3980	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3981		 RREG32(mmVGT_CACHE_INVALIDATION));
3982	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3983		 RREG32(mmVGT_GS_VERTEX_REUSE));
3984	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3985		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3986	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3987		 RREG32(mmPA_CL_ENHANCE));
3988	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3989		 RREG32(mmPA_SC_ENHANCE));
3990
3991	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3992		 RREG32(mmCP_ME_CNTL));
3993	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3994		 RREG32(mmCP_MAX_CONTEXT));
3995	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3996		 RREG32(mmCP_ENDIAN_SWAP));
3997	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3998		 RREG32(mmCP_DEVICE_ID));
3999
4000	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4001		 RREG32(mmCP_SEM_WAIT_TIMER));
4002
4003	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4004		 RREG32(mmCP_RB_WPTR_DELAY));
4005	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4006		 RREG32(mmCP_RB_VMID));
4007	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4008		 RREG32(mmCP_RB0_CNTL));
4009	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4010		 RREG32(mmCP_RB0_WPTR));
4011	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4012		 RREG32(mmCP_RB0_RPTR_ADDR));
4013	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4014		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4015	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4016		 RREG32(mmCP_RB0_CNTL));
4017	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4018		 RREG32(mmCP_RB0_BASE));
4019	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4020		 RREG32(mmCP_RB0_BASE_HI));
4021	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4022		 RREG32(mmCP_MEC_CNTL));
4023	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4024		 RREG32(mmCP_CPF_DEBUG));
4025
4026	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4027		 RREG32(mmSCRATCH_ADDR));
4028	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4029		 RREG32(mmSCRATCH_UMSK));
4030
4031	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4032		 RREG32(mmCP_INT_CNTL_RING0));
4033	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4034		 RREG32(mmRLC_LB_CNTL));
4035	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4036		 RREG32(mmRLC_CNTL));
4037	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4038		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4039	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4040		 RREG32(mmRLC_LB_CNTR_INIT));
4041	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4042		 RREG32(mmRLC_LB_CNTR_MAX));
4043	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4044		 RREG32(mmRLC_LB_INIT_CU_MASK));
4045	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4046		 RREG32(mmRLC_LB_PARAMS));
4047	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4048		 RREG32(mmRLC_LB_CNTL));
4049	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4050		 RREG32(mmRLC_MC_CNTL));
4051	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4052		 RREG32(mmRLC_UCODE_CNTL));
4053
4054	mutex_lock(&adev->srbm_mutex);
4055	for (i = 0; i < 16; i++) {
4056		vi_srbm_select(adev, 0, 0, 0, i);
4057		dev_info(adev->dev, "  VM %d:\n", i);
4058		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4059			 RREG32(mmSH_MEM_CONFIG));
4060		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4061			 RREG32(mmSH_MEM_APE1_BASE));
4062		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4063			 RREG32(mmSH_MEM_APE1_LIMIT));
4064		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4065			 RREG32(mmSH_MEM_BASES));
4066	}
4067	vi_srbm_select(adev, 0, 0, 0, 0);
4068	mutex_unlock(&adev->srbm_mutex);
4069}
4070
4071static int gfx_v8_0_soft_reset(void *handle)
4072{
4073	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4074	u32 tmp;
4075	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4076
4077	/* GRBM_STATUS */
4078	tmp = RREG32(mmGRBM_STATUS);
4079	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4080		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4081		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4082		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4083		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4084		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
 
4085		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4086						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4087		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4088						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4089	}
4090
4091	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4092		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4093						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4094		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4095						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4096	}
4097
4098	/* GRBM_STATUS2 */
4099	tmp = RREG32(mmGRBM_STATUS2);
4100	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4101		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4102						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103
 
 
 
 
 
 
 
 
 
 
 
 
 
4104	/* SRBM_STATUS */
4105	tmp = RREG32(mmSRBM_STATUS);
4106	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4107		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4108						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
 
 
 
4109
4110	if (grbm_soft_reset || srbm_soft_reset) {
4111		gfx_v8_0_print_status((void *)adev);
4112		/* stop the rlc */
4113		gfx_v8_0_rlc_stop(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4114
 
 
4115		/* Disable GFX parsing/prefetching */
4116		gfx_v8_0_cp_gfx_enable(adev, false);
4117
 
 
 
 
 
 
 
 
 
 
 
4118		/* Disable MEC parsing/prefetching */
4119		gfx_v8_0_cp_compute_enable(adev, false);
 
4120
4121		if (grbm_soft_reset || srbm_soft_reset) {
4122			tmp = RREG32(mmGMCON_DEBUG);
4123			tmp = REG_SET_FIELD(tmp,
4124					    GMCON_DEBUG, GFX_STALL, 1);
4125			tmp = REG_SET_FIELD(tmp,
4126					    GMCON_DEBUG, GFX_CLEAR, 1);
4127			WREG32(mmGMCON_DEBUG, tmp);
4128
4129			udelay(50);
4130		}
 
 
 
4131
4132		if (grbm_soft_reset) {
4133			tmp = RREG32(mmGRBM_SOFT_RESET);
4134			tmp |= grbm_soft_reset;
4135			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4136			WREG32(mmGRBM_SOFT_RESET, tmp);
4137			tmp = RREG32(mmGRBM_SOFT_RESET);
4138
4139			udelay(50);
 
4140
4141			tmp &= ~grbm_soft_reset;
4142			WREG32(mmGRBM_SOFT_RESET, tmp);
4143			tmp = RREG32(mmGRBM_SOFT_RESET);
4144		}
 
 
 
4145
4146		if (srbm_soft_reset) {
4147			tmp = RREG32(mmSRBM_SOFT_RESET);
4148			tmp |= srbm_soft_reset;
4149			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4150			WREG32(mmSRBM_SOFT_RESET, tmp);
4151			tmp = RREG32(mmSRBM_SOFT_RESET);
4152
4153			udelay(50);
4154
4155			tmp &= ~srbm_soft_reset;
4156			WREG32(mmSRBM_SOFT_RESET, tmp);
4157			tmp = RREG32(mmSRBM_SOFT_RESET);
4158		}
4159
4160		if (grbm_soft_reset || srbm_soft_reset) {
4161			tmp = RREG32(mmGMCON_DEBUG);
4162			tmp = REG_SET_FIELD(tmp,
4163					    GMCON_DEBUG, GFX_STALL, 0);
4164			tmp = REG_SET_FIELD(tmp,
4165					    GMCON_DEBUG, GFX_CLEAR, 0);
4166			WREG32(mmGMCON_DEBUG, tmp);
4167		}
4168
4169		/* Wait a little for things to settle down */
4170		udelay(50);
4171		gfx_v8_0_print_status((void *)adev);
 
 
 
4172	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4173	return 0;
4174}
4175
4176/**
4177 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4178 *
4179 * @adev: amdgpu_device pointer
4180 *
4181 * Fetches a GPU clock counter snapshot.
4182 * Returns the 64 bit clock counter snapshot.
4183 */
4184uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4185{
4186	uint64_t clock;
4187
4188	mutex_lock(&adev->gfx.gpu_clock_mutex);
4189	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4190	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4191		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4192	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4193	return clock;
4194}
4195
4196static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4197					  uint32_t vmid,
4198					  uint32_t gds_base, uint32_t gds_size,
4199					  uint32_t gws_base, uint32_t gws_size,
4200					  uint32_t oa_base, uint32_t oa_size)
4201{
4202	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4203	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4204
4205	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4206	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4207
4208	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4209	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4210
4211	/* GDS Base */
4212	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4213	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4214				WRITE_DATA_DST_SEL(0)));
4215	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4216	amdgpu_ring_write(ring, 0);
4217	amdgpu_ring_write(ring, gds_base);
4218
4219	/* GDS Size */
4220	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4221	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4222				WRITE_DATA_DST_SEL(0)));
4223	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4224	amdgpu_ring_write(ring, 0);
4225	amdgpu_ring_write(ring, gds_size);
4226
4227	/* GWS */
4228	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4229	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4230				WRITE_DATA_DST_SEL(0)));
4231	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4232	amdgpu_ring_write(ring, 0);
4233	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4234
4235	/* OA */
4236	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4237	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4238				WRITE_DATA_DST_SEL(0)));
4239	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4240	amdgpu_ring_write(ring, 0);
4241	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4242}
4243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4244static int gfx_v8_0_early_init(void *handle)
4245{
4246	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4247
4248	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4249	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
 
4250	gfx_v8_0_set_ring_funcs(adev);
4251	gfx_v8_0_set_irq_funcs(adev);
4252	gfx_v8_0_set_gds_init(adev);
 
4253
4254	return 0;
4255}
4256
4257static int gfx_v8_0_late_init(void *handle)
4258{
4259	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4260	int r;
4261
4262	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4263	if (r)
4264		return r;
4265
4266	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4267	if (r)
4268		return r;
4269
4270	/* requires IBs so do in late init after IB pool is initialized */
4271	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4272	if (r)
4273		return r;
4274
 
 
 
4275	return 0;
4276}
4277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4278static int gfx_v8_0_set_powergating_state(void *handle,
4279					  enum amd_powergating_state state)
4280{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4281	return 0;
4282}
4283
4284static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4285		uint32_t reg_addr, uint32_t cmd)
4286{
4287	uint32_t data;
4288
4289	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4290
4291	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4292	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4293
4294	data = RREG32(mmRLC_SERDES_WR_CTRL);
4295	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4296			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4297			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4298			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4299			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4300			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4301			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4302			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4303			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4304			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4305			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
 
 
 
 
 
 
 
 
 
 
 
4306	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4307			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4308			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4309			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4310
4311	WREG32(mmRLC_SERDES_WR_CTRL, data);
4312}
4313
4314static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4315		bool enable)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4316{
4317	uint32_t temp, data;
4318
 
 
4319	/* It is disabled by HW by default */
4320	if (enable) {
4321		/* 1 - RLC memory Light sleep */
4322		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4323		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4324		if (temp != data)
4325			WREG32(mmRLC_MEM_SLP_CNTL, data);
4326
4327		/* 2 - CP memory Light sleep */
4328		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4329		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4330		if (temp != data)
4331			WREG32(mmCP_MEM_SLP_CNTL, data);
4332
4333		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4334		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4335		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4336				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4337				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4338				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
 
 
 
 
 
4339
4340		if (temp != data)
4341			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4342
4343		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4344		gfx_v8_0_wait_for_rlc_serdes(adev);
4345
4346		/* 5 - clear mgcg override */
4347		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4348
4349		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4350		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4351		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4352		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4353		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4354		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4355		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4356		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4357		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4358		if (temp != data)
4359			WREG32(mmCGTS_SM_CTRL_REG, data);
 
 
 
 
4360		udelay(50);
4361
4362		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4363		gfx_v8_0_wait_for_rlc_serdes(adev);
4364	} else {
4365		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4366		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4367		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4368				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4369				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4370				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4371		if (temp != data)
4372			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4373
4374		/* 2 - disable MGLS in RLC */
4375		data = RREG32(mmRLC_MEM_SLP_CNTL);
4376		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4377			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4378			WREG32(mmRLC_MEM_SLP_CNTL, data);
4379		}
4380
4381		/* 3 - disable MGLS in CP */
4382		data = RREG32(mmCP_MEM_SLP_CNTL);
4383		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4384			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4385			WREG32(mmCP_MEM_SLP_CNTL, data);
4386		}
4387
4388		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4389		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4390		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4391				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4392		if (temp != data)
4393			WREG32(mmCGTS_SM_CTRL_REG, data);
4394
4395		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4396		gfx_v8_0_wait_for_rlc_serdes(adev);
4397
4398		/* 6 - set mgcg override */
4399		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4400
4401		udelay(50);
4402
4403		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4404		gfx_v8_0_wait_for_rlc_serdes(adev);
4405	}
 
 
4406}
4407
4408static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4409		bool enable)
4410{
4411	uint32_t temp, temp1, data, data1;
4412
4413	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4414
4415	if (enable) {
4416		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4417		 * Cmp_busy/GFX_Idle interrupts
4418		 */
4419		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4420
 
4421		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4422		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4423		if (temp1 != data1)
4424			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4425
4426		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4427		gfx_v8_0_wait_for_rlc_serdes(adev);
4428
4429		/* 3 - clear cgcg override */
4430		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4431
4432		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4433		gfx_v8_0_wait_for_rlc_serdes(adev);
4434
4435		/* 4 - write cmd to set CGLS */
4436		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4437
4438		/* 5 - enable cgcg */
4439		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4440
4441		/* enable cgls*/
4442		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
 
4443
4444		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4446
4447		if (temp1 != data1)
4448			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
 
 
 
4449
4450		if (temp != data)
4451			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 
 
 
 
 
4452	} else {
4453		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4454		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4455
4456		/* TEST CGCG */
4457		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4458		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4459				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4460		if (temp1 != data1)
4461			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4462
4463		/* read gfx register to wake up cgcg */
4464		RREG32(mmCB_CGTT_SCLK_CTRL);
4465		RREG32(mmCB_CGTT_SCLK_CTRL);
4466		RREG32(mmCB_CGTT_SCLK_CTRL);
4467		RREG32(mmCB_CGTT_SCLK_CTRL);
4468
4469		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4470		gfx_v8_0_wait_for_rlc_serdes(adev);
4471
4472		/* write cmd to Set CGCG Overrride */
4473		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4474
4475		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4476		gfx_v8_0_wait_for_rlc_serdes(adev);
4477
4478		/* write cmd to Clear CGLS */
4479		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4480
4481		/* disable cgcg, cgls should be disabled too. */
4482		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4483				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4484		if (temp != data)
4485			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4486	}
 
 
 
 
4487}
4488static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4489		bool enable)
4490{
4491	if (enable) {
4492		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4493		 * ===  MGCG + MGLS + TS(CG/LS) ===
4494		 */
4495		fiji_update_medium_grain_clock_gating(adev, enable);
4496		fiji_update_coarse_grain_clock_gating(adev, enable);
4497	} else {
4498		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4499		 * ===  CGCG + CGLS ===
4500		 */
4501		fiji_update_coarse_grain_clock_gating(adev, enable);
4502		fiji_update_medium_grain_clock_gating(adev, enable);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4503	}
 
4504	return 0;
4505}
4506
4507static int gfx_v8_0_set_clockgating_state(void *handle,
4508					  enum amd_clockgating_state state)
4509{
4510	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4511
4512	switch (adev->asic_type) {
4513	case CHIP_FIJI:
4514		fiji_update_gfx_clock_gating(adev,
4515				state == AMD_CG_STATE_GATE ? true : false);
 
 
 
 
 
 
 
 
 
4516		break;
4517	default:
4518		break;
4519	}
4520	return 0;
4521}
4522
4523static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4524{
4525	u32 rptr;
4526
4527	rptr = ring->adev->wb.wb[ring->rptr_offs];
4528
4529	return rptr;
4530}
4531
4532static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4533{
4534	struct amdgpu_device *adev = ring->adev;
4535	u32 wptr;
4536
4537	if (ring->use_doorbell)
4538		/* XXX check if swapping is necessary on BE */
4539		wptr = ring->adev->wb.wb[ring->wptr_offs];
4540	else
4541		wptr = RREG32(mmCP_RB0_WPTR);
4542
4543	return wptr;
4544}
4545
4546static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4547{
4548	struct amdgpu_device *adev = ring->adev;
4549
4550	if (ring->use_doorbell) {
4551		/* XXX check if swapping is necessary on BE */
4552		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4553		WDOORBELL32(ring->doorbell_index, ring->wptr);
4554	} else {
4555		WREG32(mmCP_RB0_WPTR, ring->wptr);
4556		(void)RREG32(mmCP_RB0_WPTR);
4557	}
4558}
4559
4560static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4561{
4562	u32 ref_and_mask, reg_mem_engine;
4563
4564	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4565		switch (ring->me) {
4566		case 1:
4567			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4568			break;
4569		case 2:
4570			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4571			break;
4572		default:
4573			return;
4574		}
4575		reg_mem_engine = 0;
4576	} else {
4577		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4578		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4579	}
4580
4581	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4582	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4583				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4584				 reg_mem_engine));
4585	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4586	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4587	amdgpu_ring_write(ring, ref_and_mask);
4588	amdgpu_ring_write(ring, ref_and_mask);
4589	amdgpu_ring_write(ring, 0x20); /* poll interval */
4590}
4591
 
 
 
 
 
 
 
 
 
 
 
 
4592static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4593{
4594	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4595	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4596				 WRITE_DATA_DST_SEL(0) |
4597				 WR_CONFIRM));
4598	amdgpu_ring_write(ring, mmHDP_DEBUG0);
4599	amdgpu_ring_write(ring, 0);
4600	amdgpu_ring_write(ring, 1);
4601
4602}
4603
4604static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4605				  struct amdgpu_ib *ib)
 
4606{
4607	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4608	u32 header, control = 0;
4609	u32 next_rptr = ring->wptr + 5;
4610
4611	/* drop the CE preamble IB for the same context */
4612	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4613		return;
4614
4615	if (need_ctx_switch)
4616		next_rptr += 2;
4617
4618	next_rptr += 4;
4619	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4620	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4621	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4622	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4623	amdgpu_ring_write(ring, next_rptr);
4624
4625	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4626	if (need_ctx_switch) {
4627		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4628		amdgpu_ring_write(ring, 0);
4629	}
4630
4631	if (ib->flags & AMDGPU_IB_FLAG_CE)
4632		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4633	else
4634		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4635
4636	control |= ib->length_dw | (ib->vm_id << 24);
4637
4638	amdgpu_ring_write(ring, header);
4639	amdgpu_ring_write(ring,
4640#ifdef __BIG_ENDIAN
4641			  (2 << 0) |
4642#endif
4643			  (ib->gpu_addr & 0xFFFFFFFC));
4644	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4645	amdgpu_ring_write(ring, control);
4646}
4647
4648static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4649				  struct amdgpu_ib *ib)
 
4650{
4651	u32 header, control = 0;
4652	u32 next_rptr = ring->wptr + 5;
4653
4654	control |= INDIRECT_BUFFER_VALID;
4655
4656	next_rptr += 4;
4657	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4658	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4659	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4660	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4661	amdgpu_ring_write(ring, next_rptr);
4662
4663	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4664
4665	control |= ib->length_dw | (ib->vm_id << 24);
4666
4667	amdgpu_ring_write(ring, header);
4668	amdgpu_ring_write(ring,
4669#ifdef __BIG_ENDIAN
4670					  (2 << 0) |
4671#endif
4672					  (ib->gpu_addr & 0xFFFFFFFC));
4673	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4674	amdgpu_ring_write(ring, control);
4675}
4676
4677static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4678					 u64 seq, unsigned flags)
4679{
4680	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4681	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4682
4683	/* EVENT_WRITE_EOP - flush caches, send int */
4684	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4685	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4686				 EOP_TC_ACTION_EN |
 
4687				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4688				 EVENT_INDEX(5)));
4689	amdgpu_ring_write(ring, addr & 0xfffffffc);
4690	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4691			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4692	amdgpu_ring_write(ring, lower_32_bits(seq));
4693	amdgpu_ring_write(ring, upper_32_bits(seq));
4694
4695}
4696
4697static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4698{
4699	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4700	uint32_t seq = ring->fence_drv.sync_seq;
4701	uint64_t addr = ring->fence_drv.gpu_addr;
4702
4703	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4704	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4705				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4706				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4707	amdgpu_ring_write(ring, addr & 0xfffffffc);
4708	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4709	amdgpu_ring_write(ring, seq);
4710	amdgpu_ring_write(ring, 0xffffffff);
4711	amdgpu_ring_write(ring, 4); /* poll interval */
4712
4713	if (usepfp) {
4714		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4715		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4716		amdgpu_ring_write(ring, 0);
4717		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4718		amdgpu_ring_write(ring, 0);
4719	}
4720}
4721
4722static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4723					unsigned vm_id, uint64_t pd_addr)
4724{
4725	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4726
4727	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4728	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4729				 WRITE_DATA_DST_SEL(0)) |
4730				 WR_CONFIRM);
4731	if (vm_id < 8) {
4732		amdgpu_ring_write(ring,
4733				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4734	} else {
4735		amdgpu_ring_write(ring,
4736				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4737	}
4738	amdgpu_ring_write(ring, 0);
4739	amdgpu_ring_write(ring, pd_addr >> 12);
4740
4741	/* bits 0-15 are the VM contexts0-15 */
4742	/* invalidate the cache */
4743	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4744	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4745				 WRITE_DATA_DST_SEL(0)));
4746	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4747	amdgpu_ring_write(ring, 0);
4748	amdgpu_ring_write(ring, 1 << vm_id);
4749
4750	/* wait for the invalidate to complete */
4751	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4752	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4753				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4754				 WAIT_REG_MEM_ENGINE(0))); /* me */
4755	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4756	amdgpu_ring_write(ring, 0);
4757	amdgpu_ring_write(ring, 0); /* ref */
4758	amdgpu_ring_write(ring, 0); /* mask */
4759	amdgpu_ring_write(ring, 0x20); /* poll interval */
4760
4761	/* compute doesn't have PFP */
4762	if (usepfp) {
4763		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4764		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4765		amdgpu_ring_write(ring, 0x0);
4766		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4767		amdgpu_ring_write(ring, 0);
4768		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4769		amdgpu_ring_write(ring, 0);
4770	}
4771}
4772
4773static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4774{
4775	return ring->adev->wb.wb[ring->rptr_offs];
4776}
4777
4778static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4779{
4780	return ring->adev->wb.wb[ring->wptr_offs];
4781}
4782
4783static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4784{
4785	struct amdgpu_device *adev = ring->adev;
4786
4787	/* XXX check if swapping is necessary on BE */
4788	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4789	WDOORBELL32(ring->doorbell_index, ring->wptr);
4790}
4791
4792static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4793					     u64 addr, u64 seq,
4794					     unsigned flags)
4795{
4796	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4797	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4798
4799	/* RELEASE_MEM - flush caches, send int */
4800	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4801	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4802				 EOP_TC_ACTION_EN |
4803				 EOP_TC_WB_ACTION_EN |
4804				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4805				 EVENT_INDEX(5)));
4806	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4807	amdgpu_ring_write(ring, addr & 0xfffffffc);
4808	amdgpu_ring_write(ring, upper_32_bits(addr));
4809	amdgpu_ring_write(ring, lower_32_bits(seq));
4810	amdgpu_ring_write(ring, upper_32_bits(seq));
4811}
4812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4813static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4814						 enum amdgpu_interrupt_state state)
4815{
4816	u32 cp_int_cntl;
4817
4818	switch (state) {
4819	case AMDGPU_IRQ_STATE_DISABLE:
4820		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4821		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4822					    TIME_STAMP_INT_ENABLE, 0);
4823		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4824		break;
4825	case AMDGPU_IRQ_STATE_ENABLE:
4826		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4827		cp_int_cntl =
4828			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4829				      TIME_STAMP_INT_ENABLE, 1);
4830		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4831		break;
4832	default:
4833		break;
4834	}
4835}
4836
4837static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4838						     int me, int pipe,
4839						     enum amdgpu_interrupt_state state)
4840{
4841	u32 mec_int_cntl, mec_int_cntl_reg;
4842
4843	/*
4844	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4845	 * handles the setting of interrupts for this specific pipe. All other
4846	 * pipes' interrupts are set by amdkfd.
4847	 */
4848
4849	if (me == 1) {
4850		switch (pipe) {
4851		case 0:
4852			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4853			break;
4854		default:
4855			DRM_DEBUG("invalid pipe %d\n", pipe);
4856			return;
4857		}
4858	} else {
4859		DRM_DEBUG("invalid me %d\n", me);
4860		return;
4861	}
4862
4863	switch (state) {
4864	case AMDGPU_IRQ_STATE_DISABLE:
4865		mec_int_cntl = RREG32(mec_int_cntl_reg);
4866		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4867					     TIME_STAMP_INT_ENABLE, 0);
4868		WREG32(mec_int_cntl_reg, mec_int_cntl);
4869		break;
4870	case AMDGPU_IRQ_STATE_ENABLE:
4871		mec_int_cntl = RREG32(mec_int_cntl_reg);
4872		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4873					     TIME_STAMP_INT_ENABLE, 1);
4874		WREG32(mec_int_cntl_reg, mec_int_cntl);
4875		break;
4876	default:
4877		break;
4878	}
4879}
4880
4881static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4882					     struct amdgpu_irq_src *source,
4883					     unsigned type,
4884					     enum amdgpu_interrupt_state state)
4885{
4886	u32 cp_int_cntl;
4887
4888	switch (state) {
4889	case AMDGPU_IRQ_STATE_DISABLE:
4890		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4891		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4892					    PRIV_REG_INT_ENABLE, 0);
4893		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4894		break;
4895	case AMDGPU_IRQ_STATE_ENABLE:
4896		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4897		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4898					    PRIV_REG_INT_ENABLE, 1);
4899		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4900		break;
4901	default:
4902		break;
4903	}
4904
4905	return 0;
4906}
4907
4908static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4909					      struct amdgpu_irq_src *source,
4910					      unsigned type,
4911					      enum amdgpu_interrupt_state state)
4912{
4913	u32 cp_int_cntl;
4914
4915	switch (state) {
4916	case AMDGPU_IRQ_STATE_DISABLE:
4917		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4918		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4919					    PRIV_INSTR_INT_ENABLE, 0);
4920		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4921		break;
4922	case AMDGPU_IRQ_STATE_ENABLE:
4923		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4924		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4925					    PRIV_INSTR_INT_ENABLE, 1);
4926		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4927		break;
4928	default:
4929		break;
4930	}
4931
4932	return 0;
4933}
4934
4935static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4936					    struct amdgpu_irq_src *src,
4937					    unsigned type,
4938					    enum amdgpu_interrupt_state state)
4939{
4940	switch (type) {
4941	case AMDGPU_CP_IRQ_GFX_EOP:
4942		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4943		break;
4944	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4945		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4946		break;
4947	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4948		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4949		break;
4950	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4951		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4952		break;
4953	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4954		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4955		break;
4956	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4957		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4958		break;
4959	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4960		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4961		break;
4962	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4963		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4964		break;
4965	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4966		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4967		break;
4968	default:
4969		break;
4970	}
4971	return 0;
4972}
4973
4974static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4975			    struct amdgpu_irq_src *source,
4976			    struct amdgpu_iv_entry *entry)
4977{
4978	int i;
4979	u8 me_id, pipe_id, queue_id;
4980	struct amdgpu_ring *ring;
4981
4982	DRM_DEBUG("IH: CP EOP\n");
4983	me_id = (entry->ring_id & 0x0c) >> 2;
4984	pipe_id = (entry->ring_id & 0x03) >> 0;
4985	queue_id = (entry->ring_id & 0x70) >> 4;
4986
4987	switch (me_id) {
4988	case 0:
4989		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4990		break;
4991	case 1:
4992	case 2:
4993		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994			ring = &adev->gfx.compute_ring[i];
4995			/* Per-queue interrupt is supported for MEC starting from VI.
4996			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4997			  */
4998			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4999				amdgpu_fence_process(ring);
5000		}
5001		break;
5002	}
5003	return 0;
5004}
5005
5006static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5007				 struct amdgpu_irq_src *source,
5008				 struct amdgpu_iv_entry *entry)
5009{
5010	DRM_ERROR("Illegal register access in command stream\n");
5011	schedule_work(&adev->reset_work);
5012	return 0;
5013}
5014
5015static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5016				  struct amdgpu_irq_src *source,
5017				  struct amdgpu_iv_entry *entry)
5018{
5019	DRM_ERROR("Illegal instruction in command stream\n");
5020	schedule_work(&adev->reset_work);
5021	return 0;
5022}
5023
5024const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 
5025	.early_init = gfx_v8_0_early_init,
5026	.late_init = gfx_v8_0_late_init,
5027	.sw_init = gfx_v8_0_sw_init,
5028	.sw_fini = gfx_v8_0_sw_fini,
5029	.hw_init = gfx_v8_0_hw_init,
5030	.hw_fini = gfx_v8_0_hw_fini,
5031	.suspend = gfx_v8_0_suspend,
5032	.resume = gfx_v8_0_resume,
5033	.is_idle = gfx_v8_0_is_idle,
5034	.wait_for_idle = gfx_v8_0_wait_for_idle,
 
 
5035	.soft_reset = gfx_v8_0_soft_reset,
5036	.print_status = gfx_v8_0_print_status,
5037	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5038	.set_powergating_state = gfx_v8_0_set_powergating_state,
5039};
5040
5041static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5042	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
 
 
 
5043	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5044	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5045	.parse_cs = NULL,
 
 
 
 
 
 
 
 
 
5046	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5047	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5048	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5049	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5050	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5051	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5052	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5053	.test_ring = gfx_v8_0_ring_test_ring,
5054	.test_ib = gfx_v8_0_ring_test_ib,
5055	.insert_nop = amdgpu_ring_insert_nop,
5056	.pad_ib = amdgpu_ring_generic_pad_ib,
 
 
5057};
5058
5059static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5060	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
 
 
 
5061	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5062	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5063	.parse_cs = NULL,
 
 
 
 
 
 
 
5064	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5065	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5066	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5067	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5068	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5069	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5070	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5071	.test_ring = gfx_v8_0_ring_test_ring,
5072	.test_ib = gfx_v8_0_ring_test_ib,
5073	.insert_nop = amdgpu_ring_insert_nop,
5074	.pad_ib = amdgpu_ring_generic_pad_ib,
5075};
5076
5077static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5078{
5079	int i;
5080
5081	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5082		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5083
5084	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5085		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5086}
5087
5088static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5089	.set = gfx_v8_0_set_eop_interrupt_state,
5090	.process = gfx_v8_0_eop_irq,
5091};
5092
5093static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5094	.set = gfx_v8_0_set_priv_reg_fault_state,
5095	.process = gfx_v8_0_priv_reg_irq,
5096};
5097
5098static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5099	.set = gfx_v8_0_set_priv_inst_fault_state,
5100	.process = gfx_v8_0_priv_inst_irq,
5101};
5102
5103static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5104{
5105	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5106	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5107
5108	adev->gfx.priv_reg_irq.num_types = 1;
5109	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5110
5111	adev->gfx.priv_inst_irq.num_types = 1;
5112	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5113}
5114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5115static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5116{
5117	/* init asci gds info */
5118	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5119	adev->gds.gws.total_size = 64;
5120	adev->gds.oa.total_size = 16;
5121
5122	if (adev->gds.mem.total_size == 64 * 1024) {
5123		adev->gds.mem.gfx_partition_size = 4096;
5124		adev->gds.mem.cs_partition_size = 4096;
5125
5126		adev->gds.gws.gfx_partition_size = 4;
5127		adev->gds.gws.cs_partition_size = 4;
5128
5129		adev->gds.oa.gfx_partition_size = 4;
5130		adev->gds.oa.cs_partition_size = 1;
5131	} else {
5132		adev->gds.mem.gfx_partition_size = 1024;
5133		adev->gds.mem.cs_partition_size = 1024;
5134
5135		adev->gds.gws.gfx_partition_size = 16;
5136		adev->gds.gws.cs_partition_size = 16;
5137
5138		adev->gds.oa.gfx_partition_size = 4;
5139		adev->gds.oa.cs_partition_size = 4;
5140	}
5141}
5142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5143static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5144{
5145	u32 data, mask;
5146
5147	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5148	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5149
5150	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5151	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5152
5153	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5154
5155	return (~data) & mask;
5156}
5157
5158int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5159			 struct amdgpu_cu_info *cu_info)
5160{
5161	int i, j, k, counter, active_cu_number = 0;
5162	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 
 
5163
5164	if (!adev || !cu_info)
5165		return -EINVAL;
5166
5167	memset(cu_info, 0, sizeof(*cu_info));
5168
5169	mutex_lock(&adev->grbm_idx_mutex);
5170	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5171		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5172			mask = 1;
5173			ao_bitmap = 0;
5174			counter = 0;
5175			gfx_v8_0_select_se_sh(adev, i, j);
 
 
 
5176			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5177			cu_info->bitmap[i][j] = bitmap;
5178
5179			for (k = 0; k < 16; k ++) {
5180				if (bitmap & mask) {
5181					if (counter < 2)
5182						ao_bitmap |= mask;
5183					counter ++;
5184				}
5185				mask <<= 1;
5186			}
5187			active_cu_number += counter;
5188			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5189		}
5190	}
5191	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5192	mutex_unlock(&adev->grbm_idx_mutex);
5193
5194	cu_info->number = active_cu_number;
5195	cu_info->ao_cu_mask = ao_cu_mask;
 
 
 
 
 
 
 
 
 
 
5196
5197	return 0;
5198}