Linux Audio

Check our new training course

Open-source upstreaming

Need help get the support for your hardware in upstream Linux?
Loading...
v4.6
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
 
 
 
  23#include <linux/firmware.h>
  24#include "drmP.h"
 
 
  25#include "amdgpu.h"
  26#include "amdgpu_gfx.h"
 
  27#include "vi.h"
 
  28#include "vid.h"
  29#include "amdgpu_ucode.h"
 
 
  30#include "clearstate_vi.h"
  31
  32#include "gmc/gmc_8_2_d.h"
  33#include "gmc/gmc_8_2_sh_mask.h"
  34
  35#include "oss/oss_3_0_d.h"
  36#include "oss/oss_3_0_sh_mask.h"
  37
  38#include "bif/bif_5_0_d.h"
  39#include "bif/bif_5_0_sh_mask.h"
  40
  41#include "gca/gfx_8_0_d.h"
  42#include "gca/gfx_8_0_enum.h"
  43#include "gca/gfx_8_0_sh_mask.h"
  44#include "gca/gfx_8_0_enum.h"
  45
  46#include "dce/dce_10_0_d.h"
  47#include "dce/dce_10_0_sh_mask.h"
  48
 
 
 
 
  49#define GFX8_NUM_GFX_RINGS     1
  50#define GFX8_NUM_COMPUTE_RINGS 8
  51
  52#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  53#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 
  54#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  55
  56#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  57#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  58#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  59#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  60#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  61#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  62#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  63#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  64#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  65
  66#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  67#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  68#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  69#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  70#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  71#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  72
  73/* BPM SERDES CMD */
  74#define SET_BPM_SERDES_CMD    1
  75#define CLE_BPM_SERDES_CMD    0
  76
  77/* BPM Register Address*/
  78enum {
  79	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  80	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  81	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  82	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  83	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  84	BPM_REG_FGCG_MAX
  85};
  86
 
 
  87MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
  88MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
  89MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
  90MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
  91MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
  92MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
  93
  94MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
  95MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
  96MODULE_FIRMWARE("amdgpu/stoney_me.bin");
  97MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
  98MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
  99
 100MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 101MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 102MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 103MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 104MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 105MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 106
 107MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 108MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 109MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 110MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 111MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 112
 113MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 114MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 115MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 116MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 117MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 118MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 120static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 121{
 122	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 123	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 124	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 125	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 126	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 127	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 128	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 129	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 130	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 131	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 132	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 133	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 134	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 135	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 136	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 137	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 138};
 139
 140static const u32 golden_settings_tonga_a11[] =
 141{
 142	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 143	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 144	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 145	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 146	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 147	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 148	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 
 149	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 150	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 151	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 152	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 153	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 154	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 155	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 156	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 157};
 158
 159static const u32 tonga_golden_common_all[] =
 160{
 161	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 162	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 163	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 164	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 165	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 166	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 167	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 168	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 169};
 170
 171static const u32 tonga_mgcg_cgcg_init[] =
 172{
 173	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 174	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 175	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 176	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 177	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 178	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 179	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 180	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 181	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 182	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 183	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 184	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 185	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 186	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 187	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 188	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 189	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 190	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 191	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 192	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 193	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 194	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 195	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 196	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 197	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 198	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 199	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 200	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 201	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 202	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 203	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 204	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 205	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 206	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 207	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 208	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 209	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 210	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 211	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 212	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 213	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 214	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 215	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 216	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 217	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 218	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 219	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 220	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 221	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 222	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 223	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 224	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 225	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 226	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 227	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 228	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 229	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 230	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 231	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 232	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 233	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 234	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 235	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 236	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 237	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 238	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 239	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 240	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 241	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 242	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 243	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 244	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 245	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 246	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 247	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 248};
 249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 250static const u32 fiji_golden_common_all[] =
 251{
 252	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 253	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 254	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 255	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 256	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 257	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 258	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 259	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 260	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 261	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 262};
 263
 264static const u32 golden_settings_fiji_a10[] =
 265{
 266	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 267	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 268	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 269	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 270	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 271	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 272	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 273	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 274	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 275	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 276	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 277};
 278
 279static const u32 fiji_mgcg_cgcg_init[] =
 280{
 281	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 282	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 283	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 284	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 285	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 286	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 287	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 288	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 289	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 290	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 291	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 292	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 293	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 294	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 295	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 296	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 297	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 298	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 299	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 300	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 301	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 302	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 303	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 304	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 305	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 306	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 307	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 308	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 309	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 310	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 311	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 312	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 313	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 314	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 315	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 316};
 317
 318static const u32 golden_settings_iceland_a11[] =
 319{
 320	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 321	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 322	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 323	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 324	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 325	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 326	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 327	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 
 328	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 329	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 330	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 331	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 332	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 333	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 334	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 335};
 336
 337static const u32 iceland_golden_common_all[] =
 338{
 339	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 340	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 341	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 342	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 343	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 344	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 345	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 346	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 347};
 348
 349static const u32 iceland_mgcg_cgcg_init[] =
 350{
 351	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 352	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 353	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 354	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 355	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 356	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 357	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 358	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 359	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 360	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 361	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 362	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 363	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 364	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 365	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 366	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 367	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 368	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 369	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 370	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 371	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 372	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 373	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 374	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 375	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 376	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 377	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 378	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 379	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 380	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 381	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 382	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 383	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 384	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 385	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 386	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 387	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 388	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 389	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 390	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 391	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 392	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 393	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 394	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 395	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 396	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 397	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 398	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 399	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 400	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 401	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 402	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 403	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 404	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 405	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 406	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 407	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 408	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 409	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 410	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 411	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 412	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 413	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 414	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 415};
 416
 417static const u32 cz_golden_settings_a11[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 422	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 423	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 
 426	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 427	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 428	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 429};
 430
 431static const u32 cz_golden_common_all[] =
 432{
 433	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 434	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 435	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 436	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 437	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 438	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 439	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 440	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
 441};
 442
 443static const u32 cz_mgcg_cgcg_init[] =
 444{
 445	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 446	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 447	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 452	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 453	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 454	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 455	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 456	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 459	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 460	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 461	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 462	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 463	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 464	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 465	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 466	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 467	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 468	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 469	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 470	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 471	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 472	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 473	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 474	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 475	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 476	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 477	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 478	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 479	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 480	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 481	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 482	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 483	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 484	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 485	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 486	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 487	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 488	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 489	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 490	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 491	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 492	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 493	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 494	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 495	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 496	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 497	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 498	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 499	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 500	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 501	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 502	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 503	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 504	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 505	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 506	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 507	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 508	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 509	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 510	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 511	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 512	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 513	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 514	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 515	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 516	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 517	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 518	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 519	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 520};
 521
 522static const u32 stoney_golden_settings_a11[] =
 523{
 524	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 525	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 526	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 527	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 528	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 529	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 530  	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 531	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 532	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 533	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 534};
 535
 536static const u32 stoney_golden_common_all[] =
 537{
 538	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 539	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 540	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 541	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 542	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 543	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 544	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
 545	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
 546};
 547
 548static const u32 stoney_mgcg_cgcg_init[] =
 549{
 550	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 551	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 552	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 553	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 554	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 555	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
 
 
 
 
 
 
 
 
 
 
 556};
 557
 558static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 559static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 560static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 
 
 
 
 
 
 
 
 561
 562static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 563{
 
 
 564	switch (adev->asic_type) {
 565	case CHIP_TOPAZ:
 566		amdgpu_program_register_sequence(adev,
 567						 iceland_mgcg_cgcg_init,
 568						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
 569		amdgpu_program_register_sequence(adev,
 570						 golden_settings_iceland_a11,
 571						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
 572		amdgpu_program_register_sequence(adev,
 573						 iceland_golden_common_all,
 574						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
 575		break;
 576	case CHIP_FIJI:
 577		amdgpu_program_register_sequence(adev,
 578						 fiji_mgcg_cgcg_init,
 579						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
 580		amdgpu_program_register_sequence(adev,
 581						 golden_settings_fiji_a10,
 582						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
 583		amdgpu_program_register_sequence(adev,
 584						 fiji_golden_common_all,
 585						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
 586		break;
 587
 588	case CHIP_TONGA:
 589		amdgpu_program_register_sequence(adev,
 590						 tonga_mgcg_cgcg_init,
 591						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
 592		amdgpu_program_register_sequence(adev,
 593						 golden_settings_tonga_a11,
 594						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
 595		amdgpu_program_register_sequence(adev,
 596						 tonga_golden_common_all,
 597						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 598		break;
 599	case CHIP_CARRIZO:
 600		amdgpu_program_register_sequence(adev,
 601						 cz_mgcg_cgcg_init,
 602						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
 603		amdgpu_program_register_sequence(adev,
 604						 cz_golden_settings_a11,
 605						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
 606		amdgpu_program_register_sequence(adev,
 607						 cz_golden_common_all,
 608						 (const u32)ARRAY_SIZE(cz_golden_common_all));
 609		break;
 610	case CHIP_STONEY:
 611		amdgpu_program_register_sequence(adev,
 612						 stoney_mgcg_cgcg_init,
 613						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
 614		amdgpu_program_register_sequence(adev,
 615						 stoney_golden_settings_a11,
 616						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
 617		amdgpu_program_register_sequence(adev,
 618						 stoney_golden_common_all,
 619						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
 620		break;
 621	default:
 622		break;
 623	}
 624}
 625
 626static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
 627{
 628	int i;
 629
 630	adev->gfx.scratch.num_reg = 7;
 631	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
 632	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
 633		adev->gfx.scratch.free[i] = true;
 634		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
 635	}
 636}
 637
 638static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 639{
 640	struct amdgpu_device *adev = ring->adev;
 641	uint32_t scratch;
 642	uint32_t tmp = 0;
 643	unsigned i;
 644	int r;
 645
 646	r = amdgpu_gfx_scratch_get(adev, &scratch);
 647	if (r) {
 648		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 649		return r;
 650	}
 651	WREG32(scratch, 0xCAFEDEAD);
 652	r = amdgpu_ring_alloc(ring, 3);
 653	if (r) {
 654		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 655			  ring->idx, r);
 656		amdgpu_gfx_scratch_free(adev, scratch);
 657		return r;
 658	}
 659	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 660	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 661	amdgpu_ring_write(ring, 0xDEADBEEF);
 662	amdgpu_ring_commit(ring);
 663
 664	for (i = 0; i < adev->usec_timeout; i++) {
 665		tmp = RREG32(scratch);
 666		if (tmp == 0xDEADBEEF)
 667			break;
 668		DRM_UDELAY(1);
 669	}
 670	if (i < adev->usec_timeout) {
 671		DRM_INFO("ring test on %d succeeded in %d usecs\n",
 672			 ring->idx, i);
 673	} else {
 674		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 675			  ring->idx, scratch, tmp);
 676		r = -EINVAL;
 677	}
 678	amdgpu_gfx_scratch_free(adev, scratch);
 
 
 
 679	return r;
 680}
 681
 682static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 683{
 684	struct amdgpu_device *adev = ring->adev;
 685	struct amdgpu_ib ib;
 686	struct fence *f = NULL;
 687	uint32_t scratch;
 688	uint32_t tmp = 0;
 689	unsigned i;
 690	int r;
 691
 692	r = amdgpu_gfx_scratch_get(adev, &scratch);
 693	if (r) {
 694		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
 
 
 
 
 695		return r;
 696	}
 697	WREG32(scratch, 0xCAFEDEAD);
 
 698	memset(&ib, 0, sizeof(ib));
 699	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 700	if (r) {
 701		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 702		goto err1;
 703	}
 704	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
 705	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
 706	ib.ptr[2] = 0xDEADBEEF;
 707	ib.length_dw = 3;
 
 
 708
 709	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 710	if (r)
 711		goto err2;
 712
 713	r = fence_wait(f, false);
 714	if (r) {
 715		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
 716		goto err2;
 717	}
 718	for (i = 0; i < adev->usec_timeout; i++) {
 719		tmp = RREG32(scratch);
 720		if (tmp == 0xDEADBEEF)
 721			break;
 722		DRM_UDELAY(1);
 723	}
 724	if (i < adev->usec_timeout) {
 725		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
 726			 ring->idx, i);
 727		goto err2;
 728	} else {
 729		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 730			  scratch, tmp);
 731		r = -EINVAL;
 732	}
 
 
 
 
 
 
 
 733err2:
 734	fence_put(f);
 735	amdgpu_ib_free(adev, &ib, NULL);
 736	fence_put(f);
 737err1:
 738	amdgpu_gfx_scratch_free(adev, scratch);
 739	return r;
 740}
 741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 742static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 743{
 744	const char *chip_name;
 745	char fw_name[30];
 746	int err;
 747	struct amdgpu_firmware_info *info = NULL;
 748	const struct common_firmware_header *header = NULL;
 749	const struct gfx_firmware_header_v1_0 *cp_hdr;
 
 
 750
 751	DRM_DEBUG("\n");
 752
 753	switch (adev->asic_type) {
 754	case CHIP_TOPAZ:
 755		chip_name = "topaz";
 756		break;
 757	case CHIP_TONGA:
 758		chip_name = "tonga";
 759		break;
 760	case CHIP_CARRIZO:
 761		chip_name = "carrizo";
 762		break;
 763	case CHIP_FIJI:
 764		chip_name = "fiji";
 765		break;
 766	case CHIP_STONEY:
 767		chip_name = "stoney";
 768		break;
 
 
 
 
 
 
 
 
 
 
 
 
 769	default:
 770		BUG();
 771	}
 772
 773	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 774	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 775	if (err)
 776		goto out;
 777	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 
 
 
 
 
 
 778	if (err)
 779		goto out;
 780	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 781	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 782	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 783
 784	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 785	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 786	if (err)
 787		goto out;
 788	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 
 
 
 
 
 
 789	if (err)
 790		goto out;
 791	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 792	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 
 793	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 794
 795	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 796	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 797	if (err)
 798		goto out;
 799	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 
 
 
 
 
 
 800	if (err)
 801		goto out;
 802	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 803	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 804	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 805
 806	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 807	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 
 
 808	if (err)
 809		goto out;
 810	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 811	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
 812	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 813	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 814
 815	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 816	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 817	if (err)
 818		goto out;
 819	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 820	if (err)
 821		goto out;
 822	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 823	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 824	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 825
 826	if ((adev->asic_type != CHIP_STONEY) &&
 827	    (adev->asic_type != CHIP_TOPAZ)) {
 828		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 829		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 830		if (!err) {
 831			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 832			if (err)
 833				goto out;
 834			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
 835				adev->gfx.mec2_fw->data;
 836			adev->gfx.mec2_fw_version =
 837				le32_to_cpu(cp_hdr->header.ucode_version);
 838			adev->gfx.mec2_feature_version =
 839				le32_to_cpu(cp_hdr->ucode_feature_version);
 840		} else {
 841			err = 0;
 842			adev->gfx.mec2_fw = NULL;
 843		}
 844	}
 845
 846	if (adev->firmware.smu_load) {
 847		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
 848		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
 849		info->fw = adev->gfx.pfp_fw;
 850		header = (const struct common_firmware_header *)info->fw->data;
 851		adev->firmware.fw_size +=
 852			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 853
 854		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
 855		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
 856		info->fw = adev->gfx.me_fw;
 857		header = (const struct common_firmware_header *)info->fw->data;
 858		adev->firmware.fw_size +=
 859			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 860
 861		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
 862		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
 863		info->fw = adev->gfx.ce_fw;
 864		header = (const struct common_firmware_header *)info->fw->data;
 865		adev->firmware.fw_size +=
 866			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 867
 868		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 869		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 870		info->fw = adev->gfx.rlc_fw;
 871		header = (const struct common_firmware_header *)info->fw->data;
 872		adev->firmware.fw_size +=
 873			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 874
 875		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 876		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 877		info->fw = adev->gfx.mec_fw;
 
 878		header = (const struct common_firmware_header *)info->fw->data;
 879		adev->firmware.fw_size +=
 880			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 881
 882		if (adev->gfx.mec2_fw) {
 883			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 884			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
 885			info->fw = adev->gfx.mec2_fw;
 886			header = (const struct common_firmware_header *)info->fw->data;
 887			adev->firmware.fw_size +=
 888				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 889		}
 890
 891	}
 892
 893out:
 894	if (err) {
 895		dev_err(adev->dev,
 896			"gfx8: Failed to load firmware \"%s\"\n",
 897			fw_name);
 898		release_firmware(adev->gfx.pfp_fw);
 899		adev->gfx.pfp_fw = NULL;
 900		release_firmware(adev->gfx.me_fw);
 901		adev->gfx.me_fw = NULL;
 902		release_firmware(adev->gfx.ce_fw);
 903		adev->gfx.ce_fw = NULL;
 904		release_firmware(adev->gfx.rlc_fw);
 905		adev->gfx.rlc_fw = NULL;
 906		release_firmware(adev->gfx.mec_fw);
 907		adev->gfx.mec_fw = NULL;
 908		release_firmware(adev->gfx.mec2_fw);
 909		adev->gfx.mec2_fw = NULL;
 910	}
 911	return err;
 912}
 913
 914static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 
 915{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 916	int r;
 917
 918	if (adev->gfx.mec.hpd_eop_obj) {
 919		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 920		if (unlikely(r != 0))
 921			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 922		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 923		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 924
 925		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 926		adev->gfx.mec.hpd_eop_obj = NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 927	}
 
 
 
 
 
 
 928}
 929
 930#define MEC_HPD_SIZE 2048
 
 
 
 931
 932static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 933{
 934	int r;
 935	u32 *hpd;
 
 936
 937	/*
 938	 * we assign only 1 pipe because all other pipes will
 939	 * be handled by KFD
 940	 */
 941	adev->gfx.mec.num_mec = 1;
 942	adev->gfx.mec.num_pipe = 1;
 943	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 944
 945	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 946		r = amdgpu_bo_create(adev,
 947				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
 948				     PAGE_SIZE, true,
 949				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 950				     &adev->gfx.mec.hpd_eop_obj);
 951		if (r) {
 952			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 953			return r;
 954		}
 955	}
 956
 957	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 958	if (unlikely(r != 0)) {
 959		gfx_v8_0_mec_fini(adev);
 960		return r;
 961	}
 962	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 963			  &adev->gfx.mec.hpd_eop_gpu_addr);
 964	if (r) {
 965		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 966		gfx_v8_0_mec_fini(adev);
 967		return r;
 968	}
 969	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 970	if (r) {
 971		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 972		gfx_v8_0_mec_fini(adev);
 973		return r;
 974	}
 975
 976	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
 977
 978	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 979	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 980
 981	return 0;
 982}
 983
 984static const u32 vgpr_init_compute_shader[] =
 985{
 986	0x7e000209, 0x7e020208,
 987	0x7e040207, 0x7e060206,
 988	0x7e080205, 0x7e0a0204,
 989	0x7e0c0203, 0x7e0e0202,
 990	0x7e100201, 0x7e120200,
 991	0x7e140209, 0x7e160208,
 992	0x7e180207, 0x7e1a0206,
 993	0x7e1c0205, 0x7e1e0204,
 994	0x7e200203, 0x7e220202,
 995	0x7e240201, 0x7e260200,
 996	0x7e280209, 0x7e2a0208,
 997	0x7e2c0207, 0x7e2e0206,
 998	0x7e300205, 0x7e320204,
 999	0x7e340203, 0x7e360202,
1000	0x7e380201, 0x7e3a0200,
1001	0x7e3c0209, 0x7e3e0208,
1002	0x7e400207, 0x7e420206,
1003	0x7e440205, 0x7e460204,
1004	0x7e480203, 0x7e4a0202,
1005	0x7e4c0201, 0x7e4e0200,
1006	0x7e500209, 0x7e520208,
1007	0x7e540207, 0x7e560206,
1008	0x7e580205, 0x7e5a0204,
1009	0x7e5c0203, 0x7e5e0202,
1010	0x7e600201, 0x7e620200,
1011	0x7e640209, 0x7e660208,
1012	0x7e680207, 0x7e6a0206,
1013	0x7e6c0205, 0x7e6e0204,
1014	0x7e700203, 0x7e720202,
1015	0x7e740201, 0x7e760200,
1016	0x7e780209, 0x7e7a0208,
1017	0x7e7c0207, 0x7e7e0206,
1018	0xbf8a0000, 0xbf810000,
1019};
1020
1021static const u32 sgpr_init_compute_shader[] =
1022{
1023	0xbe8a0100, 0xbe8c0102,
1024	0xbe8e0104, 0xbe900106,
1025	0xbe920108, 0xbe940100,
1026	0xbe960102, 0xbe980104,
1027	0xbe9a0106, 0xbe9c0108,
1028	0xbe9e0100, 0xbea00102,
1029	0xbea20104, 0xbea40106,
1030	0xbea60108, 0xbea80100,
1031	0xbeaa0102, 0xbeac0104,
1032	0xbeae0106, 0xbeb00108,
1033	0xbeb20100, 0xbeb40102,
1034	0xbeb60104, 0xbeb80106,
1035	0xbeba0108, 0xbebc0100,
1036	0xbebe0102, 0xbec00104,
1037	0xbec20106, 0xbec40108,
1038	0xbec60100, 0xbec80102,
1039	0xbee60004, 0xbee70005,
1040	0xbeea0006, 0xbeeb0007,
1041	0xbee80008, 0xbee90009,
1042	0xbefc0000, 0xbf8a0000,
1043	0xbf810000, 0x00000000,
1044};
1045
1046static const u32 vgpr_init_regs[] =
1047{
1048	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1049	mmCOMPUTE_RESOURCE_LIMITS, 0,
1050	mmCOMPUTE_NUM_THREAD_X, 256*4,
1051	mmCOMPUTE_NUM_THREAD_Y, 1,
1052	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1053	mmCOMPUTE_PGM_RSRC2, 20,
1054	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1055	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1056	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1057	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1058	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1059	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1060	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1061	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1062	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1063	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1064};
1065
1066static const u32 sgpr1_init_regs[] =
1067{
1068	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1069	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1070	mmCOMPUTE_NUM_THREAD_X, 256*5,
1071	mmCOMPUTE_NUM_THREAD_Y, 1,
1072	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1073	mmCOMPUTE_PGM_RSRC2, 20,
1074	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1075	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1076	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1077	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1078	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1079	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1080	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1081	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1082	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1083	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1084};
1085
1086static const u32 sgpr2_init_regs[] =
1087{
1088	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1089	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1090	mmCOMPUTE_NUM_THREAD_X, 256*5,
1091	mmCOMPUTE_NUM_THREAD_Y, 1,
1092	mmCOMPUTE_NUM_THREAD_Z, 1,
 
1093	mmCOMPUTE_PGM_RSRC2, 20,
1094	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1095	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1096	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1097	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1098	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1099	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1100	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1101	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1102	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1103	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1104};
1105
1106static const u32 sec_ded_counter_registers[] =
1107{
1108	mmCPC_EDC_ATC_CNT,
1109	mmCPC_EDC_SCRATCH_CNT,
1110	mmCPC_EDC_UCODE_CNT,
1111	mmCPF_EDC_ATC_CNT,
1112	mmCPF_EDC_ROQ_CNT,
1113	mmCPF_EDC_TAG_CNT,
1114	mmCPG_EDC_ATC_CNT,
1115	mmCPG_EDC_DMA_CNT,
1116	mmCPG_EDC_TAG_CNT,
1117	mmDC_EDC_CSINVOC_CNT,
1118	mmDC_EDC_RESTORE_CNT,
1119	mmDC_EDC_STATE_CNT,
1120	mmGDS_EDC_CNT,
1121	mmGDS_EDC_GRBM_CNT,
1122	mmGDS_EDC_OA_DED,
1123	mmSPI_EDC_CNT,
1124	mmSQC_ATC_EDC_GATCL1_CNT,
1125	mmSQC_EDC_CNT,
1126	mmSQ_EDC_DED_CNT,
1127	mmSQ_EDC_INFO,
1128	mmSQ_EDC_SEC_CNT,
1129	mmTCC_EDC_CNT,
1130	mmTCP_ATC_EDC_GATCL1_CNT,
1131	mmTCP_EDC_CNT,
1132	mmTD_EDC_CNT
1133};
1134
1135static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1136{
1137	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1138	struct amdgpu_ib ib;
1139	struct fence *f = NULL;
1140	int r, i;
1141	u32 tmp;
1142	unsigned total_size, vgpr_offset, sgpr_offset;
1143	u64 gpu_addr;
1144
1145	/* only supported on CZ */
1146	if (adev->asic_type != CHIP_CARRIZO)
1147		return 0;
1148
1149	/* bail if the compute ring is not ready */
1150	if (!ring->ready)
1151		return 0;
1152
1153	tmp = RREG32(mmGB_EDC_MODE);
1154	WREG32(mmGB_EDC_MODE, 0);
1155
1156	total_size =
1157		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1158	total_size +=
1159		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1160	total_size +=
1161		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162	total_size = ALIGN(total_size, 256);
1163	vgpr_offset = total_size;
1164	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1165	sgpr_offset = total_size;
1166	total_size += sizeof(sgpr_init_compute_shader);
1167
1168	/* allocate an indirect buffer to put the commands in */
1169	memset(&ib, 0, sizeof(ib));
1170	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
 
1171	if (r) {
1172		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1173		return r;
1174	}
1175
1176	/* load the compute shaders */
1177	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1178		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1179
1180	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1181		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1182
1183	/* init the ib length to 0 */
1184	ib.length_dw = 0;
1185
1186	/* VGPR */
1187	/* write the register state for the compute dispatch */
1188	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1189		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1190		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1191		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1192	}
1193	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1194	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1195	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1196	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1197	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1198	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1199
1200	/* write dispatch packet */
1201	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1202	ib.ptr[ib.length_dw++] = 8; /* x */
1203	ib.ptr[ib.length_dw++] = 1; /* y */
1204	ib.ptr[ib.length_dw++] = 1; /* z */
1205	ib.ptr[ib.length_dw++] =
1206		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1207
1208	/* write CS partial flush packet */
1209	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1210	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1211
1212	/* SGPR1 */
1213	/* write the register state for the compute dispatch */
1214	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1215		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1216		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1217		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1218	}
1219	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1220	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1221	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1222	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1223	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1224	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1225
1226	/* write dispatch packet */
1227	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1228	ib.ptr[ib.length_dw++] = 8; /* x */
1229	ib.ptr[ib.length_dw++] = 1; /* y */
1230	ib.ptr[ib.length_dw++] = 1; /* z */
1231	ib.ptr[ib.length_dw++] =
1232		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1233
1234	/* write CS partial flush packet */
1235	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1236	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1237
1238	/* SGPR2 */
1239	/* write the register state for the compute dispatch */
1240	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1241		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1242		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1243		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1244	}
1245	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1246	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1247	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1248	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1249	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1250	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1251
1252	/* write dispatch packet */
1253	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1254	ib.ptr[ib.length_dw++] = 8; /* x */
1255	ib.ptr[ib.length_dw++] = 1; /* y */
1256	ib.ptr[ib.length_dw++] = 1; /* z */
1257	ib.ptr[ib.length_dw++] =
1258		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1259
1260	/* write CS partial flush packet */
1261	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1262	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1263
1264	/* shedule the ib on the ring */
1265	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1266	if (r) {
1267		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1268		goto fail;
1269	}
1270
1271	/* wait for the GPU to finish processing the IB */
1272	r = fence_wait(f, false);
1273	if (r) {
1274		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1275		goto fail;
1276	}
1277
1278	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1279	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1280	WREG32(mmGB_EDC_MODE, tmp);
1281
1282	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1283	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1284	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1285
1286
1287	/* read back registers to clear the counters */
1288	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1289		RREG32(sec_ded_counter_registers[i]);
1290
1291fail:
1292	fence_put(f);
1293	amdgpu_ib_free(adev, &ib, NULL);
1294	fence_put(f);
1295
1296	return r;
1297}
1298
1299static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1300{
1301	u32 gb_addr_config;
1302	u32 mc_shared_chmap, mc_arb_ramcfg;
1303	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1304	u32 tmp;
 
1305
1306	switch (adev->asic_type) {
1307	case CHIP_TOPAZ:
1308		adev->gfx.config.max_shader_engines = 1;
1309		adev->gfx.config.max_tile_pipes = 2;
1310		adev->gfx.config.max_cu_per_sh = 6;
1311		adev->gfx.config.max_sh_per_se = 1;
1312		adev->gfx.config.max_backends_per_se = 2;
1313		adev->gfx.config.max_texture_channel_caches = 2;
1314		adev->gfx.config.max_gprs = 256;
1315		adev->gfx.config.max_gs_threads = 32;
1316		adev->gfx.config.max_hw_contexts = 8;
1317
1318		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1319		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1320		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1321		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1322		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1323		break;
1324	case CHIP_FIJI:
1325		adev->gfx.config.max_shader_engines = 4;
1326		adev->gfx.config.max_tile_pipes = 16;
1327		adev->gfx.config.max_cu_per_sh = 16;
1328		adev->gfx.config.max_sh_per_se = 1;
1329		adev->gfx.config.max_backends_per_se = 4;
1330		adev->gfx.config.max_texture_channel_caches = 16;
1331		adev->gfx.config.max_gprs = 256;
1332		adev->gfx.config.max_gs_threads = 32;
1333		adev->gfx.config.max_hw_contexts = 8;
1334
1335		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1336		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1337		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1338		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1339		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1340		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1341	case CHIP_TONGA:
1342		adev->gfx.config.max_shader_engines = 4;
1343		adev->gfx.config.max_tile_pipes = 8;
1344		adev->gfx.config.max_cu_per_sh = 8;
1345		adev->gfx.config.max_sh_per_se = 1;
1346		adev->gfx.config.max_backends_per_se = 2;
1347		adev->gfx.config.max_texture_channel_caches = 8;
1348		adev->gfx.config.max_gprs = 256;
1349		adev->gfx.config.max_gs_threads = 32;
1350		adev->gfx.config.max_hw_contexts = 8;
1351
1352		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1356		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1357		break;
1358	case CHIP_CARRIZO:
1359		adev->gfx.config.max_shader_engines = 1;
1360		adev->gfx.config.max_tile_pipes = 2;
1361		adev->gfx.config.max_sh_per_se = 1;
1362		adev->gfx.config.max_backends_per_se = 2;
1363
1364		switch (adev->pdev->revision) {
1365		case 0xc4:
1366		case 0x84:
1367		case 0xc8:
1368		case 0xcc:
1369		case 0xe1:
1370		case 0xe3:
1371			/* B10 */
1372			adev->gfx.config.max_cu_per_sh = 8;
1373			break;
1374		case 0xc5:
1375		case 0x81:
1376		case 0x85:
1377		case 0xc9:
1378		case 0xcd:
1379		case 0xe2:
1380		case 0xe4:
1381			/* B8 */
1382			adev->gfx.config.max_cu_per_sh = 6;
1383			break;
1384		case 0xc6:
1385		case 0xca:
1386		case 0xce:
1387		case 0x88:
1388			/* B6 */
1389			adev->gfx.config.max_cu_per_sh = 6;
1390			break;
1391		case 0xc7:
1392		case 0x87:
1393		case 0xcb:
1394		case 0xe5:
1395		case 0x89:
1396		default:
1397			/* B4 */
1398			adev->gfx.config.max_cu_per_sh = 4;
1399			break;
1400		}
1401
1402		adev->gfx.config.max_texture_channel_caches = 2;
1403		adev->gfx.config.max_gprs = 256;
1404		adev->gfx.config.max_gs_threads = 32;
1405		adev->gfx.config.max_hw_contexts = 8;
1406
1407		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1408		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1409		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1410		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1411		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1412		break;
1413	case CHIP_STONEY:
1414		adev->gfx.config.max_shader_engines = 1;
1415		adev->gfx.config.max_tile_pipes = 2;
1416		adev->gfx.config.max_sh_per_se = 1;
1417		adev->gfx.config.max_backends_per_se = 1;
1418
1419		switch (adev->pdev->revision) {
1420		case 0xc0:
1421		case 0xc1:
1422		case 0xc2:
1423		case 0xc4:
1424		case 0xc8:
1425		case 0xc9:
1426			adev->gfx.config.max_cu_per_sh = 3;
1427			break;
1428		case 0xd0:
1429		case 0xd1:
1430		case 0xd2:
1431		default:
1432			adev->gfx.config.max_cu_per_sh = 2;
1433			break;
1434		}
1435
1436		adev->gfx.config.max_texture_channel_caches = 2;
1437		adev->gfx.config.max_gprs = 256;
1438		adev->gfx.config.max_gs_threads = 16;
1439		adev->gfx.config.max_hw_contexts = 8;
1440
1441		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1442		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1443		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1444		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1445		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1446		break;
1447	default:
1448		adev->gfx.config.max_shader_engines = 2;
1449		adev->gfx.config.max_tile_pipes = 4;
1450		adev->gfx.config.max_cu_per_sh = 2;
1451		adev->gfx.config.max_sh_per_se = 1;
1452		adev->gfx.config.max_backends_per_se = 2;
1453		adev->gfx.config.max_texture_channel_caches = 4;
1454		adev->gfx.config.max_gprs = 256;
1455		adev->gfx.config.max_gs_threads = 32;
1456		adev->gfx.config.max_hw_contexts = 8;
1457
1458		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1459		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1460		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1461		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1462		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1463		break;
1464	}
1465
1466	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1467	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1468	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1469
 
 
 
 
 
1470	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1471	adev->gfx.config.mem_max_burst_length_bytes = 256;
1472	if (adev->flags & AMD_IS_APU) {
1473		/* Get memory bank mapping mode. */
1474		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1475		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1476		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1477
1478		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1479		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1480		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1481
1482		/* Validate settings in case only one DIMM installed. */
1483		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1484			dimm00_addr_map = 0;
1485		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1486			dimm01_addr_map = 0;
1487		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1488			dimm10_addr_map = 0;
1489		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1490			dimm11_addr_map = 0;
1491
1492		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1493		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1494		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1495			adev->gfx.config.mem_row_size_in_kb = 2;
1496		else
1497			adev->gfx.config.mem_row_size_in_kb = 1;
1498	} else {
1499		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1500		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1501		if (adev->gfx.config.mem_row_size_in_kb > 4)
1502			adev->gfx.config.mem_row_size_in_kb = 4;
1503	}
1504
1505	adev->gfx.config.shader_engine_tile_size = 32;
1506	adev->gfx.config.num_gpus = 1;
1507	adev->gfx.config.multi_gpu_tile_size = 64;
1508
1509	/* fix up row size */
1510	switch (adev->gfx.config.mem_row_size_in_kb) {
1511	case 1:
1512	default:
1513		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1514		break;
1515	case 2:
1516		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1517		break;
1518	case 4:
1519		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1520		break;
1521	}
1522	adev->gfx.config.gb_addr_config = gb_addr_config;
 
 
1523}
1524
1525static int gfx_v8_0_sw_init(void *handle)
 
1526{
1527	int i, r;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1528	struct amdgpu_ring *ring;
1529	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1530
1531	/* EOP Event */
1532	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1533	if (r)
1534		return r;
1535
1536	/* Privileged reg */
1537	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
 
1538	if (r)
1539		return r;
1540
1541	/* Privileged inst */
1542	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
 
1543	if (r)
1544		return r;
1545
1546	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
 
 
 
 
1547
1548	gfx_v8_0_scratch_init(adev);
 
 
 
 
 
 
 
 
 
 
1549
1550	r = gfx_v8_0_init_microcode(adev);
1551	if (r) {
1552		DRM_ERROR("Failed to load gfx firmware!\n");
1553		return r;
1554	}
1555
 
 
 
 
 
 
1556	r = gfx_v8_0_mec_init(adev);
1557	if (r) {
1558		DRM_ERROR("Failed to init MEC BOs!\n");
1559		return r;
1560	}
1561
1562	/* set up the gfx ring */
1563	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1564		ring = &adev->gfx.gfx_ring[i];
1565		ring->ring_obj = NULL;
1566		sprintf(ring->name, "gfx");
1567		/* no gfx doorbells on iceland */
1568		if (adev->asic_type != CHIP_TOPAZ) {
1569			ring->use_doorbell = true;
1570			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1571		}
1572
1573		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1574				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1575				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1576				     AMDGPU_RING_TYPE_GFX);
1577		if (r)
1578			return r;
1579	}
1580
1581	/* set up the compute queues */
1582	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1583		unsigned irq_type;
1584
1585		/* max 32 queues per MEC */
1586		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1587			DRM_ERROR("Too many (%d) compute rings!\n", i);
1588			break;
 
 
 
 
 
 
 
 
 
 
 
 
 
1589		}
1590		ring = &adev->gfx.compute_ring[i];
1591		ring->ring_obj = NULL;
1592		ring->use_doorbell = true;
1593		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1594		ring->me = 1; /* first MEC */
1595		ring->pipe = i / 8;
1596		ring->queue = i % 8;
1597		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1598		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1599		/* type-2 packets are deprecated on MEC, use type-3 instead */
1600		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1601				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1602				     &adev->gfx.eop_irq, irq_type,
1603				     AMDGPU_RING_TYPE_COMPUTE);
1604		if (r)
1605			return r;
1606	}
1607
1608	/* reserve GDS, GWS and OA resource for gfx */
1609	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1610			PAGE_SIZE, true,
1611			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1612			NULL, &adev->gds.gds_gfx_bo);
1613	if (r)
1614		return r;
 
1615
1616	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1617		PAGE_SIZE, true,
1618		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1619		NULL, &adev->gds.gws_gfx_bo);
1620	if (r)
1621		return r;
1622
1623	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1624			PAGE_SIZE, true,
1625			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1626			NULL, &adev->gds.oa_gfx_bo);
1627	if (r)
1628		return r;
1629
1630	adev->gfx.ce_ram_size = 0x8000;
1631
1632	gfx_v8_0_gpu_early_init(adev);
 
 
1633
1634	return 0;
1635}
1636
1637static int gfx_v8_0_sw_fini(void *handle)
1638{
 
1639	int i;
1640	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1641
1642	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1643	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1644	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1645
1646	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1647		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1648	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1649		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1650
 
 
 
 
1651	gfx_v8_0_mec_fini(adev);
 
 
 
 
 
 
 
 
 
 
 
1652
1653	return 0;
1654}
1655
1656static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1657{
1658	uint32_t *modearray, *mod2array;
1659	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1660	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1661	u32 reg_offset;
1662
1663	modearray = adev->gfx.config.tile_mode_array;
1664	mod2array = adev->gfx.config.macrotile_mode_array;
1665
1666	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1667		modearray[reg_offset] = 0;
1668
1669	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1670		mod2array[reg_offset] = 0;
1671
1672	switch (adev->asic_type) {
1673	case CHIP_TOPAZ:
1674		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675				PIPE_CONFIG(ADDR_SURF_P2) |
1676				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1677				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679				PIPE_CONFIG(ADDR_SURF_P2) |
1680				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1681				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683				PIPE_CONFIG(ADDR_SURF_P2) |
1684				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1685				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1687				PIPE_CONFIG(ADDR_SURF_P2) |
1688				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1689				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1690		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1691				PIPE_CONFIG(ADDR_SURF_P2) |
1692				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1693				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695				PIPE_CONFIG(ADDR_SURF_P2) |
1696				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1698		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1699				PIPE_CONFIG(ADDR_SURF_P2) |
1700				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1701				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1702		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1703				PIPE_CONFIG(ADDR_SURF_P2));
1704		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1705				PIPE_CONFIG(ADDR_SURF_P2) |
1706				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1707				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709				 PIPE_CONFIG(ADDR_SURF_P2) |
1710				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1711				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713				 PIPE_CONFIG(ADDR_SURF_P2) |
1714				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1715				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1717				 PIPE_CONFIG(ADDR_SURF_P2) |
1718				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1720		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721				 PIPE_CONFIG(ADDR_SURF_P2) |
1722				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1724		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1725				 PIPE_CONFIG(ADDR_SURF_P2) |
1726				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1727				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1728		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1729				 PIPE_CONFIG(ADDR_SURF_P2) |
1730				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1732		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1733				 PIPE_CONFIG(ADDR_SURF_P2) |
1734				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1735				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1737				 PIPE_CONFIG(ADDR_SURF_P2) |
1738				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1739				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1741				 PIPE_CONFIG(ADDR_SURF_P2) |
1742				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1745				 PIPE_CONFIG(ADDR_SURF_P2) |
1746				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1749				 PIPE_CONFIG(ADDR_SURF_P2) |
1750				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1751				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1752		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1753				 PIPE_CONFIG(ADDR_SURF_P2) |
1754				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1756		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1757				 PIPE_CONFIG(ADDR_SURF_P2) |
1758				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1759				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1761				 PIPE_CONFIG(ADDR_SURF_P2) |
1762				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1763				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1765				 PIPE_CONFIG(ADDR_SURF_P2) |
1766				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1767				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1768		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1769				 PIPE_CONFIG(ADDR_SURF_P2) |
1770				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1771				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1772		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1773				 PIPE_CONFIG(ADDR_SURF_P2) |
1774				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1775				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1776
1777		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1778				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1779				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780				NUM_BANKS(ADDR_SURF_8_BANK));
1781		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1782				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1783				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784				NUM_BANKS(ADDR_SURF_8_BANK));
1785		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1786				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1787				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788				NUM_BANKS(ADDR_SURF_8_BANK));
1789		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1790				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1791				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792				NUM_BANKS(ADDR_SURF_8_BANK));
1793		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1794				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1795				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1796				NUM_BANKS(ADDR_SURF_8_BANK));
1797		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1798				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1799				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1800				NUM_BANKS(ADDR_SURF_8_BANK));
1801		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1802				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1803				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1804				NUM_BANKS(ADDR_SURF_8_BANK));
1805		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1806				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1807				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808				NUM_BANKS(ADDR_SURF_16_BANK));
1809		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1810				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1811				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812				NUM_BANKS(ADDR_SURF_16_BANK));
1813		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1814				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1816				 NUM_BANKS(ADDR_SURF_16_BANK));
1817		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1818				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1819				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1820				 NUM_BANKS(ADDR_SURF_16_BANK));
1821		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1823				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824				 NUM_BANKS(ADDR_SURF_16_BANK));
1825		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1827				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1828				 NUM_BANKS(ADDR_SURF_16_BANK));
1829		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1830				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1831				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1832				 NUM_BANKS(ADDR_SURF_8_BANK));
1833
1834		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1835			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1836			    reg_offset != 23)
1837				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1838
1839		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1840			if (reg_offset != 7)
1841				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1842
1843		break;
1844	case CHIP_FIJI:
 
1845		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1848				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1850				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1852				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1854				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1856				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1859				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1860				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1862				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1863				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1864				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1865		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1867				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1868				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1869		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1870				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1871				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1872				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1873		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1875				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1876				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1877		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1878				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1879		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1882				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1886				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1888				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1890				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1891		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1893				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1894				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1896				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1897				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1899		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1900				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1903		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1904				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1906				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1907		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1908				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1910				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1911		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1912				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1913				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1914				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1915		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1916				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1918				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1920				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1921				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1926				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1928				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1932				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1936				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1937				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1938				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1939		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1940				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1942				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1943		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1944				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1946				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1947		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1948				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1949				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1950				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1951		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1952				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1953				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1954				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1955		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1956				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1957				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1958				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1959		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1960				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1961				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1962				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1963		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1964				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1965				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1966				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1967
1968		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1971				NUM_BANKS(ADDR_SURF_8_BANK));
1972		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1975				NUM_BANKS(ADDR_SURF_8_BANK));
1976		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1979				NUM_BANKS(ADDR_SURF_8_BANK));
1980		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983				NUM_BANKS(ADDR_SURF_8_BANK));
1984		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1987				NUM_BANKS(ADDR_SURF_8_BANK));
1988		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991				NUM_BANKS(ADDR_SURF_8_BANK));
1992		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995				NUM_BANKS(ADDR_SURF_8_BANK));
1996		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999				NUM_BANKS(ADDR_SURF_8_BANK));
2000		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003				NUM_BANKS(ADDR_SURF_8_BANK));
2004		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007				 NUM_BANKS(ADDR_SURF_8_BANK));
2008		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2009				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2010				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2011				 NUM_BANKS(ADDR_SURF_8_BANK));
2012		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2014				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2015				 NUM_BANKS(ADDR_SURF_8_BANK));
2016		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2018				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2019				 NUM_BANKS(ADDR_SURF_8_BANK));
2020		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2022				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2023				 NUM_BANKS(ADDR_SURF_4_BANK));
2024
2025		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2027
2028		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029			if (reg_offset != 7)
2030				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2031
2032		break;
2033	case CHIP_TONGA:
2034		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2037				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2041				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2045				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2048				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2049				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2051				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2052				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2056				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2060				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2061				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2062		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2063				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2064				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2065				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2066		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2067				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2068		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2080		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2083				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2093				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2095				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2099				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2100		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2101				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2102				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2103				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2104		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2105				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2107				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2109				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2110				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2117				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2121				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2125				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2126				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2127				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2128		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2129				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2132		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2133				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2135				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2136		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2137				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2138				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2139				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2142				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2143				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2146				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2150				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2151				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2153				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2154				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2155				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2156
2157		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2159				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2160				NUM_BANKS(ADDR_SURF_16_BANK));
2161		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2163				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164				NUM_BANKS(ADDR_SURF_16_BANK));
2165		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168				NUM_BANKS(ADDR_SURF_16_BANK));
2169		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172				NUM_BANKS(ADDR_SURF_16_BANK));
2173		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176				NUM_BANKS(ADDR_SURF_16_BANK));
2177		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2179				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180				NUM_BANKS(ADDR_SURF_16_BANK));
2181		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2184				NUM_BANKS(ADDR_SURF_16_BANK));
2185		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2187				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2188				NUM_BANKS(ADDR_SURF_16_BANK));
2189		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2192				NUM_BANKS(ADDR_SURF_16_BANK));
2193		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2195				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196				 NUM_BANKS(ADDR_SURF_16_BANK));
2197		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200				 NUM_BANKS(ADDR_SURF_16_BANK));
2201		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204				 NUM_BANKS(ADDR_SURF_8_BANK));
2205		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208				 NUM_BANKS(ADDR_SURF_4_BANK));
2209		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2212				 NUM_BANKS(ADDR_SURF_4_BANK));
2213
2214		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2215			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2216
2217		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2218			if (reg_offset != 7)
2219				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2220
2221		break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2222	case CHIP_STONEY:
2223		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224				PIPE_CONFIG(ADDR_SURF_P2) |
2225				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2226				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228				PIPE_CONFIG(ADDR_SURF_P2) |
2229				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232				PIPE_CONFIG(ADDR_SURF_P2) |
2233				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2234				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236				PIPE_CONFIG(ADDR_SURF_P2) |
2237				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2238				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240				PIPE_CONFIG(ADDR_SURF_P2) |
2241				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244				PIPE_CONFIG(ADDR_SURF_P2) |
2245				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248				PIPE_CONFIG(ADDR_SURF_P2) |
2249				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2252				PIPE_CONFIG(ADDR_SURF_P2));
2253		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254				PIPE_CONFIG(ADDR_SURF_P2) |
2255				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258				 PIPE_CONFIG(ADDR_SURF_P2) |
2259				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262				 PIPE_CONFIG(ADDR_SURF_P2) |
2263				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266				 PIPE_CONFIG(ADDR_SURF_P2) |
2267				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270				 PIPE_CONFIG(ADDR_SURF_P2) |
2271				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2274				 PIPE_CONFIG(ADDR_SURF_P2) |
2275				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278				 PIPE_CONFIG(ADDR_SURF_P2) |
2279				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282				 PIPE_CONFIG(ADDR_SURF_P2) |
2283				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286				 PIPE_CONFIG(ADDR_SURF_P2) |
2287				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290				 PIPE_CONFIG(ADDR_SURF_P2) |
2291				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294				 PIPE_CONFIG(ADDR_SURF_P2) |
2295				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298				 PIPE_CONFIG(ADDR_SURF_P2) |
2299				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2302				 PIPE_CONFIG(ADDR_SURF_P2) |
2303				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2306				 PIPE_CONFIG(ADDR_SURF_P2) |
2307				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2308				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2310				 PIPE_CONFIG(ADDR_SURF_P2) |
2311				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314				 PIPE_CONFIG(ADDR_SURF_P2) |
2315				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318				 PIPE_CONFIG(ADDR_SURF_P2) |
2319				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322				 PIPE_CONFIG(ADDR_SURF_P2) |
2323				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325
2326		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2329				NUM_BANKS(ADDR_SURF_8_BANK));
2330		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2332				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2333				NUM_BANKS(ADDR_SURF_8_BANK));
2334		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337				NUM_BANKS(ADDR_SURF_8_BANK));
2338		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341				NUM_BANKS(ADDR_SURF_8_BANK));
2342		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2344				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345				NUM_BANKS(ADDR_SURF_8_BANK));
2346		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2348				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349				NUM_BANKS(ADDR_SURF_8_BANK));
2350		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353				NUM_BANKS(ADDR_SURF_8_BANK));
2354		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2355				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2356				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357				NUM_BANKS(ADDR_SURF_16_BANK));
2358		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2360				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361				NUM_BANKS(ADDR_SURF_16_BANK));
2362		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2363				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365				 NUM_BANKS(ADDR_SURF_16_BANK));
2366		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2368				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369				 NUM_BANKS(ADDR_SURF_16_BANK));
2370		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373				 NUM_BANKS(ADDR_SURF_16_BANK));
2374		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377				 NUM_BANKS(ADDR_SURF_16_BANK));
2378		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381				 NUM_BANKS(ADDR_SURF_8_BANK));
2382
2383		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2384			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2385			    reg_offset != 23)
2386				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387
2388		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389			if (reg_offset != 7)
2390				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2391
2392		break;
2393	default:
2394		dev_warn(adev->dev,
2395			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2396			 adev->asic_type);
 
2397
2398	case CHIP_CARRIZO:
2399		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400				PIPE_CONFIG(ADDR_SURF_P2) |
2401				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404				PIPE_CONFIG(ADDR_SURF_P2) |
2405				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408				PIPE_CONFIG(ADDR_SURF_P2) |
2409				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412				PIPE_CONFIG(ADDR_SURF_P2) |
2413				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416				PIPE_CONFIG(ADDR_SURF_P2) |
2417				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420				PIPE_CONFIG(ADDR_SURF_P2) |
2421				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424				PIPE_CONFIG(ADDR_SURF_P2) |
2425				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2428				PIPE_CONFIG(ADDR_SURF_P2));
2429		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430				PIPE_CONFIG(ADDR_SURF_P2) |
2431				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434				 PIPE_CONFIG(ADDR_SURF_P2) |
2435				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438				 PIPE_CONFIG(ADDR_SURF_P2) |
2439				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442				 PIPE_CONFIG(ADDR_SURF_P2) |
2443				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446				 PIPE_CONFIG(ADDR_SURF_P2) |
2447				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2448				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2450				 PIPE_CONFIG(ADDR_SURF_P2) |
2451				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454				 PIPE_CONFIG(ADDR_SURF_P2) |
2455				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2457		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2458				 PIPE_CONFIG(ADDR_SURF_P2) |
2459				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2462				 PIPE_CONFIG(ADDR_SURF_P2) |
2463				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2464				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2466				 PIPE_CONFIG(ADDR_SURF_P2) |
2467				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2470				 PIPE_CONFIG(ADDR_SURF_P2) |
2471				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2474				 PIPE_CONFIG(ADDR_SURF_P2) |
2475				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478				 PIPE_CONFIG(ADDR_SURF_P2) |
2479				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2482				 PIPE_CONFIG(ADDR_SURF_P2) |
2483				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2486				 PIPE_CONFIG(ADDR_SURF_P2) |
2487				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490				 PIPE_CONFIG(ADDR_SURF_P2) |
2491				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2492				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494				 PIPE_CONFIG(ADDR_SURF_P2) |
2495				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498				 PIPE_CONFIG(ADDR_SURF_P2) |
2499				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2501
2502		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505				NUM_BANKS(ADDR_SURF_8_BANK));
2506		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509				NUM_BANKS(ADDR_SURF_8_BANK));
2510		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513				NUM_BANKS(ADDR_SURF_8_BANK));
2514		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2517				NUM_BANKS(ADDR_SURF_8_BANK));
2518		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521				NUM_BANKS(ADDR_SURF_8_BANK));
2522		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525				NUM_BANKS(ADDR_SURF_8_BANK));
2526		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529				NUM_BANKS(ADDR_SURF_8_BANK));
2530		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2531				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2532				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533				NUM_BANKS(ADDR_SURF_16_BANK));
2534		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2535				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537				NUM_BANKS(ADDR_SURF_16_BANK));
2538		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2539				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2541				 NUM_BANKS(ADDR_SURF_16_BANK));
2542		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2543				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2545				 NUM_BANKS(ADDR_SURF_16_BANK));
2546		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2549				 NUM_BANKS(ADDR_SURF_16_BANK));
2550		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2553				 NUM_BANKS(ADDR_SURF_16_BANK));
2554		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557				 NUM_BANKS(ADDR_SURF_8_BANK));
2558
2559		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2560			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2561			    reg_offset != 23)
2562				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2563
2564		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2565			if (reg_offset != 7)
2566				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2567
2568		break;
2569	}
2570}
2571
2572void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
 
 
2573{
2574	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2575
2576	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2577		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2578		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2579	} else if (se_num == 0xffffffff) {
2580		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
 
2581		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582	} else if (sh_num == 0xffffffff) {
2583		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2584		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2585	} else {
 
 
 
2586		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2587		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588	}
2589	WREG32(mmGRBM_GFX_INDEX, data);
2590}
2591
2592static u32 gfx_v8_0_create_bitmask(u32 bit_width)
 
2593{
2594	return (u32)((1ULL << bit_width) - 1);
2595}
2596
2597static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2598{
2599	u32 data, mask;
2600
2601	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2602	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2603
2604	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2605	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2606
2607	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608				       adev->gfx.config.max_sh_per_se);
2609
2610	return (~data) & mask;
2611}
2612
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2613static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2614{
2615	int i, j;
2616	u32 data;
 
2617	u32 active_rbs = 0;
2618	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2619					adev->gfx.config.max_sh_per_se;
 
2620
2621	mutex_lock(&adev->grbm_idx_mutex);
2622	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2623		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2624			gfx_v8_0_select_se_sh(adev, i, j);
2625			data = gfx_v8_0_get_rb_active_bitmap(adev);
2626			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2627					       rb_bitmap_width_per_sh);
2628		}
2629	}
2630	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2631	mutex_unlock(&adev->grbm_idx_mutex);
2632
2633	adev->gfx.config.backend_enable_mask = active_rbs;
2634	adev->gfx.config.num_rbs = hweight32(active_rbs);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2635}
2636
 
2637/**
2638 * gfx_v8_0_init_compute_vmid - gart enable
2639 *
2640 * @rdev: amdgpu_device pointer
2641 *
2642 * Initialize compute vmid sh_mem registers
2643 *
2644 */
2645#define DEFAULT_SH_MEM_BASES	(0x6000)
2646#define FIRST_COMPUTE_VMID	(8)
2647#define LAST_COMPUTE_VMID	(16)
2648static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2649{
2650	int i;
2651	uint32_t sh_mem_config;
2652	uint32_t sh_mem_bases;
2653
2654	/*
2655	 * Configure apertures:
2656	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2657	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2658	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2659	 */
2660	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2661
2662	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2663			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2664			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2665			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2666			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2667			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2668
2669	mutex_lock(&adev->srbm_mutex);
2670	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2671		vi_srbm_select(adev, 0, 0, 0, i);
2672		/* CP and shaders */
2673		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2674		WREG32(mmSH_MEM_APE1_BASE, 1);
2675		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2676		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2677	}
2678	vi_srbm_select(adev, 0, 0, 0, 0);
2679	mutex_unlock(&adev->srbm_mutex);
 
 
 
 
 
 
 
 
 
2680}
2681
2682static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2683{
2684	u32 tmp;
2685	int i;
2686
2687	tmp = RREG32(mmGRBM_CNTL);
2688	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2689	WREG32(mmGRBM_CNTL, tmp);
 
 
 
 
 
 
 
 
 
 
2690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2691	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2692	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2694
2695	gfx_v8_0_tiling_mode_table_init(adev);
2696
2697	gfx_v8_0_setup_rb(adev);
 
 
2698
2699	/* XXX SH_MEM regs */
2700	/* where to put LDS, scratch, GPUVM in FSA64 space */
 
 
 
 
 
 
 
 
2701	mutex_lock(&adev->srbm_mutex);
2702	for (i = 0; i < 16; i++) {
2703		vi_srbm_select(adev, 0, 0, 0, i);
2704		/* CP and shaders */
2705		if (i == 0) {
2706			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2707			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2708			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2709					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2710			WREG32(mmSH_MEM_CONFIG, tmp);
 
2711		} else {
2712			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2713			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2714			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2715					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2716			WREG32(mmSH_MEM_CONFIG, tmp);
 
 
2717		}
2718
2719		WREG32(mmSH_MEM_APE1_BASE, 1);
2720		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2721		WREG32(mmSH_MEM_BASES, 0);
2722	}
2723	vi_srbm_select(adev, 0, 0, 0, 0);
2724	mutex_unlock(&adev->srbm_mutex);
2725
2726	gfx_v8_0_init_compute_vmid(adev);
 
2727
2728	mutex_lock(&adev->grbm_idx_mutex);
2729	/*
2730	 * making sure that the following register writes will be broadcasted
2731	 * to all the shaders
2732	 */
2733	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2734
2735	WREG32(mmPA_SC_FIFO_SIZE,
2736		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2737			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2738		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2739			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2740		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2741			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2742		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2743			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
 
 
 
 
 
 
 
 
2744	mutex_unlock(&adev->grbm_idx_mutex);
2745
2746}
2747
2748static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2749{
2750	u32 i, j, k;
2751	u32 mask;
2752
2753	mutex_lock(&adev->grbm_idx_mutex);
2754	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2755		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2756			gfx_v8_0_select_se_sh(adev, i, j);
2757			for (k = 0; k < adev->usec_timeout; k++) {
2758				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2759					break;
2760				udelay(1);
2761			}
 
 
 
 
 
 
 
 
2762		}
2763	}
2764	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2765	mutex_unlock(&adev->grbm_idx_mutex);
2766
2767	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2768		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2769		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2770		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2771	for (k = 0; k < adev->usec_timeout; k++) {
2772		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2773			break;
2774		udelay(1);
2775	}
2776}
2777
2778static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2779					       bool enable)
2780{
2781	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2782
2783	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2784	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2785	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2786	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2787
2788	WREG32(mmCP_INT_CNTL_RING0, tmp);
2789}
2790
2791void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2792{
2793	u32 tmp = RREG32(mmRLC_CNTL);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2794
2795	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2796	WREG32(mmRLC_CNTL, tmp);
 
 
2797
2798	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2799
2800	gfx_v8_0_wait_for_rlc_serdes(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2801}
2802
2803static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2804{
2805	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2806
2807	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2808	WREG32(mmGRBM_SOFT_RESET, tmp);
2809	udelay(50);
2810	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2811	WREG32(mmGRBM_SOFT_RESET, tmp);
2812	udelay(50);
2813}
2814
2815static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2816{
2817	u32 tmp = RREG32(mmRLC_CNTL);
 
2818
2819	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2820	WREG32(mmRLC_CNTL, tmp);
 
2821
2822	/* carrizo do enable cp interrupt after cp inited */
2823	if (!(adev->flags & AMD_IS_APU))
2824		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
 
 
 
 
 
 
 
2825
2826	udelay(50);
2827}
2828
2829static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
 
2830{
2831	const struct rlc_firmware_header_v2_0 *hdr;
2832	const __le32 *fw_data;
2833	unsigned i, fw_size;
2834
2835	if (!adev->gfx.rlc_fw)
2836		return -EINVAL;
 
 
 
2837
2838	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2839	amdgpu_ucode_print_rlc_hdr(&hdr->header);
 
 
2840
2841	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2842			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2843	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2844
2845	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2846	for (i = 0; i < fw_size; i++)
2847		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2848	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
 
 
 
 
 
 
 
 
 
 
2849
2850	return 0;
2851}
2852
2853static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2854{
2855	int r;
2856
2857	gfx_v8_0_rlc_stop(adev);
 
 
 
 
 
 
 
2858
2859	/* disable CG */
2860	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
 
2861
2862	/* disable PG */
2863	WREG32(mmRLC_PG_CNTL, 0);
 
2864
2865	gfx_v8_0_rlc_reset(adev);
2866
2867	if (!adev->pp_enabled) {
2868		if (!adev->firmware.smu_load) {
2869			/* legacy rlc firmware loading */
2870			r = gfx_v8_0_rlc_load_microcode(adev);
2871			if (r)
2872				return r;
2873		} else {
2874			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2875							AMDGPU_UCODE_ID_RLC_G);
2876			if (r)
2877				return -EINVAL;
2878		}
2879	}
2880
2881	gfx_v8_0_rlc_start(adev);
 
 
 
2882
2883	return 0;
2884}
2885
2886static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2887{
2888	int i;
2889	u32 tmp = RREG32(mmCP_ME_CNTL);
2890
2891	if (enable) {
2892		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2893		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2894		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2895	} else {
2896		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2897		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2898		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2899		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2900			adev->gfx.gfx_ring[i].ready = false;
2901	}
2902	WREG32(mmCP_ME_CNTL, tmp);
2903	udelay(50);
2904}
2905
2906static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2907{
2908	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2909	const struct gfx_firmware_header_v1_0 *ce_hdr;
2910	const struct gfx_firmware_header_v1_0 *me_hdr;
2911	const __le32 *fw_data;
2912	unsigned i, fw_size;
2913
2914	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2915		return -EINVAL;
2916
2917	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2918		adev->gfx.pfp_fw->data;
2919	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2920		adev->gfx.ce_fw->data;
2921	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2922		adev->gfx.me_fw->data;
2923
2924	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2925	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2926	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2927
2928	gfx_v8_0_cp_gfx_enable(adev, false);
2929
2930	/* PFP */
2931	fw_data = (const __le32 *)
2932		(adev->gfx.pfp_fw->data +
2933		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2934	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2935	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2936	for (i = 0; i < fw_size; i++)
2937		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2938	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2939
2940	/* CE */
2941	fw_data = (const __le32 *)
2942		(adev->gfx.ce_fw->data +
2943		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2944	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2945	WREG32(mmCP_CE_UCODE_ADDR, 0);
2946	for (i = 0; i < fw_size; i++)
2947		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2948	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2949
2950	/* ME */
2951	fw_data = (const __le32 *)
2952		(adev->gfx.me_fw->data +
2953		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2954	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2955	WREG32(mmCP_ME_RAM_WADDR, 0);
2956	for (i = 0; i < fw_size; i++)
2957		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2958	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2959
2960	return 0;
2961}
2962
2963static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2964{
2965	u32 count = 0;
2966	const struct cs_section_def *sect = NULL;
2967	const struct cs_extent_def *ext = NULL;
2968
2969	/* begin clear state */
2970	count += 2;
2971	/* context control state */
2972	count += 3;
2973
2974	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2975		for (ext = sect->section; ext->extent != NULL; ++ext) {
2976			if (sect->id == SECT_CONTEXT)
2977				count += 2 + ext->reg_count;
2978			else
2979				return 0;
2980		}
2981	}
2982	/* pa_sc_raster_config/pa_sc_raster_config1 */
2983	count += 4;
2984	/* end clear state */
2985	count += 2;
2986	/* clear state */
2987	count += 2;
2988
2989	return count;
2990}
2991
2992static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2993{
2994	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2995	const struct cs_section_def *sect = NULL;
2996	const struct cs_extent_def *ext = NULL;
2997	int r, i;
2998
2999	/* init the CP */
3000	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3001	WREG32(mmCP_ENDIAN_SWAP, 0);
3002	WREG32(mmCP_DEVICE_ID, 1);
3003
3004	gfx_v8_0_cp_gfx_enable(adev, true);
3005
3006	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3007	if (r) {
3008		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3009		return r;
3010	}
3011
3012	/* clear state buffer */
3013	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3014	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3015
3016	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3017	amdgpu_ring_write(ring, 0x80000000);
3018	amdgpu_ring_write(ring, 0x80000000);
3019
3020	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3021		for (ext = sect->section; ext->extent != NULL; ++ext) {
3022			if (sect->id == SECT_CONTEXT) {
3023				amdgpu_ring_write(ring,
3024				       PACKET3(PACKET3_SET_CONTEXT_REG,
3025					       ext->reg_count));
3026				amdgpu_ring_write(ring,
3027				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3028				for (i = 0; i < ext->reg_count; i++)
3029					amdgpu_ring_write(ring, ext->extent[i]);
3030			}
3031		}
3032	}
3033
3034	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3035	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3036	switch (adev->asic_type) {
3037	case CHIP_TONGA:
3038		amdgpu_ring_write(ring, 0x16000012);
3039		amdgpu_ring_write(ring, 0x0000002A);
3040		break;
3041	case CHIP_FIJI:
3042		amdgpu_ring_write(ring, 0x3a00161a);
3043		amdgpu_ring_write(ring, 0x0000002e);
3044		break;
3045	case CHIP_TOPAZ:
3046	case CHIP_CARRIZO:
3047		amdgpu_ring_write(ring, 0x00000002);
3048		amdgpu_ring_write(ring, 0x00000000);
3049		break;
3050	case CHIP_STONEY:
3051		amdgpu_ring_write(ring, 0x00000000);
3052		amdgpu_ring_write(ring, 0x00000000);
3053		break;
3054	default:
3055		BUG();
3056	}
3057
3058	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3059	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3060
3061	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3062	amdgpu_ring_write(ring, 0);
3063
3064	/* init the CE partitions */
3065	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3066	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3067	amdgpu_ring_write(ring, 0x8000);
3068	amdgpu_ring_write(ring, 0x8000);
3069
3070	amdgpu_ring_commit(ring);
3071
3072	return 0;
3073}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3074
3075static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3076{
3077	struct amdgpu_ring *ring;
3078	u32 tmp;
3079	u32 rb_bufsz;
3080	u64 rb_addr, rptr_addr;
3081	int r;
3082
3083	/* Set the write pointer delay */
3084	WREG32(mmCP_RB_WPTR_DELAY, 0);
3085
3086	/* set the RB to use vmid 0 */
3087	WREG32(mmCP_RB_VMID, 0);
3088
3089	/* Set ring buffer size */
3090	ring = &adev->gfx.gfx_ring[0];
3091	rb_bufsz = order_base_2(ring->ring_size / 8);
3092	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3093	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3094	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3095	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3096#ifdef __BIG_ENDIAN
3097	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3098#endif
3099	WREG32(mmCP_RB0_CNTL, tmp);
3100
3101	/* Initialize the ring buffer's read and write pointers */
3102	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3103	ring->wptr = 0;
3104	WREG32(mmCP_RB0_WPTR, ring->wptr);
3105
3106	/* set the wb address wether it's enabled or not */
3107	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3108	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3109	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3110
 
 
 
3111	mdelay(1);
3112	WREG32(mmCP_RB0_CNTL, tmp);
3113
3114	rb_addr = ring->gpu_addr >> 8;
3115	WREG32(mmCP_RB0_BASE, rb_addr);
3116	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3117
3118	/* no gfx doorbells on iceland */
3119	if (adev->asic_type != CHIP_TOPAZ) {
3120		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3121		if (ring->use_doorbell) {
3122			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3123					    DOORBELL_OFFSET, ring->doorbell_index);
3124			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3125					    DOORBELL_EN, 1);
3126		} else {
3127			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3128					    DOORBELL_EN, 0);
3129		}
3130		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3131
3132		if (adev->asic_type == CHIP_TONGA) {
3133			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3134					    DOORBELL_RANGE_LOWER,
3135					    AMDGPU_DOORBELL_GFX_RING0);
3136			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3137
3138			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3139			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3140		}
3141
3142	}
3143
3144	/* start the ring */
 
3145	gfx_v8_0_cp_gfx_start(adev);
3146	ring->ready = true;
3147	r = amdgpu_ring_test_ring(ring);
3148	if (r) {
3149		ring->ready = false;
3150		return r;
3151	}
3152
3153	return 0;
3154}
3155
3156static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3157{
3158	int i;
3159
3160	if (enable) {
3161		WREG32(mmCP_MEC_CNTL, 0);
3162	} else {
3163		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3164		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3165			adev->gfx.compute_ring[i].ready = false;
3166	}
3167	udelay(50);
3168}
3169
3170static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 
3171{
3172	const struct gfx_firmware_header_v1_0 *mec_hdr;
3173	const __le32 *fw_data;
3174	unsigned i, fw_size;
3175
3176	if (!adev->gfx.mec_fw)
3177		return -EINVAL;
3178
3179	gfx_v8_0_cp_compute_enable(adev, false);
 
 
 
 
 
 
 
3180
3181	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3182	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
 
 
 
3183
3184	fw_data = (const __le32 *)
3185		(adev->gfx.mec_fw->data +
3186		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3187	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3188
3189	/* MEC1 */
3190	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3191	for (i = 0; i < fw_size; i++)
3192		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3193	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
 
3194
3195	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3196	if (adev->gfx.mec2_fw) {
3197		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3198
3199		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3200		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3201
3202		fw_data = (const __le32 *)
3203			(adev->gfx.mec2_fw->data +
3204			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3205		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3206
3207		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3208		for (i = 0; i < fw_size; i++)
3209			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3210		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
 
 
 
 
 
3211	}
3212
 
 
3213	return 0;
3214}
3215
3216struct vi_mqd {
3217	uint32_t header;  /* ordinal0 */
3218	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3219	uint32_t compute_dim_x;  /* ordinal2 */
3220	uint32_t compute_dim_y;  /* ordinal3 */
3221	uint32_t compute_dim_z;  /* ordinal4 */
3222	uint32_t compute_start_x;  /* ordinal5 */
3223	uint32_t compute_start_y;  /* ordinal6 */
3224	uint32_t compute_start_z;  /* ordinal7 */
3225	uint32_t compute_num_thread_x;  /* ordinal8 */
3226	uint32_t compute_num_thread_y;  /* ordinal9 */
3227	uint32_t compute_num_thread_z;  /* ordinal10 */
3228	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3229	uint32_t compute_perfcount_enable;  /* ordinal12 */
3230	uint32_t compute_pgm_lo;  /* ordinal13 */
3231	uint32_t compute_pgm_hi;  /* ordinal14 */
3232	uint32_t compute_tba_lo;  /* ordinal15 */
3233	uint32_t compute_tba_hi;  /* ordinal16 */
3234	uint32_t compute_tma_lo;  /* ordinal17 */
3235	uint32_t compute_tma_hi;  /* ordinal18 */
3236	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3237	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3238	uint32_t compute_vmid;  /* ordinal21 */
3239	uint32_t compute_resource_limits;  /* ordinal22 */
3240	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3241	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3242	uint32_t compute_tmpring_size;  /* ordinal25 */
3243	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3244	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3245	uint32_t compute_restart_x;  /* ordinal28 */
3246	uint32_t compute_restart_y;  /* ordinal29 */
3247	uint32_t compute_restart_z;  /* ordinal30 */
3248	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3249	uint32_t compute_misc_reserved;  /* ordinal32 */
3250	uint32_t compute_dispatch_id;  /* ordinal33 */
3251	uint32_t compute_threadgroup_id;  /* ordinal34 */
3252	uint32_t compute_relaunch;  /* ordinal35 */
3253	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3254	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3255	uint32_t compute_wave_restore_control;  /* ordinal38 */
3256	uint32_t reserved9;  /* ordinal39 */
3257	uint32_t reserved10;  /* ordinal40 */
3258	uint32_t reserved11;  /* ordinal41 */
3259	uint32_t reserved12;  /* ordinal42 */
3260	uint32_t reserved13;  /* ordinal43 */
3261	uint32_t reserved14;  /* ordinal44 */
3262	uint32_t reserved15;  /* ordinal45 */
3263	uint32_t reserved16;  /* ordinal46 */
3264	uint32_t reserved17;  /* ordinal47 */
3265	uint32_t reserved18;  /* ordinal48 */
3266	uint32_t reserved19;  /* ordinal49 */
3267	uint32_t reserved20;  /* ordinal50 */
3268	uint32_t reserved21;  /* ordinal51 */
3269	uint32_t reserved22;  /* ordinal52 */
3270	uint32_t reserved23;  /* ordinal53 */
3271	uint32_t reserved24;  /* ordinal54 */
3272	uint32_t reserved25;  /* ordinal55 */
3273	uint32_t reserved26;  /* ordinal56 */
3274	uint32_t reserved27;  /* ordinal57 */
3275	uint32_t reserved28;  /* ordinal58 */
3276	uint32_t reserved29;  /* ordinal59 */
3277	uint32_t reserved30;  /* ordinal60 */
3278	uint32_t reserved31;  /* ordinal61 */
3279	uint32_t reserved32;  /* ordinal62 */
3280	uint32_t reserved33;  /* ordinal63 */
3281	uint32_t reserved34;  /* ordinal64 */
3282	uint32_t compute_user_data_0;  /* ordinal65 */
3283	uint32_t compute_user_data_1;  /* ordinal66 */
3284	uint32_t compute_user_data_2;  /* ordinal67 */
3285	uint32_t compute_user_data_3;  /* ordinal68 */
3286	uint32_t compute_user_data_4;  /* ordinal69 */
3287	uint32_t compute_user_data_5;  /* ordinal70 */
3288	uint32_t compute_user_data_6;  /* ordinal71 */
3289	uint32_t compute_user_data_7;  /* ordinal72 */
3290	uint32_t compute_user_data_8;  /* ordinal73 */
3291	uint32_t compute_user_data_9;  /* ordinal74 */
3292	uint32_t compute_user_data_10;  /* ordinal75 */
3293	uint32_t compute_user_data_11;  /* ordinal76 */
3294	uint32_t compute_user_data_12;  /* ordinal77 */
3295	uint32_t compute_user_data_13;  /* ordinal78 */
3296	uint32_t compute_user_data_14;  /* ordinal79 */
3297	uint32_t compute_user_data_15;  /* ordinal80 */
3298	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3299	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3300	uint32_t reserved35;  /* ordinal83 */
3301	uint32_t reserved36;  /* ordinal84 */
3302	uint32_t reserved37;  /* ordinal85 */
3303	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3304	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3305	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3306	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3307	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3308	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3309	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3310	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3311	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3312	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3313	uint32_t reserved38;  /* ordinal96 */
3314	uint32_t reserved39;  /* ordinal97 */
3315	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3316	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3317	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3318	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3319	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3320	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3321	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3322	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3323	uint32_t reserved40;  /* ordinal106 */
3324	uint32_t reserved41;  /* ordinal107 */
3325	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3326	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3327	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3328	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3329	uint32_t reserved42;  /* ordinal112 */
3330	uint32_t reserved43;  /* ordinal113 */
3331	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3332	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3333	uint32_t cp_packet_id_lo;  /* ordinal116 */
3334	uint32_t cp_packet_id_hi;  /* ordinal117 */
3335	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3336	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3337	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3338	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3339	uint32_t gds_save_mask_lo;  /* ordinal122 */
3340	uint32_t gds_save_mask_hi;  /* ordinal123 */
3341	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3342	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3343	uint32_t reserved44;  /* ordinal126 */
3344	uint32_t reserved45;  /* ordinal127 */
3345	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3346	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3347	uint32_t cp_hqd_active;  /* ordinal130 */
3348	uint32_t cp_hqd_vmid;  /* ordinal131 */
3349	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3350	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3351	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3352	uint32_t cp_hqd_quantum;  /* ordinal135 */
3353	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3354	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3355	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3356	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3357	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3358	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3359	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3360	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3361	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3362	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3363	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3364	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3365	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3366	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3367	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3368	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3369	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3370	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3371	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3372	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3373	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3374	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3375	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3376	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3377	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3378	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3379	uint32_t cp_mqd_control;  /* ordinal162 */
3380	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3381	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3382	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3383	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3384	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3385	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3386	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3387	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3388	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3389	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3390	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3391	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3392	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3393	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3394	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3395	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3396	uint32_t cp_hqd_error;  /* ordinal179 */
3397	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3398	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3399	uint32_t reserved46;  /* ordinal182 */
3400	uint32_t reserved47;  /* ordinal183 */
3401	uint32_t reserved48;  /* ordinal184 */
3402	uint32_t reserved49;  /* ordinal185 */
3403	uint32_t reserved50;  /* ordinal186 */
3404	uint32_t reserved51;  /* ordinal187 */
3405	uint32_t reserved52;  /* ordinal188 */
3406	uint32_t reserved53;  /* ordinal189 */
3407	uint32_t reserved54;  /* ordinal190 */
3408	uint32_t reserved55;  /* ordinal191 */
3409	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3410	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3411	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3412	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3413	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3414	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3415	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3416	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3417	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3418	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3419	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3420	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3421	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3422	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3423	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3424	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3425	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3426	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3427	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3428	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3429	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3430	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3431	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3432	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3433	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3434	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3435	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3436	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3437	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3438	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3439	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3440	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3441	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3442	uint32_t reserved56;  /* ordinal225 */
3443	uint32_t reserved57;  /* ordinal226 */
3444	uint32_t reserved58;  /* ordinal227 */
3445	uint32_t set_resources_header;  /* ordinal228 */
3446	uint32_t set_resources_dw1;  /* ordinal229 */
3447	uint32_t set_resources_dw2;  /* ordinal230 */
3448	uint32_t set_resources_dw3;  /* ordinal231 */
3449	uint32_t set_resources_dw4;  /* ordinal232 */
3450	uint32_t set_resources_dw5;  /* ordinal233 */
3451	uint32_t set_resources_dw6;  /* ordinal234 */
3452	uint32_t set_resources_dw7;  /* ordinal235 */
3453	uint32_t reserved59;  /* ordinal236 */
3454	uint32_t reserved60;  /* ordinal237 */
3455	uint32_t reserved61;  /* ordinal238 */
3456	uint32_t reserved62;  /* ordinal239 */
3457	uint32_t reserved63;  /* ordinal240 */
3458	uint32_t reserved64;  /* ordinal241 */
3459	uint32_t reserved65;  /* ordinal242 */
3460	uint32_t reserved66;  /* ordinal243 */
3461	uint32_t reserved67;  /* ordinal244 */
3462	uint32_t reserved68;  /* ordinal245 */
3463	uint32_t reserved69;  /* ordinal246 */
3464	uint32_t reserved70;  /* ordinal247 */
3465	uint32_t reserved71;  /* ordinal248 */
3466	uint32_t reserved72;  /* ordinal249 */
3467	uint32_t reserved73;  /* ordinal250 */
3468	uint32_t reserved74;  /* ordinal251 */
3469	uint32_t reserved75;  /* ordinal252 */
3470	uint32_t reserved76;  /* ordinal253 */
3471	uint32_t reserved77;  /* ordinal254 */
3472	uint32_t reserved78;  /* ordinal255 */
3473
3474	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3475};
3476
3477static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3478{
3479	int i, r;
3480
3481	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3482		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 
 
 
 
 
 
 
 
 
 
 
3483
3484		if (ring->mqd_obj) {
3485			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3486			if (unlikely(r != 0))
3487				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3488
3489			amdgpu_bo_unpin(ring->mqd_obj);
3490			amdgpu_bo_unreserve(ring->mqd_obj);
 
3491
3492			amdgpu_bo_unref(&ring->mqd_obj);
3493			ring->mqd_obj = NULL;
 
 
 
3494		}
3495	}
3496}
3497
3498static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3499{
3500	int r, i, j;
3501	u32 tmp;
3502	bool use_doorbell = true;
3503	u64 hqd_gpu_addr;
3504	u64 mqd_gpu_addr;
3505	u64 eop_gpu_addr;
3506	u64 wb_gpu_addr;
3507	u32 *buf;
3508	struct vi_mqd *mqd;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3509
3510	/* init the pipes */
3511	mutex_lock(&adev->srbm_mutex);
3512	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3513		int me = (i < 4) ? 1 : 2;
3514		int pipe = (i < 4) ? i : (i - 4);
3515
3516		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3517		eop_gpu_addr >>= 8;
3518
3519		vi_srbm_select(adev, me, pipe, 0, 0);
3520
3521		/* write the EOP addr */
3522		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3523		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3524
3525		/* set the VMID assigned */
3526		WREG32(mmCP_HQD_VMID, 0);
3527
3528		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3529		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3530		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3531				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3532		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3533	}
3534	vi_srbm_select(adev, 0, 0, 0, 0);
3535	mutex_unlock(&adev->srbm_mutex);
3536
3537	/* init the queues.  Just two for now. */
3538	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3539		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3540
3541		if (ring->mqd_obj == NULL) {
3542			r = amdgpu_bo_create(adev,
3543					     sizeof(struct vi_mqd),
3544					     PAGE_SIZE, true,
3545					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3546					     NULL, &ring->mqd_obj);
3547			if (r) {
3548				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3549				return r;
3550			}
3551		}
3552
3553		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3554		if (unlikely(r != 0)) {
3555			gfx_v8_0_cp_compute_fini(adev);
3556			return r;
3557		}
3558		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3559				  &mqd_gpu_addr);
3560		if (r) {
3561			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3562			gfx_v8_0_cp_compute_fini(adev);
3563			return r;
3564		}
3565		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3566		if (r) {
3567			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3568			gfx_v8_0_cp_compute_fini(adev);
3569			return r;
3570		}
3571
3572		/* init the mqd struct */
3573		memset(buf, 0, sizeof(struct vi_mqd));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3574
3575		mqd = (struct vi_mqd *)buf;
3576		mqd->header = 0xC0310800;
3577		mqd->compute_pipelinestat_enable = 0x00000001;
3578		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3579		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3580		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3581		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3582		mqd->compute_misc_reserved = 0x00000003;
3583
3584		mutex_lock(&adev->srbm_mutex);
3585		vi_srbm_select(adev, ring->me,
3586			       ring->pipe,
3587			       ring->queue, 0);
3588
3589		/* disable wptr polling */
3590		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3591		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3592		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3593
3594		mqd->cp_hqd_eop_base_addr_lo =
3595			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3596		mqd->cp_hqd_eop_base_addr_hi =
3597			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3598
3599		/* enable doorbell? */
3600		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3601		if (use_doorbell) {
3602			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3603		} else {
3604			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3605		}
3606		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3607		mqd->cp_hqd_pq_doorbell_control = tmp;
3608
3609		/* disable the queue if it's active */
3610		mqd->cp_hqd_dequeue_request = 0;
3611		mqd->cp_hqd_pq_rptr = 0;
3612		mqd->cp_hqd_pq_wptr= 0;
3613		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3614			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3615			for (j = 0; j < adev->usec_timeout; j++) {
3616				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3617					break;
3618				udelay(1);
3619			}
3620			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3621			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3622			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3623		}
3624
3625		/* set the pointer to the MQD */
3626		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3627		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3628		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3629		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3630
3631		/* set MQD vmid to 0 */
3632		tmp = RREG32(mmCP_MQD_CONTROL);
3633		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3634		WREG32(mmCP_MQD_CONTROL, tmp);
3635		mqd->cp_mqd_control = tmp;
3636
3637		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3638		hqd_gpu_addr = ring->gpu_addr >> 8;
3639		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3640		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3641		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3642		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3643
3644		/* set up the HQD, this is similar to CP_RB0_CNTL */
3645		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3646		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3647				    (order_base_2(ring->ring_size / 4) - 1));
3648		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3649			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3650#ifdef __BIG_ENDIAN
3651		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3652#endif
3653		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3654		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3655		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3656		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3657		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3658		mqd->cp_hqd_pq_control = tmp;
3659
3660		/* set the wb address wether it's enabled or not */
3661		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3662		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3663		mqd->cp_hqd_pq_rptr_report_addr_hi =
3664			upper_32_bits(wb_gpu_addr) & 0xffff;
3665		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3666		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3667		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3668		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3669
3670		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3671		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3672		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3673		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3674		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3675		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3676		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3677
3678		/* enable the doorbell if requested */
3679		if (use_doorbell) {
3680			if ((adev->asic_type == CHIP_CARRIZO) ||
3681			    (adev->asic_type == CHIP_FIJI) ||
3682			    (adev->asic_type == CHIP_STONEY)) {
3683				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3684				       AMDGPU_DOORBELL_KIQ << 2);
3685				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3686				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3687			}
3688			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3689			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3690					    DOORBELL_OFFSET, ring->doorbell_index);
3691			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3692			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3693			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3694			mqd->cp_hqd_pq_doorbell_control = tmp;
3695
3696		} else {
3697			mqd->cp_hqd_pq_doorbell_control = 0;
3698		}
3699		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3700		       mqd->cp_hqd_pq_doorbell_control);
 
 
 
 
 
 
 
 
 
 
 
 
3701
3702		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3703		ring->wptr = 0;
3704		mqd->cp_hqd_pq_wptr = ring->wptr;
3705		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3706		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3707
3708		/* set the vmid for the queue */
3709		mqd->cp_hqd_vmid = 0;
3710		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3711
3712		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3713		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3714		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3715		mqd->cp_hqd_persistent_state = tmp;
3716		if (adev->asic_type == CHIP_STONEY) {
3717			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3718			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3719			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3720		}
 
3721
3722		/* activate the queue */
3723		mqd->cp_hqd_active = 1;
3724		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 
 
3725
 
 
 
 
 
 
 
 
 
 
 
 
 
3726		vi_srbm_select(adev, 0, 0, 0, 0);
3727		mutex_unlock(&adev->srbm_mutex);
3728
3729		amdgpu_bo_kunmap(ring->mqd_obj);
3730		amdgpu_bo_unreserve(ring->mqd_obj);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3731	}
 
 
 
 
 
 
 
 
 
 
3732
3733	if (use_doorbell) {
3734		tmp = RREG32(mmCP_PQ_STATUS);
3735		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3736		WREG32(mmCP_PQ_STATUS, tmp);
 
 
 
 
3737	}
3738
 
 
 
 
 
 
 
 
 
 
 
 
3739	gfx_v8_0_cp_compute_enable(adev, true);
3740
3741	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3742		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3743
3744		ring->ready = true;
3745		r = amdgpu_ring_test_ring(ring);
 
 
 
 
 
 
 
 
3746		if (r)
3747			ring->ready = false;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3748	}
3749
3750	return 0;
3751}
3752
3753static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3754{
3755	int r;
3756
3757	if (!(adev->flags & AMD_IS_APU))
3758		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3759
3760	if (!adev->pp_enabled) {
3761		if (!adev->firmware.smu_load) {
3762			/* legacy firmware loading */
3763			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3764			if (r)
3765				return r;
3766
3767			r = gfx_v8_0_cp_compute_load_microcode(adev);
3768			if (r)
3769				return r;
3770		} else {
3771			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3772							AMDGPU_UCODE_ID_CP_CE);
3773			if (r)
3774				return -EINVAL;
3775
3776			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3777							AMDGPU_UCODE_ID_CP_PFP);
3778			if (r)
3779				return -EINVAL;
3780
3781			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3782							AMDGPU_UCODE_ID_CP_ME);
3783			if (r)
3784				return -EINVAL;
3785
3786			if (adev->asic_type == CHIP_TOPAZ) {
3787				r = gfx_v8_0_cp_compute_load_microcode(adev);
3788				if (r)
3789					return r;
3790			} else {
3791				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3792										 AMDGPU_UCODE_ID_CP_MEC1);
3793				if (r)
3794					return -EINVAL;
3795			}
3796		}
3797	}
3798
3799	r = gfx_v8_0_cp_gfx_resume(adev);
3800	if (r)
3801		return r;
3802
3803	r = gfx_v8_0_cp_compute_resume(adev);
 
 
 
 
3804	if (r)
3805		return r;
3806
3807	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3808
3809	return 0;
3810}
3811
3812static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3813{
3814	gfx_v8_0_cp_gfx_enable(adev, enable);
3815	gfx_v8_0_cp_compute_enable(adev, enable);
3816}
3817
3818static int gfx_v8_0_hw_init(void *handle)
3819{
3820	int r;
3821	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3822
3823	gfx_v8_0_init_golden_registers(adev);
 
3824
3825	gfx_v8_0_gpu_init(adev);
3826
3827	r = gfx_v8_0_rlc_resume(adev);
3828	if (r)
3829		return r;
3830
3831	r = gfx_v8_0_cp_resume(adev);
3832	if (r)
3833		return r;
3834
3835	return r;
3836}
3837
3838static int gfx_v8_0_hw_fini(void *handle)
3839{
3840	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
3841
3842	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3843	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3844	gfx_v8_0_cp_enable(adev, false);
3845	gfx_v8_0_rlc_stop(adev);
3846	gfx_v8_0_cp_compute_fini(adev);
3847
3848	return 0;
3849}
3850
3851static int gfx_v8_0_suspend(void *handle)
3852{
3853	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3854
3855	return gfx_v8_0_hw_fini(adev);
3856}
3857
3858static int gfx_v8_0_resume(void *handle)
3859{
3860	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861
3862	return gfx_v8_0_hw_init(adev);
 
 
 
 
3863}
3864
3865static bool gfx_v8_0_is_idle(void *handle)
3866{
3867	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868
3869	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3870		return false;
3871	else
3872		return true;
3873}
3874
3875static int gfx_v8_0_wait_for_idle(void *handle)
3876{
3877	unsigned i;
3878	u32 tmp;
3879	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3880
3881	for (i = 0; i < adev->usec_timeout; i++) {
3882		/* read MC_STATUS */
3883		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3884
3885		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3886			return 0;
 
3887		udelay(1);
3888	}
3889	return -ETIMEDOUT;
3890}
3891
3892static void gfx_v8_0_print_status(void *handle)
3893{
3894	int i;
3895	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3896
3897	dev_info(adev->dev, "GFX 8.x registers\n");
3898	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3899		 RREG32(mmGRBM_STATUS));
3900	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3901		 RREG32(mmGRBM_STATUS2));
3902	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3903		 RREG32(mmGRBM_STATUS_SE0));
3904	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3905		 RREG32(mmGRBM_STATUS_SE1));
3906	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3907		 RREG32(mmGRBM_STATUS_SE2));
3908	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3909		 RREG32(mmGRBM_STATUS_SE3));
3910	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3911	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3912		 RREG32(mmCP_STALLED_STAT1));
3913	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3914		 RREG32(mmCP_STALLED_STAT2));
3915	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3916		 RREG32(mmCP_STALLED_STAT3));
3917	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3918		 RREG32(mmCP_CPF_BUSY_STAT));
3919	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3920		 RREG32(mmCP_CPF_STALLED_STAT1));
3921	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3922	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3923	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3924		 RREG32(mmCP_CPC_STALLED_STAT1));
3925	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3926
3927	for (i = 0; i < 32; i++) {
3928		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3929			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3930	}
3931	for (i = 0; i < 16; i++) {
3932		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3933			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3934	}
3935	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3936		dev_info(adev->dev, "  se: %d\n", i);
3937		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3938		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3939			 RREG32(mmPA_SC_RASTER_CONFIG));
3940		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3941			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3942	}
3943	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3944
3945	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3946		 RREG32(mmGB_ADDR_CONFIG));
3947	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3948		 RREG32(mmHDP_ADDR_CONFIG));
3949	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3950		 RREG32(mmDMIF_ADDR_CALC));
3951
3952	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3953		 RREG32(mmCP_MEQ_THRESHOLDS));
3954	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3955		 RREG32(mmSX_DEBUG_1));
3956	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3957		 RREG32(mmTA_CNTL_AUX));
3958	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3959		 RREG32(mmSPI_CONFIG_CNTL));
3960	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3961		 RREG32(mmSQ_CONFIG));
3962	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3963		 RREG32(mmDB_DEBUG));
3964	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3965		 RREG32(mmDB_DEBUG2));
3966	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3967		 RREG32(mmDB_DEBUG3));
3968	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3969		 RREG32(mmCB_HW_CONTROL));
3970	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3971		 RREG32(mmSPI_CONFIG_CNTL_1));
3972	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3973		 RREG32(mmPA_SC_FIFO_SIZE));
3974	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3975		 RREG32(mmVGT_NUM_INSTANCES));
3976	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3977		 RREG32(mmCP_PERFMON_CNTL));
3978	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3979		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3980	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3981		 RREG32(mmVGT_CACHE_INVALIDATION));
3982	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3983		 RREG32(mmVGT_GS_VERTEX_REUSE));
3984	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3985		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3986	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3987		 RREG32(mmPA_CL_ENHANCE));
3988	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3989		 RREG32(mmPA_SC_ENHANCE));
3990
3991	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3992		 RREG32(mmCP_ME_CNTL));
3993	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3994		 RREG32(mmCP_MAX_CONTEXT));
3995	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3996		 RREG32(mmCP_ENDIAN_SWAP));
3997	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3998		 RREG32(mmCP_DEVICE_ID));
3999
4000	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4001		 RREG32(mmCP_SEM_WAIT_TIMER));
4002
4003	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4004		 RREG32(mmCP_RB_WPTR_DELAY));
4005	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4006		 RREG32(mmCP_RB_VMID));
4007	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4008		 RREG32(mmCP_RB0_CNTL));
4009	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4010		 RREG32(mmCP_RB0_WPTR));
4011	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4012		 RREG32(mmCP_RB0_RPTR_ADDR));
4013	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4014		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4015	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4016		 RREG32(mmCP_RB0_CNTL));
4017	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4018		 RREG32(mmCP_RB0_BASE));
4019	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4020		 RREG32(mmCP_RB0_BASE_HI));
4021	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4022		 RREG32(mmCP_MEC_CNTL));
4023	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4024		 RREG32(mmCP_CPF_DEBUG));
4025
4026	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4027		 RREG32(mmSCRATCH_ADDR));
4028	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4029		 RREG32(mmSCRATCH_UMSK));
4030
4031	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4032		 RREG32(mmCP_INT_CNTL_RING0));
4033	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4034		 RREG32(mmRLC_LB_CNTL));
4035	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4036		 RREG32(mmRLC_CNTL));
4037	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4038		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4039	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4040		 RREG32(mmRLC_LB_CNTR_INIT));
4041	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4042		 RREG32(mmRLC_LB_CNTR_MAX));
4043	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4044		 RREG32(mmRLC_LB_INIT_CU_MASK));
4045	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4046		 RREG32(mmRLC_LB_PARAMS));
4047	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4048		 RREG32(mmRLC_LB_CNTL));
4049	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4050		 RREG32(mmRLC_MC_CNTL));
4051	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4052		 RREG32(mmRLC_UCODE_CNTL));
4053
4054	mutex_lock(&adev->srbm_mutex);
4055	for (i = 0; i < 16; i++) {
4056		vi_srbm_select(adev, 0, 0, 0, i);
4057		dev_info(adev->dev, "  VM %d:\n", i);
4058		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4059			 RREG32(mmSH_MEM_CONFIG));
4060		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4061			 RREG32(mmSH_MEM_APE1_BASE));
4062		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4063			 RREG32(mmSH_MEM_APE1_LIMIT));
4064		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4065			 RREG32(mmSH_MEM_BASES));
 
 
 
 
 
4066	}
4067	vi_srbm_select(adev, 0, 0, 0, 0);
4068	mutex_unlock(&adev->srbm_mutex);
 
 
 
 
 
 
 
 
 
 
 
4069}
4070
4071static int gfx_v8_0_soft_reset(void *handle)
4072{
 
 
 
 
 
 
 
 
 
 
 
4073	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4074	u32 tmp;
4075	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4076
4077	/* GRBM_STATUS */
4078	tmp = RREG32(mmGRBM_STATUS);
4079	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4080		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4081		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4082		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4083		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4084		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
 
4085		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4086						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4087		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4088						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4089	}
4090
4091	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4092		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4093						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4094		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4095						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4096	}
4097
4098	/* GRBM_STATUS2 */
4099	tmp = RREG32(mmGRBM_STATUS2);
4100	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4101		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4102						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103
 
 
 
 
 
 
 
 
 
 
 
 
 
4104	/* SRBM_STATUS */
4105	tmp = RREG32(mmSRBM_STATUS);
4106	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4107		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4108						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
 
 
 
4109
4110	if (grbm_soft_reset || srbm_soft_reset) {
4111		gfx_v8_0_print_status((void *)adev);
4112		/* stop the rlc */
4113		gfx_v8_0_rlc_stop(adev);
 
 
 
 
 
 
 
 
 
 
 
4114
 
 
 
 
 
 
 
 
 
 
 
4115		/* Disable GFX parsing/prefetching */
4116		gfx_v8_0_cp_gfx_enable(adev, false);
4117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4118		/* Disable MEC parsing/prefetching */
4119		gfx_v8_0_cp_compute_enable(adev, false);
 
4120
4121		if (grbm_soft_reset || srbm_soft_reset) {
4122			tmp = RREG32(mmGMCON_DEBUG);
4123			tmp = REG_SET_FIELD(tmp,
4124					    GMCON_DEBUG, GFX_STALL, 1);
4125			tmp = REG_SET_FIELD(tmp,
4126					    GMCON_DEBUG, GFX_CLEAR, 1);
4127			WREG32(mmGMCON_DEBUG, tmp);
4128
4129			udelay(50);
4130		}
 
 
 
4131
4132		if (grbm_soft_reset) {
4133			tmp = RREG32(mmGRBM_SOFT_RESET);
4134			tmp |= grbm_soft_reset;
4135			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4136			WREG32(mmGRBM_SOFT_RESET, tmp);
4137			tmp = RREG32(mmGRBM_SOFT_RESET);
4138
4139			udelay(50);
 
4140
4141			tmp &= ~grbm_soft_reset;
4142			WREG32(mmGRBM_SOFT_RESET, tmp);
4143			tmp = RREG32(mmGRBM_SOFT_RESET);
4144		}
 
 
 
4145
4146		if (srbm_soft_reset) {
4147			tmp = RREG32(mmSRBM_SOFT_RESET);
4148			tmp |= srbm_soft_reset;
4149			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4150			WREG32(mmSRBM_SOFT_RESET, tmp);
4151			tmp = RREG32(mmSRBM_SOFT_RESET);
4152
4153			udelay(50);
4154
4155			tmp &= ~srbm_soft_reset;
4156			WREG32(mmSRBM_SOFT_RESET, tmp);
4157			tmp = RREG32(mmSRBM_SOFT_RESET);
4158		}
4159
4160		if (grbm_soft_reset || srbm_soft_reset) {
4161			tmp = RREG32(mmGMCON_DEBUG);
4162			tmp = REG_SET_FIELD(tmp,
4163					    GMCON_DEBUG, GFX_STALL, 0);
4164			tmp = REG_SET_FIELD(tmp,
4165					    GMCON_DEBUG, GFX_CLEAR, 0);
4166			WREG32(mmGMCON_DEBUG, tmp);
4167		}
4168
4169		/* Wait a little for things to settle down */
4170		udelay(50);
4171		gfx_v8_0_print_status((void *)adev);
 
 
 
4172	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4173	return 0;
4174}
4175
4176/**
4177 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4178 *
4179 * @adev: amdgpu_device pointer
4180 *
4181 * Fetches a GPU clock counter snapshot.
4182 * Returns the 64 bit clock counter snapshot.
4183 */
4184uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4185{
4186	uint64_t clock;
4187
4188	mutex_lock(&adev->gfx.gpu_clock_mutex);
4189	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4190	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4191		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4192	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4193	return clock;
4194}
4195
4196static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4197					  uint32_t vmid,
4198					  uint32_t gds_base, uint32_t gds_size,
4199					  uint32_t gws_base, uint32_t gws_size,
4200					  uint32_t oa_base, uint32_t oa_size)
4201{
4202	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4203	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4204
4205	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4206	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4207
4208	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4209	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4210
4211	/* GDS Base */
4212	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4213	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4214				WRITE_DATA_DST_SEL(0)));
4215	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4216	amdgpu_ring_write(ring, 0);
4217	amdgpu_ring_write(ring, gds_base);
4218
4219	/* GDS Size */
4220	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4221	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4222				WRITE_DATA_DST_SEL(0)));
4223	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4224	amdgpu_ring_write(ring, 0);
4225	amdgpu_ring_write(ring, gds_size);
4226
4227	/* GWS */
4228	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4229	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4230				WRITE_DATA_DST_SEL(0)));
4231	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4232	amdgpu_ring_write(ring, 0);
4233	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4234
4235	/* OA */
4236	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4237	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4238				WRITE_DATA_DST_SEL(0)));
4239	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4240	amdgpu_ring_write(ring, 0);
4241	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4242}
4243
4244static int gfx_v8_0_early_init(void *handle)
4245{
4246	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4247
 
 
 
 
 
4248	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4249	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
 
 
4250	gfx_v8_0_set_ring_funcs(adev);
4251	gfx_v8_0_set_irq_funcs(adev);
4252	gfx_v8_0_set_gds_init(adev);
 
4253
4254	return 0;
4255}
4256
4257static int gfx_v8_0_late_init(void *handle)
4258{
4259	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4260	int r;
4261
4262	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4263	if (r)
4264		return r;
4265
4266	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4267	if (r)
4268		return r;
4269
4270	/* requires IBs so do in late init after IB pool is initialized */
4271	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4272	if (r)
4273		return r;
4274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4275	return 0;
4276}
4277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4278static int gfx_v8_0_set_powergating_state(void *handle,
4279					  enum amd_powergating_state state)
4280{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4281	return 0;
4282}
4283
4284static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4285		uint32_t reg_addr, uint32_t cmd)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4286{
4287	uint32_t data;
4288
4289	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4290
4291	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4292	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4293
4294	data = RREG32(mmRLC_SERDES_WR_CTRL);
4295	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4296			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4297			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4298			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4299			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4300			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4301			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4302			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4303			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4304			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4305			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
 
 
 
 
 
 
 
 
 
 
 
4306	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4307			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4308			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4309			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4310
4311	WREG32(mmRLC_SERDES_WR_CTRL, data);
4312}
4313
4314static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4315		bool enable)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4316{
4317	uint32_t temp, data;
4318
 
 
4319	/* It is disabled by HW by default */
4320	if (enable) {
4321		/* 1 - RLC memory Light sleep */
4322		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4323		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4324		if (temp != data)
4325			WREG32(mmRLC_MEM_SLP_CNTL, data);
4326
4327		/* 2 - CP memory Light sleep */
4328		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4329		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4330		if (temp != data)
4331			WREG32(mmCP_MEM_SLP_CNTL, data);
4332
4333		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4334		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4335		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4336				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4337				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4338				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
 
 
 
 
 
4339
4340		if (temp != data)
4341			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4342
4343		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4344		gfx_v8_0_wait_for_rlc_serdes(adev);
4345
4346		/* 5 - clear mgcg override */
4347		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4348
4349		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4350		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4351		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4352		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4353		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4354		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4355		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4356		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4357		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4358		if (temp != data)
4359			WREG32(mmCGTS_SM_CTRL_REG, data);
 
 
 
 
4360		udelay(50);
4361
4362		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4363		gfx_v8_0_wait_for_rlc_serdes(adev);
4364	} else {
4365		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4366		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4367		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4368				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4369				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4370				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4371		if (temp != data)
4372			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4373
4374		/* 2 - disable MGLS in RLC */
4375		data = RREG32(mmRLC_MEM_SLP_CNTL);
4376		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4377			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4378			WREG32(mmRLC_MEM_SLP_CNTL, data);
4379		}
4380
4381		/* 3 - disable MGLS in CP */
4382		data = RREG32(mmCP_MEM_SLP_CNTL);
4383		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4384			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4385			WREG32(mmCP_MEM_SLP_CNTL, data);
4386		}
4387
4388		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4389		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4390		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4391				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4392		if (temp != data)
4393			WREG32(mmCGTS_SM_CTRL_REG, data);
4394
4395		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4396		gfx_v8_0_wait_for_rlc_serdes(adev);
4397
4398		/* 6 - set mgcg override */
4399		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4400
4401		udelay(50);
4402
4403		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4404		gfx_v8_0_wait_for_rlc_serdes(adev);
4405	}
 
 
4406}
4407
4408static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4409		bool enable)
4410{
4411	uint32_t temp, temp1, data, data1;
4412
4413	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4414
4415	if (enable) {
4416		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4417		 * Cmp_busy/GFX_Idle interrupts
4418		 */
4419		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4420
 
4421		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4422		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4423		if (temp1 != data1)
4424			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4425
4426		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4427		gfx_v8_0_wait_for_rlc_serdes(adev);
4428
4429		/* 3 - clear cgcg override */
4430		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4431
4432		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4433		gfx_v8_0_wait_for_rlc_serdes(adev);
4434
4435		/* 4 - write cmd to set CGLS */
4436		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4437
4438		/* 5 - enable cgcg */
4439		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4440
4441		/* enable cgls*/
4442		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
 
4443
4444		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4446
4447		if (temp1 != data1)
4448			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
 
 
 
4449
4450		if (temp != data)
4451			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 
 
 
 
 
4452	} else {
4453		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4454		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4455
4456		/* TEST CGCG */
4457		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4458		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4459				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4460		if (temp1 != data1)
4461			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4462
4463		/* read gfx register to wake up cgcg */
4464		RREG32(mmCB_CGTT_SCLK_CTRL);
4465		RREG32(mmCB_CGTT_SCLK_CTRL);
4466		RREG32(mmCB_CGTT_SCLK_CTRL);
4467		RREG32(mmCB_CGTT_SCLK_CTRL);
4468
4469		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4470		gfx_v8_0_wait_for_rlc_serdes(adev);
4471
4472		/* write cmd to Set CGCG Overrride */
4473		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4474
4475		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4476		gfx_v8_0_wait_for_rlc_serdes(adev);
4477
4478		/* write cmd to Clear CGLS */
4479		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4480
4481		/* disable cgcg, cgls should be disabled too. */
4482		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4483				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4484		if (temp != data)
4485			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 
 
4486	}
 
 
 
 
4487}
4488static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4489		bool enable)
4490{
4491	if (enable) {
4492		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4493		 * ===  MGCG + MGLS + TS(CG/LS) ===
4494		 */
4495		fiji_update_medium_grain_clock_gating(adev, enable);
4496		fiji_update_coarse_grain_clock_gating(adev, enable);
4497	} else {
4498		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4499		 * ===  CGCG + CGLS ===
4500		 */
4501		fiji_update_coarse_grain_clock_gating(adev, enable);
4502		fiji_update_medium_grain_clock_gating(adev, enable);
4503	}
4504	return 0;
4505}
4506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4507static int gfx_v8_0_set_clockgating_state(void *handle,
4508					  enum amd_clockgating_state state)
4509{
4510	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4511
 
 
 
4512	switch (adev->asic_type) {
4513	case CHIP_FIJI:
4514		fiji_update_gfx_clock_gating(adev,
4515				state == AMD_CG_STATE_GATE ? true : false);
 
 
 
 
 
 
 
 
 
 
 
4516		break;
4517	default:
4518		break;
4519	}
4520	return 0;
4521}
4522
4523static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4524{
4525	u32 rptr;
4526
4527	rptr = ring->adev->wb.wb[ring->rptr_offs];
4528
4529	return rptr;
4530}
4531
4532static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4533{
4534	struct amdgpu_device *adev = ring->adev;
4535	u32 wptr;
4536
4537	if (ring->use_doorbell)
4538		/* XXX check if swapping is necessary on BE */
4539		wptr = ring->adev->wb.wb[ring->wptr_offs];
4540	else
4541		wptr = RREG32(mmCP_RB0_WPTR);
4542
4543	return wptr;
4544}
4545
4546static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4547{
4548	struct amdgpu_device *adev = ring->adev;
4549
4550	if (ring->use_doorbell) {
4551		/* XXX check if swapping is necessary on BE */
4552		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4553		WDOORBELL32(ring->doorbell_index, ring->wptr);
4554	} else {
4555		WREG32(mmCP_RB0_WPTR, ring->wptr);
4556		(void)RREG32(mmCP_RB0_WPTR);
4557	}
4558}
4559
4560static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4561{
4562	u32 ref_and_mask, reg_mem_engine;
4563
4564	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
 
4565		switch (ring->me) {
4566		case 1:
4567			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4568			break;
4569		case 2:
4570			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4571			break;
4572		default:
4573			return;
4574		}
4575		reg_mem_engine = 0;
4576	} else {
4577		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4578		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4579	}
4580
4581	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4582	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4583				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4584				 reg_mem_engine));
4585	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4586	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4587	amdgpu_ring_write(ring, ref_and_mask);
4588	amdgpu_ring_write(ring, ref_and_mask);
4589	amdgpu_ring_write(ring, 0x20); /* poll interval */
4590}
4591
4592static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4593{
4594	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4595	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4596				 WRITE_DATA_DST_SEL(0) |
4597				 WR_CONFIRM));
4598	amdgpu_ring_write(ring, mmHDP_DEBUG0);
4599	amdgpu_ring_write(ring, 0);
4600	amdgpu_ring_write(ring, 1);
4601
4602}
4603
4604static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4605				  struct amdgpu_ib *ib)
 
 
4606{
4607	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4608	u32 header, control = 0;
4609	u32 next_rptr = ring->wptr + 5;
4610
4611	/* drop the CE preamble IB for the same context */
4612	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4613		return;
4614
4615	if (need_ctx_switch)
4616		next_rptr += 2;
4617
4618	next_rptr += 4;
4619	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4620	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4621	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4622	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4623	amdgpu_ring_write(ring, next_rptr);
4624
4625	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4626	if (need_ctx_switch) {
4627		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4628		amdgpu_ring_write(ring, 0);
4629	}
4630
4631	if (ib->flags & AMDGPU_IB_FLAG_CE)
4632		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4633	else
4634		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4635
4636	control |= ib->length_dw | (ib->vm_id << 24);
 
 
 
 
 
 
 
4637
4638	amdgpu_ring_write(ring, header);
4639	amdgpu_ring_write(ring,
4640#ifdef __BIG_ENDIAN
4641			  (2 << 0) |
4642#endif
4643			  (ib->gpu_addr & 0xFFFFFFFC));
4644	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4645	amdgpu_ring_write(ring, control);
4646}
4647
4648static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4649				  struct amdgpu_ib *ib)
4650{
4651	u32 header, control = 0;
4652	u32 next_rptr = ring->wptr + 5;
4653
4654	control |= INDIRECT_BUFFER_VALID;
4655
4656	next_rptr += 4;
4657	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4658	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4659	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4660	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4661	amdgpu_ring_write(ring, next_rptr);
4662
4663	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4664
4665	control |= ib->length_dw | (ib->vm_id << 24);
 
 
 
 
 
4666
4667	amdgpu_ring_write(ring, header);
4668	amdgpu_ring_write(ring,
4669#ifdef __BIG_ENDIAN
4670					  (2 << 0) |
4671#endif
4672					  (ib->gpu_addr & 0xFFFFFFFC));
4673	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4674	amdgpu_ring_write(ring, control);
4675}
4676
4677static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4678					 u64 seq, unsigned flags)
4679{
4680	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4681	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
4682
4683	/* EVENT_WRITE_EOP - flush caches, send int */
 
 
4684	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4685	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4686				 EOP_TC_ACTION_EN |
 
4687				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4688				 EVENT_INDEX(5)));
4689	amdgpu_ring_write(ring, addr & 0xfffffffc);
4690	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4691			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4692	amdgpu_ring_write(ring, lower_32_bits(seq));
4693	amdgpu_ring_write(ring, upper_32_bits(seq));
4694
4695}
4696
4697static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4698{
4699	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4700	uint32_t seq = ring->fence_drv.sync_seq;
4701	uint64_t addr = ring->fence_drv.gpu_addr;
4702
4703	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4704	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4705				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4706				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4707	amdgpu_ring_write(ring, addr & 0xfffffffc);
4708	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4709	amdgpu_ring_write(ring, seq);
4710	amdgpu_ring_write(ring, 0xffffffff);
4711	amdgpu_ring_write(ring, 4); /* poll interval */
4712
4713	if (usepfp) {
4714		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4715		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4716		amdgpu_ring_write(ring, 0);
4717		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4718		amdgpu_ring_write(ring, 0);
4719	}
4720}
4721
4722static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4723					unsigned vm_id, uint64_t pd_addr)
4724{
4725	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4726
4727	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4728	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4729				 WRITE_DATA_DST_SEL(0)) |
4730				 WR_CONFIRM);
4731	if (vm_id < 8) {
4732		amdgpu_ring_write(ring,
4733				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4734	} else {
4735		amdgpu_ring_write(ring,
4736				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4737	}
4738	amdgpu_ring_write(ring, 0);
4739	amdgpu_ring_write(ring, pd_addr >> 12);
4740
4741	/* bits 0-15 are the VM contexts0-15 */
4742	/* invalidate the cache */
4743	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4744	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4745				 WRITE_DATA_DST_SEL(0)));
4746	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4747	amdgpu_ring_write(ring, 0);
4748	amdgpu_ring_write(ring, 1 << vm_id);
4749
4750	/* wait for the invalidate to complete */
4751	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4752	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4753				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4754				 WAIT_REG_MEM_ENGINE(0))); /* me */
4755	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4756	amdgpu_ring_write(ring, 0);
4757	amdgpu_ring_write(ring, 0); /* ref */
4758	amdgpu_ring_write(ring, 0); /* mask */
4759	amdgpu_ring_write(ring, 0x20); /* poll interval */
4760
4761	/* compute doesn't have PFP */
4762	if (usepfp) {
4763		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4764		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4765		amdgpu_ring_write(ring, 0x0);
4766		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4767		amdgpu_ring_write(ring, 0);
4768		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4769		amdgpu_ring_write(ring, 0);
4770	}
4771}
4772
4773static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4774{
4775	return ring->adev->wb.wb[ring->rptr_offs];
4776}
4777
4778static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4779{
4780	return ring->adev->wb.wb[ring->wptr_offs];
4781}
4782
4783static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4784{
4785	struct amdgpu_device *adev = ring->adev;
4786
4787	/* XXX check if swapping is necessary on BE */
4788	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4789	WDOORBELL32(ring->doorbell_index, ring->wptr);
4790}
4791
4792static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4793					     u64 addr, u64 seq,
4794					     unsigned flags)
4795{
4796	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4797	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4798
4799	/* RELEASE_MEM - flush caches, send int */
4800	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4801	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4802				 EOP_TC_ACTION_EN |
4803				 EOP_TC_WB_ACTION_EN |
4804				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4805				 EVENT_INDEX(5)));
4806	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4807	amdgpu_ring_write(ring, addr & 0xfffffffc);
4808	amdgpu_ring_write(ring, upper_32_bits(addr));
4809	amdgpu_ring_write(ring, lower_32_bits(seq));
4810	amdgpu_ring_write(ring, upper_32_bits(seq));
4811}
4812
4813static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4814						 enum amdgpu_interrupt_state state)
4815{
4816	u32 cp_int_cntl;
 
4817
4818	switch (state) {
4819	case AMDGPU_IRQ_STATE_DISABLE:
4820		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4821		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4822					    TIME_STAMP_INT_ENABLE, 0);
4823		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4824		break;
4825	case AMDGPU_IRQ_STATE_ENABLE:
4826		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4827		cp_int_cntl =
4828			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4829				      TIME_STAMP_INT_ENABLE, 1);
4830		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4831		break;
4832	default:
 
4833		break;
4834	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4835}
4836
4837static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4838						     int me, int pipe,
4839						     enum amdgpu_interrupt_state state)
4840{
4841	u32 mec_int_cntl, mec_int_cntl_reg;
4842
4843	/*
4844	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4845	 * handles the setting of interrupts for this specific pipe. All other
4846	 * pipes' interrupts are set by amdkfd.
4847	 */
4848
4849	if (me == 1) {
4850		switch (pipe) {
4851		case 0:
4852			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4853			break;
 
 
 
 
 
 
 
 
 
4854		default:
4855			DRM_DEBUG("invalid pipe %d\n", pipe);
4856			return;
4857		}
4858	} else {
4859		DRM_DEBUG("invalid me %d\n", me);
4860		return;
4861	}
4862
4863	switch (state) {
4864	case AMDGPU_IRQ_STATE_DISABLE:
4865		mec_int_cntl = RREG32(mec_int_cntl_reg);
4866		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4867					     TIME_STAMP_INT_ENABLE, 0);
4868		WREG32(mec_int_cntl_reg, mec_int_cntl);
4869		break;
4870	case AMDGPU_IRQ_STATE_ENABLE:
4871		mec_int_cntl = RREG32(mec_int_cntl_reg);
4872		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4873					     TIME_STAMP_INT_ENABLE, 1);
4874		WREG32(mec_int_cntl_reg, mec_int_cntl);
4875		break;
4876	default:
4877		break;
4878	}
4879}
4880
4881static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4882					     struct amdgpu_irq_src *source,
4883					     unsigned type,
4884					     enum amdgpu_interrupt_state state)
4885{
4886	u32 cp_int_cntl;
4887
4888	switch (state) {
4889	case AMDGPU_IRQ_STATE_DISABLE:
4890		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4891		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4892					    PRIV_REG_INT_ENABLE, 0);
4893		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4894		break;
4895	case AMDGPU_IRQ_STATE_ENABLE:
4896		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4897		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4898					    PRIV_REG_INT_ENABLE, 1);
4899		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4900		break;
4901	default:
4902		break;
4903	}
4904
4905	return 0;
4906}
4907
4908static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4909					      struct amdgpu_irq_src *source,
4910					      unsigned type,
4911					      enum amdgpu_interrupt_state state)
4912{
4913	u32 cp_int_cntl;
4914
4915	switch (state) {
4916	case AMDGPU_IRQ_STATE_DISABLE:
4917		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4918		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4919					    PRIV_INSTR_INT_ENABLE, 0);
4920		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4921		break;
4922	case AMDGPU_IRQ_STATE_ENABLE:
4923		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4924		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4925					    PRIV_INSTR_INT_ENABLE, 1);
4926		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4927		break;
4928	default:
4929		break;
4930	}
4931
4932	return 0;
4933}
4934
4935static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4936					    struct amdgpu_irq_src *src,
4937					    unsigned type,
4938					    enum amdgpu_interrupt_state state)
4939{
4940	switch (type) {
4941	case AMDGPU_CP_IRQ_GFX_EOP:
4942		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4943		break;
4944	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4945		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4946		break;
4947	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4948		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4949		break;
4950	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4951		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4952		break;
4953	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4954		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4955		break;
4956	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4957		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4958		break;
4959	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4960		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4961		break;
4962	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4963		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4964		break;
4965	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4966		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4967		break;
4968	default:
4969		break;
4970	}
4971	return 0;
4972}
4973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4974static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4975			    struct amdgpu_irq_src *source,
4976			    struct amdgpu_iv_entry *entry)
4977{
4978	int i;
4979	u8 me_id, pipe_id, queue_id;
4980	struct amdgpu_ring *ring;
4981
4982	DRM_DEBUG("IH: CP EOP\n");
4983	me_id = (entry->ring_id & 0x0c) >> 2;
4984	pipe_id = (entry->ring_id & 0x03) >> 0;
4985	queue_id = (entry->ring_id & 0x70) >> 4;
4986
4987	switch (me_id) {
4988	case 0:
4989		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4990		break;
4991	case 1:
4992	case 2:
4993		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4994			ring = &adev->gfx.compute_ring[i];
4995			/* Per-queue interrupt is supported for MEC starting from VI.
4996			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4997			  */
4998			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4999				amdgpu_fence_process(ring);
5000		}
5001		break;
5002	}
5003	return 0;
5004}
5005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5006static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5007				 struct amdgpu_irq_src *source,
5008				 struct amdgpu_iv_entry *entry)
5009{
5010	DRM_ERROR("Illegal register access in command stream\n");
5011	schedule_work(&adev->reset_work);
5012	return 0;
5013}
5014
5015static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5016				  struct amdgpu_irq_src *source,
5017				  struct amdgpu_iv_entry *entry)
5018{
5019	DRM_ERROR("Illegal instruction in command stream\n");
5020	schedule_work(&adev->reset_work);
5021	return 0;
5022}
5023
5024const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5025	.early_init = gfx_v8_0_early_init,
5026	.late_init = gfx_v8_0_late_init,
5027	.sw_init = gfx_v8_0_sw_init,
5028	.sw_fini = gfx_v8_0_sw_fini,
5029	.hw_init = gfx_v8_0_hw_init,
5030	.hw_fini = gfx_v8_0_hw_fini,
5031	.suspend = gfx_v8_0_suspend,
5032	.resume = gfx_v8_0_resume,
5033	.is_idle = gfx_v8_0_is_idle,
5034	.wait_for_idle = gfx_v8_0_wait_for_idle,
 
 
5035	.soft_reset = gfx_v8_0_soft_reset,
5036	.print_status = gfx_v8_0_print_status,
5037	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5038	.set_powergating_state = gfx_v8_0_set_powergating_state,
 
5039};
5040
5041static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5042	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
 
 
 
 
5043	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5044	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5045	.parse_cs = NULL,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5046	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5047	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5048	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5049	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5050	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5051	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5052	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5053	.test_ring = gfx_v8_0_ring_test_ring,
5054	.test_ib = gfx_v8_0_ring_test_ib,
5055	.insert_nop = amdgpu_ring_insert_nop,
5056	.pad_ib = amdgpu_ring_generic_pad_ib,
 
 
 
 
 
 
 
5057};
5058
5059static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5060	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
 
 
 
 
5061	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5062	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5063	.parse_cs = NULL,
 
 
 
 
 
 
 
 
 
 
5064	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5065	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5066	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5067	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5068	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5069	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5070	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5071	.test_ring = gfx_v8_0_ring_test_ring,
5072	.test_ib = gfx_v8_0_ring_test_ib,
5073	.insert_nop = amdgpu_ring_insert_nop,
5074	.pad_ib = amdgpu_ring_generic_pad_ib,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5075};
5076
5077static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5078{
5079	int i;
5080
 
 
5081	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5082		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5083
5084	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5085		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5086}
5087
5088static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5089	.set = gfx_v8_0_set_eop_interrupt_state,
5090	.process = gfx_v8_0_eop_irq,
5091};
5092
5093static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5094	.set = gfx_v8_0_set_priv_reg_fault_state,
5095	.process = gfx_v8_0_priv_reg_irq,
5096};
5097
5098static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5099	.set = gfx_v8_0_set_priv_inst_fault_state,
5100	.process = gfx_v8_0_priv_inst_irq,
5101};
5102
 
 
 
 
 
 
 
 
 
 
5103static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5104{
5105	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5106	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5107
5108	adev->gfx.priv_reg_irq.num_types = 1;
5109	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5110
5111	adev->gfx.priv_inst_irq.num_types = 1;
5112	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
 
 
 
 
 
 
 
 
 
 
 
5113}
5114
5115static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5116{
5117	/* init asci gds info */
5118	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5119	adev->gds.gws.total_size = 64;
5120	adev->gds.oa.total_size = 16;
5121
5122	if (adev->gds.mem.total_size == 64 * 1024) {
5123		adev->gds.mem.gfx_partition_size = 4096;
5124		adev->gds.mem.cs_partition_size = 4096;
5125
5126		adev->gds.gws.gfx_partition_size = 4;
5127		adev->gds.gws.cs_partition_size = 4;
 
 
5128
5129		adev->gds.oa.gfx_partition_size = 4;
5130		adev->gds.oa.cs_partition_size = 1;
5131	} else {
5132		adev->gds.mem.gfx_partition_size = 1024;
5133		adev->gds.mem.cs_partition_size = 1024;
5134
5135		adev->gds.gws.gfx_partition_size = 16;
5136		adev->gds.gws.cs_partition_size = 16;
5137
5138		adev->gds.oa.gfx_partition_size = 4;
5139		adev->gds.oa.cs_partition_size = 4;
5140	}
5141}
5142
5143static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5144{
5145	u32 data, mask;
5146
5147	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5148	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5149
5150	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5151	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5152
5153	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5154
5155	return (~data) & mask;
5156}
5157
5158int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5159			 struct amdgpu_cu_info *cu_info)
5160{
5161	int i, j, k, counter, active_cu_number = 0;
5162	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5163
5164	if (!adev || !cu_info)
5165		return -EINVAL;
5166
5167	memset(cu_info, 0, sizeof(*cu_info));
5168
 
 
 
 
 
 
 
5169	mutex_lock(&adev->grbm_idx_mutex);
5170	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5171		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5172			mask = 1;
5173			ao_bitmap = 0;
5174			counter = 0;
5175			gfx_v8_0_select_se_sh(adev, i, j);
 
 
 
5176			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5177			cu_info->bitmap[i][j] = bitmap;
5178
5179			for (k = 0; k < 16; k ++) {
5180				if (bitmap & mask) {
5181					if (counter < 2)
5182						ao_bitmap |= mask;
5183					counter ++;
5184				}
5185				mask <<= 1;
5186			}
5187			active_cu_number += counter;
5188			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 
 
5189		}
5190	}
5191	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5192	mutex_unlock(&adev->grbm_idx_mutex);
5193
5194	cu_info->number = active_cu_number;
5195	cu_info->ao_cu_mask = ao_cu_mask;
 
 
 
 
 
 
5196
5197	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5198}
v6.13.7
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "amdgpu_ring.h"
  33#include "vi.h"
  34#include "vi_structs.h"
  35#include "vid.h"
  36#include "amdgpu_ucode.h"
  37#include "amdgpu_atombios.h"
  38#include "atombios_i2c.h"
  39#include "clearstate_vi.h"
  40
  41#include "gmc/gmc_8_2_d.h"
  42#include "gmc/gmc_8_2_sh_mask.h"
  43
  44#include "oss/oss_3_0_d.h"
  45#include "oss/oss_3_0_sh_mask.h"
  46
  47#include "bif/bif_5_0_d.h"
  48#include "bif/bif_5_0_sh_mask.h"
 
  49#include "gca/gfx_8_0_d.h"
  50#include "gca/gfx_8_0_enum.h"
  51#include "gca/gfx_8_0_sh_mask.h"
 
  52
  53#include "dce/dce_10_0_d.h"
  54#include "dce/dce_10_0_sh_mask.h"
  55
  56#include "smu/smu_7_1_3_d.h"
  57
  58#include "ivsrcid/ivsrcid_vislands30.h"
  59
  60#define GFX8_NUM_GFX_RINGS     1
  61#define GFX8_MEC_HPD_SIZE 4096
  62
  63#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
  64#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
  65#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
  66#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
  67
  68#define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
  69#define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
  70#define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
  71#define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
  72#define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
  73#define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
  74#define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
  75#define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
  76#define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
  77
  78#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
  79#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
  80#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
  81#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
  82#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
  83#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
  84
  85/* BPM SERDES CMD */
  86#define SET_BPM_SERDES_CMD    1
  87#define CLE_BPM_SERDES_CMD    0
  88
  89/* BPM Register Address*/
  90enum {
  91	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
  92	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
  93	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
  94	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
  95	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
  96	BPM_REG_FGCG_MAX
  97};
  98
  99#define RLC_FormatDirectRegListLength        14
 100
 101MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
 102MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
 103MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
 104MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
 105MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
 106MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
 107
 108MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
 109MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
 110MODULE_FIRMWARE("amdgpu/stoney_me.bin");
 111MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
 112MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
 113
 114MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
 115MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
 116MODULE_FIRMWARE("amdgpu/tonga_me.bin");
 117MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
 118MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
 119MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
 122MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/topaz_me.bin");
 124MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
 125MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
 126
 127MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
 128MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
 129MODULE_FIRMWARE("amdgpu/fiji_me.bin");
 130MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
 131MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 132MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 133
 134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 135MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 136MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
 137MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
 138MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
 139MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
 140MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
 141MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
 142MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
 143MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 144MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 145
 146MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
 147MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
 148MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
 149MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
 150MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
 151MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
 152MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
 153MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
 154MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
 155MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
 156MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
 157
 158MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 159MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 160MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
 161MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
 162MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
 163MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
 164MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
 165MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
 166MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
 167MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 168MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 169
 170MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
 171MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
 172MODULE_FIRMWARE("amdgpu/vegam_me.bin");
 173MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
 174MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
 175MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
 176
 177static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 178{
 179	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
 180	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
 181	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
 182	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
 183	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
 184	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
 185	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
 186	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
 187	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
 188	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
 189	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
 190	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
 191	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
 192	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
 193	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
 194	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
 195};
 196
 197static const u32 golden_settings_tonga_a11[] =
 198{
 199	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
 200	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 201	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 202	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 203	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 204	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
 205	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 206	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 207	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 208	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 209	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 210	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 211	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
 212	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
 213	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
 214	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 215};
 216
 217static const u32 tonga_golden_common_all[] =
 218{
 219	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 220	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 221	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 222	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 223	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 224	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 225	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 226	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 227};
 228
 229static const u32 tonga_mgcg_cgcg_init[] =
 230{
 231	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 232	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 233	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 234	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 235	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 236	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 237	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 238	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 239	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 240	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 241	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 242	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 243	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 244	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 245	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 246	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 247	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 248	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 249	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 250	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 251	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 252	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 253	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 254	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 255	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 256	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 257	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 258	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 259	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 260	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 261	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 262	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 263	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 264	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 265	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 266	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 267	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 268	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 269	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 270	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 271	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 272	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 273	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 274	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 275	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 276	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 277	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 278	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 279	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 280	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 281	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 282	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 283	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 284	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 285	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 286	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 287	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 288	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 289	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 290	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 291	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 292	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 293	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 294	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 295	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 296	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 297	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 298	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 299	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 300	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 301	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 302	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 303	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 304	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 305	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 306};
 307
 308static const u32 golden_settings_vegam_a11[] =
 309{
 310	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 311	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
 312	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 313	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 314	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 315	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 316	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
 317	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
 318	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 319	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 320	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 321	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 322	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 323	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 324	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 325	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
 326	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 327};
 328
 329static const u32 vegam_golden_common_all[] =
 330{
 331	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 332	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 333	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 334	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 335	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 336	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 337};
 338
 339static const u32 golden_settings_polaris11_a11[] =
 340{
 341	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
 342	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 343	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 344	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 345	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 346	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 347	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 348	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 349	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 350	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 351	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 352	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 353	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 354	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 355	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 356	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
 357	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 358};
 359
 360static const u32 polaris11_golden_common_all[] =
 361{
 362	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 363	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
 364	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 365	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 366	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 367	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 368};
 369
 370static const u32 golden_settings_polaris10_a11[] =
 371{
 372	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
 373	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
 374	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 375	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 376	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 377	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 378	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 379	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
 380	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
 381	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 382	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
 383	mmSQ_CONFIG, 0x07f80000, 0x07180000,
 384	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 385	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 386	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 387	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 388	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 389};
 390
 391static const u32 polaris10_golden_common_all[] =
 392{
 393	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 394	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
 395	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
 396	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 397	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 398	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 399	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 400	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 401};
 402
 403static const u32 fiji_golden_common_all[] =
 404{
 405	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 406	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 407	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
 408	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 409	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 410	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 411	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 412	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 413	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 414	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 415};
 416
 417static const u32 golden_settings_fiji_a10[] =
 418{
 419	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 420	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 421	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 422	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 423	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 424	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 425	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 426	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 427	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 428	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
 429	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 430};
 431
 432static const u32 fiji_mgcg_cgcg_init[] =
 433{
 434	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 435	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 436	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 437	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 438	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 439	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 440	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
 441	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 442	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 443	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 444	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 445	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 446	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 447	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 448	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 449	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 450	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 451	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 452	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 453	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 454	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 455	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 456	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 457	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 458	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 459	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 460	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 461	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 462	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 463	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 464	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 465	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 466	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 467	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 468	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 469};
 470
 471static const u32 golden_settings_iceland_a11[] =
 472{
 473	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 474	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 475	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
 476	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 477	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 478	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 479	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 480	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 481	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 482	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 483	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 484	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 485	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 486	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
 487	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 488	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
 489};
 490
 491static const u32 iceland_golden_common_all[] =
 492{
 493	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 494	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 495	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 496	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 497	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 498	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 499	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 500	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 501};
 502
 503static const u32 iceland_mgcg_cgcg_init[] =
 504{
 505	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 506	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 507	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 508	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 509	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
 510	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
 511	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
 512	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 513	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 514	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 515	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 516	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 517	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 518	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 519	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 520	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 521	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 522	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 523	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 524	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 525	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 526	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 527	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
 528	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 529	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 530	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 531	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 532	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 533	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 534	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 535	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 536	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 537	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 538	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 539	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 540	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 541	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 542	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 543	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 544	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 545	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 546	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 547	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 548	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 549	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 550	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 551	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 552	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 553	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 554	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 555	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 556	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 557	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 558	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
 559	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 560	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 561	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 562	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 563	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 564	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 565	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 566	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 567	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 568	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
 569};
 570
 571static const u32 cz_golden_settings_a11[] =
 572{
 573	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
 574	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 575	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 576	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 577	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 578	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 579	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 580	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
 581	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 582	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 583	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 584	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
 585};
 586
 587static const u32 cz_golden_common_all[] =
 588{
 589	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 590	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
 591	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 592	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
 593	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 594	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 595	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 596	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
 597};
 598
 599static const u32 cz_mgcg_cgcg_init[] =
 600{
 601	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 602	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 603	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 604	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 605	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 606	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 607	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
 608	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 609	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 610	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
 611	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
 612	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
 613	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
 614	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
 615	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
 616	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
 617	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
 618	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
 619	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
 620	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
 621	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
 622	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
 623	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
 624	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
 625	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
 626	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
 627	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
 628	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 629	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 630	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
 631	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 632	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 633	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 634	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 635	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 636	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 637	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 638	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 639	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
 640	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 641	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 642	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 643	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 644	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
 645	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 646	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 647	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 648	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 649	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
 650	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 651	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 652	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 653	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 654	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
 655	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 656	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 657	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 658	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 659	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
 660	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 661	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 662	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 663	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 664	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
 665	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 666	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 667	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
 668	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
 669	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
 670	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
 671	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
 672	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 673	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 674	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 675	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 676};
 677
 678static const u32 stoney_golden_settings_a11[] =
 679{
 680	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 681	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 682	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
 683	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 684	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 685	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 686	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 687	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 688	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
 689	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
 690};
 691
 692static const u32 stoney_golden_common_all[] =
 693{
 694	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 695	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
 696	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
 697	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
 698	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 699	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 700	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
 701	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
 702};
 703
 704static const u32 stoney_mgcg_cgcg_init[] =
 705{
 706	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 707	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
 708	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 709	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
 710	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
 711};
 712
 713
 714static const char * const sq_edc_source_names[] = {
 715	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
 716	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
 717	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
 718	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
 719	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
 720	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
 721	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
 722};
 723
 724static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
 725static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
 726static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
 727static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
 728static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
 729static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
 730static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
 731static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 732
 733#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
 734#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
 735
 736static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 737{
 738	uint32_t data;
 739
 740	switch (adev->asic_type) {
 741	case CHIP_TOPAZ:
 742		amdgpu_device_program_register_sequence(adev,
 743							iceland_mgcg_cgcg_init,
 744							ARRAY_SIZE(iceland_mgcg_cgcg_init));
 745		amdgpu_device_program_register_sequence(adev,
 746							golden_settings_iceland_a11,
 747							ARRAY_SIZE(golden_settings_iceland_a11));
 748		amdgpu_device_program_register_sequence(adev,
 749							iceland_golden_common_all,
 750							ARRAY_SIZE(iceland_golden_common_all));
 751		break;
 752	case CHIP_FIJI:
 753		amdgpu_device_program_register_sequence(adev,
 754							fiji_mgcg_cgcg_init,
 755							ARRAY_SIZE(fiji_mgcg_cgcg_init));
 756		amdgpu_device_program_register_sequence(adev,
 757							golden_settings_fiji_a10,
 758							ARRAY_SIZE(golden_settings_fiji_a10));
 759		amdgpu_device_program_register_sequence(adev,
 760							fiji_golden_common_all,
 761							ARRAY_SIZE(fiji_golden_common_all));
 762		break;
 763
 764	case CHIP_TONGA:
 765		amdgpu_device_program_register_sequence(adev,
 766							tonga_mgcg_cgcg_init,
 767							ARRAY_SIZE(tonga_mgcg_cgcg_init));
 768		amdgpu_device_program_register_sequence(adev,
 769							golden_settings_tonga_a11,
 770							ARRAY_SIZE(golden_settings_tonga_a11));
 771		amdgpu_device_program_register_sequence(adev,
 772							tonga_golden_common_all,
 773							ARRAY_SIZE(tonga_golden_common_all));
 774		break;
 775	case CHIP_VEGAM:
 776		amdgpu_device_program_register_sequence(adev,
 777							golden_settings_vegam_a11,
 778							ARRAY_SIZE(golden_settings_vegam_a11));
 779		amdgpu_device_program_register_sequence(adev,
 780							vegam_golden_common_all,
 781							ARRAY_SIZE(vegam_golden_common_all));
 782		break;
 783	case CHIP_POLARIS11:
 784	case CHIP_POLARIS12:
 785		amdgpu_device_program_register_sequence(adev,
 786							golden_settings_polaris11_a11,
 787							ARRAY_SIZE(golden_settings_polaris11_a11));
 788		amdgpu_device_program_register_sequence(adev,
 789							polaris11_golden_common_all,
 790							ARRAY_SIZE(polaris11_golden_common_all));
 791		break;
 792	case CHIP_POLARIS10:
 793		amdgpu_device_program_register_sequence(adev,
 794							golden_settings_polaris10_a11,
 795							ARRAY_SIZE(golden_settings_polaris10_a11));
 796		amdgpu_device_program_register_sequence(adev,
 797							polaris10_golden_common_all,
 798							ARRAY_SIZE(polaris10_golden_common_all));
 799		data = RREG32_SMC(ixCG_ACLK_CNTL);
 800		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
 801		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
 802		WREG32_SMC(ixCG_ACLK_CNTL, data);
 803		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
 804		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
 805		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
 806		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
 807			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
 808			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
 809		}
 810		break;
 811	case CHIP_CARRIZO:
 812		amdgpu_device_program_register_sequence(adev,
 813							cz_mgcg_cgcg_init,
 814							ARRAY_SIZE(cz_mgcg_cgcg_init));
 815		amdgpu_device_program_register_sequence(adev,
 816							cz_golden_settings_a11,
 817							ARRAY_SIZE(cz_golden_settings_a11));
 818		amdgpu_device_program_register_sequence(adev,
 819							cz_golden_common_all,
 820							ARRAY_SIZE(cz_golden_common_all));
 821		break;
 822	case CHIP_STONEY:
 823		amdgpu_device_program_register_sequence(adev,
 824							stoney_mgcg_cgcg_init,
 825							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 826		amdgpu_device_program_register_sequence(adev,
 827							stoney_golden_settings_a11,
 828							ARRAY_SIZE(stoney_golden_settings_a11));
 829		amdgpu_device_program_register_sequence(adev,
 830							stoney_golden_common_all,
 831							ARRAY_SIZE(stoney_golden_common_all));
 832		break;
 833	default:
 834		break;
 835	}
 836}
 837
 
 
 
 
 
 
 
 
 
 
 
 
 838static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 839{
 840	struct amdgpu_device *adev = ring->adev;
 
 841	uint32_t tmp = 0;
 842	unsigned i;
 843	int r;
 844
 845	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
 
 
 
 
 
 846	r = amdgpu_ring_alloc(ring, 3);
 847	if (r)
 
 
 
 848		return r;
 849
 850	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 851	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
 852	amdgpu_ring_write(ring, 0xDEADBEEF);
 853	amdgpu_ring_commit(ring);
 854
 855	for (i = 0; i < adev->usec_timeout; i++) {
 856		tmp = RREG32(mmSCRATCH_REG0);
 857		if (tmp == 0xDEADBEEF)
 858			break;
 859		udelay(1);
 
 
 
 
 
 
 
 
 860	}
 861
 862	if (i >= adev->usec_timeout)
 863		r = -ETIMEDOUT;
 864
 865	return r;
 866}
 867
 868static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 869{
 870	struct amdgpu_device *adev = ring->adev;
 871	struct amdgpu_ib ib;
 872	struct dma_fence *f = NULL;
 
 
 
 
 873
 874	unsigned int index;
 875	uint64_t gpu_addr;
 876	uint32_t tmp;
 877	long r;
 878
 879	r = amdgpu_device_wb_get(adev, &index);
 880	if (r)
 881		return r;
 882
 883	gpu_addr = adev->wb.gpu_addr + (index * 4);
 884	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 885	memset(&ib, 0, sizeof(ib));
 886
 887	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
 888	if (r)
 889		goto err1;
 890
 891	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 892	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 893	ib.ptr[2] = lower_32_bits(gpu_addr);
 894	ib.ptr[3] = upper_32_bits(gpu_addr);
 895	ib.ptr[4] = 0xDEADBEEF;
 896	ib.length_dw = 5;
 897
 898	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 899	if (r)
 900		goto err2;
 901
 902	r = dma_fence_wait_timeout(f, false, timeout);
 903	if (r == 0) {
 904		r = -ETIMEDOUT;
 905		goto err2;
 906	} else if (r < 0) {
 
 
 
 
 
 
 
 
 
 907		goto err2;
 
 
 
 
 908	}
 909
 910	tmp = adev->wb.wb[index];
 911	if (tmp == 0xDEADBEEF)
 912		r = 0;
 913	else
 914		r = -EINVAL;
 915
 916err2:
 
 917	amdgpu_ib_free(adev, &ib, NULL);
 918	dma_fence_put(f);
 919err1:
 920	amdgpu_device_wb_free(adev, index);
 921	return r;
 922}
 923
 924
 925static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
 926{
 927	amdgpu_ucode_release(&adev->gfx.pfp_fw);
 928	amdgpu_ucode_release(&adev->gfx.me_fw);
 929	amdgpu_ucode_release(&adev->gfx.ce_fw);
 930	amdgpu_ucode_release(&adev->gfx.rlc_fw);
 931	amdgpu_ucode_release(&adev->gfx.mec_fw);
 932	if ((adev->asic_type != CHIP_STONEY) &&
 933	    (adev->asic_type != CHIP_TOPAZ))
 934		amdgpu_ucode_release(&adev->gfx.mec2_fw);
 935
 936	kfree(adev->gfx.rlc.register_list_format);
 937}
 938
 939static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 940{
 941	const char *chip_name;
 
 942	int err;
 943	struct amdgpu_firmware_info *info = NULL;
 944	const struct common_firmware_header *header = NULL;
 945	const struct gfx_firmware_header_v1_0 *cp_hdr;
 946	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 947	unsigned int *tmp = NULL, i;
 948
 949	DRM_DEBUG("\n");
 950
 951	switch (adev->asic_type) {
 952	case CHIP_TOPAZ:
 953		chip_name = "topaz";
 954		break;
 955	case CHIP_TONGA:
 956		chip_name = "tonga";
 957		break;
 958	case CHIP_CARRIZO:
 959		chip_name = "carrizo";
 960		break;
 961	case CHIP_FIJI:
 962		chip_name = "fiji";
 963		break;
 964	case CHIP_STONEY:
 965		chip_name = "stoney";
 966		break;
 967	case CHIP_POLARIS10:
 968		chip_name = "polaris10";
 969		break;
 970	case CHIP_POLARIS11:
 971		chip_name = "polaris11";
 972		break;
 973	case CHIP_POLARIS12:
 974		chip_name = "polaris12";
 975		break;
 976	case CHIP_VEGAM:
 977		chip_name = "vegam";
 978		break;
 979	default:
 980		BUG();
 981	}
 982
 983	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
 984		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
 985					   "amdgpu/%s_pfp_2.bin", chip_name);
 986		if (err == -ENODEV) {
 987			err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
 988						   "amdgpu/%s_pfp.bin", chip_name);
 989		}
 990	} else {
 991		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
 992					   "amdgpu/%s_pfp.bin", chip_name);
 993	}
 994	if (err)
 995		goto out;
 996	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 997	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 998	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 999
1000	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1001		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1002					   "amdgpu/%s_me_2.bin", chip_name);
1003		if (err == -ENODEV) {
1004			err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1005						   "amdgpu/%s_me.bin", chip_name);
1006		}
1007	} else {
1008		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1009					   "amdgpu/%s_me.bin", chip_name);
1010	}
1011	if (err)
1012		goto out;
1013	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1014	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015
1016	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1017
1018	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1019		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1020					   "amdgpu/%s_ce_2.bin", chip_name);
1021		if (err == -ENODEV) {
1022			err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1023						   "amdgpu/%s_ce.bin", chip_name);
1024		}
1025	} else {
1026		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1027					   "amdgpu/%s_ce.bin", chip_name);
1028	}
1029	if (err)
1030		goto out;
1031	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1032	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1033	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035	/*
1036	 * Support for MCBP/Virtualization in combination with chained IBs is
1037	 * formal released on feature version #46
1038	 */
1039	if (adev->gfx.ce_feature_version >= 46 &&
1040	    adev->gfx.pfp_feature_version >= 46) {
1041		adev->virt.chained_ib_support = true;
1042		DRM_INFO("Chained IB support enabled!\n");
1043	} else
1044		adev->virt.chained_ib_support = false;
1045
1046	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1047				   "amdgpu/%s_rlc.bin", chip_name);
1048	if (err)
1049		goto out;
1050	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1051	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1052	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1053
1054	adev->gfx.rlc.save_and_restore_offset =
1055			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1056	adev->gfx.rlc.clear_state_descriptor_offset =
1057			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1058	adev->gfx.rlc.avail_scratch_ram_locations =
1059			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1060	adev->gfx.rlc.reg_restore_list_size =
1061			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1062	adev->gfx.rlc.reg_list_format_start =
1063			le32_to_cpu(rlc_hdr->reg_list_format_start);
1064	adev->gfx.rlc.reg_list_format_separate_start =
1065			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1066	adev->gfx.rlc.starting_offsets_start =
1067			le32_to_cpu(rlc_hdr->starting_offsets_start);
1068	adev->gfx.rlc.reg_list_format_size_bytes =
1069			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1070	adev->gfx.rlc.reg_list_size_bytes =
1071			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1072
1073	adev->gfx.rlc.register_list_format =
1074			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1075					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1076
1077	if (!adev->gfx.rlc.register_list_format) {
1078		err = -ENOMEM;
 
1079		goto out;
1080	}
1081
1082	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1083			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1084	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1085		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1086
1087	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1088
1089	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1090			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1091	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1092		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1093
1094	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1095		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1096					   "amdgpu/%s_mec_2.bin", chip_name);
1097		if (err == -ENODEV) {
1098			err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1099						   "amdgpu/%s_mec.bin", chip_name);
1100		}
1101	} else {
1102		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1103					   "amdgpu/%s_mec.bin", chip_name);
1104	}
1105	if (err)
1106		goto out;
1107	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1108	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1109	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1110
1111	if ((adev->asic_type != CHIP_STONEY) &&
1112	    (adev->asic_type != CHIP_TOPAZ)) {
1113		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1114			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1115						   "amdgpu/%s_mec2_2.bin", chip_name);
1116			if (err == -ENODEV) {
1117				err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1118							   "amdgpu/%s_mec2.bin", chip_name);
1119			}
1120		} else {
1121			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1122						   "amdgpu/%s_mec2.bin", chip_name);
1123		}
1124		if (!err) {
 
 
 
1125			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1126				adev->gfx.mec2_fw->data;
1127			adev->gfx.mec2_fw_version =
1128				le32_to_cpu(cp_hdr->header.ucode_version);
1129			adev->gfx.mec2_feature_version =
1130				le32_to_cpu(cp_hdr->ucode_feature_version);
1131		} else {
1132			err = 0;
1133			adev->gfx.mec2_fw = NULL;
1134		}
1135	}
1136
1137	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1138	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1139	info->fw = adev->gfx.pfp_fw;
1140	header = (const struct common_firmware_header *)info->fw->data;
1141	adev->firmware.fw_size +=
1142		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1143
1144	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1145	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1146	info->fw = adev->gfx.me_fw;
1147	header = (const struct common_firmware_header *)info->fw->data;
1148	adev->firmware.fw_size +=
1149		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1150
1151	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1152	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1153	info->fw = adev->gfx.ce_fw;
1154	header = (const struct common_firmware_header *)info->fw->data;
1155	adev->firmware.fw_size +=
1156		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1157
1158	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1159	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1160	info->fw = adev->gfx.rlc_fw;
1161	header = (const struct common_firmware_header *)info->fw->data;
1162	adev->firmware.fw_size +=
1163		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1164
1165	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1166	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1167	info->fw = adev->gfx.mec_fw;
1168	header = (const struct common_firmware_header *)info->fw->data;
1169	adev->firmware.fw_size +=
1170		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1171
1172	/* we need account JT in */
1173	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1174	adev->firmware.fw_size +=
1175		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
 
 
 
 
 
 
 
 
 
1176
1177	if (amdgpu_sriov_vf(adev)) {
1178		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1179		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1180		info->fw = adev->gfx.mec_fw;
1181		adev->firmware.fw_size +=
1182			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1183	}
1184
1185	if (adev->gfx.mec2_fw) {
1186		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1187		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1188		info->fw = adev->gfx.mec2_fw;
1189		header = (const struct common_firmware_header *)info->fw->data;
1190		adev->firmware.fw_size +=
1191			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
1192	}
1193
1194out:
1195	if (err) {
1196		dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
1197		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1198		amdgpu_ucode_release(&adev->gfx.me_fw);
1199		amdgpu_ucode_release(&adev->gfx.ce_fw);
1200		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1201		amdgpu_ucode_release(&adev->gfx.mec_fw);
1202		amdgpu_ucode_release(&adev->gfx.mec2_fw);
 
 
 
 
 
 
 
 
1203	}
1204	return err;
1205}
1206
1207static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1208				    volatile u32 *buffer)
1209{
1210	u32 count = 0, i;
1211	const struct cs_section_def *sect = NULL;
1212	const struct cs_extent_def *ext = NULL;
1213
1214	if (adev->gfx.rlc.cs_data == NULL)
1215		return;
1216	if (buffer == NULL)
1217		return;
1218
1219	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1220	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1221
1222	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1223	buffer[count++] = cpu_to_le32(0x80000000);
1224	buffer[count++] = cpu_to_le32(0x80000000);
1225
1226	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1227		for (ext = sect->section; ext->extent != NULL; ++ext) {
1228			if (sect->id == SECT_CONTEXT) {
1229				buffer[count++] =
1230					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1231				buffer[count++] = cpu_to_le32(ext->reg_index -
1232						PACKET3_SET_CONTEXT_REG_START);
1233				for (i = 0; i < ext->reg_count; i++)
1234					buffer[count++] = cpu_to_le32(ext->extent[i]);
1235			} else {
1236				return;
1237			}
1238		}
1239	}
1240
1241	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1242	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1243			PACKET3_SET_CONTEXT_REG_START);
1244	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1245	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1246
1247	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1248	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1249
1250	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1251	buffer[count++] = cpu_to_le32(0);
1252}
1253
1254static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1255{
1256	if (adev->asic_type == CHIP_CARRIZO)
1257		return 5;
1258	else
1259		return 4;
1260}
1261
1262static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1263{
1264	const struct cs_section_def *cs_data;
1265	int r;
1266
1267	adev->gfx.rlc.cs_data = vi_cs_data;
 
 
 
 
 
1268
1269	cs_data = adev->gfx.rlc.cs_data;
1270
1271	if (cs_data) {
1272		/* init clear state block */
1273		r = amdgpu_gfx_rlc_init_csb(adev);
1274		if (r)
1275			return r;
1276	}
1277
1278	if ((adev->asic_type == CHIP_CARRIZO) ||
1279	    (adev->asic_type == CHIP_STONEY)) {
1280		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1281		r = amdgpu_gfx_rlc_init_cpt(adev);
1282		if (r)
1283			return r;
1284	}
1285
1286	/* init spm vmid with 0xf */
1287	if (adev->gfx.rlc.funcs->update_spm_vmid)
1288		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1289
1290	return 0;
1291}
1292
1293static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1294{
1295	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1296}
1297
1298static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1299{
1300	int r;
1301	u32 *hpd;
1302	size_t mec_hpd_size;
1303
1304	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1305
1306	/* take ownership of the relevant compute queues */
1307	amdgpu_gfx_compute_queue_acquire(adev);
1308
1309	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1310	if (mec_hpd_size) {
1311		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1312					      AMDGPU_GEM_DOMAIN_VRAM |
1313					      AMDGPU_GEM_DOMAIN_GTT,
1314					      &adev->gfx.mec.hpd_eop_obj,
1315					      &adev->gfx.mec.hpd_eop_gpu_addr,
1316					      (void **)&hpd);
 
1317		if (r) {
1318			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1319			return r;
1320		}
 
1321
1322		memset(hpd, 0, mec_hpd_size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1323
1324		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1325		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1326	}
 
1327
1328	return 0;
1329}
1330
1331static const u32 vgpr_init_compute_shader[] =
1332{
1333	0x7e000209, 0x7e020208,
1334	0x7e040207, 0x7e060206,
1335	0x7e080205, 0x7e0a0204,
1336	0x7e0c0203, 0x7e0e0202,
1337	0x7e100201, 0x7e120200,
1338	0x7e140209, 0x7e160208,
1339	0x7e180207, 0x7e1a0206,
1340	0x7e1c0205, 0x7e1e0204,
1341	0x7e200203, 0x7e220202,
1342	0x7e240201, 0x7e260200,
1343	0x7e280209, 0x7e2a0208,
1344	0x7e2c0207, 0x7e2e0206,
1345	0x7e300205, 0x7e320204,
1346	0x7e340203, 0x7e360202,
1347	0x7e380201, 0x7e3a0200,
1348	0x7e3c0209, 0x7e3e0208,
1349	0x7e400207, 0x7e420206,
1350	0x7e440205, 0x7e460204,
1351	0x7e480203, 0x7e4a0202,
1352	0x7e4c0201, 0x7e4e0200,
1353	0x7e500209, 0x7e520208,
1354	0x7e540207, 0x7e560206,
1355	0x7e580205, 0x7e5a0204,
1356	0x7e5c0203, 0x7e5e0202,
1357	0x7e600201, 0x7e620200,
1358	0x7e640209, 0x7e660208,
1359	0x7e680207, 0x7e6a0206,
1360	0x7e6c0205, 0x7e6e0204,
1361	0x7e700203, 0x7e720202,
1362	0x7e740201, 0x7e760200,
1363	0x7e780209, 0x7e7a0208,
1364	0x7e7c0207, 0x7e7e0206,
1365	0xbf8a0000, 0xbf810000,
1366};
1367
1368static const u32 sgpr_init_compute_shader[] =
1369{
1370	0xbe8a0100, 0xbe8c0102,
1371	0xbe8e0104, 0xbe900106,
1372	0xbe920108, 0xbe940100,
1373	0xbe960102, 0xbe980104,
1374	0xbe9a0106, 0xbe9c0108,
1375	0xbe9e0100, 0xbea00102,
1376	0xbea20104, 0xbea40106,
1377	0xbea60108, 0xbea80100,
1378	0xbeaa0102, 0xbeac0104,
1379	0xbeae0106, 0xbeb00108,
1380	0xbeb20100, 0xbeb40102,
1381	0xbeb60104, 0xbeb80106,
1382	0xbeba0108, 0xbebc0100,
1383	0xbebe0102, 0xbec00104,
1384	0xbec20106, 0xbec40108,
1385	0xbec60100, 0xbec80102,
1386	0xbee60004, 0xbee70005,
1387	0xbeea0006, 0xbeeb0007,
1388	0xbee80008, 0xbee90009,
1389	0xbefc0000, 0xbf8a0000,
1390	0xbf810000, 0x00000000,
1391};
1392
1393static const u32 vgpr_init_regs[] =
1394{
1395	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1396	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1397	mmCOMPUTE_NUM_THREAD_X, 256*4,
1398	mmCOMPUTE_NUM_THREAD_Y, 1,
1399	mmCOMPUTE_NUM_THREAD_Z, 1,
1400	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1401	mmCOMPUTE_PGM_RSRC2, 20,
1402	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1403	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1404	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1405	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1406	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1407	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1408	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1409	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1410	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1411	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1412};
1413
1414static const u32 sgpr1_init_regs[] =
1415{
1416	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1417	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1418	mmCOMPUTE_NUM_THREAD_X, 256*5,
1419	mmCOMPUTE_NUM_THREAD_Y, 1,
1420	mmCOMPUTE_NUM_THREAD_Z, 1,
1421	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1422	mmCOMPUTE_PGM_RSRC2, 20,
1423	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1424	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1425	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1426	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1427	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1428	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1429	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1430	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1431	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1432	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1433};
1434
1435static const u32 sgpr2_init_regs[] =
1436{
1437	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1438	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1439	mmCOMPUTE_NUM_THREAD_X, 256*5,
1440	mmCOMPUTE_NUM_THREAD_Y, 1,
1441	mmCOMPUTE_NUM_THREAD_Z, 1,
1442	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1443	mmCOMPUTE_PGM_RSRC2, 20,
1444	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1445	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1446	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1447	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1448	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1449	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1450	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1451	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1452	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1453	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1454};
1455
1456static const u32 sec_ded_counter_registers[] =
1457{
1458	mmCPC_EDC_ATC_CNT,
1459	mmCPC_EDC_SCRATCH_CNT,
1460	mmCPC_EDC_UCODE_CNT,
1461	mmCPF_EDC_ATC_CNT,
1462	mmCPF_EDC_ROQ_CNT,
1463	mmCPF_EDC_TAG_CNT,
1464	mmCPG_EDC_ATC_CNT,
1465	mmCPG_EDC_DMA_CNT,
1466	mmCPG_EDC_TAG_CNT,
1467	mmDC_EDC_CSINVOC_CNT,
1468	mmDC_EDC_RESTORE_CNT,
1469	mmDC_EDC_STATE_CNT,
1470	mmGDS_EDC_CNT,
1471	mmGDS_EDC_GRBM_CNT,
1472	mmGDS_EDC_OA_DED,
1473	mmSPI_EDC_CNT,
1474	mmSQC_ATC_EDC_GATCL1_CNT,
1475	mmSQC_EDC_CNT,
1476	mmSQ_EDC_DED_CNT,
1477	mmSQ_EDC_INFO,
1478	mmSQ_EDC_SEC_CNT,
1479	mmTCC_EDC_CNT,
1480	mmTCP_ATC_EDC_GATCL1_CNT,
1481	mmTCP_EDC_CNT,
1482	mmTD_EDC_CNT
1483};
1484
1485static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1486{
1487	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1488	struct amdgpu_ib ib;
1489	struct dma_fence *f = NULL;
1490	int r, i;
1491	u32 tmp;
1492	unsigned total_size, vgpr_offset, sgpr_offset;
1493	u64 gpu_addr;
1494
1495	/* only supported on CZ */
1496	if (adev->asic_type != CHIP_CARRIZO)
1497		return 0;
1498
1499	/* bail if the compute ring is not ready */
1500	if (!ring->sched.ready)
1501		return 0;
1502
1503	tmp = RREG32(mmGB_EDC_MODE);
1504	WREG32(mmGB_EDC_MODE, 0);
1505
1506	total_size =
1507		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1508	total_size +=
1509		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1510	total_size +=
1511		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1512	total_size = ALIGN(total_size, 256);
1513	vgpr_offset = total_size;
1514	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1515	sgpr_offset = total_size;
1516	total_size += sizeof(sgpr_init_compute_shader);
1517
1518	/* allocate an indirect buffer to put the commands in */
1519	memset(&ib, 0, sizeof(ib));
1520	r = amdgpu_ib_get(adev, NULL, total_size,
1521					AMDGPU_IB_POOL_DIRECT, &ib);
1522	if (r) {
1523		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1524		return r;
1525	}
1526
1527	/* load the compute shaders */
1528	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1529		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1530
1531	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1532		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1533
1534	/* init the ib length to 0 */
1535	ib.length_dw = 0;
1536
1537	/* VGPR */
1538	/* write the register state for the compute dispatch */
1539	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1540		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1541		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1542		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1543	}
1544	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1545	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1546	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1547	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1548	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1549	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1550
1551	/* write dispatch packet */
1552	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1553	ib.ptr[ib.length_dw++] = 8; /* x */
1554	ib.ptr[ib.length_dw++] = 1; /* y */
1555	ib.ptr[ib.length_dw++] = 1; /* z */
1556	ib.ptr[ib.length_dw++] =
1557		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1558
1559	/* write CS partial flush packet */
1560	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1561	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1562
1563	/* SGPR1 */
1564	/* write the register state for the compute dispatch */
1565	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1566		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1567		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1568		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1569	}
1570	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1571	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1572	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1573	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1574	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1575	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1576
1577	/* write dispatch packet */
1578	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1579	ib.ptr[ib.length_dw++] = 8; /* x */
1580	ib.ptr[ib.length_dw++] = 1; /* y */
1581	ib.ptr[ib.length_dw++] = 1; /* z */
1582	ib.ptr[ib.length_dw++] =
1583		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1584
1585	/* write CS partial flush packet */
1586	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1587	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1588
1589	/* SGPR2 */
1590	/* write the register state for the compute dispatch */
1591	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1592		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1593		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1594		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1595	}
1596	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1597	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1598	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1599	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1600	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1601	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1602
1603	/* write dispatch packet */
1604	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1605	ib.ptr[ib.length_dw++] = 8; /* x */
1606	ib.ptr[ib.length_dw++] = 1; /* y */
1607	ib.ptr[ib.length_dw++] = 1; /* z */
1608	ib.ptr[ib.length_dw++] =
1609		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1610
1611	/* write CS partial flush packet */
1612	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1613	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1614
1615	/* shedule the ib on the ring */
1616	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1617	if (r) {
1618		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1619		goto fail;
1620	}
1621
1622	/* wait for the GPU to finish processing the IB */
1623	r = dma_fence_wait(f, false);
1624	if (r) {
1625		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1626		goto fail;
1627	}
1628
1629	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1630	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1631	WREG32(mmGB_EDC_MODE, tmp);
1632
1633	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1634	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1635	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1636
1637
1638	/* read back registers to clear the counters */
1639	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1640		RREG32(sec_ded_counter_registers[i]);
1641
1642fail:
 
1643	amdgpu_ib_free(adev, &ib, NULL);
1644	dma_fence_put(f);
1645
1646	return r;
1647}
1648
1649static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1650{
1651	u32 gb_addr_config;
1652	u32 mc_arb_ramcfg;
1653	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1654	u32 tmp;
1655	int ret;
1656
1657	switch (adev->asic_type) {
1658	case CHIP_TOPAZ:
1659		adev->gfx.config.max_shader_engines = 1;
1660		adev->gfx.config.max_tile_pipes = 2;
1661		adev->gfx.config.max_cu_per_sh = 6;
1662		adev->gfx.config.max_sh_per_se = 1;
1663		adev->gfx.config.max_backends_per_se = 2;
1664		adev->gfx.config.max_texture_channel_caches = 2;
1665		adev->gfx.config.max_gprs = 256;
1666		adev->gfx.config.max_gs_threads = 32;
1667		adev->gfx.config.max_hw_contexts = 8;
1668
1669		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1670		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1671		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1672		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1673		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1674		break;
1675	case CHIP_FIJI:
1676		adev->gfx.config.max_shader_engines = 4;
1677		adev->gfx.config.max_tile_pipes = 16;
1678		adev->gfx.config.max_cu_per_sh = 16;
1679		adev->gfx.config.max_sh_per_se = 1;
1680		adev->gfx.config.max_backends_per_se = 4;
1681		adev->gfx.config.max_texture_channel_caches = 16;
1682		adev->gfx.config.max_gprs = 256;
1683		adev->gfx.config.max_gs_threads = 32;
1684		adev->gfx.config.max_hw_contexts = 8;
1685
1686		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1687		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1688		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1689		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1690		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1691		break;
1692	case CHIP_POLARIS11:
1693	case CHIP_POLARIS12:
1694		ret = amdgpu_atombios_get_gfx_info(adev);
1695		if (ret)
1696			return ret;
1697		adev->gfx.config.max_gprs = 256;
1698		adev->gfx.config.max_gs_threads = 32;
1699		adev->gfx.config.max_hw_contexts = 8;
1700
1701		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1702		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1703		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1704		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1705		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1706		break;
1707	case CHIP_POLARIS10:
1708	case CHIP_VEGAM:
1709		ret = amdgpu_atombios_get_gfx_info(adev);
1710		if (ret)
1711			return ret;
1712		adev->gfx.config.max_gprs = 256;
1713		adev->gfx.config.max_gs_threads = 32;
1714		adev->gfx.config.max_hw_contexts = 8;
1715
1716		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1717		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1718		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1719		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1720		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1721		break;
1722	case CHIP_TONGA:
1723		adev->gfx.config.max_shader_engines = 4;
1724		adev->gfx.config.max_tile_pipes = 8;
1725		adev->gfx.config.max_cu_per_sh = 8;
1726		adev->gfx.config.max_sh_per_se = 1;
1727		adev->gfx.config.max_backends_per_se = 2;
1728		adev->gfx.config.max_texture_channel_caches = 8;
1729		adev->gfx.config.max_gprs = 256;
1730		adev->gfx.config.max_gs_threads = 32;
1731		adev->gfx.config.max_hw_contexts = 8;
1732
1733		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1734		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1735		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1736		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1737		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1738		break;
1739	case CHIP_CARRIZO:
1740		adev->gfx.config.max_shader_engines = 1;
1741		adev->gfx.config.max_tile_pipes = 2;
1742		adev->gfx.config.max_sh_per_se = 1;
1743		adev->gfx.config.max_backends_per_se = 2;
1744		adev->gfx.config.max_cu_per_sh = 8;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1745		adev->gfx.config.max_texture_channel_caches = 2;
1746		adev->gfx.config.max_gprs = 256;
1747		adev->gfx.config.max_gs_threads = 32;
1748		adev->gfx.config.max_hw_contexts = 8;
1749
1750		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1751		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1752		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1753		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1754		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1755		break;
1756	case CHIP_STONEY:
1757		adev->gfx.config.max_shader_engines = 1;
1758		adev->gfx.config.max_tile_pipes = 2;
1759		adev->gfx.config.max_sh_per_se = 1;
1760		adev->gfx.config.max_backends_per_se = 1;
1761		adev->gfx.config.max_cu_per_sh = 3;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1762		adev->gfx.config.max_texture_channel_caches = 2;
1763		adev->gfx.config.max_gprs = 256;
1764		adev->gfx.config.max_gs_threads = 16;
1765		adev->gfx.config.max_hw_contexts = 8;
1766
1767		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1768		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1769		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1770		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1771		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1772		break;
1773	default:
1774		adev->gfx.config.max_shader_engines = 2;
1775		adev->gfx.config.max_tile_pipes = 4;
1776		adev->gfx.config.max_cu_per_sh = 2;
1777		adev->gfx.config.max_sh_per_se = 1;
1778		adev->gfx.config.max_backends_per_se = 2;
1779		adev->gfx.config.max_texture_channel_caches = 4;
1780		adev->gfx.config.max_gprs = 256;
1781		adev->gfx.config.max_gs_threads = 32;
1782		adev->gfx.config.max_hw_contexts = 8;
1783
1784		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1785		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1786		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1787		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1788		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1789		break;
1790	}
1791
 
1792	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1793	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1794
1795	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1796				MC_ARB_RAMCFG, NOOFBANK);
1797	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1798				MC_ARB_RAMCFG, NOOFRANKS);
1799
1800	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1801	adev->gfx.config.mem_max_burst_length_bytes = 256;
1802	if (adev->flags & AMD_IS_APU) {
1803		/* Get memory bank mapping mode. */
1804		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1805		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1806		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1807
1808		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1809		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1810		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1811
1812		/* Validate settings in case only one DIMM installed. */
1813		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1814			dimm00_addr_map = 0;
1815		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1816			dimm01_addr_map = 0;
1817		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1818			dimm10_addr_map = 0;
1819		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1820			dimm11_addr_map = 0;
1821
1822		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1823		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1824		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1825			adev->gfx.config.mem_row_size_in_kb = 2;
1826		else
1827			adev->gfx.config.mem_row_size_in_kb = 1;
1828	} else {
1829		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1830		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1831		if (adev->gfx.config.mem_row_size_in_kb > 4)
1832			adev->gfx.config.mem_row_size_in_kb = 4;
1833	}
1834
1835	adev->gfx.config.shader_engine_tile_size = 32;
1836	adev->gfx.config.num_gpus = 1;
1837	adev->gfx.config.multi_gpu_tile_size = 64;
1838
1839	/* fix up row size */
1840	switch (adev->gfx.config.mem_row_size_in_kb) {
1841	case 1:
1842	default:
1843		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1844		break;
1845	case 2:
1846		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1847		break;
1848	case 4:
1849		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1850		break;
1851	}
1852	adev->gfx.config.gb_addr_config = gb_addr_config;
1853
1854	return 0;
1855}
1856
1857static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1858					int mec, int pipe, int queue)
1859{
1860	int r;
1861	unsigned irq_type;
1862	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1863	unsigned int hw_prio;
1864
1865	ring = &adev->gfx.compute_ring[ring_id];
1866
1867	/* mec0 is me1 */
1868	ring->me = mec + 1;
1869	ring->pipe = pipe;
1870	ring->queue = queue;
1871
1872	ring->ring_obj = NULL;
1873	ring->use_doorbell = true;
1874	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1875	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1876				+ (ring_id * GFX8_MEC_HPD_SIZE);
1877	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1878
1879	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1880		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1881		+ ring->pipe;
1882
1883	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1884			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1885	/* type-2 packets are deprecated on MEC, use type-3 instead */
1886	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1887			     hw_prio, NULL);
1888	if (r)
1889		return r;
1890
1891
1892	return 0;
1893}
1894
1895static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1896
1897static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
1898{
1899	int i, j, k, r, ring_id;
1900	int xcc_id = 0;
1901	struct amdgpu_ring *ring;
1902	struct amdgpu_device *adev = ip_block->adev;
1903
1904	switch (adev->asic_type) {
1905	case CHIP_TONGA:
1906	case CHIP_CARRIZO:
1907	case CHIP_FIJI:
1908	case CHIP_POLARIS10:
1909	case CHIP_POLARIS11:
1910	case CHIP_POLARIS12:
1911	case CHIP_VEGAM:
1912		adev->gfx.mec.num_mec = 2;
1913		break;
1914	case CHIP_TOPAZ:
1915	case CHIP_STONEY:
1916	default:
1917		adev->gfx.mec.num_mec = 1;
1918		break;
1919	}
1920
1921	adev->gfx.mec.num_pipe_per_mec = 4;
1922	adev->gfx.mec.num_queue_per_pipe = 8;
1923
1924	/* EOP Event */
1925	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1926	if (r)
1927		return r;
1928
1929	/* Privileged reg */
1930	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1931			      &adev->gfx.priv_reg_irq);
1932	if (r)
1933		return r;
1934
1935	/* Privileged inst */
1936	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1937			      &adev->gfx.priv_inst_irq);
1938	if (r)
1939		return r;
1940
1941	/* Add CP EDC/ECC irq  */
1942	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1943			      &adev->gfx.cp_ecc_error_irq);
1944	if (r)
1945		return r;
1946
1947	/* SQ interrupts. */
1948	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1949			      &adev->gfx.sq_irq);
1950	if (r) {
1951		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1952		return r;
1953	}
1954
1955	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1956
1957	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1958
1959	r = gfx_v8_0_init_microcode(adev);
1960	if (r) {
1961		DRM_ERROR("Failed to load gfx firmware!\n");
1962		return r;
1963	}
1964
1965	r = adev->gfx.rlc.funcs->init(adev);
1966	if (r) {
1967		DRM_ERROR("Failed to init rlc BOs!\n");
1968		return r;
1969	}
1970
1971	r = gfx_v8_0_mec_init(adev);
1972	if (r) {
1973		DRM_ERROR("Failed to init MEC BOs!\n");
1974		return r;
1975	}
1976
1977	/* set up the gfx ring */
1978	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1979		ring = &adev->gfx.gfx_ring[i];
1980		ring->ring_obj = NULL;
1981		sprintf(ring->name, "gfx");
1982		/* no gfx doorbells on iceland */
1983		if (adev->asic_type != CHIP_TOPAZ) {
1984			ring->use_doorbell = true;
1985			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1986		}
1987
1988		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1989				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1990				     AMDGPU_RING_PRIO_DEFAULT, NULL);
 
1991		if (r)
1992			return r;
1993	}
1994
 
 
 
1995
1996	/* set up the compute queues - allocate horizontally across pipes */
1997	ring_id = 0;
1998	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1999		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2000			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2001				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2002								     k, j))
2003					continue;
2004
2005				r = gfx_v8_0_compute_ring_init(adev,
2006								ring_id,
2007								i, k, j);
2008				if (r)
2009					return r;
2010
2011				ring_id++;
2012			}
2013		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2014	}
2015
2016	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2017	if (r) {
2018		DRM_ERROR("Failed to init KIQ BOs!\n");
 
 
 
2019		return r;
2020	}
2021
2022	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
 
 
 
2023	if (r)
2024		return r;
2025
2026	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2027	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
 
 
2028	if (r)
2029		return r;
2030
2031	adev->gfx.ce_ram_size = 0x8000;
2032
2033	r = gfx_v8_0_gpu_early_init(adev);
2034	if (r)
2035		return r;
2036
2037	return 0;
2038}
2039
2040static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
2041{
2042	struct amdgpu_device *adev = ip_block->adev;
2043	int i;
 
 
 
 
 
2044
2045	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2046		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2047	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2048		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2049
2050	amdgpu_gfx_mqd_sw_fini(adev, 0);
2051	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2052	amdgpu_gfx_kiq_fini(adev, 0);
2053
2054	gfx_v8_0_mec_fini(adev);
2055	amdgpu_gfx_rlc_fini(adev);
2056	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2057				&adev->gfx.rlc.clear_state_gpu_addr,
2058				(void **)&adev->gfx.rlc.cs_ptr);
2059	if ((adev->asic_type == CHIP_CARRIZO) ||
2060	    (adev->asic_type == CHIP_STONEY)) {
2061		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2062				&adev->gfx.rlc.cp_table_gpu_addr,
2063				(void **)&adev->gfx.rlc.cp_table_ptr);
2064	}
2065	gfx_v8_0_free_microcode(adev);
2066
2067	return 0;
2068}
2069
2070static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2071{
2072	uint32_t *modearray, *mod2array;
2073	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2074	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2075	u32 reg_offset;
2076
2077	modearray = adev->gfx.config.tile_mode_array;
2078	mod2array = adev->gfx.config.macrotile_mode_array;
2079
2080	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2081		modearray[reg_offset] = 0;
2082
2083	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2084		mod2array[reg_offset] = 0;
2085
2086	switch (adev->asic_type) {
2087	case CHIP_TOPAZ:
2088		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089				PIPE_CONFIG(ADDR_SURF_P2) |
2090				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2091				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2092		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2093				PIPE_CONFIG(ADDR_SURF_P2) |
2094				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2095				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2096		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097				PIPE_CONFIG(ADDR_SURF_P2) |
2098				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2099				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2100		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2101				PIPE_CONFIG(ADDR_SURF_P2) |
2102				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2103				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2104		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2105				PIPE_CONFIG(ADDR_SURF_P2) |
2106				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2107				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2108		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109				PIPE_CONFIG(ADDR_SURF_P2) |
2110				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2111				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2112		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2113				PIPE_CONFIG(ADDR_SURF_P2) |
2114				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2115				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2116		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2117				PIPE_CONFIG(ADDR_SURF_P2));
2118		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2119				PIPE_CONFIG(ADDR_SURF_P2) |
2120				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2121				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2122		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123				 PIPE_CONFIG(ADDR_SURF_P2) |
2124				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2125				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2126		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2127				 PIPE_CONFIG(ADDR_SURF_P2) |
2128				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2129				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2130		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131				 PIPE_CONFIG(ADDR_SURF_P2) |
2132				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2133				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135				 PIPE_CONFIG(ADDR_SURF_P2) |
2136				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2137				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2139				 PIPE_CONFIG(ADDR_SURF_P2) |
2140				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2141				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2142		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2143				 PIPE_CONFIG(ADDR_SURF_P2) |
2144				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2145				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2146		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2147				 PIPE_CONFIG(ADDR_SURF_P2) |
2148				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2149				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2150		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2151				 PIPE_CONFIG(ADDR_SURF_P2) |
2152				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2153				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2154		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2155				 PIPE_CONFIG(ADDR_SURF_P2) |
2156				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2157				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2158		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2159				 PIPE_CONFIG(ADDR_SURF_P2) |
2160				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2161				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2162		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2163				 PIPE_CONFIG(ADDR_SURF_P2) |
2164				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2165				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2166		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2167				 PIPE_CONFIG(ADDR_SURF_P2) |
2168				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2169				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2170		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2171				 PIPE_CONFIG(ADDR_SURF_P2) |
2172				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2173				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2174		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2175				 PIPE_CONFIG(ADDR_SURF_P2) |
2176				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2177				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2178		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179				 PIPE_CONFIG(ADDR_SURF_P2) |
2180				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2181				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183				 PIPE_CONFIG(ADDR_SURF_P2) |
2184				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2185				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187				 PIPE_CONFIG(ADDR_SURF_P2) |
2188				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2189				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2190
2191		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2192				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2193				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2194				NUM_BANKS(ADDR_SURF_8_BANK));
2195		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2196				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198				NUM_BANKS(ADDR_SURF_8_BANK));
2199		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2200				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2201				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202				NUM_BANKS(ADDR_SURF_8_BANK));
2203		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2205				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2206				NUM_BANKS(ADDR_SURF_8_BANK));
2207		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2209				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210				NUM_BANKS(ADDR_SURF_8_BANK));
2211		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2213				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2214				NUM_BANKS(ADDR_SURF_8_BANK));
2215		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2217				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2218				NUM_BANKS(ADDR_SURF_8_BANK));
2219		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2220				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2221				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2222				NUM_BANKS(ADDR_SURF_16_BANK));
2223		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2224				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2225				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2226				NUM_BANKS(ADDR_SURF_16_BANK));
2227		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2228				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2229				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2230				 NUM_BANKS(ADDR_SURF_16_BANK));
2231		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2232				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2233				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234				 NUM_BANKS(ADDR_SURF_16_BANK));
2235		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2237				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2238				 NUM_BANKS(ADDR_SURF_16_BANK));
2239		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2240				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2241				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2242				 NUM_BANKS(ADDR_SURF_16_BANK));
2243		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246				 NUM_BANKS(ADDR_SURF_8_BANK));
2247
2248		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2249			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2250			    reg_offset != 23)
2251				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2252
2253		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2254			if (reg_offset != 7)
2255				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2256
2257		break;
2258	case CHIP_FIJI:
2259	case CHIP_VEGAM:
2260		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2262				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2263				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2264		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2265				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2266				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2267				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2268		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2270				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2271				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2272		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2274				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2275				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2276		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2277				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2278				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2279				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2280		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2281				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2282				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2283				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2284		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2285				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2286				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2287				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2288		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2289				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2290				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2291				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2292		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2293				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2294		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2297				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2301				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2302		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2305				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2306		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2308				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2309				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2310		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2317				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2319				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2321				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2325				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2328				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2329				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2331				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2333				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2334		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2335				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2337				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2338		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2339				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2341				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2342		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2343				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2345				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2346		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2347				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2349				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2350		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2351				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2352				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2353				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2354		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2355				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2357				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2358		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2359				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2361				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2362		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2363				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2365				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2366		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2369				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2370		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2373				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2375				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2377				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2378		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2379				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2380				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2381				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2382
2383		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2385				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386				NUM_BANKS(ADDR_SURF_8_BANK));
2387		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390				NUM_BANKS(ADDR_SURF_8_BANK));
2391		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2394				NUM_BANKS(ADDR_SURF_8_BANK));
2395		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2397				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2398				NUM_BANKS(ADDR_SURF_8_BANK));
2399		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2402				NUM_BANKS(ADDR_SURF_8_BANK));
2403		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2406				NUM_BANKS(ADDR_SURF_8_BANK));
2407		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2408				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2409				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2410				NUM_BANKS(ADDR_SURF_8_BANK));
2411		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2413				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414				NUM_BANKS(ADDR_SURF_8_BANK));
2415		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2417				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2418				NUM_BANKS(ADDR_SURF_8_BANK));
2419		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2421				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2422				 NUM_BANKS(ADDR_SURF_8_BANK));
2423		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2425				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2426				 NUM_BANKS(ADDR_SURF_8_BANK));
2427		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2428				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2429				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2430				 NUM_BANKS(ADDR_SURF_8_BANK));
2431		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2434				 NUM_BANKS(ADDR_SURF_8_BANK));
2435		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2438				 NUM_BANKS(ADDR_SURF_4_BANK));
2439
2440		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2441			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2442
2443		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2444			if (reg_offset != 7)
2445				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2446
2447		break;
2448	case CHIP_TONGA:
2449		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2451				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2452				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2453		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2455				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2456				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2457		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2459				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2460				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2461		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2462				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2463				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2464				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2465		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2467				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2468				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2469		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2470				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2471				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2472				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2473		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2474				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2475				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2476				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2477		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2478				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2479				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2480				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2481		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2482				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2483		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2484				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2486				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2487		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2490				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2491		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2495		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2497				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2498				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2499		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2502				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2506				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2508				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2510				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2511		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2514				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2516				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2518				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2519		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2520				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2523		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2524				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2526				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2527		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2528				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2530				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2531		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2532				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2534				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2535		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2536				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2538				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2539		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2540				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2541				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2542				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2543		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2544				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2546				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2547		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2548				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2550				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2551		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2552				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2554				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2555		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2556				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2558				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2562				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2564				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2566				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2567		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2568				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2569				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2570				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2571
2572		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2574				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2575				NUM_BANKS(ADDR_SURF_16_BANK));
2576		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2578				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2579				NUM_BANKS(ADDR_SURF_16_BANK));
2580		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2583				NUM_BANKS(ADDR_SURF_16_BANK));
2584		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2587				NUM_BANKS(ADDR_SURF_16_BANK));
2588		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2590				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2591				NUM_BANKS(ADDR_SURF_16_BANK));
2592		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2607				NUM_BANKS(ADDR_SURF_16_BANK));
2608		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2610				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2611				 NUM_BANKS(ADDR_SURF_16_BANK));
2612		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2615				 NUM_BANKS(ADDR_SURF_16_BANK));
2616		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619				 NUM_BANKS(ADDR_SURF_8_BANK));
2620		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2623				 NUM_BANKS(ADDR_SURF_4_BANK));
2624		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2627				 NUM_BANKS(ADDR_SURF_4_BANK));
2628
2629		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2630			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2631
2632		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2633			if (reg_offset != 7)
2634				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2635
2636		break;
2637	case CHIP_POLARIS11:
2638	case CHIP_POLARIS12:
2639		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2642				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2643		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2646				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2647		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2650				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2651		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2654				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2655		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2658				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2659		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2660				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2662				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2663		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2664				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2666				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2670				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2671		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2672				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2673		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2677		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2680				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2681		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2684				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2685		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2688				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2689		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2690				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2692				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2696				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2698				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2700				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2702				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2704				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2705		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2708				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2709		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2710				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2712				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2713		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2714				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2716				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2717		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2718				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2720				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2721		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2722				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2724				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2725		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2726				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2728				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2729		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2730				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2732				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2733		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2734				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2736				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2737		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2738				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2740				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2741		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2742				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2744				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2745		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2746				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2752				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2754				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2756				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2757		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2758				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2760				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2761
2762		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765				NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2769				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2770				NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2774				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2775				NUM_BANKS(ADDR_SURF_16_BANK));
2776
2777		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780				NUM_BANKS(ADDR_SURF_16_BANK));
2781
2782		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2784				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2785				NUM_BANKS(ADDR_SURF_16_BANK));
2786
2787		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2788				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2789				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2790				NUM_BANKS(ADDR_SURF_16_BANK));
2791
2792		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2795				NUM_BANKS(ADDR_SURF_16_BANK));
2796
2797		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2798				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2799				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2800				NUM_BANKS(ADDR_SURF_16_BANK));
2801
2802		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2803				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2805				NUM_BANKS(ADDR_SURF_16_BANK));
2806
2807		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2809				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2810				NUM_BANKS(ADDR_SURF_16_BANK));
2811
2812		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2815				NUM_BANKS(ADDR_SURF_16_BANK));
2816
2817		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821
2822		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2824				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2825				NUM_BANKS(ADDR_SURF_8_BANK));
2826
2827		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2829				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2830				NUM_BANKS(ADDR_SURF_4_BANK));
2831
2832		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2833			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2834
2835		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2836			if (reg_offset != 7)
2837				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2838
2839		break;
2840	case CHIP_POLARIS10:
2841		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2844				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2845		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2848				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2849		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2852				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2853		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2856				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2857		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2859				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2860				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2861		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2862				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2864				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2865		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2868				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2869		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2872				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2873		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2874				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2875		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2878				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2882				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2883		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2886				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2887		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2890				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2891		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2894				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2895		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2898				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2899		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2900				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2903		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2904				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2906				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2907		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2908				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2909				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2910				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2911		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2912				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2915		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2916				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2918				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2919		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2920				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2922				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2923		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2924				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2926				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2927		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2928				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2930				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2931		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2932				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2934				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2935		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2936				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2939		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2940				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2942				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2943		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2944				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2946				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2947		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2948				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2950				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2954				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2956				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2958				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2959		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2960				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2961				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2962				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2963
2964		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2966				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967				NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2971				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2972				NUM_BANKS(ADDR_SURF_16_BANK));
2973
2974		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2976				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977				NUM_BANKS(ADDR_SURF_16_BANK));
2978
2979		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983
2984		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987				NUM_BANKS(ADDR_SURF_16_BANK));
2988
2989		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2991				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2992				NUM_BANKS(ADDR_SURF_16_BANK));
2993
2994		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2995				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2996				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2997				NUM_BANKS(ADDR_SURF_16_BANK));
2998
2999		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002				NUM_BANKS(ADDR_SURF_16_BANK));
3003
3004		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008
3009		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3011				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3012				NUM_BANKS(ADDR_SURF_16_BANK));
3013
3014		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3015				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3016				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3017				NUM_BANKS(ADDR_SURF_16_BANK));
3018
3019		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3022				NUM_BANKS(ADDR_SURF_8_BANK));
3023
3024		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3027				NUM_BANKS(ADDR_SURF_4_BANK));
3028
3029		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3031				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3032				NUM_BANKS(ADDR_SURF_4_BANK));
3033
3034		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3035			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3036
3037		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3038			if (reg_offset != 7)
3039				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3040
3041		break;
3042	case CHIP_STONEY:
3043		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3044				PIPE_CONFIG(ADDR_SURF_P2) |
3045				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3046				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3047		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3048				PIPE_CONFIG(ADDR_SURF_P2) |
3049				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3050				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3051		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052				PIPE_CONFIG(ADDR_SURF_P2) |
3053				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3054				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3055		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3056				PIPE_CONFIG(ADDR_SURF_P2) |
3057				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3058				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3059		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3060				PIPE_CONFIG(ADDR_SURF_P2) |
3061				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3062				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3063		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3064				PIPE_CONFIG(ADDR_SURF_P2) |
3065				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3066				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3067		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3068				PIPE_CONFIG(ADDR_SURF_P2) |
3069				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3070				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3071		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3072				PIPE_CONFIG(ADDR_SURF_P2));
3073		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3074				PIPE_CONFIG(ADDR_SURF_P2) |
3075				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3076				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3077		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078				 PIPE_CONFIG(ADDR_SURF_P2) |
3079				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3080				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3081		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082				 PIPE_CONFIG(ADDR_SURF_P2) |
3083				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3084				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3085		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3086				 PIPE_CONFIG(ADDR_SURF_P2) |
3087				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3088				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3089		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090				 PIPE_CONFIG(ADDR_SURF_P2) |
3091				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3092				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3093		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3094				 PIPE_CONFIG(ADDR_SURF_P2) |
3095				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3096				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3097		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3098				 PIPE_CONFIG(ADDR_SURF_P2) |
3099				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3100				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3101		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3102				 PIPE_CONFIG(ADDR_SURF_P2) |
3103				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3104				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3105		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3106				 PIPE_CONFIG(ADDR_SURF_P2) |
3107				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3108				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3109		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3110				 PIPE_CONFIG(ADDR_SURF_P2) |
3111				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3112				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3113		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3114				 PIPE_CONFIG(ADDR_SURF_P2) |
3115				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3116				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3117		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3118				 PIPE_CONFIG(ADDR_SURF_P2) |
3119				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3120				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3121		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3122				 PIPE_CONFIG(ADDR_SURF_P2) |
3123				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3124				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3125		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3126				 PIPE_CONFIG(ADDR_SURF_P2) |
3127				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3128				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3129		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3130				 PIPE_CONFIG(ADDR_SURF_P2) |
3131				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3132				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3133		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134				 PIPE_CONFIG(ADDR_SURF_P2) |
3135				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3136				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138				 PIPE_CONFIG(ADDR_SURF_P2) |
3139				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3140				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142				 PIPE_CONFIG(ADDR_SURF_P2) |
3143				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3144				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145
3146		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3147				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3148				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3149				NUM_BANKS(ADDR_SURF_8_BANK));
3150		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3152				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153				NUM_BANKS(ADDR_SURF_8_BANK));
3154		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3155				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3156				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3157				NUM_BANKS(ADDR_SURF_8_BANK));
3158		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3161				NUM_BANKS(ADDR_SURF_8_BANK));
3162		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165				NUM_BANKS(ADDR_SURF_8_BANK));
3166		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3167				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3168				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3169				NUM_BANKS(ADDR_SURF_8_BANK));
3170		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173				NUM_BANKS(ADDR_SURF_8_BANK));
3174		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3175				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3176				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3177				NUM_BANKS(ADDR_SURF_16_BANK));
3178		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3179				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3180				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3181				NUM_BANKS(ADDR_SURF_16_BANK));
3182		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3183				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3184				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3185				 NUM_BANKS(ADDR_SURF_16_BANK));
3186		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3187				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3188				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3189				 NUM_BANKS(ADDR_SURF_16_BANK));
3190		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3191				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3192				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3193				 NUM_BANKS(ADDR_SURF_16_BANK));
3194		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3195				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3196				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3197				 NUM_BANKS(ADDR_SURF_16_BANK));
3198		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3199				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3200				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3201				 NUM_BANKS(ADDR_SURF_8_BANK));
3202
3203		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3204			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3205			    reg_offset != 23)
3206				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3207
3208		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3209			if (reg_offset != 7)
3210				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3211
3212		break;
3213	default:
3214		dev_warn(adev->dev,
3215			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3216			 adev->asic_type);
3217		fallthrough;
3218
3219	case CHIP_CARRIZO:
3220		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221				PIPE_CONFIG(ADDR_SURF_P2) |
3222				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3223				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3224		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3225				PIPE_CONFIG(ADDR_SURF_P2) |
3226				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3227				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3228		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3229				PIPE_CONFIG(ADDR_SURF_P2) |
3230				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3231				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3232		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233				PIPE_CONFIG(ADDR_SURF_P2) |
3234				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3235				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3236		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3237				PIPE_CONFIG(ADDR_SURF_P2) |
3238				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3239				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3240		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3241				PIPE_CONFIG(ADDR_SURF_P2) |
3242				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3243				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3244		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3245				PIPE_CONFIG(ADDR_SURF_P2) |
3246				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3247				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3248		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3249				PIPE_CONFIG(ADDR_SURF_P2));
3250		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3251				PIPE_CONFIG(ADDR_SURF_P2) |
3252				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3253				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3254		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255				 PIPE_CONFIG(ADDR_SURF_P2) |
3256				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3257				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259				 PIPE_CONFIG(ADDR_SURF_P2) |
3260				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3261				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3262		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263				 PIPE_CONFIG(ADDR_SURF_P2) |
3264				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267				 PIPE_CONFIG(ADDR_SURF_P2) |
3268				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3269				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3271				 PIPE_CONFIG(ADDR_SURF_P2) |
3272				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3273				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3274		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3275				 PIPE_CONFIG(ADDR_SURF_P2) |
3276				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3277				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3278		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3279				 PIPE_CONFIG(ADDR_SURF_P2) |
3280				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3281				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3282		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3283				 PIPE_CONFIG(ADDR_SURF_P2) |
3284				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3285				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3286		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3287				 PIPE_CONFIG(ADDR_SURF_P2) |
3288				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3289				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3290		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3291				 PIPE_CONFIG(ADDR_SURF_P2) |
3292				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3293				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3294		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3295				 PIPE_CONFIG(ADDR_SURF_P2) |
3296				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3297				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3298		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3299				 PIPE_CONFIG(ADDR_SURF_P2) |
3300				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3301				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3302		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3303				 PIPE_CONFIG(ADDR_SURF_P2) |
3304				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3305				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3306		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3307				 PIPE_CONFIG(ADDR_SURF_P2) |
3308				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3309				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3310		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3311				 PIPE_CONFIG(ADDR_SURF_P2) |
3312				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3313				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3315				 PIPE_CONFIG(ADDR_SURF_P2) |
3316				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3317				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3319				 PIPE_CONFIG(ADDR_SURF_P2) |
3320				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3321				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3322
3323		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3325				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326				NUM_BANKS(ADDR_SURF_8_BANK));
3327		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3329				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3330				NUM_BANKS(ADDR_SURF_8_BANK));
3331		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3334				NUM_BANKS(ADDR_SURF_8_BANK));
3335		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3336				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3337				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3338				NUM_BANKS(ADDR_SURF_8_BANK));
3339		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3340				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3341				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3342				NUM_BANKS(ADDR_SURF_8_BANK));
3343		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3345				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3346				NUM_BANKS(ADDR_SURF_8_BANK));
3347		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3348				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3349				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3350				NUM_BANKS(ADDR_SURF_8_BANK));
3351		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3352				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3353				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3354				NUM_BANKS(ADDR_SURF_16_BANK));
3355		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3356				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3357				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358				NUM_BANKS(ADDR_SURF_16_BANK));
3359		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3360				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3361				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3362				 NUM_BANKS(ADDR_SURF_16_BANK));
3363		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3364				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3365				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3366				 NUM_BANKS(ADDR_SURF_16_BANK));
3367		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3368				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3369				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370				 NUM_BANKS(ADDR_SURF_16_BANK));
3371		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3372				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3373				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3374				 NUM_BANKS(ADDR_SURF_16_BANK));
3375		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3376				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3377				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3378				 NUM_BANKS(ADDR_SURF_8_BANK));
3379
3380		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3381			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3382			    reg_offset != 23)
3383				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3384
3385		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3386			if (reg_offset != 7)
3387				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3388
3389		break;
3390	}
3391}
3392
3393static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3394				  u32 se_num, u32 sh_num, u32 instance,
3395				  int xcc_id)
3396{
3397	u32 data;
3398
3399	if (instance == 0xffffffff)
3400		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3401	else
3402		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3403
3404	if (se_num == 0xffffffff)
3405		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3406	else
 
3407		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3408
3409	if (sh_num == 0xffffffff)
3410		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3411	else
3412		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3413
 
3414	WREG32(mmGRBM_GFX_INDEX, data);
3415}
3416
3417static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3418				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3419{
3420	vi_srbm_select(adev, me, pipe, q, vm);
3421}
3422
3423static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3424{
3425	u32 data, mask;
3426
3427	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3428		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3429
3430	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 
3431
3432	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3433					 adev->gfx.config.max_sh_per_se);
3434
3435	return (~data) & mask;
3436}
3437
3438static void
3439gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3440{
3441	switch (adev->asic_type) {
3442	case CHIP_FIJI:
3443	case CHIP_VEGAM:
3444		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3445			  RB_XSEL2(1) | PKR_MAP(2) |
3446			  PKR_XSEL(1) | PKR_YSEL(1) |
3447			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3448		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3449			   SE_PAIR_YSEL(2);
3450		break;
3451	case CHIP_TONGA:
3452	case CHIP_POLARIS10:
3453		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3454			  SE_XSEL(1) | SE_YSEL(1);
3455		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3456			   SE_PAIR_YSEL(2);
3457		break;
3458	case CHIP_TOPAZ:
3459	case CHIP_CARRIZO:
3460		*rconf |= RB_MAP_PKR0(2);
3461		*rconf1 |= 0x0;
3462		break;
3463	case CHIP_POLARIS11:
3464	case CHIP_POLARIS12:
3465		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3466			  SE_XSEL(1) | SE_YSEL(1);
3467		*rconf1 |= 0x0;
3468		break;
3469	case CHIP_STONEY:
3470		*rconf |= 0x0;
3471		*rconf1 |= 0x0;
3472		break;
3473	default:
3474		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3475		break;
3476	}
3477}
3478
3479static void
3480gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3481					u32 raster_config, u32 raster_config_1,
3482					unsigned rb_mask, unsigned num_rb)
3483{
3484	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3485	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3486	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3487	unsigned rb_per_se = num_rb / num_se;
3488	unsigned se_mask[4];
3489	unsigned se;
3490
3491	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3492	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3493	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3494	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3495
3496	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3497	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3498	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3499
3500	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3501			     (!se_mask[2] && !se_mask[3]))) {
3502		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3503
3504		if (!se_mask[0] && !se_mask[1]) {
3505			raster_config_1 |=
3506				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3507		} else {
3508			raster_config_1 |=
3509				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3510		}
3511	}
3512
3513	for (se = 0; se < num_se; se++) {
3514		unsigned raster_config_se = raster_config;
3515		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3516		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3517		int idx = (se / 2) * 2;
3518
3519		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3520			raster_config_se &= ~SE_MAP_MASK;
3521
3522			if (!se_mask[idx]) {
3523				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3524			} else {
3525				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3526			}
3527		}
3528
3529		pkr0_mask &= rb_mask;
3530		pkr1_mask &= rb_mask;
3531		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3532			raster_config_se &= ~PKR_MAP_MASK;
3533
3534			if (!pkr0_mask) {
3535				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3536			} else {
3537				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3538			}
3539		}
3540
3541		if (rb_per_se >= 2) {
3542			unsigned rb0_mask = 1 << (se * rb_per_se);
3543			unsigned rb1_mask = rb0_mask << 1;
3544
3545			rb0_mask &= rb_mask;
3546			rb1_mask &= rb_mask;
3547			if (!rb0_mask || !rb1_mask) {
3548				raster_config_se &= ~RB_MAP_PKR0_MASK;
3549
3550				if (!rb0_mask) {
3551					raster_config_se |=
3552						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3553				} else {
3554					raster_config_se |=
3555						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3556				}
3557			}
3558
3559			if (rb_per_se > 2) {
3560				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3561				rb1_mask = rb0_mask << 1;
3562				rb0_mask &= rb_mask;
3563				rb1_mask &= rb_mask;
3564				if (!rb0_mask || !rb1_mask) {
3565					raster_config_se &= ~RB_MAP_PKR1_MASK;
3566
3567					if (!rb0_mask) {
3568						raster_config_se |=
3569							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3570					} else {
3571						raster_config_se |=
3572							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3573					}
3574				}
3575			}
3576		}
3577
3578		/* GRBM_GFX_INDEX has a different offset on VI */
3579		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3580		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3581		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3582	}
3583
3584	/* GRBM_GFX_INDEX has a different offset on VI */
3585	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3586}
3587
3588static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3589{
3590	int i, j;
3591	u32 data;
3592	u32 raster_config = 0, raster_config_1 = 0;
3593	u32 active_rbs = 0;
3594	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3595					adev->gfx.config.max_sh_per_se;
3596	unsigned num_rb_pipes;
3597
3598	mutex_lock(&adev->grbm_idx_mutex);
3599	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3600		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3601			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3602			data = gfx_v8_0_get_rb_active_bitmap(adev);
3603			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3604					       rb_bitmap_width_per_sh);
3605		}
3606	}
3607	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
3608
3609	adev->gfx.config.backend_enable_mask = active_rbs;
3610	adev->gfx.config.num_rbs = hweight32(active_rbs);
3611
3612	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3613			     adev->gfx.config.max_shader_engines, 16);
3614
3615	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3616
3617	if (!adev->gfx.config.backend_enable_mask ||
3618			adev->gfx.config.num_rbs >= num_rb_pipes) {
3619		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3620		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3621	} else {
3622		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3623							adev->gfx.config.backend_enable_mask,
3624							num_rb_pipes);
3625	}
3626
3627	/* cache the values for userspace */
3628	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3629		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3630			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3631			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3632				RREG32(mmCC_RB_BACKEND_DISABLE);
3633			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3634				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3635			adev->gfx.config.rb_config[i][j].raster_config =
3636				RREG32(mmPA_SC_RASTER_CONFIG);
3637			adev->gfx.config.rb_config[i][j].raster_config_1 =
3638				RREG32(mmPA_SC_RASTER_CONFIG_1);
3639		}
3640	}
3641	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3642	mutex_unlock(&adev->grbm_idx_mutex);
3643}
3644
3645#define DEFAULT_SH_MEM_BASES	(0x6000)
3646/**
3647 * gfx_v8_0_init_compute_vmid - gart enable
3648 *
3649 * @adev: amdgpu_device pointer
3650 *
3651 * Initialize compute vmid sh_mem registers
3652 *
3653 */
 
 
 
3654static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3655{
3656	int i;
3657	uint32_t sh_mem_config;
3658	uint32_t sh_mem_bases;
3659
3660	/*
3661	 * Configure apertures:
3662	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3663	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3664	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3665	 */
3666	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3667
3668	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3669			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3670			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3671			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3672			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3673			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3674
3675	mutex_lock(&adev->srbm_mutex);
3676	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3677		vi_srbm_select(adev, 0, 0, 0, i);
3678		/* CP and shaders */
3679		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3680		WREG32(mmSH_MEM_APE1_BASE, 1);
3681		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3682		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3683	}
3684	vi_srbm_select(adev, 0, 0, 0, 0);
3685	mutex_unlock(&adev->srbm_mutex);
3686
3687	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3688	   access. These should be enabled by FW for target VMIDs. */
3689	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3690		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3691		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3692		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3693		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3694	}
3695}
3696
3697static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3698{
3699	int vmid;
 
3700
3701	/*
3702	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3703	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3704	 * the driver can enable them for graphics. VMID0 should maintain
3705	 * access so that HWS firmware can save/restore entries.
3706	 */
3707	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3708		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3709		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3710		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3711		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3712	}
3713}
3714
3715static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3716{
3717	switch (adev->asic_type) {
3718	default:
3719		adev->gfx.config.double_offchip_lds_buf = 1;
3720		break;
3721	case CHIP_CARRIZO:
3722	case CHIP_STONEY:
3723		adev->gfx.config.double_offchip_lds_buf = 0;
3724		break;
3725	}
3726}
3727
3728static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3729{
3730	u32 tmp, sh_static_mem_cfg;
3731	int i;
3732
3733	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3734	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3735	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3736	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3737
3738	gfx_v8_0_tiling_mode_table_init(adev);
 
3739	gfx_v8_0_setup_rb(adev);
3740	gfx_v8_0_get_cu_info(adev);
3741	gfx_v8_0_config_init(adev);
3742
3743	/* XXX SH_MEM regs */
3744	/* where to put LDS, scratch, GPUVM in FSA64 space */
3745	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3746				   SWIZZLE_ENABLE, 1);
3747	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3748				   ELEMENT_SIZE, 1);
3749	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3750				   INDEX_STRIDE, 3);
3751	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3752
3753	mutex_lock(&adev->srbm_mutex);
3754	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3755		vi_srbm_select(adev, 0, 0, 0, i);
3756		/* CP and shaders */
3757		if (i == 0) {
3758			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3759			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3760			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762			WREG32(mmSH_MEM_CONFIG, tmp);
3763			WREG32(mmSH_MEM_BASES, 0);
3764		} else {
3765			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3766			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3767			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3768					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3769			WREG32(mmSH_MEM_CONFIG, tmp);
3770			tmp = adev->gmc.shared_aperture_start >> 48;
3771			WREG32(mmSH_MEM_BASES, tmp);
3772		}
3773
3774		WREG32(mmSH_MEM_APE1_BASE, 1);
3775		WREG32(mmSH_MEM_APE1_LIMIT, 0);
 
3776	}
3777	vi_srbm_select(adev, 0, 0, 0, 0);
3778	mutex_unlock(&adev->srbm_mutex);
3779
3780	gfx_v8_0_init_compute_vmid(adev);
3781	gfx_v8_0_init_gds_vmid(adev);
3782
3783	mutex_lock(&adev->grbm_idx_mutex);
3784	/*
3785	 * making sure that the following register writes will be broadcasted
3786	 * to all the shaders
3787	 */
3788	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3789
3790	WREG32(mmPA_SC_FIFO_SIZE,
3791		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3792			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3793		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3794			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3795		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3796			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3797		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3798			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3799
3800	tmp = RREG32(mmSPI_ARB_PRIORITY);
3801	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3802	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3803	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3804	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3805	WREG32(mmSPI_ARB_PRIORITY, tmp);
3806
3807	mutex_unlock(&adev->grbm_idx_mutex);
3808
3809}
3810
3811static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3812{
3813	u32 i, j, k;
3814	u32 mask;
3815
3816	mutex_lock(&adev->grbm_idx_mutex);
3817	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3818		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3819			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3820			for (k = 0; k < adev->usec_timeout; k++) {
3821				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3822					break;
3823				udelay(1);
3824			}
3825			if (k == adev->usec_timeout) {
3826				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3827						      0xffffffff, 0xffffffff, 0);
3828				mutex_unlock(&adev->grbm_idx_mutex);
3829				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3830					 i, j);
3831				return;
3832			}
3833		}
3834	}
3835	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3836	mutex_unlock(&adev->grbm_idx_mutex);
3837
3838	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3839		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3840		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3841		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3842	for (k = 0; k < adev->usec_timeout; k++) {
3843		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3844			break;
3845		udelay(1);
3846	}
3847}
3848
3849static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3850					       bool enable)
3851{
3852	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3853
3854	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3855	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3856	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3857	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3858
3859	WREG32(mmCP_INT_CNTL_RING0, tmp);
3860}
3861
3862static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3863{
3864	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3865	/* csib */
3866	WREG32(mmRLC_CSIB_ADDR_HI,
3867			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3868	WREG32(mmRLC_CSIB_ADDR_LO,
3869			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3870	WREG32(mmRLC_CSIB_LENGTH,
3871			adev->gfx.rlc.clear_state_size);
3872}
3873
3874static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3875				int ind_offset,
3876				int list_size,
3877				int *unique_indices,
3878				int *indices_count,
3879				int max_indices,
3880				int *ind_start_offsets,
3881				int *offset_count,
3882				int max_offset)
3883{
3884	int indices;
3885	bool new_entry = true;
3886
3887	for (; ind_offset < list_size; ind_offset++) {
3888
3889		if (new_entry) {
3890			new_entry = false;
3891			ind_start_offsets[*offset_count] = ind_offset;
3892			*offset_count = *offset_count + 1;
3893			BUG_ON(*offset_count >= max_offset);
3894		}
3895
3896		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3897			new_entry = true;
3898			continue;
3899		}
3900
3901		ind_offset += 2;
3902
3903		/* look for the matching indice */
3904		for (indices = 0;
3905			indices < *indices_count;
3906			indices++) {
3907			if (unique_indices[indices] ==
3908				register_list_format[ind_offset])
3909				break;
3910		}
3911
3912		if (indices >= *indices_count) {
3913			unique_indices[*indices_count] =
3914				register_list_format[ind_offset];
3915			indices = *indices_count;
3916			*indices_count = *indices_count + 1;
3917			BUG_ON(*indices_count >= max_indices);
3918		}
3919
3920		register_list_format[ind_offset] = indices;
3921	}
3922}
3923
3924static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3925{
3926	int i, temp, data;
3927	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3928	int indices_count = 0;
3929	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3930	int offset_count = 0;
3931
3932	int list_size;
3933	unsigned int *register_list_format =
3934		kmemdup(adev->gfx.rlc.register_list_format,
3935			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3936	if (!register_list_format)
3937		return -ENOMEM;
3938
3939	gfx_v8_0_parse_ind_reg_list(register_list_format,
3940				RLC_FormatDirectRegListLength,
3941				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3942				unique_indices,
3943				&indices_count,
3944				ARRAY_SIZE(unique_indices),
3945				indirect_start_offsets,
3946				&offset_count,
3947				ARRAY_SIZE(indirect_start_offsets));
3948
3949	/* save and restore list */
3950	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3951
3952	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3953	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3954		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3955
3956	/* indirect list */
3957	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3958	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3959		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3960
3961	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3962	list_size = list_size >> 1;
3963	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3964	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3965
3966	/* starting offsets starts */
3967	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3968		adev->gfx.rlc.starting_offsets_start);
3969	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3970		WREG32(mmRLC_GPM_SCRATCH_DATA,
3971				indirect_start_offsets[i]);
3972
3973	/* unique indices */
3974	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3975	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3976	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3977		if (unique_indices[i] != 0) {
3978			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3979			WREG32(data + i, unique_indices[i] >> 20);
3980		}
3981	}
3982	kfree(register_list_format);
3983
3984	return 0;
 
 
 
 
 
3985}
3986
3987static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3988{
3989	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3990}
3991
3992static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3993{
3994	uint32_t data;
3995
3996	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3997
3998	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3999	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4000	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4001	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4002	WREG32(mmRLC_PG_DELAY, data);
4003
4004	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4005	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4006
 
4007}
4008
4009static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4010						bool enable)
4011{
4012	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4013}
 
4014
4015static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4016						  bool enable)
4017{
4018	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4019}
4020
4021static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4022{
4023	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4024}
4025
4026static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4027{
4028	if ((adev->asic_type == CHIP_CARRIZO) ||
4029	    (adev->asic_type == CHIP_STONEY)) {
4030		gfx_v8_0_init_csb(adev);
4031		gfx_v8_0_init_save_restore_list(adev);
4032		gfx_v8_0_enable_save_restore_machine(adev);
4033		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4034		gfx_v8_0_init_power_gating(adev);
4035		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4036	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4037		   (adev->asic_type == CHIP_POLARIS12) ||
4038		   (adev->asic_type == CHIP_VEGAM)) {
4039		gfx_v8_0_init_csb(adev);
4040		gfx_v8_0_init_save_restore_list(adev);
4041		gfx_v8_0_enable_save_restore_machine(adev);
4042		gfx_v8_0_init_power_gating(adev);
4043	}
4044
 
4045}
4046
4047static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4048{
4049	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4050
4051	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4052	gfx_v8_0_wait_for_rlc_serdes(adev);
4053}
4054
4055static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4056{
4057	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4058	udelay(50);
4059
4060	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4061	udelay(50);
4062}
4063
4064static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4065{
4066	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4067
4068	/* carrizo do enable cp interrupt after cp inited */
4069	if (!(adev->flags & AMD_IS_APU))
4070		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4071
4072	udelay(50);
4073}
4074
4075static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4076{
4077	if (amdgpu_sriov_vf(adev)) {
4078		gfx_v8_0_init_csb(adev);
4079		return 0;
 
 
4080	}
4081
4082	adev->gfx.rlc.funcs->stop(adev);
4083	adev->gfx.rlc.funcs->reset(adev);
4084	gfx_v8_0_init_pg(adev);
4085	adev->gfx.rlc.funcs->start(adev);
4086
4087	return 0;
4088}
4089
4090static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4091{
 
4092	u32 tmp = RREG32(mmCP_ME_CNTL);
4093
4094	if (enable) {
4095		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4096		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4097		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4098	} else {
4099		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4100		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4101		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
 
 
4102	}
4103	WREG32(mmCP_ME_CNTL, tmp);
4104	udelay(50);
4105}
4106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4107static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4108{
4109	u32 count = 0;
4110	const struct cs_section_def *sect = NULL;
4111	const struct cs_extent_def *ext = NULL;
4112
4113	/* begin clear state */
4114	count += 2;
4115	/* context control state */
4116	count += 3;
4117
4118	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4119		for (ext = sect->section; ext->extent != NULL; ++ext) {
4120			if (sect->id == SECT_CONTEXT)
4121				count += 2 + ext->reg_count;
4122			else
4123				return 0;
4124		}
4125	}
4126	/* pa_sc_raster_config/pa_sc_raster_config1 */
4127	count += 4;
4128	/* end clear state */
4129	count += 2;
4130	/* clear state */
4131	count += 2;
4132
4133	return count;
4134}
4135
4136static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4137{
4138	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4139	const struct cs_section_def *sect = NULL;
4140	const struct cs_extent_def *ext = NULL;
4141	int r, i;
4142
4143	/* init the CP */
4144	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4145	WREG32(mmCP_ENDIAN_SWAP, 0);
4146	WREG32(mmCP_DEVICE_ID, 1);
4147
4148	gfx_v8_0_cp_gfx_enable(adev, true);
4149
4150	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4151	if (r) {
4152		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4153		return r;
4154	}
4155
4156	/* clear state buffer */
4157	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4158	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4159
4160	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4161	amdgpu_ring_write(ring, 0x80000000);
4162	amdgpu_ring_write(ring, 0x80000000);
4163
4164	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4165		for (ext = sect->section; ext->extent != NULL; ++ext) {
4166			if (sect->id == SECT_CONTEXT) {
4167				amdgpu_ring_write(ring,
4168				       PACKET3(PACKET3_SET_CONTEXT_REG,
4169					       ext->reg_count));
4170				amdgpu_ring_write(ring,
4171				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4172				for (i = 0; i < ext->reg_count; i++)
4173					amdgpu_ring_write(ring, ext->extent[i]);
4174			}
4175		}
4176	}
4177
4178	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4179	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4180	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4181	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4182
4183	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4184	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4185
4186	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4187	amdgpu_ring_write(ring, 0);
4188
4189	/* init the CE partitions */
4190	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4191	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4192	amdgpu_ring_write(ring, 0x8000);
4193	amdgpu_ring_write(ring, 0x8000);
4194
4195	amdgpu_ring_commit(ring);
4196
4197	return 0;
4198}
4199static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4200{
4201	u32 tmp;
4202	/* no gfx doorbells on iceland */
4203	if (adev->asic_type == CHIP_TOPAZ)
4204		return;
4205
4206	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4207
4208	if (ring->use_doorbell) {
4209		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4210				DOORBELL_OFFSET, ring->doorbell_index);
4211		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4212						DOORBELL_HIT, 0);
4213		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4214					    DOORBELL_EN, 1);
4215	} else {
4216		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4217	}
4218
4219	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4220
4221	if (adev->flags & AMD_IS_APU)
4222		return;
4223
4224	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4225					DOORBELL_RANGE_LOWER,
4226					adev->doorbell_index.gfx_ring0);
4227	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4228
4229	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4230		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4231}
4232
4233static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4234{
4235	struct amdgpu_ring *ring;
4236	u32 tmp;
4237	u32 rb_bufsz;
4238	u64 rb_addr, rptr_addr, wptr_gpu_addr;
 
4239
4240	/* Set the write pointer delay */
4241	WREG32(mmCP_RB_WPTR_DELAY, 0);
4242
4243	/* set the RB to use vmid 0 */
4244	WREG32(mmCP_RB_VMID, 0);
4245
4246	/* Set ring buffer size */
4247	ring = &adev->gfx.gfx_ring[0];
4248	rb_bufsz = order_base_2(ring->ring_size / 8);
4249	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4250	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4251	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4252	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4253#ifdef __BIG_ENDIAN
4254	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4255#endif
4256	WREG32(mmCP_RB0_CNTL, tmp);
4257
4258	/* Initialize the ring buffer's read and write pointers */
4259	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4260	ring->wptr = 0;
4261	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4262
4263	/* set the wb address whether it's enabled or not */
4264	rptr_addr = ring->rptr_gpu_addr;
4265	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4266	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4267
4268	wptr_gpu_addr = ring->wptr_gpu_addr;
4269	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4270	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4271	mdelay(1);
4272	WREG32(mmCP_RB0_CNTL, tmp);
4273
4274	rb_addr = ring->gpu_addr >> 8;
4275	WREG32(mmCP_RB0_BASE, rb_addr);
4276	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4277
4278	gfx_v8_0_set_cpg_door_bell(adev, ring);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4279	/* start the ring */
4280	amdgpu_ring_clear_ring(ring);
4281	gfx_v8_0_cp_gfx_start(adev);
 
 
 
 
 
 
4282
4283	return 0;
4284}
4285
4286static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4287{
 
 
4288	if (enable) {
4289		WREG32(mmCP_MEC_CNTL, 0);
4290	} else {
4291		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4292		adev->gfx.kiq[0].ring.sched.ready = false;
 
4293	}
4294	udelay(50);
4295}
4296
4297/* KIQ functions */
4298static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4299{
4300	uint32_t tmp;
4301	struct amdgpu_device *adev = ring->adev;
 
 
 
 
4302
4303	/* tell RLC which is KIQ queue */
4304	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4305	tmp &= 0xffffff00;
4306	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4307	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4308	tmp |= 0x80;
4309	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4310}
4311
4312static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4313{
4314	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4315	uint64_t queue_mask = 0;
4316	int r, i;
4317
4318	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4319		if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4320			continue;
4321
4322		/* This situation may be hit in the future if a new HW
4323		 * generation exposes more than 64 queues. If so, the
4324		 * definition of queue_mask needs updating */
4325		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4326			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4327			break;
4328		}
4329
4330		queue_mask |= (1ull << i);
4331	}
 
4332
4333	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4334	if (r) {
4335		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4336		return r;
4337	}
4338	/* set resources */
4339	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4340	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4341	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4342	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4343	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4344	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4345	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4346	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4347	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4348		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4349		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4350		uint64_t wptr_addr = ring->wptr_gpu_addr;
4351
4352		/* map queues */
4353		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4354		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4355		amdgpu_ring_write(kiq_ring,
4356				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4357		amdgpu_ring_write(kiq_ring,
4358				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4359				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4360				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4361				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4362		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4363		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4364		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4365		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4366	}
4367
4368	amdgpu_ring_commit(kiq_ring);
4369
4370	return 0;
4371}
4372
4373static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4374{
4375	int i, r = 0;
4376
4377	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4378		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4379		for (i = 0; i < adev->usec_timeout; i++) {
4380			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4381				break;
4382			udelay(1);
4383		}
4384		if (i == adev->usec_timeout)
4385			r = -ETIMEDOUT;
4386	}
4387	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4388	WREG32(mmCP_HQD_PQ_RPTR, 0);
4389	WREG32(mmCP_HQD_PQ_WPTR, 0);
4390
4391	return r;
4392}
 
 
4393
4394static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4395{
4396	struct amdgpu_device *adev = ring->adev;
4397
4398	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4399		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4400			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4401			mqd->cp_hqd_queue_priority =
4402				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4403		}
4404	}
4405}
4406
4407static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4408{
4409	struct amdgpu_device *adev = ring->adev;
4410	struct vi_mqd *mqd = ring->mqd_ptr;
4411	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4412	uint32_t tmp;
4413
4414	mqd->header = 0xC0310800;
4415	mqd->compute_pipelinestat_enable = 0x00000001;
4416	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4417	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4418	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4419	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4420	mqd->compute_misc_reserved = 0x00000003;
4421	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4422						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4423	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4424						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4425	eop_base_addr = ring->eop_gpu_addr >> 8;
4426	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4427	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4428
4429	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4430	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4431	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4432			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4433
4434	mqd->cp_hqd_eop_control = tmp;
4435
4436	/* enable doorbell? */
4437	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4438			    CP_HQD_PQ_DOORBELL_CONTROL,
4439			    DOORBELL_EN,
4440			    ring->use_doorbell ? 1 : 0);
4441
4442	mqd->cp_hqd_pq_doorbell_control = tmp;
4443
4444	/* set the pointer to the MQD */
4445	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4446	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4447
4448	/* set MQD vmid to 0 */
4449	tmp = RREG32(mmCP_MQD_CONTROL);
4450	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4451	mqd->cp_mqd_control = tmp;
4452
4453	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4454	hqd_gpu_addr = ring->gpu_addr >> 8;
4455	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4456	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4457
4458	/* set up the HQD, this is similar to CP_RB0_CNTL */
4459	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4460	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4461			    (order_base_2(ring->ring_size / 4) - 1));
4462	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4463			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4464#ifdef __BIG_ENDIAN
4465	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4466#endif
4467	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4468	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4469	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4470	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4471	mqd->cp_hqd_pq_control = tmp;
4472
4473	/* set the wb address whether it's enabled or not */
4474	wb_gpu_addr = ring->rptr_gpu_addr;
4475	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4476	mqd->cp_hqd_pq_rptr_report_addr_hi =
4477		upper_32_bits(wb_gpu_addr) & 0xffff;
4478
4479	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4480	wb_gpu_addr = ring->wptr_gpu_addr;
4481	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4482	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4483
4484	tmp = 0;
4485	/* enable the doorbell if requested */
4486	if (ring->use_doorbell) {
4487		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4488		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4489				DOORBELL_OFFSET, ring->doorbell_index);
4490
4491		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4492					 DOORBELL_EN, 1);
4493		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4494					 DOORBELL_SOURCE, 0);
4495		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4496					 DOORBELL_HIT, 0);
 
 
 
 
 
 
 
 
 
 
4497	}
 
 
4498
4499	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 
4500
4501	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4502	ring->wptr = 0;
4503	mqd->cp_hqd_pq_wptr = ring->wptr;
4504	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
 
 
 
 
 
 
 
4505
4506	/* set the vmid for the queue */
4507	mqd->cp_hqd_vmid = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4508
4509	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4510	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4511	mqd->cp_hqd_persistent_state = tmp;
4512
4513	/* set MTYPE */
4514	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4515	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4516	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4517	mqd->cp_hqd_ib_control = tmp;
4518
4519	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4520	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4521	mqd->cp_hqd_iq_timer = tmp;
4522
4523	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4524	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4525	mqd->cp_hqd_ctx_save_control = tmp;
4526
4527	/* defaults */
4528	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4529	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4530	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4531	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4532	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4533	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4534	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4535	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4536	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4537	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4538	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4539	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4540
4541	/* set static priority for a queue/ring */
4542	gfx_v8_0_mqd_set_priority(ring, mqd);
4543	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4544
4545	/* map_queues packet doesn't need activate the queue,
4546	 * so only kiq need set this field.
4547	 */
4548	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4549		mqd->cp_hqd_active = 1;
 
 
 
4550
4551	return 0;
4552}
 
 
 
 
 
 
 
 
 
 
 
 
4553
4554static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4555			struct vi_mqd *mqd)
4556{
4557	uint32_t mqd_reg;
4558	uint32_t *mqd_data;
 
 
 
 
4559
4560	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4561	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4562
4563	/* disable wptr polling */
4564	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4565
4566	/* program all HQD registers */
4567	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4568		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4569
4570	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4571	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4572	 * on ASICs that do not support context-save.
4573	 * EOP writes/reads can start anywhere in the ring.
4574	 */
4575	if (adev->asic_type != CHIP_TONGA) {
4576		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4577		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4578		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4579	}
4580
4581	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4582		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4583
4584	/* activate the HQD */
4585	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4586		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4587
4588	return 0;
4589}
4590
4591static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4592{
4593	struct amdgpu_device *adev = ring->adev;
4594	struct vi_mqd *mqd = ring->mqd_ptr;
4595
4596	gfx_v8_0_kiq_setting(ring);
4597
4598	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4599		/* reset MQD to a clean status */
4600		if (adev->gfx.kiq[0].mqd_backup)
4601			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4602
4603		/* reset ring buffer */
4604		ring->wptr = 0;
4605		amdgpu_ring_clear_ring(ring);
4606		mutex_lock(&adev->srbm_mutex);
4607		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4608		gfx_v8_0_mqd_commit(adev, mqd);
4609		vi_srbm_select(adev, 0, 0, 0, 0);
4610		mutex_unlock(&adev->srbm_mutex);
4611	} else {
4612		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4613		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4614		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4615		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4616			amdgpu_ring_clear_ring(ring);
4617		mutex_lock(&adev->srbm_mutex);
4618		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4619		gfx_v8_0_mqd_init(ring);
4620		gfx_v8_0_mqd_commit(adev, mqd);
4621		vi_srbm_select(adev, 0, 0, 0, 0);
4622		mutex_unlock(&adev->srbm_mutex);
4623
4624		if (adev->gfx.kiq[0].mqd_backup)
4625			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4626	}
4627
4628	return 0;
4629}
4630
4631static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4632{
4633	struct amdgpu_device *adev = ring->adev;
4634	struct vi_mqd *mqd = ring->mqd_ptr;
4635	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4636
4637	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4638		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4639		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4640		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4641		mutex_lock(&adev->srbm_mutex);
4642		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4643		gfx_v8_0_mqd_init(ring);
4644		vi_srbm_select(adev, 0, 0, 0, 0);
4645		mutex_unlock(&adev->srbm_mutex);
4646
4647		if (adev->gfx.mec.mqd_backup[mqd_idx])
4648			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4649	} else {
4650		/* restore MQD to a clean status */
4651		if (adev->gfx.mec.mqd_backup[mqd_idx])
4652			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4653		/* reset ring buffer */
4654		ring->wptr = 0;
4655		amdgpu_ring_clear_ring(ring);
4656	}
4657	return 0;
4658}
4659
4660static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4661{
4662	if (adev->asic_type > CHIP_TONGA) {
4663		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4664		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4665	}
4666	/* enable doorbells */
4667	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4668}
4669
4670static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4671{
4672	struct amdgpu_ring *ring;
4673	int r;
4674
4675	ring = &adev->gfx.kiq[0].ring;
4676
4677	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4678	if (unlikely(r != 0))
4679		return r;
4680
4681	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4682	if (unlikely(r != 0)) {
4683		amdgpu_bo_unreserve(ring->mqd_obj);
4684		return r;
4685	}
4686
4687	gfx_v8_0_kiq_init_queue(ring);
4688	amdgpu_bo_kunmap(ring->mqd_obj);
4689	ring->mqd_ptr = NULL;
4690	amdgpu_bo_unreserve(ring->mqd_obj);
4691	return 0;
4692}
4693
4694static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4695{
4696	struct amdgpu_ring *ring = NULL;
4697	int r = 0, i;
4698
4699	gfx_v8_0_cp_compute_enable(adev, true);
4700
4701	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4702		ring = &adev->gfx.compute_ring[i];
4703
4704		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4705		if (unlikely(r != 0))
4706			goto done;
4707		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4708		if (!r) {
4709			r = gfx_v8_0_kcq_init_queue(ring);
4710			amdgpu_bo_kunmap(ring->mqd_obj);
4711			ring->mqd_ptr = NULL;
4712		}
4713		amdgpu_bo_unreserve(ring->mqd_obj);
4714		if (r)
4715			goto done;
4716	}
4717
4718	gfx_v8_0_set_mec_doorbell_range(adev);
4719
4720	r = gfx_v8_0_kiq_kcq_enable(adev);
4721	if (r)
4722		goto done;
4723
4724done:
4725	return r;
4726}
4727
4728static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4729{
4730	int r, i;
4731	struct amdgpu_ring *ring;
4732
4733	/* collect all the ring_tests here, gfx, kiq, compute */
4734	ring = &adev->gfx.gfx_ring[0];
4735	r = amdgpu_ring_test_helper(ring);
4736	if (r)
4737		return r;
4738
4739	ring = &adev->gfx.kiq[0].ring;
4740	r = amdgpu_ring_test_helper(ring);
4741	if (r)
4742		return r;
4743
4744	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4745		ring = &adev->gfx.compute_ring[i];
4746		amdgpu_ring_test_helper(ring);
4747	}
4748
4749	return 0;
4750}
4751
4752static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4753{
4754	int r;
4755
4756	if (!(adev->flags & AMD_IS_APU))
4757		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4758
4759	r = gfx_v8_0_kiq_resume(adev);
4760	if (r)
4761		return r;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4762
4763	r = gfx_v8_0_cp_gfx_resume(adev);
4764	if (r)
4765		return r;
4766
4767	r = gfx_v8_0_kcq_resume(adev);
4768	if (r)
4769		return r;
4770
4771	r = gfx_v8_0_cp_test_all_rings(adev);
4772	if (r)
4773		return r;
4774
4775	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4776
4777	return 0;
4778}
4779
4780static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4781{
4782	gfx_v8_0_cp_gfx_enable(adev, enable);
4783	gfx_v8_0_cp_compute_enable(adev, enable);
4784}
4785
4786static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
4787{
4788	int r;
4789	struct amdgpu_device *adev = ip_block->adev;
4790
4791	gfx_v8_0_init_golden_registers(adev);
4792	gfx_v8_0_constants_init(adev);
4793
4794	r = adev->gfx.rlc.funcs->resume(adev);
 
 
4795	if (r)
4796		return r;
4797
4798	r = gfx_v8_0_cp_resume(adev);
 
 
4799
4800	return r;
4801}
4802
4803static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4804{
4805	int r, i;
4806	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4807
4808	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4809	if (r)
4810		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 
 
4811
4812	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4813		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4814
4815		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4816		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4817						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4818						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4819						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4820						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4821		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4822		amdgpu_ring_write(kiq_ring, 0);
4823		amdgpu_ring_write(kiq_ring, 0);
4824		amdgpu_ring_write(kiq_ring, 0);
4825	}
4826	/* Submit unmap queue packet */
4827	amdgpu_ring_commit(kiq_ring);
4828	/*
4829	 * Ring test will do a basic scratch register change check. Just run
4830	 * this to ensure that unmap queues that is submitted before got
4831	 * processed successfully before returning.
4832	 */
4833	r = amdgpu_ring_test_helper(kiq_ring);
4834	if (r)
4835		DRM_ERROR("KCQ disable failed\n");
4836
4837	return r;
4838}
4839
4840static bool gfx_v8_0_is_idle(void *handle)
4841{
4842	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4843
4844	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4845		|| RREG32(mmGRBM_STATUS2) != 0x8)
4846		return false;
4847	else
4848		return true;
4849}
4850
4851static bool gfx_v8_0_rlc_is_idle(void *handle)
4852{
4853	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4854
4855	if (RREG32(mmGRBM_STATUS2) != 0x8)
4856		return false;
4857	else
4858		return true;
4859}
4860
4861static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4862{
4863	unsigned int i;
 
4864	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4865
4866	for (i = 0; i < adev->usec_timeout; i++) {
4867		if (gfx_v8_0_rlc_is_idle(handle))
 
 
 
4868			return 0;
4869
4870		udelay(1);
4871	}
4872	return -ETIMEDOUT;
4873}
4874
4875static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4876{
4877	unsigned int i;
4878	struct amdgpu_device *adev = ip_block->adev;
4879
4880	for (i = 0; i < adev->usec_timeout; i++) {
4881		if (gfx_v8_0_is_idle(adev))
4882			return 0;
4883
4884		udelay(1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4885	}
4886	return -ETIMEDOUT;
4887}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4888
4889static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
4890{
4891	struct amdgpu_device *adev = ip_block->adev;
4892
4893	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4894	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4895
4896	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4897
4898	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4899
4900	/* disable KCQ to avoid CPC touch memory not valid anymore */
4901	gfx_v8_0_kcq_disable(adev);
4902
4903	if (amdgpu_sriov_vf(adev)) {
4904		pr_debug("For SRIOV client, shouldn't do anything.\n");
4905		return 0;
4906	}
4907
4908	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4909	if (!gfx_v8_0_wait_for_idle(ip_block))
4910		gfx_v8_0_cp_enable(adev, false);
4911	else
4912		pr_err("cp is busy, skip halt cp\n");
4913	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4914		adev->gfx.rlc.funcs->stop(adev);
4915	else
4916		pr_err("rlc is busy, skip halt rlc\n");
4917	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4918
4919	return 0;
4920}
4921
4922static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
4923{
4924	return gfx_v8_0_hw_fini(ip_block);
4925}
4926
4927static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
4928{
4929	return gfx_v8_0_hw_init(ip_block);
4930}
4931
4932static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4933{
4934	struct amdgpu_device *adev = ip_block->adev;
4935	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936	u32 tmp;
 
4937
4938	/* GRBM_STATUS */
4939	tmp = RREG32(mmGRBM_STATUS);
4940	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4941		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4942		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4943		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4944		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4945		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4946		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4947		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4948						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4949		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4950						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
 
 
 
 
 
4951		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4952						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4953	}
4954
4955	/* GRBM_STATUS2 */
4956	tmp = RREG32(mmGRBM_STATUS2);
4957	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4958		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4959						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4960
4961	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4962	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4963	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4964		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4965						SOFT_RESET_CPF, 1);
4966		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4967						SOFT_RESET_CPC, 1);
4968		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4969						SOFT_RESET_CPG, 1);
4970		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4971						SOFT_RESET_GRBM, 1);
4972	}
4973
4974	/* SRBM_STATUS */
4975	tmp = RREG32(mmSRBM_STATUS);
4976	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4977		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4978						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4979	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4980		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4981						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4982
4983	if (grbm_soft_reset || srbm_soft_reset) {
4984		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4985		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4986		return true;
4987	} else {
4988		adev->gfx.grbm_soft_reset = 0;
4989		adev->gfx.srbm_soft_reset = 0;
4990		return false;
4991	}
4992}
4993
4994static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
4995{
4996	struct amdgpu_device *adev = ip_block->adev;
4997	u32 grbm_soft_reset = 0;
4998
4999	if ((!adev->gfx.grbm_soft_reset) &&
5000	    (!adev->gfx.srbm_soft_reset))
5001		return 0;
5002
5003	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5004
5005	/* stop the rlc */
5006	adev->gfx.rlc.funcs->stop(adev);
5007
5008	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5009	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5010		/* Disable GFX parsing/prefetching */
5011		gfx_v8_0_cp_gfx_enable(adev, false);
5012
5013	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5014	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5015	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5016	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5017		int i;
5018
5019		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5020			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5021
5022			mutex_lock(&adev->srbm_mutex);
5023			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5024			gfx_v8_0_deactivate_hqd(adev, 2);
5025			vi_srbm_select(adev, 0, 0, 0, 0);
5026			mutex_unlock(&adev->srbm_mutex);
5027		}
5028		/* Disable MEC parsing/prefetching */
5029		gfx_v8_0_cp_compute_enable(adev, false);
5030	}
5031
5032	return 0;
5033}
 
 
 
 
 
5034
5035static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
5036{
5037	struct amdgpu_device *adev = ip_block->adev;
5038	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5039	u32 tmp;
5040
5041	if ((!adev->gfx.grbm_soft_reset) &&
5042	    (!adev->gfx.srbm_soft_reset))
5043		return 0;
 
 
 
5044
5045	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5046	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5047
5048	if (grbm_soft_reset || srbm_soft_reset) {
5049		tmp = RREG32(mmGMCON_DEBUG);
5050		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5051		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5052		WREG32(mmGMCON_DEBUG, tmp);
5053		udelay(50);
5054	}
5055
5056	if (grbm_soft_reset) {
5057		tmp = RREG32(mmGRBM_SOFT_RESET);
5058		tmp |= grbm_soft_reset;
5059		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5060		WREG32(mmGRBM_SOFT_RESET, tmp);
5061		tmp = RREG32(mmGRBM_SOFT_RESET);
5062
5063		udelay(50);
5064
5065		tmp &= ~grbm_soft_reset;
5066		WREG32(mmGRBM_SOFT_RESET, tmp);
5067		tmp = RREG32(mmGRBM_SOFT_RESET);
5068	}
5069
5070	if (srbm_soft_reset) {
5071		tmp = RREG32(mmSRBM_SOFT_RESET);
5072		tmp |= srbm_soft_reset;
5073		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5074		WREG32(mmSRBM_SOFT_RESET, tmp);
5075		tmp = RREG32(mmSRBM_SOFT_RESET);
 
 
5076
 
5077		udelay(50);
5078
5079		tmp &= ~srbm_soft_reset;
5080		WREG32(mmSRBM_SOFT_RESET, tmp);
5081		tmp = RREG32(mmSRBM_SOFT_RESET);
5082	}
5083
5084	if (grbm_soft_reset || srbm_soft_reset) {
5085		tmp = RREG32(mmGMCON_DEBUG);
5086		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5087		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5088		WREG32(mmGMCON_DEBUG, tmp);
5089	}
5090
5091	/* Wait a little for things to settle down */
5092	udelay(50);
5093
5094	return 0;
5095}
5096
5097static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5098{
5099	struct amdgpu_device *adev = ip_block->adev;
5100	u32 grbm_soft_reset = 0;
5101
5102	if ((!adev->gfx.grbm_soft_reset) &&
5103	    (!adev->gfx.srbm_soft_reset))
5104		return 0;
5105
5106	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5107
5108	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5109	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5110	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5111	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5112		int i;
5113
5114		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5115			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5116
5117			mutex_lock(&adev->srbm_mutex);
5118			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5119			gfx_v8_0_deactivate_hqd(adev, 2);
5120			vi_srbm_select(adev, 0, 0, 0, 0);
5121			mutex_unlock(&adev->srbm_mutex);
5122		}
5123		gfx_v8_0_kiq_resume(adev);
5124		gfx_v8_0_kcq_resume(adev);
5125	}
5126
5127	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5128	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5129		gfx_v8_0_cp_gfx_resume(adev);
5130
5131	gfx_v8_0_cp_test_all_rings(adev);
5132
5133	adev->gfx.rlc.funcs->start(adev);
5134
5135	return 0;
5136}
5137
5138/**
5139 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5140 *
5141 * @adev: amdgpu_device pointer
5142 *
5143 * Fetches a GPU clock counter snapshot.
5144 * Returns the 64 bit clock counter snapshot.
5145 */
5146static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5147{
5148	uint64_t clock;
5149
5150	mutex_lock(&adev->gfx.gpu_clock_mutex);
5151	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5152	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5153		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5154	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5155	return clock;
5156}
5157
5158static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5159					  uint32_t vmid,
5160					  uint32_t gds_base, uint32_t gds_size,
5161					  uint32_t gws_base, uint32_t gws_size,
5162					  uint32_t oa_base, uint32_t oa_size)
5163{
 
 
 
 
 
 
 
 
 
5164	/* GDS Base */
5165	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5166	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5167				WRITE_DATA_DST_SEL(0)));
5168	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5169	amdgpu_ring_write(ring, 0);
5170	amdgpu_ring_write(ring, gds_base);
5171
5172	/* GDS Size */
5173	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5174	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5175				WRITE_DATA_DST_SEL(0)));
5176	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5177	amdgpu_ring_write(ring, 0);
5178	amdgpu_ring_write(ring, gds_size);
5179
5180	/* GWS */
5181	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5182	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5183				WRITE_DATA_DST_SEL(0)));
5184	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5185	amdgpu_ring_write(ring, 0);
5186	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5187
5188	/* OA */
5189	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191				WRITE_DATA_DST_SEL(0)));
5192	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5193	amdgpu_ring_write(ring, 0);
5194	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5195}
5196
5197static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5198{
5199	WREG32(mmSQ_IND_INDEX,
5200		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5201		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5202		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5203		(SQ_IND_INDEX__FORCE_READ_MASK));
5204	return RREG32(mmSQ_IND_DATA);
5205}
5206
5207static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5208			   uint32_t wave, uint32_t thread,
5209			   uint32_t regno, uint32_t num, uint32_t *out)
5210{
5211	WREG32(mmSQ_IND_INDEX,
5212		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5213		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5214		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5215		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5216		(SQ_IND_INDEX__FORCE_READ_MASK) |
5217		(SQ_IND_INDEX__AUTO_INCR_MASK));
5218	while (num--)
5219		*(out++) = RREG32(mmSQ_IND_DATA);
5220}
5221
5222static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5223{
5224	/* type 0 wave data */
5225	dst[(*no_fields)++] = 0;
5226	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5227	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5228	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5229	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5230	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5231	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5232	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5233	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5234	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5235	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5236	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5237	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5238	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5239	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5240	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5241	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5242	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5243	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5244	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5245}
5246
5247static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5248				     uint32_t wave, uint32_t start,
5249				     uint32_t size, uint32_t *dst)
5250{
5251	wave_read_regs(
5252		adev, simd, wave, 0,
5253		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5254}
5255
5256
5257static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5258	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5259	.select_se_sh = &gfx_v8_0_select_se_sh,
5260	.read_wave_data = &gfx_v8_0_read_wave_data,
5261	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5262	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5263};
5264
5265static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
5266{
5267	struct amdgpu_device *adev = ip_block->adev;
5268
5269	adev->gfx.xcc_mask = 1;
5270	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5271	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5272					  AMDGPU_MAX_COMPUTE_RINGS);
5273	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5274	gfx_v8_0_set_ring_funcs(adev);
5275	gfx_v8_0_set_irq_funcs(adev);
5276	gfx_v8_0_set_gds_init(adev);
5277	gfx_v8_0_set_rlc_funcs(adev);
5278
5279	return 0;
5280}
5281
5282static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
5283{
5284	struct amdgpu_device *adev = ip_block->adev;
5285	int r;
5286
5287	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5288	if (r)
5289		return r;
5290
5291	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5292	if (r)
5293		return r;
5294
5295	/* requires IBs so do in late init after IB pool is initialized */
5296	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5297	if (r)
5298		return r;
5299
5300	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5301	if (r) {
5302		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5303		return r;
5304	}
5305
5306	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5307	if (r) {
5308		DRM_ERROR(
5309			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5310			r);
5311		return r;
5312	}
5313
5314	return 0;
5315}
5316
5317static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5318						       bool enable)
5319{
5320	if ((adev->asic_type == CHIP_POLARIS11) ||
5321	    (adev->asic_type == CHIP_POLARIS12) ||
5322	    (adev->asic_type == CHIP_VEGAM))
5323		/* Send msg to SMU via Powerplay */
5324		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5325
5326	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5327}
5328
5329static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5330							bool enable)
5331{
5332	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5333}
5334
5335static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5336		bool enable)
5337{
5338	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5339}
5340
5341static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5342					  bool enable)
5343{
5344	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5345}
5346
5347static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5348						bool enable)
5349{
5350	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5351
5352	/* Read any GFX register to wake up GFX. */
5353	if (!enable)
5354		RREG32(mmDB_RENDER_CONTROL);
5355}
5356
5357static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5358					  bool enable)
5359{
5360	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5361		cz_enable_gfx_cg_power_gating(adev, true);
5362		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5363			cz_enable_gfx_pipeline_power_gating(adev, true);
5364	} else {
5365		cz_enable_gfx_cg_power_gating(adev, false);
5366		cz_enable_gfx_pipeline_power_gating(adev, false);
5367	}
5368}
5369
5370static int gfx_v8_0_set_powergating_state(void *handle,
5371					  enum amd_powergating_state state)
5372{
5373	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5374	bool enable = (state == AMD_PG_STATE_GATE);
5375
5376	if (amdgpu_sriov_vf(adev))
5377		return 0;
5378
5379	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5380				AMD_PG_SUPPORT_RLC_SMU_HS |
5381				AMD_PG_SUPPORT_CP |
5382				AMD_PG_SUPPORT_GFX_DMG))
5383		amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5384	switch (adev->asic_type) {
5385	case CHIP_CARRIZO:
5386	case CHIP_STONEY:
5387
5388		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5389			cz_enable_sck_slow_down_on_power_up(adev, true);
5390			cz_enable_sck_slow_down_on_power_down(adev, true);
5391		} else {
5392			cz_enable_sck_slow_down_on_power_up(adev, false);
5393			cz_enable_sck_slow_down_on_power_down(adev, false);
5394		}
5395		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5396			cz_enable_cp_power_gating(adev, true);
5397		else
5398			cz_enable_cp_power_gating(adev, false);
5399
5400		cz_update_gfx_cg_power_gating(adev, enable);
5401
5402		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5403			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5404		else
5405			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5406
5407		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5408			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5409		else
5410			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5411		break;
5412	case CHIP_POLARIS11:
5413	case CHIP_POLARIS12:
5414	case CHIP_VEGAM:
5415		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5416			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5417		else
5418			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5419
5420		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5421			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5422		else
5423			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5424
5425		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5426			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5427		else
5428			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5429		break;
5430	default:
5431		break;
5432	}
5433	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5434				AMD_PG_SUPPORT_RLC_SMU_HS |
5435				AMD_PG_SUPPORT_CP |
5436				AMD_PG_SUPPORT_GFX_DMG))
5437		amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5438	return 0;
5439}
5440
5441static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5442{
5443	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5444	int data;
5445
5446	if (amdgpu_sriov_vf(adev))
5447		*flags = 0;
5448
5449	/* AMD_CG_SUPPORT_GFX_MGCG */
5450	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5451	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5452		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5453
5454	/* AMD_CG_SUPPORT_GFX_CGLG */
5455	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5456	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5457		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5458
5459	/* AMD_CG_SUPPORT_GFX_CGLS */
5460	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5461		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5462
5463	/* AMD_CG_SUPPORT_GFX_CGTS */
5464	data = RREG32(mmCGTS_SM_CTRL_REG);
5465	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5466		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5467
5468	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5469	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5470		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5471
5472	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5473	data = RREG32(mmRLC_MEM_SLP_CNTL);
5474	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5475		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5476
5477	/* AMD_CG_SUPPORT_GFX_CP_LS */
5478	data = RREG32(mmCP_MEM_SLP_CNTL);
5479	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5480		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5481}
5482
5483static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5484				     uint32_t reg_addr, uint32_t cmd)
5485{
5486	uint32_t data;
5487
5488	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5489
5490	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5491	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5492
5493	data = RREG32(mmRLC_SERDES_WR_CTRL);
5494	if (adev->asic_type == CHIP_STONEY)
5495		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5496			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5497			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5498			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5499			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5500			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5501			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5502			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5503			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5504	else
5505		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5506			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5507			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5508			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5509			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5510			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5511			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5512			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5513			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5514			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5515			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5516	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5517		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5518		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5519		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5520
5521	WREG32(mmRLC_SERDES_WR_CTRL, data);
5522}
5523
5524#define MSG_ENTER_RLC_SAFE_MODE     1
5525#define MSG_EXIT_RLC_SAFE_MODE      0
5526#define RLC_GPR_REG2__REQ_MASK 0x00000001
5527#define RLC_GPR_REG2__REQ__SHIFT 0
5528#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5529#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5530
5531static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5532{
5533	uint32_t rlc_setting;
5534
5535	rlc_setting = RREG32(mmRLC_CNTL);
5536	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5537		return false;
5538
5539	return true;
5540}
5541
5542static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5543{
5544	uint32_t data;
5545	unsigned i;
5546	data = RREG32(mmRLC_CNTL);
5547	data |= RLC_SAFE_MODE__CMD_MASK;
5548	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5549	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5550	WREG32(mmRLC_SAFE_MODE, data);
5551
5552	/* wait for RLC_SAFE_MODE */
5553	for (i = 0; i < adev->usec_timeout; i++) {
5554		if ((RREG32(mmRLC_GPM_STAT) &
5555		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5556		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5557		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5558		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5559			break;
5560		udelay(1);
5561	}
5562	for (i = 0; i < adev->usec_timeout; i++) {
5563		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5564			break;
5565		udelay(1);
5566	}
5567}
5568
5569static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5570{
5571	uint32_t data;
5572	unsigned i;
5573
5574	data = RREG32(mmRLC_CNTL);
5575	data |= RLC_SAFE_MODE__CMD_MASK;
5576	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5577	WREG32(mmRLC_SAFE_MODE, data);
5578
5579	for (i = 0; i < adev->usec_timeout; i++) {
5580		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5581			break;
5582		udelay(1);
5583	}
5584}
5585
5586static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5587{
5588	u32 data;
5589
5590	amdgpu_gfx_off_ctrl(adev, false);
5591
5592	if (amdgpu_sriov_is_pp_one_vf(adev))
5593		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5594	else
5595		data = RREG32(mmRLC_SPM_VMID);
5596
5597	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5598	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5599
5600	if (amdgpu_sriov_is_pp_one_vf(adev))
5601		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5602	else
5603		WREG32(mmRLC_SPM_VMID, data);
5604
5605	amdgpu_gfx_off_ctrl(adev, true);
5606}
5607
5608static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5609	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5610	.set_safe_mode = gfx_v8_0_set_safe_mode,
5611	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5612	.init = gfx_v8_0_rlc_init,
5613	.get_csb_size = gfx_v8_0_get_csb_size,
5614	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5615	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5616	.resume = gfx_v8_0_rlc_resume,
5617	.stop = gfx_v8_0_rlc_stop,
5618	.reset = gfx_v8_0_rlc_reset,
5619	.start = gfx_v8_0_rlc_start,
5620	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5621};
5622
5623static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5624						      bool enable)
5625{
5626	uint32_t temp, data;
5627
5628	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5629
5630	/* It is disabled by HW by default */
5631	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5632		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5633			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5634				/* 1 - RLC memory Light sleep */
5635				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
 
5636
5637			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5638				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5639		}
 
 
5640
5641		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5642		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5643		if (adev->flags & AMD_IS_APU)
5644			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5645				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5646				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5647		else
5648			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5649				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5650				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5651				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5652
5653		if (temp != data)
5654			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5655
5656		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5657		gfx_v8_0_wait_for_rlc_serdes(adev);
5658
5659		/* 5 - clear mgcg override */
5660		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5661
5662		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5663			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5664			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5665			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5666			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5667			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5668			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5669			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5670			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5671				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5672			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5673			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5674			if (temp != data)
5675				WREG32(mmCGTS_SM_CTRL_REG, data);
5676		}
5677		udelay(50);
5678
5679		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680		gfx_v8_0_wait_for_rlc_serdes(adev);
5681	} else {
5682		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5683		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5684		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5685				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5686				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5687				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5688		if (temp != data)
5689			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5690
5691		/* 2 - disable MGLS in RLC */
5692		data = RREG32(mmRLC_MEM_SLP_CNTL);
5693		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5694			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5695			WREG32(mmRLC_MEM_SLP_CNTL, data);
5696		}
5697
5698		/* 3 - disable MGLS in CP */
5699		data = RREG32(mmCP_MEM_SLP_CNTL);
5700		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5701			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5702			WREG32(mmCP_MEM_SLP_CNTL, data);
5703		}
5704
5705		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5706		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5707		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5708				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5709		if (temp != data)
5710			WREG32(mmCGTS_SM_CTRL_REG, data);
5711
5712		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713		gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715		/* 6 - set mgcg override */
5716		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5717
5718		udelay(50);
5719
5720		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721		gfx_v8_0_wait_for_rlc_serdes(adev);
5722	}
5723
5724	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5725}
5726
5727static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5728						      bool enable)
5729{
5730	uint32_t temp, temp1, data, data1;
5731
5732	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733
5734	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 
 
 
 
5735
5736	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5737		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5739		if (temp1 != data1)
5740			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5741
5742		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5743		gfx_v8_0_wait_for_rlc_serdes(adev);
5744
5745		/* 2 - clear cgcg override */
5746		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5747
5748		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749		gfx_v8_0_wait_for_rlc_serdes(adev);
5750
5751		/* 3 - write cmd to set CGLS */
5752		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5753
5754		/* 4 - enable cgcg */
5755		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5756
5757		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5758			/* enable cgls*/
5759			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5760
5761			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5763
5764			if (temp1 != data1)
5765				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5766		} else {
5767			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5768		}
5769
5770		if (temp != data)
5771			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5772
5773		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5774		 * Cmp_busy/GFX_Idle interrupts
5775		 */
5776		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5777	} else {
5778		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5779		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5780
5781		/* TEST CGCG */
5782		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5784				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5785		if (temp1 != data1)
5786			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787
5788		/* read gfx register to wake up cgcg */
5789		RREG32(mmCB_CGTT_SCLK_CTRL);
5790		RREG32(mmCB_CGTT_SCLK_CTRL);
5791		RREG32(mmCB_CGTT_SCLK_CTRL);
5792		RREG32(mmCB_CGTT_SCLK_CTRL);
5793
5794		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5795		gfx_v8_0_wait_for_rlc_serdes(adev);
5796
5797		/* write cmd to Set CGCG Override */
5798		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5799
5800		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801		gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803		/* write cmd to Clear CGLS */
5804		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5805
5806		/* disable cgcg, cgls should be disabled too. */
5807		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5808			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5809		if (temp != data)
5810			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5811		/* enable interrupts again for PG */
5812		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5813	}
5814
5815	gfx_v8_0_wait_for_rlc_serdes(adev);
5816
5817	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5818}
5819static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5820					    bool enable)
5821{
5822	if (enable) {
5823		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5824		 * ===  MGCG + MGLS + TS(CG/LS) ===
5825		 */
5826		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5827		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828	} else {
5829		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5830		 * ===  CGCG + CGLS ===
5831		 */
5832		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5833		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5834	}
5835	return 0;
5836}
5837
5838static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5839					  enum amd_clockgating_state state)
5840{
5841	uint32_t msg_id, pp_state = 0;
5842	uint32_t pp_support_state = 0;
5843
5844	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5845		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5846			pp_support_state = PP_STATE_SUPPORT_LS;
5847			pp_state = PP_STATE_LS;
5848		}
5849		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5850			pp_support_state |= PP_STATE_SUPPORT_CG;
5851			pp_state |= PP_STATE_CG;
5852		}
5853		if (state == AMD_CG_STATE_UNGATE)
5854			pp_state = 0;
5855
5856		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5857				PP_BLOCK_GFX_CG,
5858				pp_support_state,
5859				pp_state);
5860		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5861	}
5862
5863	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5864		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5865			pp_support_state = PP_STATE_SUPPORT_LS;
5866			pp_state = PP_STATE_LS;
5867		}
5868
5869		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5870			pp_support_state |= PP_STATE_SUPPORT_CG;
5871			pp_state |= PP_STATE_CG;
5872		}
5873
5874		if (state == AMD_CG_STATE_UNGATE)
5875			pp_state = 0;
5876
5877		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5878				PP_BLOCK_GFX_MG,
5879				pp_support_state,
5880				pp_state);
5881		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5882	}
5883
5884	return 0;
5885}
5886
5887static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5888					  enum amd_clockgating_state state)
5889{
5890
5891	uint32_t msg_id, pp_state = 0;
5892	uint32_t pp_support_state = 0;
5893
5894	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5895		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5896			pp_support_state = PP_STATE_SUPPORT_LS;
5897			pp_state = PP_STATE_LS;
5898		}
5899		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5900			pp_support_state |= PP_STATE_SUPPORT_CG;
5901			pp_state |= PP_STATE_CG;
5902		}
5903		if (state == AMD_CG_STATE_UNGATE)
5904			pp_state = 0;
5905
5906		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5907				PP_BLOCK_GFX_CG,
5908				pp_support_state,
5909				pp_state);
5910		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5911	}
5912
5913	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5914		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5915			pp_support_state = PP_STATE_SUPPORT_LS;
5916			pp_state = PP_STATE_LS;
5917		}
5918		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5919			pp_support_state |= PP_STATE_SUPPORT_CG;
5920			pp_state |= PP_STATE_CG;
5921		}
5922		if (state == AMD_CG_STATE_UNGATE)
5923			pp_state = 0;
5924
5925		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5926				PP_BLOCK_GFX_3D,
5927				pp_support_state,
5928				pp_state);
5929		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5930	}
5931
5932	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5933		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5934			pp_support_state = PP_STATE_SUPPORT_LS;
5935			pp_state = PP_STATE_LS;
5936		}
5937
5938		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5939			pp_support_state |= PP_STATE_SUPPORT_CG;
5940			pp_state |= PP_STATE_CG;
5941		}
5942
5943		if (state == AMD_CG_STATE_UNGATE)
5944			pp_state = 0;
5945
5946		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5947				PP_BLOCK_GFX_MG,
5948				pp_support_state,
5949				pp_state);
5950		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5951	}
5952
5953	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5954		pp_support_state = PP_STATE_SUPPORT_LS;
5955
5956		if (state == AMD_CG_STATE_UNGATE)
5957			pp_state = 0;
5958		else
5959			pp_state = PP_STATE_LS;
5960
5961		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5962				PP_BLOCK_GFX_RLC,
5963				pp_support_state,
5964				pp_state);
5965		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5966	}
5967
5968	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5969		pp_support_state = PP_STATE_SUPPORT_LS;
5970
5971		if (state == AMD_CG_STATE_UNGATE)
5972			pp_state = 0;
5973		else
5974			pp_state = PP_STATE_LS;
5975		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5976			PP_BLOCK_GFX_CP,
5977			pp_support_state,
5978			pp_state);
5979		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5980	}
5981
5982	return 0;
5983}
5984
5985static int gfx_v8_0_set_clockgating_state(void *handle,
5986					  enum amd_clockgating_state state)
5987{
5988	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5989
5990	if (amdgpu_sriov_vf(adev))
5991		return 0;
5992
5993	switch (adev->asic_type) {
5994	case CHIP_FIJI:
5995	case CHIP_CARRIZO:
5996	case CHIP_STONEY:
5997		gfx_v8_0_update_gfx_clock_gating(adev,
5998						 state == AMD_CG_STATE_GATE);
5999		break;
6000	case CHIP_TONGA:
6001		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6002		break;
6003	case CHIP_POLARIS10:
6004	case CHIP_POLARIS11:
6005	case CHIP_POLARIS12:
6006	case CHIP_VEGAM:
6007		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6008		break;
6009	default:
6010		break;
6011	}
6012	return 0;
6013}
6014
6015static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6016{
6017	return *ring->rptr_cpu_addr;
 
 
 
 
6018}
6019
6020static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6021{
6022	struct amdgpu_device *adev = ring->adev;
 
6023
6024	if (ring->use_doorbell)
6025		/* XXX check if swapping is necessary on BE */
6026		return *ring->wptr_cpu_addr;
6027	else
6028		return RREG32(mmCP_RB0_WPTR);
 
 
6029}
6030
6031static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6032{
6033	struct amdgpu_device *adev = ring->adev;
6034
6035	if (ring->use_doorbell) {
6036		/* XXX check if swapping is necessary on BE */
6037		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6038		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6039	} else {
6040		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6041		(void)RREG32(mmCP_RB0_WPTR);
6042	}
6043}
6044
6045static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6046{
6047	u32 ref_and_mask, reg_mem_engine;
6048
6049	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6050	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6051		switch (ring->me) {
6052		case 1:
6053			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6054			break;
6055		case 2:
6056			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6057			break;
6058		default:
6059			return;
6060		}
6061		reg_mem_engine = 0;
6062	} else {
6063		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6064		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6065	}
6066
6067	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6068	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6069				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6070				 reg_mem_engine));
6071	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6072	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6073	amdgpu_ring_write(ring, ref_and_mask);
6074	amdgpu_ring_write(ring, ref_and_mask);
6075	amdgpu_ring_write(ring, 0x20); /* poll interval */
6076}
6077
6078static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6079{
6080	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6081	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6082		EVENT_INDEX(4));
6083
6084	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6085	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6086		EVENT_INDEX(0));
 
6087}
6088
6089static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6090					struct amdgpu_job *job,
6091					struct amdgpu_ib *ib,
6092					uint32_t flags)
6093{
6094	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6095	u32 header, control = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6096
6097	if (ib->flags & AMDGPU_IB_FLAG_CE)
6098		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6099	else
6100		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6101
6102	control |= ib->length_dw | (vmid << 24);
6103
6104	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6105		control |= INDIRECT_BUFFER_PRE_ENB(1);
6106
6107		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6108			gfx_v8_0_ring_emit_de_meta(ring);
6109	}
6110
6111	amdgpu_ring_write(ring, header);
6112	amdgpu_ring_write(ring,
6113#ifdef __BIG_ENDIAN
6114			  (2 << 0) |
6115#endif
6116			  (ib->gpu_addr & 0xFFFFFFFC));
6117	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6118	amdgpu_ring_write(ring, control);
6119}
6120
6121static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6122					  struct amdgpu_job *job,
6123					  struct amdgpu_ib *ib,
6124					  uint32_t flags)
6125{
6126	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6127	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6128
6129	/* Currently, there is a high possibility to get wave ID mismatch
6130	 * between ME and GDS, leading to a hw deadlock, because ME generates
6131	 * different wave IDs than the GDS expects. This situation happens
6132	 * randomly when at least 5 compute pipes use GDS ordered append.
6133	 * The wave IDs generated by ME are also wrong after suspend/resume.
6134	 * Those are probably bugs somewhere else in the kernel driver.
6135	 *
6136	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6137	 * GDS to 0 for this ring (me/pipe).
6138	 */
6139	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6140		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6141		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6142		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6143	}
6144
6145	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6146	amdgpu_ring_write(ring,
6147#ifdef __BIG_ENDIAN
6148				(2 << 0) |
6149#endif
6150				(ib->gpu_addr & 0xFFFFFFFC));
6151	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6152	amdgpu_ring_write(ring, control);
6153}
6154
6155static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6156					 u64 seq, unsigned flags)
6157{
6158	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6159	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6160	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
6161
6162	/* Workaround for cache flush problems. First send a dummy EOP
6163	 * event down the pipe with seq one below.
6164	 */
6165	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6166	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6167				 EOP_TC_ACTION_EN |
6168				 EOP_TC_WB_ACTION_EN |
6169				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6170				 EVENT_INDEX(5)));
6171	amdgpu_ring_write(ring, addr & 0xfffffffc);
6172	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6173				DATA_SEL(1) | INT_SEL(0));
6174	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6175	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6176
6177	/* Then send the real EOP event down the pipe:
6178	 * EVENT_WRITE_EOP - flush caches, send int */
6179	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6180	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6181				 EOP_TC_ACTION_EN |
6182				 EOP_TC_WB_ACTION_EN |
6183				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6184				 EVENT_INDEX(5) |
6185				 (exec ? EOP_EXEC : 0)));
6186	amdgpu_ring_write(ring, addr & 0xfffffffc);
6187	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6188			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6189	amdgpu_ring_write(ring, lower_32_bits(seq));
6190	amdgpu_ring_write(ring, upper_32_bits(seq));
6191
6192}
6193
6194static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6195{
6196	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6197	uint32_t seq = ring->fence_drv.sync_seq;
6198	uint64_t addr = ring->fence_drv.gpu_addr;
6199
6200	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6201	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6202				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6203				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6204	amdgpu_ring_write(ring, addr & 0xfffffffc);
6205	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6206	amdgpu_ring_write(ring, seq);
6207	amdgpu_ring_write(ring, 0xffffffff);
6208	amdgpu_ring_write(ring, 4); /* poll interval */
 
 
 
 
 
 
 
 
6209}
6210
6211static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6212					unsigned vmid, uint64_t pd_addr)
6213{
6214	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6215
6216	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6217
6218	/* wait for the invalidate to complete */
6219	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6220	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6221				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6222				 WAIT_REG_MEM_ENGINE(0))); /* me */
6223	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6224	amdgpu_ring_write(ring, 0);
6225	amdgpu_ring_write(ring, 0); /* ref */
6226	amdgpu_ring_write(ring, 0); /* mask */
6227	amdgpu_ring_write(ring, 0x20); /* poll interval */
6228
6229	/* compute doesn't have PFP */
6230	if (usepfp) {
6231		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6232		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6233		amdgpu_ring_write(ring, 0x0);
 
 
 
 
6234	}
6235}
6236
6237static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6238{
6239	return *ring->wptr_cpu_addr;
 
 
 
 
 
6240}
6241
6242static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6243{
6244	struct amdgpu_device *adev = ring->adev;
6245
6246	/* XXX check if swapping is necessary on BE */
6247	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6248	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6249}
6250
6251static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6252					     u64 addr, u64 seq,
6253					     unsigned flags)
6254{
6255	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6256	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6257
6258	/* RELEASE_MEM - flush caches, send int */
6259	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6260	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6261				 EOP_TC_ACTION_EN |
6262				 EOP_TC_WB_ACTION_EN |
6263				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6264				 EVENT_INDEX(5)));
6265	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6266	amdgpu_ring_write(ring, addr & 0xfffffffc);
6267	amdgpu_ring_write(ring, upper_32_bits(addr));
6268	amdgpu_ring_write(ring, lower_32_bits(seq));
6269	amdgpu_ring_write(ring, upper_32_bits(seq));
6270}
6271
6272static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6273					 u64 seq, unsigned int flags)
6274{
6275	/* we only allocate 32bit for each seq wb address */
6276	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6277
6278	/* write fence seq to the "addr" */
6279	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6280	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6281				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6282	amdgpu_ring_write(ring, lower_32_bits(addr));
6283	amdgpu_ring_write(ring, upper_32_bits(addr));
6284	amdgpu_ring_write(ring, lower_32_bits(seq));
6285
6286	if (flags & AMDGPU_FENCE_FLAG_INT) {
6287		/* set register to trigger INT */
6288		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6289		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6290					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6291		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6292		amdgpu_ring_write(ring, 0);
6293		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6294	}
6295}
6296
6297static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6298{
6299	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6300	amdgpu_ring_write(ring, 0);
6301}
6302
6303static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6304{
6305	uint32_t dw2 = 0;
6306
6307	if (amdgpu_sriov_vf(ring->adev))
6308		gfx_v8_0_ring_emit_ce_meta(ring);
6309
6310	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6311	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6312		gfx_v8_0_ring_emit_vgt_flush(ring);
6313		/* set load_global_config & load_global_uconfig */
6314		dw2 |= 0x8001;
6315		/* set load_cs_sh_regs */
6316		dw2 |= 0x01000000;
6317		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6318		dw2 |= 0x10002;
6319
6320		/* set load_ce_ram if preamble presented */
6321		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6322			dw2 |= 0x10000000;
6323	} else {
6324		/* still load_ce_ram if this is the first time preamble presented
6325		 * although there is no context switch happens.
6326		 */
6327		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6328			dw2 |= 0x10000000;
6329	}
6330
6331	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6332	amdgpu_ring_write(ring, dw2);
6333	amdgpu_ring_write(ring, 0);
6334}
6335
6336static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6337						  uint64_t addr)
6338{
6339	unsigned ret;
6340
6341	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6342	amdgpu_ring_write(ring, lower_32_bits(addr));
6343	amdgpu_ring_write(ring, upper_32_bits(addr));
6344	/* discard following DWs if *cond_exec_gpu_addr==0 */
6345	amdgpu_ring_write(ring, 0);
6346	ret = ring->wptr & ring->buf_mask;
6347	/* patch dummy value later */
6348	amdgpu_ring_write(ring, 0);
6349	return ret;
6350}
6351
6352static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6353				    uint32_t reg_val_offs)
6354{
6355	struct amdgpu_device *adev = ring->adev;
6356
6357	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6358	amdgpu_ring_write(ring, 0 |	/* src: register*/
6359				(5 << 8) |	/* dst: memory */
6360				(1 << 20));	/* write confirm */
6361	amdgpu_ring_write(ring, reg);
6362	amdgpu_ring_write(ring, 0);
6363	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6364				reg_val_offs * 4));
6365	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6366				reg_val_offs * 4));
6367}
6368
6369static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6370				  uint32_t val)
6371{
6372	uint32_t cmd;
6373
6374	switch (ring->funcs->type) {
6375	case AMDGPU_RING_TYPE_GFX:
6376		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6377		break;
6378	case AMDGPU_RING_TYPE_KIQ:
6379		cmd = 1 << 16; /* no inc addr */
 
 
 
 
6380		break;
6381	default:
6382		cmd = WR_CONFIRM;
6383		break;
6384	}
6385
6386	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6387	amdgpu_ring_write(ring, cmd);
6388	amdgpu_ring_write(ring, reg);
6389	amdgpu_ring_write(ring, 0);
6390	amdgpu_ring_write(ring, val);
6391}
6392
6393static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
6394				  int mem_space, int opt, uint32_t addr0,
6395				  uint32_t addr1, uint32_t ref, uint32_t mask,
6396				  uint32_t inv)
6397{
6398	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6399	amdgpu_ring_write(ring,
6400			  /* memory (1) or register (0) */
6401			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
6402			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
6403			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
6404			   WAIT_REG_MEM_ENGINE(eng_sel)));
6405
6406	if (mem_space)
6407		BUG_ON(addr0 & 0x3); /* Dword align */
6408	amdgpu_ring_write(ring, addr0);
6409	amdgpu_ring_write(ring, addr1);
6410	amdgpu_ring_write(ring, ref);
6411	amdgpu_ring_write(ring, mask);
6412	amdgpu_ring_write(ring, inv); /* poll interval */
6413}
6414
6415static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6416					uint32_t val, uint32_t mask)
6417{
6418	gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6419}
6420
6421static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6422{
6423	struct amdgpu_device *adev = ring->adev;
6424	uint32_t value = 0;
6425
6426	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6427	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6428	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6429	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6430	WREG32(mmSQ_CMD, value);
6431}
6432
6433static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6434						 enum amdgpu_interrupt_state state)
6435{
6436	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6437		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6438}
6439
6440static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6441						     int me, int pipe,
6442						     enum amdgpu_interrupt_state state)
6443{
6444	u32 mec_int_cntl, mec_int_cntl_reg;
6445
6446	/*
6447	 * amdgpu controls only the first MEC. That's why this function only
6448	 * handles the setting of interrupts for this specific MEC. All other
6449	 * pipes' interrupts are set by amdkfd.
6450	 */
6451
6452	if (me == 1) {
6453		switch (pipe) {
6454		case 0:
6455			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6456			break;
6457		case 1:
6458			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6459			break;
6460		case 2:
6461			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6462			break;
6463		case 3:
6464			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6465			break;
6466		default:
6467			DRM_DEBUG("invalid pipe %d\n", pipe);
6468			return;
6469		}
6470	} else {
6471		DRM_DEBUG("invalid me %d\n", me);
6472		return;
6473	}
6474
6475	switch (state) {
6476	case AMDGPU_IRQ_STATE_DISABLE:
6477		mec_int_cntl = RREG32(mec_int_cntl_reg);
6478		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
 
6479		WREG32(mec_int_cntl_reg, mec_int_cntl);
6480		break;
6481	case AMDGPU_IRQ_STATE_ENABLE:
6482		mec_int_cntl = RREG32(mec_int_cntl_reg);
6483		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
 
6484		WREG32(mec_int_cntl_reg, mec_int_cntl);
6485		break;
6486	default:
6487		break;
6488	}
6489}
6490
6491static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6492					     struct amdgpu_irq_src *source,
6493					     unsigned type,
6494					     enum amdgpu_interrupt_state state)
6495{
6496	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6497		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6498
6499	return 0;
6500}
6501
6502static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6503					      struct amdgpu_irq_src *source,
6504					      unsigned type,
6505					      enum amdgpu_interrupt_state state)
6506{
6507	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6508		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6509
6510	return 0;
6511}
6512
6513static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6514					    struct amdgpu_irq_src *src,
6515					    unsigned type,
6516					    enum amdgpu_interrupt_state state)
6517{
6518	switch (type) {
6519	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6520		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6521		break;
6522	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6523		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6524		break;
6525	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6526		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6527		break;
6528	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6529		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6530		break;
6531	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6532		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6533		break;
6534	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6535		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6536		break;
6537	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6538		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6539		break;
6540	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6541		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6542		break;
6543	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6544		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6545		break;
6546	default:
6547		break;
6548	}
6549	return 0;
6550}
6551
6552static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6553					 struct amdgpu_irq_src *source,
6554					 unsigned int type,
6555					 enum amdgpu_interrupt_state state)
6556{
6557	int enable_flag;
6558
6559	switch (state) {
6560	case AMDGPU_IRQ_STATE_DISABLE:
6561		enable_flag = 0;
6562		break;
6563
6564	case AMDGPU_IRQ_STATE_ENABLE:
6565		enable_flag = 1;
6566		break;
6567
6568	default:
6569		return -EINVAL;
6570	}
6571
6572	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6573	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6574	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6575	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6576	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6577	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6578		     enable_flag);
6579	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6580		     enable_flag);
6581	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6582		     enable_flag);
6583	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6584		     enable_flag);
6585	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6586		     enable_flag);
6587	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6588		     enable_flag);
6589	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6590		     enable_flag);
6591	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6592		     enable_flag);
6593
6594	return 0;
6595}
6596
6597static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6598				     struct amdgpu_irq_src *source,
6599				     unsigned int type,
6600				     enum amdgpu_interrupt_state state)
6601{
6602	int enable_flag;
6603
6604	switch (state) {
6605	case AMDGPU_IRQ_STATE_DISABLE:
6606		enable_flag = 1;
6607		break;
6608
6609	case AMDGPU_IRQ_STATE_ENABLE:
6610		enable_flag = 0;
6611		break;
6612
6613	default:
6614		return -EINVAL;
6615	}
6616
6617	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6618		     enable_flag);
6619
6620	return 0;
6621}
6622
6623static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6624			    struct amdgpu_irq_src *source,
6625			    struct amdgpu_iv_entry *entry)
6626{
6627	int i;
6628	u8 me_id, pipe_id, queue_id;
6629	struct amdgpu_ring *ring;
6630
6631	DRM_DEBUG("IH: CP EOP\n");
6632	me_id = (entry->ring_id & 0x0c) >> 2;
6633	pipe_id = (entry->ring_id & 0x03) >> 0;
6634	queue_id = (entry->ring_id & 0x70) >> 4;
6635
6636	switch (me_id) {
6637	case 0:
6638		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6639		break;
6640	case 1:
6641	case 2:
6642		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6643			ring = &adev->gfx.compute_ring[i];
6644			/* Per-queue interrupt is supported for MEC starting from VI.
6645			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6646			  */
6647			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6648				amdgpu_fence_process(ring);
6649		}
6650		break;
6651	}
6652	return 0;
6653}
6654
6655static void gfx_v8_0_fault(struct amdgpu_device *adev,
6656			   struct amdgpu_iv_entry *entry)
6657{
6658	u8 me_id, pipe_id, queue_id;
6659	struct amdgpu_ring *ring;
6660	int i;
6661
6662	me_id = (entry->ring_id & 0x0c) >> 2;
6663	pipe_id = (entry->ring_id & 0x03) >> 0;
6664	queue_id = (entry->ring_id & 0x70) >> 4;
6665
6666	switch (me_id) {
6667	case 0:
6668		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6669		break;
6670	case 1:
6671	case 2:
6672		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6673			ring = &adev->gfx.compute_ring[i];
6674			if (ring->me == me_id && ring->pipe == pipe_id &&
6675			    ring->queue == queue_id)
6676				drm_sched_fault(&ring->sched);
6677		}
6678		break;
6679	}
6680}
6681
6682static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6683				 struct amdgpu_irq_src *source,
6684				 struct amdgpu_iv_entry *entry)
6685{
6686	DRM_ERROR("Illegal register access in command stream\n");
6687	gfx_v8_0_fault(adev, entry);
6688	return 0;
6689}
6690
6691static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6692				  struct amdgpu_irq_src *source,
6693				  struct amdgpu_iv_entry *entry)
6694{
6695	DRM_ERROR("Illegal instruction in command stream\n");
6696	gfx_v8_0_fault(adev, entry);
6697	return 0;
6698}
6699
6700static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6701				     struct amdgpu_irq_src *source,
6702				     struct amdgpu_iv_entry *entry)
6703{
6704	DRM_ERROR("CP EDC/ECC error detected.");
6705	return 0;
6706}
6707
6708static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6709				  bool from_wq)
6710{
6711	u32 enc, se_id, sh_id, cu_id;
6712	char type[20];
6713	int sq_edc_source = -1;
6714
6715	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6716	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6717
6718	switch (enc) {
6719		case 0:
6720			DRM_INFO("SQ general purpose intr detected:"
6721					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6722					"host_cmd_overflow %d, cmd_timestamp %d,"
6723					"reg_timestamp %d, thread_trace_buff_full %d,"
6724					"wlt %d, thread_trace %d.\n",
6725					se_id,
6726					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6727					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6728					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6729					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6730					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6731					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6732					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6733					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6734					);
6735			break;
6736		case 1:
6737		case 2:
6738
6739			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6740			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6741
6742			/*
6743			 * This function can be called either directly from ISR
6744			 * or from BH in which case we can access SQ_EDC_INFO
6745			 * instance
6746			 */
6747			if (from_wq) {
6748				mutex_lock(&adev->grbm_idx_mutex);
6749				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6750
6751				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6752
6753				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6754				mutex_unlock(&adev->grbm_idx_mutex);
6755			}
6756
6757			if (enc == 1)
6758				sprintf(type, "instruction intr");
6759			else
6760				sprintf(type, "EDC/ECC error");
6761
6762			DRM_INFO(
6763				"SQ %s detected: "
6764					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6765					"trap %s, sq_ed_info.source %s.\n",
6766					type, se_id, sh_id, cu_id,
6767					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6768					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6769					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6770					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6771					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6772				);
6773			break;
6774		default:
6775			DRM_ERROR("SQ invalid encoding type\n.");
6776	}
6777}
6778
6779static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6780{
6781
6782	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6783	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6784
6785	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6786}
6787
6788static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6789			   struct amdgpu_irq_src *source,
6790			   struct amdgpu_iv_entry *entry)
6791{
6792	unsigned ih_data = entry->src_data[0];
6793
6794	/*
6795	 * Try to submit work so SQ_EDC_INFO can be accessed from
6796	 * BH. If previous work submission hasn't finished yet
6797	 * just print whatever info is possible directly from the ISR.
6798	 */
6799	if (work_pending(&adev->gfx.sq_work.work)) {
6800		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6801	} else {
6802		adev->gfx.sq_work.ih_data = ih_data;
6803		schedule_work(&adev->gfx.sq_work.work);
6804	}
6805
6806	return 0;
6807}
6808
6809static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6810{
6811	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6812	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6813			  PACKET3_TC_ACTION_ENA |
6814			  PACKET3_SH_KCACHE_ACTION_ENA |
6815			  PACKET3_SH_ICACHE_ACTION_ENA |
6816			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6817	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6818	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6819	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6820}
6821
6822static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6823{
6824	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6825	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6826			  PACKET3_TC_ACTION_ENA |
6827			  PACKET3_SH_KCACHE_ACTION_ENA |
6828			  PACKET3_SH_ICACHE_ACTION_ENA |
6829			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6830	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6831	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6832	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6833	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6834	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6835}
6836
6837
6838/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6839#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6840static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6841					uint32_t pipe, bool enable)
6842{
6843	uint32_t val;
6844	uint32_t wcl_cs_reg;
6845
6846	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6847
6848	switch (pipe) {
6849	case 0:
6850		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6851		break;
6852	case 1:
6853		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6854		break;
6855	case 2:
6856		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6857		break;
6858	case 3:
6859		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6860		break;
6861	default:
6862		DRM_DEBUG("invalid pipe %d\n", pipe);
6863		return;
6864	}
6865
6866	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6867
6868}
6869
6870#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6871static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6872{
6873	struct amdgpu_device *adev = ring->adev;
6874	uint32_t val;
6875	int i;
6876
6877	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6878	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6879	 * around 25% of gpu resources.
6880	 */
6881	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6882	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6883
6884	/* Restrict waves for normal/low priority compute queues as well
6885	 * to get best QoS for high priority compute jobs.
6886	 *
6887	 * amdgpu controls only 1st ME(0-3 CS pipes).
6888	 */
6889	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6890		if (i != ring->pipe)
6891			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6892
6893	}
6894
6895}
6896
6897static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6898{
6899	struct amdgpu_device *adev = ring->adev;
6900	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
6901	struct amdgpu_ring *kiq_ring = &kiq->ring;
6902	unsigned long flags;
6903	u32 tmp;
6904	int r;
6905
6906	if (amdgpu_sriov_vf(adev))
6907		return -EINVAL;
6908
6909	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
6910		return -EINVAL;
6911
6912	spin_lock_irqsave(&kiq->ring_lock, flags);
6913
6914	if (amdgpu_ring_alloc(kiq_ring, 5)) {
6915		spin_unlock_irqrestore(&kiq->ring_lock, flags);
6916		return -ENOMEM;
6917	}
6918
6919	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
6920	gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp);
6921	amdgpu_ring_commit(kiq_ring);
6922
6923	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6924
6925	r = amdgpu_ring_test_ring(kiq_ring);
6926	if (r)
6927		return r;
6928
6929	if (amdgpu_ring_alloc(ring, 7 + 12 + 5))
6930		return -ENOMEM;
6931	gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr,
6932				     ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
6933	gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff);
6934	gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0);
6935
6936	return amdgpu_ring_test_ring(ring);
6937}
6938
6939static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6940	.name = "gfx_v8_0",
6941	.early_init = gfx_v8_0_early_init,
6942	.late_init = gfx_v8_0_late_init,
6943	.sw_init = gfx_v8_0_sw_init,
6944	.sw_fini = gfx_v8_0_sw_fini,
6945	.hw_init = gfx_v8_0_hw_init,
6946	.hw_fini = gfx_v8_0_hw_fini,
6947	.suspend = gfx_v8_0_suspend,
6948	.resume = gfx_v8_0_resume,
6949	.is_idle = gfx_v8_0_is_idle,
6950	.wait_for_idle = gfx_v8_0_wait_for_idle,
6951	.check_soft_reset = gfx_v8_0_check_soft_reset,
6952	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6953	.soft_reset = gfx_v8_0_soft_reset,
6954	.post_soft_reset = gfx_v8_0_post_soft_reset,
6955	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6956	.set_powergating_state = gfx_v8_0_set_powergating_state,
6957	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6958};
6959
6960static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6961	.type = AMDGPU_RING_TYPE_GFX,
6962	.align_mask = 0xff,
6963	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6964	.support_64bit_ptrs = false,
6965	.get_rptr = gfx_v8_0_ring_get_rptr,
6966	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6967	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6968	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6969		5 +  /* COND_EXEC */
6970		7 +  /* PIPELINE_SYNC */
6971		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6972		12 +  /* FENCE for VM_FLUSH */
6973		20 + /* GDS switch */
6974		4 + /* double SWITCH_BUFFER,
6975		       the first COND_EXEC jump to the place just
6976			   prior to this double SWITCH_BUFFER  */
6977		5 + /* COND_EXEC */
6978		7 +	 /*	HDP_flush */
6979		4 +	 /*	VGT_flush */
6980		14 + /*	CE_META */
6981		31 + /*	DE_META */
6982		3 + /* CNTX_CTRL */
6983		5 + /* HDP_INVL */
6984		12 + 12 + /* FENCE x2 */
6985		2 + /* SWITCH_BUFFER */
6986		5, /* SURFACE_SYNC */
6987	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6988	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6989	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6990	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6991	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6992	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6993	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 
6994	.test_ring = gfx_v8_0_ring_test_ring,
6995	.test_ib = gfx_v8_0_ring_test_ib,
6996	.insert_nop = amdgpu_ring_insert_nop,
6997	.pad_ib = amdgpu_ring_generic_pad_ib,
6998	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6999	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7000	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
7001	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7002	.soft_recovery = gfx_v8_0_ring_soft_recovery,
7003	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
7004	.reset = gfx_v8_0_reset_kgq,
7005};
7006
7007static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7008	.type = AMDGPU_RING_TYPE_COMPUTE,
7009	.align_mask = 0xff,
7010	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7011	.support_64bit_ptrs = false,
7012	.get_rptr = gfx_v8_0_ring_get_rptr,
7013	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7014	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7015	.emit_frame_size =
7016		20 + /* gfx_v8_0_ring_emit_gds_switch */
7017		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7018		5 + /* hdp_invalidate */
7019		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7020		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7021		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7022		7 + /* gfx_v8_0_emit_mem_sync_compute */
7023		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7024		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7025	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7026	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
7027	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
7028	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7029	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7030	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7031	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 
7032	.test_ring = gfx_v8_0_ring_test_ring,
7033	.test_ib = gfx_v8_0_ring_test_ib,
7034	.insert_nop = amdgpu_ring_insert_nop,
7035	.pad_ib = amdgpu_ring_generic_pad_ib,
7036	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7037	.soft_recovery = gfx_v8_0_ring_soft_recovery,
7038	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7039	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
7040};
7041
7042static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7043	.type = AMDGPU_RING_TYPE_KIQ,
7044	.align_mask = 0xff,
7045	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7046	.support_64bit_ptrs = false,
7047	.get_rptr = gfx_v8_0_ring_get_rptr,
7048	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7049	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7050	.emit_frame_size =
7051		20 + /* gfx_v8_0_ring_emit_gds_switch */
7052		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7053		5 + /* hdp_invalidate */
7054		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7055		17 + /* gfx_v8_0_ring_emit_vm_flush */
7056		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7057	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7058	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7059	.test_ring = gfx_v8_0_ring_test_ring,
7060	.insert_nop = amdgpu_ring_insert_nop,
7061	.pad_ib = amdgpu_ring_generic_pad_ib,
7062	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7063	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7064};
7065
7066static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7067{
7068	int i;
7069
7070	adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7071
7072	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7073		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7074
7075	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7076		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7077}
7078
7079static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7080	.set = gfx_v8_0_set_eop_interrupt_state,
7081	.process = gfx_v8_0_eop_irq,
7082};
7083
7084static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7085	.set = gfx_v8_0_set_priv_reg_fault_state,
7086	.process = gfx_v8_0_priv_reg_irq,
7087};
7088
7089static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7090	.set = gfx_v8_0_set_priv_inst_fault_state,
7091	.process = gfx_v8_0_priv_inst_irq,
7092};
7093
7094static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7095	.set = gfx_v8_0_set_cp_ecc_int_state,
7096	.process = gfx_v8_0_cp_ecc_error_irq,
7097};
7098
7099static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7100	.set = gfx_v8_0_set_sq_int_state,
7101	.process = gfx_v8_0_sq_irq,
7102};
7103
7104static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7105{
7106	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7107	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7108
7109	adev->gfx.priv_reg_irq.num_types = 1;
7110	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7111
7112	adev->gfx.priv_inst_irq.num_types = 1;
7113	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7114
7115	adev->gfx.cp_ecc_error_irq.num_types = 1;
7116	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7117
7118	adev->gfx.sq_irq.num_types = 1;
7119	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7120}
7121
7122static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7123{
7124	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7125}
7126
7127static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7128{
7129	/* init asci gds info */
7130	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7131	adev->gds.gws_size = 64;
7132	adev->gds.oa_size = 16;
7133	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7134}
 
 
7135
7136static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7137						 u32 bitmap)
7138{
7139	u32 data;
7140
7141	if (!bitmap)
7142		return;
 
 
 
7143
7144	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7145	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7146
7147	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
 
 
7148}
7149
7150static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7151{
7152	u32 data, mask;
7153
7154	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7155		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7156
7157	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
7158
7159	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 
 
7160}
7161
7162static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 
7163{
7164	int i, j, k, counter, active_cu_number = 0;
7165	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7166	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7167	unsigned disable_masks[4 * 2];
7168	u32 ao_cu_num;
7169
7170	memset(cu_info, 0, sizeof(*cu_info));
7171
7172	if (adev->flags & AMD_IS_APU)
7173		ao_cu_num = 2;
7174	else
7175		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7176
7177	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7178
7179	mutex_lock(&adev->grbm_idx_mutex);
7180	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7181		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7182			mask = 1;
7183			ao_bitmap = 0;
7184			counter = 0;
7185			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7186			if (i < 4 && j < 2)
7187				gfx_v8_0_set_user_cu_inactive_bitmap(
7188					adev, disable_masks[i * 2 + j]);
7189			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7190			cu_info->bitmap[0][i][j] = bitmap;
7191
7192			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7193				if (bitmap & mask) {
7194					if (counter < ao_cu_num)
7195						ao_bitmap |= mask;
7196					counter ++;
7197				}
7198				mask <<= 1;
7199			}
7200			active_cu_number += counter;
7201			if (i < 2 && j < 2)
7202				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7203			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7204		}
7205	}
7206	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7207	mutex_unlock(&adev->grbm_idx_mutex);
7208
7209	cu_info->number = active_cu_number;
7210	cu_info->ao_cu_mask = ao_cu_mask;
7211	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7212	cu_info->max_waves_per_simd = 10;
7213	cu_info->max_scratch_slots_per_cu = 32;
7214	cu_info->wave_front_size = 64;
7215	cu_info->lds_size = 64;
7216}
7217
7218const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7219{
7220	.type = AMD_IP_BLOCK_TYPE_GFX,
7221	.major = 8,
7222	.minor = 0,
7223	.rev = 0,
7224	.funcs = &gfx_v8_0_ip_funcs,
7225};
7226
7227const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7228{
7229	.type = AMD_IP_BLOCK_TYPE_GFX,
7230	.major = 8,
7231	.minor = 1,
7232	.rev = 0,
7233	.funcs = &gfx_v8_0_ip_funcs,
7234};
7235
7236static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7237{
7238	uint64_t ce_payload_addr;
7239	int cnt_ce;
7240	union {
7241		struct vi_ce_ib_state regular;
7242		struct vi_ce_ib_state_chained_ib chained;
7243	} ce_payload = {};
7244
7245	if (ring->adev->virt.chained_ib_support) {
7246		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7247			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7248		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7249	} else {
7250		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7251			offsetof(struct vi_gfx_meta_data, ce_payload);
7252		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7253	}
7254
7255	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7256	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7257				WRITE_DATA_DST_SEL(8) |
7258				WR_CONFIRM) |
7259				WRITE_DATA_CACHE_POLICY(0));
7260	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7261	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7262	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7263}
7264
7265static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7266{
7267	uint64_t de_payload_addr, gds_addr, csa_addr;
7268	int cnt_de;
7269	union {
7270		struct vi_de_ib_state regular;
7271		struct vi_de_ib_state_chained_ib chained;
7272	} de_payload = {};
7273
7274	csa_addr = amdgpu_csa_vaddr(ring->adev);
7275	gds_addr = csa_addr + 4096;
7276	if (ring->adev->virt.chained_ib_support) {
7277		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7278		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7279		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7280		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7281	} else {
7282		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7283		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7284		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7285		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7286	}
7287
7288	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7289	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7290				WRITE_DATA_DST_SEL(8) |
7291				WR_CONFIRM) |
7292				WRITE_DATA_CACHE_POLICY(0));
7293	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7294	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7295	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7296}