Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23#include <linux/kernel.h>
  24#include <linux/firmware.h>
  25#include <drm/drmP.h>
  26#include "amdgpu.h"
  27#include "amdgpu_gfx.h"
  28#include "soc15.h"
  29#include "soc15d.h"
  30
  31#include "gc/gc_9_0_offset.h"
  32#include "gc/gc_9_0_sh_mask.h"
  33#include "vega10_enum.h"
  34#include "hdp/hdp_4_0_offset.h"
  35
  36#include "soc15_common.h"
  37#include "clearstate_gfx9.h"
  38#include "v9_structs.h"
  39
  40#define GFX9_NUM_GFX_RINGS     1
  41#define GFX9_MEC_HPD_SIZE 2048
  42#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  43#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  44#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
  45
  46#define mmPWR_MISC_CNTL_STATUS					0x0183
  47#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
  48#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
  49#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
  50#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
  51#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
  52
  53MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  54MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  55MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  56MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  57MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  58MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  59
  60MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  61MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  62MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  63MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  64MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  65MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  66
  67MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  68MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  69MODULE_FIRMWARE("amdgpu/raven_me.bin");
  70MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  71MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  72MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  73
  74static const struct soc15_reg_golden golden_settings_gc_9_0[] =
  75{
  76	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
  77	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
  78	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
  79	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
  80	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
  81	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
  82	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
  83	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
  84	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
  85	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
  86	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
  87	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
  88	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
  89	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
  90	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
  91	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
  92	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
  93	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
  94	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
  95	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
  96	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
  97	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
  98	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
  99};
 100
 101static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 102{
 103	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 104	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 105	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 106	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 107	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 108	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 109	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 110};
 111
 112static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 113{
 114	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 115	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 116	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 117	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 118	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 119	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 120	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 121	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 122	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 123	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 124	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 125	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 126	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 127	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 128	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 129	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 130	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 131	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 132	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 133	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 134	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 135};
 136
 137static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 138{
 139	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 140	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 141	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 142	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 143	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 144	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 145	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 146};
 147
 148static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 149{
 150	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 151	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 152};
 153
 154static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 155{
 156	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 157	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 158	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 159	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 160	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 161	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 162	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 163	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 164	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 165	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 166	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 167	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 168	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 169	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 170	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 171	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 172};
 173
 174static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 175{
 176	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 177	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 178	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 179	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 180	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 181	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 182	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 183	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 184	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 185	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
 186};
 187
 188#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 189#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 190#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 191
 192static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 193static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 194static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 195static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 196static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 197                                 struct amdgpu_cu_info *cu_info);
 198static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 199static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 200static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 201
 202static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 203{
 204	switch (adev->asic_type) {
 205	case CHIP_VEGA10:
 206		soc15_program_register_sequence(adev,
 207						 golden_settings_gc_9_0,
 208						 ARRAY_SIZE(golden_settings_gc_9_0));
 209		soc15_program_register_sequence(adev,
 210						 golden_settings_gc_9_0_vg10,
 211						 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 212		break;
 213	case CHIP_VEGA12:
 214		soc15_program_register_sequence(adev,
 215						golden_settings_gc_9_2_1,
 216						ARRAY_SIZE(golden_settings_gc_9_2_1));
 217		soc15_program_register_sequence(adev,
 218						golden_settings_gc_9_2_1_vg12,
 219						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 220		break;
 221	case CHIP_RAVEN:
 222		soc15_program_register_sequence(adev,
 223						 golden_settings_gc_9_1,
 224						 ARRAY_SIZE(golden_settings_gc_9_1));
 225		soc15_program_register_sequence(adev,
 226						 golden_settings_gc_9_1_rv1,
 227						 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 228		break;
 229	default:
 230		break;
 231	}
 232
 233	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 234					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 235}
 236
 237static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 238{
 239	adev->gfx.scratch.num_reg = 8;
 240	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 241	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 242}
 243
 244static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 245				       bool wc, uint32_t reg, uint32_t val)
 246{
 247	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 248	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 249				WRITE_DATA_DST_SEL(0) |
 250				(wc ? WR_CONFIRM : 0));
 251	amdgpu_ring_write(ring, reg);
 252	amdgpu_ring_write(ring, 0);
 253	amdgpu_ring_write(ring, val);
 254}
 255
 256static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 257				  int mem_space, int opt, uint32_t addr0,
 258				  uint32_t addr1, uint32_t ref, uint32_t mask,
 259				  uint32_t inv)
 260{
 261	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 262	amdgpu_ring_write(ring,
 263				 /* memory (1) or register (0) */
 264				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 265				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
 266				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 267				 WAIT_REG_MEM_ENGINE(eng_sel)));
 268
 269	if (mem_space)
 270		BUG_ON(addr0 & 0x3); /* Dword align */
 271	amdgpu_ring_write(ring, addr0);
 272	amdgpu_ring_write(ring, addr1);
 273	amdgpu_ring_write(ring, ref);
 274	amdgpu_ring_write(ring, mask);
 275	amdgpu_ring_write(ring, inv); /* poll interval */
 276}
 277
 278static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 279{
 280	struct amdgpu_device *adev = ring->adev;
 281	uint32_t scratch;
 282	uint32_t tmp = 0;
 283	unsigned i;
 284	int r;
 285
 286	r = amdgpu_gfx_scratch_get(adev, &scratch);
 287	if (r) {
 288		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 289		return r;
 290	}
 291	WREG32(scratch, 0xCAFEDEAD);
 292	r = amdgpu_ring_alloc(ring, 3);
 293	if (r) {
 294		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 295			  ring->idx, r);
 296		amdgpu_gfx_scratch_free(adev, scratch);
 297		return r;
 298	}
 299	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 300	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 301	amdgpu_ring_write(ring, 0xDEADBEEF);
 302	amdgpu_ring_commit(ring);
 303
 304	for (i = 0; i < adev->usec_timeout; i++) {
 305		tmp = RREG32(scratch);
 306		if (tmp == 0xDEADBEEF)
 307			break;
 308		DRM_UDELAY(1);
 309	}
 310	if (i < adev->usec_timeout) {
 311		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
 312			 ring->idx, i);
 313	} else {
 314		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 315			  ring->idx, scratch, tmp);
 316		r = -EINVAL;
 317	}
 318	amdgpu_gfx_scratch_free(adev, scratch);
 319	return r;
 320}
 321
 322static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 323{
 324	struct amdgpu_device *adev = ring->adev;
 325	struct amdgpu_ib ib;
 326	struct dma_fence *f = NULL;
 327
 328	unsigned index;
 329	uint64_t gpu_addr;
 330	uint32_t tmp;
 331	long r;
 332
 333	r = amdgpu_device_wb_get(adev, &index);
 334	if (r) {
 335		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 336		return r;
 337	}
 338
 339	gpu_addr = adev->wb.gpu_addr + (index * 4);
 340	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 341	memset(&ib, 0, sizeof(ib));
 342	r = amdgpu_ib_get(adev, NULL, 16, &ib);
 343	if (r) {
 344		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 345		goto err1;
 346	}
 347	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 348	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 349	ib.ptr[2] = lower_32_bits(gpu_addr);
 350	ib.ptr[3] = upper_32_bits(gpu_addr);
 351	ib.ptr[4] = 0xDEADBEEF;
 352	ib.length_dw = 5;
 353
 354	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 355	if (r)
 356		goto err2;
 357
 358	r = dma_fence_wait_timeout(f, false, timeout);
 359	if (r == 0) {
 360			DRM_ERROR("amdgpu: IB test timed out.\n");
 361			r = -ETIMEDOUT;
 362			goto err2;
 363	} else if (r < 0) {
 364			DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 365			goto err2;
 366	}
 367
 368	tmp = adev->wb.wb[index];
 369	if (tmp == 0xDEADBEEF) {
 370			DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
 371			r = 0;
 372	} else {
 373			DRM_ERROR("ib test on ring %d failed\n", ring->idx);
 374			r = -EINVAL;
 375	}
 376
 377err2:
 378	amdgpu_ib_free(adev, &ib, NULL);
 379	dma_fence_put(f);
 380err1:
 381	amdgpu_device_wb_free(adev, index);
 382	return r;
 383}
 384
 385
 386static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 387{
 388	release_firmware(adev->gfx.pfp_fw);
 389	adev->gfx.pfp_fw = NULL;
 390	release_firmware(adev->gfx.me_fw);
 391	adev->gfx.me_fw = NULL;
 392	release_firmware(adev->gfx.ce_fw);
 393	adev->gfx.ce_fw = NULL;
 394	release_firmware(adev->gfx.rlc_fw);
 395	adev->gfx.rlc_fw = NULL;
 396	release_firmware(adev->gfx.mec_fw);
 397	adev->gfx.mec_fw = NULL;
 398	release_firmware(adev->gfx.mec2_fw);
 399	adev->gfx.mec2_fw = NULL;
 400
 401	kfree(adev->gfx.rlc.register_list_format);
 402}
 403
 404static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 405{
 406	const char *chip_name;
 407	char fw_name[30];
 408	int err;
 409	struct amdgpu_firmware_info *info = NULL;
 410	const struct common_firmware_header *header = NULL;
 411	const struct gfx_firmware_header_v1_0 *cp_hdr;
 412	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 413	unsigned int *tmp = NULL;
 414	unsigned int i = 0;
 415
 416	DRM_DEBUG("\n");
 417
 418	switch (adev->asic_type) {
 419	case CHIP_VEGA10:
 420		chip_name = "vega10";
 421		break;
 422	case CHIP_VEGA12:
 423		chip_name = "vega12";
 424		break;
 425	case CHIP_RAVEN:
 426		chip_name = "raven";
 427		break;
 428	default:
 429		BUG();
 430	}
 431
 432	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 433	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 434	if (err)
 435		goto out;
 436	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 437	if (err)
 438		goto out;
 439	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 440	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 441	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 442
 443	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 444	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 445	if (err)
 446		goto out;
 447	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 448	if (err)
 449		goto out;
 450	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 451	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 452	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 453
 454	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 455	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 456	if (err)
 457		goto out;
 458	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 459	if (err)
 460		goto out;
 461	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 462	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 463	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 464
 465	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 466	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 467	if (err)
 468		goto out;
 469	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 470	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 471	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 472	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 473	adev->gfx.rlc.save_and_restore_offset =
 474			le32_to_cpu(rlc_hdr->save_and_restore_offset);
 475	adev->gfx.rlc.clear_state_descriptor_offset =
 476			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 477	adev->gfx.rlc.avail_scratch_ram_locations =
 478			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 479	adev->gfx.rlc.reg_restore_list_size =
 480			le32_to_cpu(rlc_hdr->reg_restore_list_size);
 481	adev->gfx.rlc.reg_list_format_start =
 482			le32_to_cpu(rlc_hdr->reg_list_format_start);
 483	adev->gfx.rlc.reg_list_format_separate_start =
 484			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 485	adev->gfx.rlc.starting_offsets_start =
 486			le32_to_cpu(rlc_hdr->starting_offsets_start);
 487	adev->gfx.rlc.reg_list_format_size_bytes =
 488			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 489	adev->gfx.rlc.reg_list_size_bytes =
 490			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 491	adev->gfx.rlc.register_list_format =
 492			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 493				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 494	if (!adev->gfx.rlc.register_list_format) {
 495		err = -ENOMEM;
 496		goto out;
 497	}
 498
 499	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 500			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
 501	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
 502		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
 503
 504	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
 505
 506	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 507			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
 508	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 509		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 510
 511	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 512	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 513	if (err)
 514		goto out;
 515	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 516	if (err)
 517		goto out;
 518	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 519	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 520	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 521
 522
 523	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 524	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 525	if (!err) {
 526		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 527		if (err)
 528			goto out;
 529		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
 530		adev->gfx.mec2_fw->data;
 531		adev->gfx.mec2_fw_version =
 532		le32_to_cpu(cp_hdr->header.ucode_version);
 533		adev->gfx.mec2_feature_version =
 534		le32_to_cpu(cp_hdr->ucode_feature_version);
 535	} else {
 536		err = 0;
 537		adev->gfx.mec2_fw = NULL;
 538	}
 539
 540	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 541		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
 542		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
 543		info->fw = adev->gfx.pfp_fw;
 544		header = (const struct common_firmware_header *)info->fw->data;
 545		adev->firmware.fw_size +=
 546			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 547
 548		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
 549		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
 550		info->fw = adev->gfx.me_fw;
 551		header = (const struct common_firmware_header *)info->fw->data;
 552		adev->firmware.fw_size +=
 553			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 554
 555		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
 556		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
 557		info->fw = adev->gfx.ce_fw;
 558		header = (const struct common_firmware_header *)info->fw->data;
 559		adev->firmware.fw_size +=
 560			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 561
 562		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 563		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 564		info->fw = adev->gfx.rlc_fw;
 565		header = (const struct common_firmware_header *)info->fw->data;
 566		adev->firmware.fw_size +=
 567			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 568
 569		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 570		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 571		info->fw = adev->gfx.mec_fw;
 572		header = (const struct common_firmware_header *)info->fw->data;
 573		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
 574		adev->firmware.fw_size +=
 575			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 576
 577		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
 578		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
 579		info->fw = adev->gfx.mec_fw;
 580		adev->firmware.fw_size +=
 581			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 582
 583		if (adev->gfx.mec2_fw) {
 584			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 585			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
 586			info->fw = adev->gfx.mec2_fw;
 587			header = (const struct common_firmware_header *)info->fw->data;
 588			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
 589			adev->firmware.fw_size +=
 590				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 591			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
 592			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
 593			info->fw = adev->gfx.mec2_fw;
 594			adev->firmware.fw_size +=
 595				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 596		}
 597
 598	}
 599
 600out:
 601	if (err) {
 602		dev_err(adev->dev,
 603			"gfx9: Failed to load firmware \"%s\"\n",
 604			fw_name);
 605		release_firmware(adev->gfx.pfp_fw);
 606		adev->gfx.pfp_fw = NULL;
 607		release_firmware(adev->gfx.me_fw);
 608		adev->gfx.me_fw = NULL;
 609		release_firmware(adev->gfx.ce_fw);
 610		adev->gfx.ce_fw = NULL;
 611		release_firmware(adev->gfx.rlc_fw);
 612		adev->gfx.rlc_fw = NULL;
 613		release_firmware(adev->gfx.mec_fw);
 614		adev->gfx.mec_fw = NULL;
 615		release_firmware(adev->gfx.mec2_fw);
 616		adev->gfx.mec2_fw = NULL;
 617	}
 618	return err;
 619}
 620
 621static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
 622{
 623	u32 count = 0;
 624	const struct cs_section_def *sect = NULL;
 625	const struct cs_extent_def *ext = NULL;
 626
 627	/* begin clear state */
 628	count += 2;
 629	/* context control state */
 630	count += 3;
 631
 632	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
 633		for (ext = sect->section; ext->extent != NULL; ++ext) {
 634			if (sect->id == SECT_CONTEXT)
 635				count += 2 + ext->reg_count;
 636			else
 637				return 0;
 638		}
 639	}
 640
 641	/* end clear state */
 642	count += 2;
 643	/* clear state */
 644	count += 2;
 645
 646	return count;
 647}
 648
 649static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
 650				    volatile u32 *buffer)
 651{
 652	u32 count = 0, i;
 653	const struct cs_section_def *sect = NULL;
 654	const struct cs_extent_def *ext = NULL;
 655
 656	if (adev->gfx.rlc.cs_data == NULL)
 657		return;
 658	if (buffer == NULL)
 659		return;
 660
 661	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
 662	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
 663
 664	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 665	buffer[count++] = cpu_to_le32(0x80000000);
 666	buffer[count++] = cpu_to_le32(0x80000000);
 667
 668	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
 669		for (ext = sect->section; ext->extent != NULL; ++ext) {
 670			if (sect->id == SECT_CONTEXT) {
 671				buffer[count++] =
 672					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
 673				buffer[count++] = cpu_to_le32(ext->reg_index -
 674						PACKET3_SET_CONTEXT_REG_START);
 675				for (i = 0; i < ext->reg_count; i++)
 676					buffer[count++] = cpu_to_le32(ext->extent[i]);
 677			} else {
 678				return;
 679			}
 680		}
 681	}
 682
 683	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
 684	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
 685
 686	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
 687	buffer[count++] = cpu_to_le32(0);
 688}
 689
 690static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
 691{
 692	uint32_t data;
 693
 694	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
 695	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
 696	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
 697	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
 698	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
 699
 700	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
 701	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
 702
 703	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
 704	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
 705
 706	mutex_lock(&adev->grbm_idx_mutex);
 707	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
 708	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 709	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 710
 711	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
 712	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
 713	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
 714	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
 715	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
 716
 717	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
 718	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
 719	data &= 0x0000FFFF;
 720	data |= 0x00C00000;
 721	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
 722
 723	/* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
 724	WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
 725
 726	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
 727	 * but used for RLC_LB_CNTL configuration */
 728	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
 729	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
 730	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
 731	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
 732	mutex_unlock(&adev->grbm_idx_mutex);
 733}
 734
 735static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
 736{
 737	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
 738}
 739
 740static void rv_init_cp_jump_table(struct amdgpu_device *adev)
 741{
 742	const __le32 *fw_data;
 743	volatile u32 *dst_ptr;
 744	int me, i, max_me = 5;
 745	u32 bo_offset = 0;
 746	u32 table_offset, table_size;
 747
 748	/* write the cp table buffer */
 749	dst_ptr = adev->gfx.rlc.cp_table_ptr;
 750	for (me = 0; me < max_me; me++) {
 751		if (me == 0) {
 752			const struct gfx_firmware_header_v1_0 *hdr =
 753				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 754			fw_data = (const __le32 *)
 755				(adev->gfx.ce_fw->data +
 756				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 757			table_offset = le32_to_cpu(hdr->jt_offset);
 758			table_size = le32_to_cpu(hdr->jt_size);
 759		} else if (me == 1) {
 760			const struct gfx_firmware_header_v1_0 *hdr =
 761				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 762			fw_data = (const __le32 *)
 763				(adev->gfx.pfp_fw->data +
 764				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 765			table_offset = le32_to_cpu(hdr->jt_offset);
 766			table_size = le32_to_cpu(hdr->jt_size);
 767		} else if (me == 2) {
 768			const struct gfx_firmware_header_v1_0 *hdr =
 769				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 770			fw_data = (const __le32 *)
 771				(adev->gfx.me_fw->data +
 772				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 773			table_offset = le32_to_cpu(hdr->jt_offset);
 774			table_size = le32_to_cpu(hdr->jt_size);
 775		} else if (me == 3) {
 776			const struct gfx_firmware_header_v1_0 *hdr =
 777				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 778			fw_data = (const __le32 *)
 779				(adev->gfx.mec_fw->data +
 780				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 781			table_offset = le32_to_cpu(hdr->jt_offset);
 782			table_size = le32_to_cpu(hdr->jt_size);
 783		} else  if (me == 4) {
 784			const struct gfx_firmware_header_v1_0 *hdr =
 785				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
 786			fw_data = (const __le32 *)
 787				(adev->gfx.mec2_fw->data +
 788				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 789			table_offset = le32_to_cpu(hdr->jt_offset);
 790			table_size = le32_to_cpu(hdr->jt_size);
 791		}
 792
 793		for (i = 0; i < table_size; i ++) {
 794			dst_ptr[bo_offset + i] =
 795				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
 796		}
 797
 798		bo_offset += table_size;
 799	}
 800}
 801
 802static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
 803{
 804	/* clear state block */
 805	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
 806			&adev->gfx.rlc.clear_state_gpu_addr,
 807			(void **)&adev->gfx.rlc.cs_ptr);
 808
 809	/* jump table block */
 810	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
 811			&adev->gfx.rlc.cp_table_gpu_addr,
 812			(void **)&adev->gfx.rlc.cp_table_ptr);
 813}
 814
 815static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
 816{
 817	volatile u32 *dst_ptr;
 818	u32 dws;
 819	const struct cs_section_def *cs_data;
 820	int r;
 821
 822	adev->gfx.rlc.cs_data = gfx9_cs_data;
 823
 824	cs_data = adev->gfx.rlc.cs_data;
 825
 826	if (cs_data) {
 827		/* clear state block */
 828		adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
 829		r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
 830					      AMDGPU_GEM_DOMAIN_VRAM,
 831					      &adev->gfx.rlc.clear_state_obj,
 832					      &adev->gfx.rlc.clear_state_gpu_addr,
 833					      (void **)&adev->gfx.rlc.cs_ptr);
 834		if (r) {
 835			dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
 836				r);
 837			gfx_v9_0_rlc_fini(adev);
 838			return r;
 839		}
 840		/* set up the cs buffer */
 841		dst_ptr = adev->gfx.rlc.cs_ptr;
 842		gfx_v9_0_get_csb_buffer(adev, dst_ptr);
 843		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
 844		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 845	}
 846
 847	if (adev->asic_type == CHIP_RAVEN) {
 848		/* TODO: double check the cp_table_size for RV */
 849		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
 850		r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
 851					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 852					      &adev->gfx.rlc.cp_table_obj,
 853					      &adev->gfx.rlc.cp_table_gpu_addr,
 854					      (void **)&adev->gfx.rlc.cp_table_ptr);
 855		if (r) {
 856			dev_err(adev->dev,
 857				"(%d) failed to create cp table bo\n", r);
 858			gfx_v9_0_rlc_fini(adev);
 859			return r;
 860		}
 861
 862		rv_init_cp_jump_table(adev);
 863		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
 864		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
 865
 866		gfx_v9_0_init_lbpw(adev);
 867	}
 868
 869	return 0;
 870}
 871
 872static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 873{
 874	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
 875	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
 876}
 877
 878static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 879{
 880	int r;
 881	u32 *hpd;
 882	const __le32 *fw_data;
 883	unsigned fw_size;
 884	u32 *fw;
 885	size_t mec_hpd_size;
 886
 887	const struct gfx_firmware_header_v1_0 *mec_hdr;
 888
 889	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 890
 891	/* take ownership of the relevant compute queues */
 892	amdgpu_gfx_compute_queue_acquire(adev);
 893	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
 894
 895	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
 896				      AMDGPU_GEM_DOMAIN_GTT,
 897				      &adev->gfx.mec.hpd_eop_obj,
 898				      &adev->gfx.mec.hpd_eop_gpu_addr,
 899				      (void **)&hpd);
 900	if (r) {
 901		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 902		gfx_v9_0_mec_fini(adev);
 903		return r;
 904	}
 905
 906	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
 907
 908	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 909	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 910
 911	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 912
 913	fw_data = (const __le32 *)
 914		(adev->gfx.mec_fw->data +
 915		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
 916	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
 917
 918	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
 919				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 920				      &adev->gfx.mec.mec_fw_obj,
 921				      &adev->gfx.mec.mec_fw_gpu_addr,
 922				      (void **)&fw);
 923	if (r) {
 924		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
 925		gfx_v9_0_mec_fini(adev);
 926		return r;
 927	}
 928
 929	memcpy(fw, fw_data, fw_size);
 930
 931	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
 932	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 933
 934	return 0;
 935}
 936
 937static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 938{
 939	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 940		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 941		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 942		(address << SQ_IND_INDEX__INDEX__SHIFT) |
 943		(SQ_IND_INDEX__FORCE_READ_MASK));
 944	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 945}
 946
 947static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 948			   uint32_t wave, uint32_t thread,
 949			   uint32_t regno, uint32_t num, uint32_t *out)
 950{
 951	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 952		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 953		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 954		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
 955		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
 956		(SQ_IND_INDEX__FORCE_READ_MASK) |
 957		(SQ_IND_INDEX__AUTO_INCR_MASK));
 958	while (num--)
 959		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 960}
 961
 962static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
 963{
 964	/* type 1 wave data */
 965	dst[(*no_fields)++] = 1;
 966	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
 967	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
 968	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
 969	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
 970	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
 971	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
 972	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
 973	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
 974	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
 975	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
 976	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
 977	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
 978	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
 979	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
 980}
 981
 982static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 983				     uint32_t wave, uint32_t start,
 984				     uint32_t size, uint32_t *dst)
 985{
 986	wave_read_regs(
 987		adev, simd, wave, 0,
 988		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 989}
 990
 991static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
 992				     uint32_t wave, uint32_t thread,
 993				     uint32_t start, uint32_t size,
 994				     uint32_t *dst)
 995{
 996	wave_read_regs(
 997		adev, simd, wave, thread,
 998		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
 999}
1000
1001static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1002				  u32 me, u32 pipe, u32 q)
1003{
1004	soc15_grbm_select(adev, me, pipe, q, 0);
1005}
1006
1007static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1008	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1009	.select_se_sh = &gfx_v9_0_select_se_sh,
1010	.read_wave_data = &gfx_v9_0_read_wave_data,
1011	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1012	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1013	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1014};
1015
1016static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1017{
1018	u32 gb_addr_config;
1019
1020	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1021
1022	switch (adev->asic_type) {
1023	case CHIP_VEGA10:
1024		adev->gfx.config.max_hw_contexts = 8;
1025		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1026		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1027		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1028		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1029		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1030		break;
1031	case CHIP_VEGA12:
1032		adev->gfx.config.max_hw_contexts = 8;
1033		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1034		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1035		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1036		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1037		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1038		DRM_INFO("fix gfx.config for vega12\n");
1039		break;
1040	case CHIP_RAVEN:
1041		adev->gfx.config.max_hw_contexts = 8;
1042		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1043		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1044		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1045		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1046		gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1047		break;
1048	default:
1049		BUG();
1050		break;
1051	}
1052
1053	adev->gfx.config.gb_addr_config = gb_addr_config;
1054
1055	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1056			REG_GET_FIELD(
1057					adev->gfx.config.gb_addr_config,
1058					GB_ADDR_CONFIG,
1059					NUM_PIPES);
1060
1061	adev->gfx.config.max_tile_pipes =
1062		adev->gfx.config.gb_addr_config_fields.num_pipes;
1063
1064	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1065			REG_GET_FIELD(
1066					adev->gfx.config.gb_addr_config,
1067					GB_ADDR_CONFIG,
1068					NUM_BANKS);
1069	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1070			REG_GET_FIELD(
1071					adev->gfx.config.gb_addr_config,
1072					GB_ADDR_CONFIG,
1073					MAX_COMPRESSED_FRAGS);
1074	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1075			REG_GET_FIELD(
1076					adev->gfx.config.gb_addr_config,
1077					GB_ADDR_CONFIG,
1078					NUM_RB_PER_SE);
1079	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1080			REG_GET_FIELD(
1081					adev->gfx.config.gb_addr_config,
1082					GB_ADDR_CONFIG,
1083					NUM_SHADER_ENGINES);
1084	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1085			REG_GET_FIELD(
1086					adev->gfx.config.gb_addr_config,
1087					GB_ADDR_CONFIG,
1088					PIPE_INTERLEAVE_SIZE));
1089}
1090
1091static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1092				   struct amdgpu_ngg_buf *ngg_buf,
1093				   int size_se,
1094				   int default_size_se)
1095{
1096	int r;
1097
1098	if (size_se < 0) {
1099		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1100		return -EINVAL;
1101	}
1102	size_se = size_se ? size_se : default_size_se;
1103
1104	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1105	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1106				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1107				    &ngg_buf->bo,
1108				    &ngg_buf->gpu_addr,
1109				    NULL);
1110	if (r) {
1111		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1112		return r;
1113	}
1114	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1115
1116	return r;
1117}
1118
1119static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1120{
1121	int i;
1122
1123	for (i = 0; i < NGG_BUF_MAX; i++)
1124		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1125				      &adev->gfx.ngg.buf[i].gpu_addr,
1126				      NULL);
1127
1128	memset(&adev->gfx.ngg.buf[0], 0,
1129			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1130
1131	adev->gfx.ngg.init = false;
1132
1133	return 0;
1134}
1135
1136static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1137{
1138	int r;
1139
1140	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1141		return 0;
1142
1143	/* GDS reserve memory: 64 bytes alignment */
1144	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1145	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1146	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1147	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1148	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1149
1150	/* Primitive Buffer */
1151	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1152				    amdgpu_prim_buf_per_se,
1153				    64 * 1024);
1154	if (r) {
1155		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1156		goto err;
1157	}
1158
1159	/* Position Buffer */
1160	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1161				    amdgpu_pos_buf_per_se,
1162				    256 * 1024);
1163	if (r) {
1164		dev_err(adev->dev, "Failed to create Position Buffer\n");
1165		goto err;
1166	}
1167
1168	/* Control Sideband */
1169	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1170				    amdgpu_cntl_sb_buf_per_se,
1171				    256);
1172	if (r) {
1173		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1174		goto err;
1175	}
1176
1177	/* Parameter Cache, not created by default */
1178	if (amdgpu_param_buf_per_se <= 0)
1179		goto out;
1180
1181	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1182				    amdgpu_param_buf_per_se,
1183				    512 * 1024);
1184	if (r) {
1185		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1186		goto err;
1187	}
1188
1189out:
1190	adev->gfx.ngg.init = true;
1191	return 0;
1192err:
1193	gfx_v9_0_ngg_fini(adev);
1194	return r;
1195}
1196
1197static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1198{
1199	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1200	int r;
1201	u32 data, base;
1202
1203	if (!amdgpu_ngg)
1204		return 0;
1205
1206	/* Program buffer size */
1207	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1208			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1209	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1210			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1211	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1212
1213	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1214			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1215	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1216			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1217	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1218
1219	/* Program buffer base address */
1220	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1221	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1222	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1223
1224	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1225	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1226	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1227
1228	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1229	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1230	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1231
1232	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1233	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1234	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1235
1236	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1237	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1238	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1239
1240	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1241	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1242	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1243
1244	/* Clear GDS reserved memory */
1245	r = amdgpu_ring_alloc(ring, 17);
1246	if (r) {
1247		DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1248			  ring->idx, r);
1249		return r;
1250	}
1251
1252	gfx_v9_0_write_data_to_reg(ring, 0, false,
1253				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1254			           (adev->gds.mem.total_size +
1255				    adev->gfx.ngg.gds_reserve_size) >>
1256				   AMDGPU_GDS_SHIFT);
1257
1258	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1259	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1260				PACKET3_DMA_DATA_DST_SEL(1) |
1261				PACKET3_DMA_DATA_SRC_SEL(2)));
1262	amdgpu_ring_write(ring, 0);
1263	amdgpu_ring_write(ring, 0);
1264	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1265	amdgpu_ring_write(ring, 0);
1266	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1267				adev->gfx.ngg.gds_reserve_size);
1268
1269	gfx_v9_0_write_data_to_reg(ring, 0, false,
1270				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1271
1272	amdgpu_ring_commit(ring);
1273
1274	return 0;
1275}
1276
1277static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1278				      int mec, int pipe, int queue)
1279{
1280	int r;
1281	unsigned irq_type;
1282	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1283
1284	ring = &adev->gfx.compute_ring[ring_id];
1285
1286	/* mec0 is me1 */
1287	ring->me = mec + 1;
1288	ring->pipe = pipe;
1289	ring->queue = queue;
1290
1291	ring->ring_obj = NULL;
1292	ring->use_doorbell = true;
1293	ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
1294	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1295				+ (ring_id * GFX9_MEC_HPD_SIZE);
1296	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1297
1298	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1299		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1300		+ ring->pipe;
1301
1302	/* type-2 packets are deprecated on MEC, use type-3 instead */
1303	r = amdgpu_ring_init(adev, ring, 1024,
1304			     &adev->gfx.eop_irq, irq_type);
1305	if (r)
1306		return r;
1307
1308
1309	return 0;
1310}
1311
1312static int gfx_v9_0_sw_init(void *handle)
1313{
1314	int i, j, k, r, ring_id;
1315	struct amdgpu_ring *ring;
1316	struct amdgpu_kiq *kiq;
1317	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1318
1319	switch (adev->asic_type) {
1320	case CHIP_VEGA10:
1321	case CHIP_VEGA12:
1322	case CHIP_RAVEN:
1323		adev->gfx.mec.num_mec = 2;
1324		break;
1325	default:
1326		adev->gfx.mec.num_mec = 1;
1327		break;
1328	}
1329
1330	adev->gfx.mec.num_pipe_per_mec = 4;
1331	adev->gfx.mec.num_queue_per_pipe = 8;
1332
1333	/* KIQ event */
1334	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1335	if (r)
1336		return r;
1337
1338	/* EOP Event */
1339	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
1340	if (r)
1341		return r;
1342
1343	/* Privileged reg */
1344	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
1345			      &adev->gfx.priv_reg_irq);
1346	if (r)
1347		return r;
1348
1349	/* Privileged inst */
1350	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
1351			      &adev->gfx.priv_inst_irq);
1352	if (r)
1353		return r;
1354
1355	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1356
1357	gfx_v9_0_scratch_init(adev);
1358
1359	r = gfx_v9_0_init_microcode(adev);
1360	if (r) {
1361		DRM_ERROR("Failed to load gfx firmware!\n");
1362		return r;
1363	}
1364
1365	r = gfx_v9_0_rlc_init(adev);
1366	if (r) {
1367		DRM_ERROR("Failed to init rlc BOs!\n");
1368		return r;
1369	}
1370
1371	r = gfx_v9_0_mec_init(adev);
1372	if (r) {
1373		DRM_ERROR("Failed to init MEC BOs!\n");
1374		return r;
1375	}
1376
1377	/* set up the gfx ring */
1378	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1379		ring = &adev->gfx.gfx_ring[i];
1380		ring->ring_obj = NULL;
1381		if (!i)
1382			sprintf(ring->name, "gfx");
1383		else
1384			sprintf(ring->name, "gfx_%d", i);
1385		ring->use_doorbell = true;
1386		ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
1387		r = amdgpu_ring_init(adev, ring, 1024,
1388				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1389		if (r)
1390			return r;
1391	}
1392
1393	/* set up the compute queues - allocate horizontally across pipes */
1394	ring_id = 0;
1395	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1396		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1397			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1398				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1399					continue;
1400
1401				r = gfx_v9_0_compute_ring_init(adev,
1402							       ring_id,
1403							       i, k, j);
1404				if (r)
1405					return r;
1406
1407				ring_id++;
1408			}
1409		}
1410	}
1411
1412	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1413	if (r) {
1414		DRM_ERROR("Failed to init KIQ BOs!\n");
1415		return r;
1416	}
1417
1418	kiq = &adev->gfx.kiq;
1419	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1420	if (r)
1421		return r;
1422
1423	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1424	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1425	if (r)
1426		return r;
1427
1428	/* reserve GDS, GWS and OA resource for gfx */
1429	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1430				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1431				    &adev->gds.gds_gfx_bo, NULL, NULL);
1432	if (r)
1433		return r;
1434
1435	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1436				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1437				    &adev->gds.gws_gfx_bo, NULL, NULL);
1438	if (r)
1439		return r;
1440
1441	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1442				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1443				    &adev->gds.oa_gfx_bo, NULL, NULL);
1444	if (r)
1445		return r;
1446
1447	adev->gfx.ce_ram_size = 0x8000;
1448
1449	gfx_v9_0_gpu_early_init(adev);
1450
1451	r = gfx_v9_0_ngg_init(adev);
1452	if (r)
1453		return r;
1454
1455	return 0;
1456}
1457
1458
1459static int gfx_v9_0_sw_fini(void *handle)
1460{
1461	int i;
1462	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1463
1464	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1465	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1466	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1467
1468	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1469		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1470	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1471		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1472
1473	amdgpu_gfx_compute_mqd_sw_fini(adev);
1474	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1475	amdgpu_gfx_kiq_fini(adev);
1476
1477	gfx_v9_0_mec_fini(adev);
1478	gfx_v9_0_ngg_fini(adev);
1479	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1480				&adev->gfx.rlc.clear_state_gpu_addr,
1481				(void **)&adev->gfx.rlc.cs_ptr);
1482	if (adev->asic_type == CHIP_RAVEN) {
1483		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1484				&adev->gfx.rlc.cp_table_gpu_addr,
1485				(void **)&adev->gfx.rlc.cp_table_ptr);
1486	}
1487	gfx_v9_0_free_microcode(adev);
1488
1489	return 0;
1490}
1491
1492
1493static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1494{
1495	/* TODO */
1496}
1497
1498static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1499{
1500	u32 data;
1501
1502	if (instance == 0xffffffff)
1503		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1504	else
1505		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1506
1507	if (se_num == 0xffffffff)
1508		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1509	else
1510		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1511
1512	if (sh_num == 0xffffffff)
1513		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1514	else
1515		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1516
1517	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
1518}
1519
1520static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1521{
1522	u32 data, mask;
1523
1524	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1525	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1526
1527	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1528	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1529
1530	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1531					 adev->gfx.config.max_sh_per_se);
1532
1533	return (~data) & mask;
1534}
1535
1536static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1537{
1538	int i, j;
1539	u32 data;
1540	u32 active_rbs = 0;
1541	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1542					adev->gfx.config.max_sh_per_se;
1543
1544	mutex_lock(&adev->grbm_idx_mutex);
1545	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1546		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1547			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1548			data = gfx_v9_0_get_rb_active_bitmap(adev);
1549			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1550					       rb_bitmap_width_per_sh);
1551		}
1552	}
1553	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1554	mutex_unlock(&adev->grbm_idx_mutex);
1555
1556	adev->gfx.config.backend_enable_mask = active_rbs;
1557	adev->gfx.config.num_rbs = hweight32(active_rbs);
1558}
1559
1560#define DEFAULT_SH_MEM_BASES	(0x6000)
1561#define FIRST_COMPUTE_VMID	(8)
1562#define LAST_COMPUTE_VMID	(16)
1563static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1564{
1565	int i;
1566	uint32_t sh_mem_config;
1567	uint32_t sh_mem_bases;
1568
1569	/*
1570	 * Configure apertures:
1571	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1572	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1573	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1574	 */
1575	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1576
1577	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1578			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1579			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1580
1581	mutex_lock(&adev->srbm_mutex);
1582	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1583		soc15_grbm_select(adev, 0, 0, 0, i);
1584		/* CP and shaders */
1585		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1586		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1587	}
1588	soc15_grbm_select(adev, 0, 0, 0, 0);
1589	mutex_unlock(&adev->srbm_mutex);
1590}
1591
1592static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1593{
1594	u32 tmp;
1595	int i;
1596
1597	WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1598
1599	gfx_v9_0_tiling_mode_table_init(adev);
1600
1601	gfx_v9_0_setup_rb(adev);
1602	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1603
1604	/* XXX SH_MEM regs */
1605	/* where to put LDS, scratch, GPUVM in FSA64 space */
1606	mutex_lock(&adev->srbm_mutex);
1607	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1608		soc15_grbm_select(adev, 0, 0, 0, i);
1609		/* CP and shaders */
1610		if (i == 0) {
1611			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1612					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1613			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1614			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
1615		} else {
1616			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1617					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1618			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1619			tmp = adev->gmc.shared_aperture_start >> 48;
1620			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
1621		}
1622	}
1623	soc15_grbm_select(adev, 0, 0, 0, 0);
1624
1625	mutex_unlock(&adev->srbm_mutex);
1626
1627	gfx_v9_0_init_compute_vmid(adev);
1628
1629	mutex_lock(&adev->grbm_idx_mutex);
1630	/*
1631	 * making sure that the following register writes will be broadcasted
1632	 * to all the shaders
1633	 */
1634	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1635
1636	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
1637		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
1638			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1639		   (adev->gfx.config.sc_prim_fifo_size_backend <<
1640			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1641		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
1642			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1643		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1644			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1645	mutex_unlock(&adev->grbm_idx_mutex);
1646
1647}
1648
1649static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1650{
1651	u32 i, j, k;
1652	u32 mask;
1653
1654	mutex_lock(&adev->grbm_idx_mutex);
1655	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1656		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1657			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1658			for (k = 0; k < adev->usec_timeout; k++) {
1659				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1660					break;
1661				udelay(1);
1662			}
1663			if (k == adev->usec_timeout) {
1664				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1665						      0xffffffff, 0xffffffff);
1666				mutex_unlock(&adev->grbm_idx_mutex);
1667				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1668					 i, j);
1669				return;
1670			}
1671		}
1672	}
1673	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1674	mutex_unlock(&adev->grbm_idx_mutex);
1675
1676	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1677		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1678		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1679		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1680	for (k = 0; k < adev->usec_timeout; k++) {
1681		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
1682			break;
1683		udelay(1);
1684	}
1685}
1686
1687static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1688					       bool enable)
1689{
1690	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
1691
1692	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
1693	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
1694	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
1695	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
1696
1697	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
1698}
1699
1700static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
1701{
1702	/* csib */
1703	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
1704			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1705	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
1706			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1707	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
1708			adev->gfx.rlc.clear_state_size);
1709}
1710
1711static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
1712				int indirect_offset,
1713				int list_size,
1714				int *unique_indirect_regs,
1715				int *unique_indirect_reg_count,
1716				int max_indirect_reg_count,
1717				int *indirect_start_offsets,
1718				int *indirect_start_offsets_count,
1719				int max_indirect_start_offsets_count)
1720{
1721	int idx;
1722	bool new_entry = true;
1723
1724	for (; indirect_offset < list_size; indirect_offset++) {
1725
1726		if (new_entry) {
1727			new_entry = false;
1728			indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1729			*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1730			BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
1731		}
1732
1733		if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
1734			new_entry = true;
1735			continue;
1736		}
1737
1738		indirect_offset += 2;
1739
1740		/* look for the matching indice */
1741		for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
1742			if (unique_indirect_regs[idx] ==
1743				register_list_format[indirect_offset])
1744				break;
1745		}
1746
1747		if (idx >= *unique_indirect_reg_count) {
1748			unique_indirect_regs[*unique_indirect_reg_count] =
1749				register_list_format[indirect_offset];
1750			idx = *unique_indirect_reg_count;
1751			*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
1752			BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
1753		}
1754
1755		register_list_format[indirect_offset] = idx;
1756	}
1757}
1758
1759static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1760{
1761	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1762	int unique_indirect_reg_count = 0;
1763
1764	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1765	int indirect_start_offsets_count = 0;
1766
1767	int list_size = 0;
1768	int i = 0;
1769	u32 tmp = 0;
1770
1771	u32 *register_list_format =
1772		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
1773	if (!register_list_format)
1774		return -ENOMEM;
1775	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
1776		adev->gfx.rlc.reg_list_format_size_bytes);
1777
1778	/* setup unique_indirect_regs array and indirect_start_offsets array */
1779	gfx_v9_0_parse_ind_reg_list(register_list_format,
1780				GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
1781				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
1782				unique_indirect_regs,
1783				&unique_indirect_reg_count,
1784				ARRAY_SIZE(unique_indirect_regs),
1785				indirect_start_offsets,
1786				&indirect_start_offsets_count,
1787				ARRAY_SIZE(indirect_start_offsets));
1788
1789	/* enable auto inc in case it is disabled */
1790	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1791	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1792	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1793
1794	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
1795	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
1796		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
1797	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1798		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1799			adev->gfx.rlc.register_restore[i]);
1800
1801	/* load direct register */
1802	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
1803	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1804		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1805			adev->gfx.rlc.register_restore[i]);
1806
1807	/* load indirect register */
1808	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1809		adev->gfx.rlc.reg_list_format_start);
1810	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
1811		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1812			register_list_format[i]);
1813
1814	/* set save/restore list size */
1815	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
1816	list_size = list_size >> 1;
1817	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1818		adev->gfx.rlc.reg_restore_list_size);
1819	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
1820
1821	/* write the starting offsets to RLC scratch ram */
1822	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1823		adev->gfx.rlc.starting_offsets_start);
1824	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
1825		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1826			indirect_start_offsets[i]);
1827
1828	/* load unique indirect regs*/
1829	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
1830		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
1831			unique_indirect_regs[i] & 0x3FFFF);
1832		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
1833			unique_indirect_regs[i] >> 20);
1834	}
1835
1836	kfree(register_list_format);
1837	return 0;
1838}
1839
1840static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
1841{
1842	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
1843}
1844
1845static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
1846					     bool enable)
1847{
1848	uint32_t data = 0;
1849	uint32_t default_data = 0;
1850
1851	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
1852	if (enable == true) {
1853		/* enable GFXIP control over CGPG */
1854		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1855		if(default_data != data)
1856			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1857
1858		/* update status */
1859		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
1860		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
1861		if(default_data != data)
1862			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1863	} else {
1864		/* restore GFXIP control over GCPG */
1865		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1866		if(default_data != data)
1867			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1868	}
1869}
1870
1871static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
1872{
1873	uint32_t data = 0;
1874
1875	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1876			      AMD_PG_SUPPORT_GFX_SMG |
1877			      AMD_PG_SUPPORT_GFX_DMG)) {
1878		/* init IDLE_POLL_COUNT = 60 */
1879		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
1880		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
1881		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
1882		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
1883
1884		/* init RLC PG Delay */
1885		data = 0;
1886		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
1887		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
1888		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
1889		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
1890		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
1891
1892		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
1893		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
1894		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
1895		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
1896
1897		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
1898		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
1899		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
1900		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
1901
1902		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
1903		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
1904
1905		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
1906		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
1907		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
1908
1909		pwr_10_0_gfxip_control_over_cgpg(adev, true);
1910	}
1911}
1912
1913static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
1914						bool enable)
1915{
1916	uint32_t data = 0;
1917	uint32_t default_data = 0;
1918
1919	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1920	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1921			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
1922			     enable ? 1 : 0);
1923	if (default_data != data)
1924		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1925}
1926
1927static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
1928						bool enable)
1929{
1930	uint32_t data = 0;
1931	uint32_t default_data = 0;
1932
1933	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1934	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1935			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
1936			     enable ? 1 : 0);
1937	if(default_data != data)
1938		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1939}
1940
1941static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
1942					bool enable)
1943{
1944	uint32_t data = 0;
1945	uint32_t default_data = 0;
1946
1947	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1948	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1949			     CP_PG_DISABLE,
1950			     enable ? 0 : 1);
1951	if(default_data != data)
1952		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1953}
1954
1955static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
1956						bool enable)
1957{
1958	uint32_t data, default_data;
1959
1960	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1961	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1962			     GFX_POWER_GATING_ENABLE,
1963			     enable ? 1 : 0);
1964	if(default_data != data)
1965		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1966}
1967
1968static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
1969						bool enable)
1970{
1971	uint32_t data, default_data;
1972
1973	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1974	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1975			     GFX_PIPELINE_PG_ENABLE,
1976			     enable ? 1 : 0);
1977	if(default_data != data)
1978		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1979
1980	if (!enable)
1981		/* read any GFX register to wake up GFX */
1982		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
1983}
1984
1985static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
1986						       bool enable)
1987{
1988	uint32_t data, default_data;
1989
1990	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1991	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1992			     STATIC_PER_CU_PG_ENABLE,
1993			     enable ? 1 : 0);
1994	if(default_data != data)
1995		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1996}
1997
1998static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
1999						bool enable)
2000{
2001	uint32_t data, default_data;
2002
2003	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2004	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2005			     DYN_PER_CU_PG_ENABLE,
2006			     enable ? 1 : 0);
2007	if(default_data != data)
2008		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2009}
2010
2011static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2012{
2013	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2014			      AMD_PG_SUPPORT_GFX_SMG |
2015			      AMD_PG_SUPPORT_GFX_DMG |
2016			      AMD_PG_SUPPORT_CP |
2017			      AMD_PG_SUPPORT_GDS |
2018			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2019		gfx_v9_0_init_csb(adev);
2020		gfx_v9_0_init_rlc_save_restore_list(adev);
2021		gfx_v9_0_enable_save_restore_machine(adev);
2022
2023		if (adev->asic_type == CHIP_RAVEN) {
2024			WREG32(mmRLC_JUMP_TABLE_RESTORE,
2025				adev->gfx.rlc.cp_table_gpu_addr >> 8);
2026			gfx_v9_0_init_gfx_power_gating(adev);
2027
2028			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
2029				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
2030				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
2031			} else {
2032				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
2033				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
2034			}
2035
2036			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
2037				gfx_v9_0_enable_cp_power_gating(adev, true);
2038			else
2039				gfx_v9_0_enable_cp_power_gating(adev, false);
2040		}
2041	}
2042}
2043
2044void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2045{
2046	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2047	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2048	gfx_v9_0_wait_for_rlc_serdes(adev);
2049}
2050
2051static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2052{
2053	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2054	udelay(50);
2055	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2056	udelay(50);
2057}
2058
2059static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2060{
2061#ifdef AMDGPU_RLC_DEBUG_RETRY
2062	u32 rlc_ucode_ver;
2063#endif
2064
2065	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2066
2067	/* carrizo do enable cp interrupt after cp inited */
2068	if (!(adev->flags & AMD_IS_APU))
2069		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2070
2071	udelay(50);
2072
2073#ifdef AMDGPU_RLC_DEBUG_RETRY
2074	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2075	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2076	if(rlc_ucode_ver == 0x108) {
2077		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2078				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2079		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2080		 * default is 0x9C4 to create a 100us interval */
2081		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2082		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2083		 * to disable the page fault retry interrupts, default is
2084		 * 0x100 (256) */
2085		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2086	}
2087#endif
2088}
2089
2090static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2091{
2092	const struct rlc_firmware_header_v2_0 *hdr;
2093	const __le32 *fw_data;
2094	unsigned i, fw_size;
2095
2096	if (!adev->gfx.rlc_fw)
2097		return -EINVAL;
2098
2099	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2100	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2101
2102	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2103			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2104	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2105
2106	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2107			RLCG_UCODE_LOADING_START_ADDRESS);
2108	for (i = 0; i < fw_size; i++)
2109		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2110	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2111
2112	return 0;
2113}
2114
2115static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2116{
2117	int r;
2118
2119	if (amdgpu_sriov_vf(adev)) {
2120		gfx_v9_0_init_csb(adev);
2121		return 0;
2122	}
2123
2124	gfx_v9_0_rlc_stop(adev);
2125
2126	/* disable CG */
2127	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2128
2129	/* disable PG */
2130	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
2131
2132	gfx_v9_0_rlc_reset(adev);
2133
2134	gfx_v9_0_init_pg(adev);
2135
2136	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2137		/* legacy rlc firmware loading */
2138		r = gfx_v9_0_rlc_load_microcode(adev);
2139		if (r)
2140			return r;
2141	}
2142
2143	if (adev->asic_type == CHIP_RAVEN) {
2144		if (amdgpu_lbpw != 0)
2145			gfx_v9_0_enable_lbpw(adev, true);
2146		else
2147			gfx_v9_0_enable_lbpw(adev, false);
2148	}
2149
2150	gfx_v9_0_rlc_start(adev);
2151
2152	return 0;
2153}
2154
2155static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2156{
2157	int i;
2158	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2159
2160	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2161	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2162	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2163	if (!enable) {
2164		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2165			adev->gfx.gfx_ring[i].ready = false;
2166	}
2167	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
2168	udelay(50);
2169}
2170
2171static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2172{
2173	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2174	const struct gfx_firmware_header_v1_0 *ce_hdr;
2175	const struct gfx_firmware_header_v1_0 *me_hdr;
2176	const __le32 *fw_data;
2177	unsigned i, fw_size;
2178
2179	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2180		return -EINVAL;
2181
2182	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2183		adev->gfx.pfp_fw->data;
2184	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2185		adev->gfx.ce_fw->data;
2186	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2187		adev->gfx.me_fw->data;
2188
2189	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2190	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2191	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2192
2193	gfx_v9_0_cp_gfx_enable(adev, false);
2194
2195	/* PFP */
2196	fw_data = (const __le32 *)
2197		(adev->gfx.pfp_fw->data +
2198		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2199	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2200	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2201	for (i = 0; i < fw_size; i++)
2202		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2203	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2204
2205	/* CE */
2206	fw_data = (const __le32 *)
2207		(adev->gfx.ce_fw->data +
2208		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2209	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2210	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2211	for (i = 0; i < fw_size; i++)
2212		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2213	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2214
2215	/* ME */
2216	fw_data = (const __le32 *)
2217		(adev->gfx.me_fw->data +
2218		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2219	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2220	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2221	for (i = 0; i < fw_size; i++)
2222		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2223	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2224
2225	return 0;
2226}
2227
2228static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2229{
2230	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2231	const struct cs_section_def *sect = NULL;
2232	const struct cs_extent_def *ext = NULL;
2233	int r, i, tmp;
2234
2235	/* init the CP */
2236	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2237	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2238
2239	gfx_v9_0_cp_gfx_enable(adev, true);
2240
2241	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2242	if (r) {
2243		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2244		return r;
2245	}
2246
2247	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2248	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2249
2250	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2251	amdgpu_ring_write(ring, 0x80000000);
2252	amdgpu_ring_write(ring, 0x80000000);
2253
2254	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2255		for (ext = sect->section; ext->extent != NULL; ++ext) {
2256			if (sect->id == SECT_CONTEXT) {
2257				amdgpu_ring_write(ring,
2258				       PACKET3(PACKET3_SET_CONTEXT_REG,
2259					       ext->reg_count));
2260				amdgpu_ring_write(ring,
2261				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2262				for (i = 0; i < ext->reg_count; i++)
2263					amdgpu_ring_write(ring, ext->extent[i]);
2264			}
2265		}
2266	}
2267
2268	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2269	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2270
2271	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2272	amdgpu_ring_write(ring, 0);
2273
2274	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2275	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2276	amdgpu_ring_write(ring, 0x8000);
2277	amdgpu_ring_write(ring, 0x8000);
2278
2279	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2280	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2281		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2282	amdgpu_ring_write(ring, tmp);
2283	amdgpu_ring_write(ring, 0);
2284
2285	amdgpu_ring_commit(ring);
2286
2287	return 0;
2288}
2289
2290static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2291{
2292	struct amdgpu_ring *ring;
2293	u32 tmp;
2294	u32 rb_bufsz;
2295	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2296
2297	/* Set the write pointer delay */
2298	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2299
2300	/* set the RB to use vmid 0 */
2301	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2302
2303	/* Set ring buffer size */
2304	ring = &adev->gfx.gfx_ring[0];
2305	rb_bufsz = order_base_2(ring->ring_size / 8);
2306	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2307	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2308#ifdef __BIG_ENDIAN
2309	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2310#endif
2311	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2312
2313	/* Initialize the ring buffer's write pointers */
2314	ring->wptr = 0;
2315	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2316	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2317
2318	/* set the wb address wether it's enabled or not */
2319	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2320	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2321	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2322
2323	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2324	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2325	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2326
2327	mdelay(1);
2328	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2329
2330	rb_addr = ring->gpu_addr >> 8;
2331	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2332	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2333
2334	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2335	if (ring->use_doorbell) {
2336		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2337				    DOORBELL_OFFSET, ring->doorbell_index);
2338		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2339				    DOORBELL_EN, 1);
2340	} else {
2341		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2342	}
2343	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2344
2345	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2346			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2347	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2348
2349	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2350		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2351
2352
2353	/* start the ring */
2354	gfx_v9_0_cp_gfx_start(adev);
2355	ring->ready = true;
2356
2357	return 0;
2358}
2359
2360static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2361{
2362	int i;
2363
2364	if (enable) {
2365		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
2366	} else {
2367		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
2368			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2369		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2370			adev->gfx.compute_ring[i].ready = false;
2371		adev->gfx.kiq.ring.ready = false;
2372	}
2373	udelay(50);
2374}
2375
2376static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2377{
2378	const struct gfx_firmware_header_v1_0 *mec_hdr;
2379	const __le32 *fw_data;
2380	unsigned i;
2381	u32 tmp;
2382
2383	if (!adev->gfx.mec_fw)
2384		return -EINVAL;
2385
2386	gfx_v9_0_cp_compute_enable(adev, false);
2387
2388	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2389	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2390
2391	fw_data = (const __le32 *)
2392		(adev->gfx.mec_fw->data +
2393		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2394	tmp = 0;
2395	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2396	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2397	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2398
2399	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2400		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2401	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2402		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2403
2404	/* MEC1 */
2405	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2406			 mec_hdr->jt_offset);
2407	for (i = 0; i < mec_hdr->jt_size; i++)
2408		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2409			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2410
2411	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2412			adev->gfx.mec_fw_version);
2413	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2414
2415	return 0;
2416}
2417
2418/* KIQ functions */
2419static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2420{
2421	uint32_t tmp;
2422	struct amdgpu_device *adev = ring->adev;
2423
2424	/* tell RLC which is KIQ queue */
2425	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2426	tmp &= 0xffffff00;
2427	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2428	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2429	tmp |= 0x80;
2430	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2431}
2432
2433static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2434{
2435	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2436	uint32_t scratch, tmp = 0;
2437	uint64_t queue_mask = 0;
2438	int r, i;
2439
2440	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2441		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2442			continue;
2443
2444		/* This situation may be hit in the future if a new HW
2445		 * generation exposes more than 64 queues. If so, the
2446		 * definition of queue_mask needs updating */
2447		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2448			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2449			break;
2450		}
2451
2452		queue_mask |= (1ull << i);
2453	}
2454
2455	r = amdgpu_gfx_scratch_get(adev, &scratch);
2456	if (r) {
2457		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2458		return r;
2459	}
2460	WREG32(scratch, 0xCAFEDEAD);
2461
2462	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
2463	if (r) {
2464		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2465		amdgpu_gfx_scratch_free(adev, scratch);
2466		return r;
2467	}
2468
2469	/* set resources */
2470	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2471	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2472			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2473	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2474	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2475	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2476	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2477	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2478	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2479	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2480		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2481		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2482		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2483
2484		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2485		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2486		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2487				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2488				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2489				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2490				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2491				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2492				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2493				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2494				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2495				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2496		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2497		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2498		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2499		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2500		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2501	}
2502	/* write to scratch for completion */
2503	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2504	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2505	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2506	amdgpu_ring_commit(kiq_ring);
2507
2508	for (i = 0; i < adev->usec_timeout; i++) {
2509		tmp = RREG32(scratch);
2510		if (tmp == 0xDEADBEEF)
2511			break;
2512		DRM_UDELAY(1);
2513	}
2514	if (i >= adev->usec_timeout) {
2515		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
2516			  scratch, tmp);
2517		r = -EINVAL;
2518	}
2519	amdgpu_gfx_scratch_free(adev, scratch);
2520
2521	return r;
2522}
2523
2524static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2525{
2526	struct amdgpu_device *adev = ring->adev;
2527	struct v9_mqd *mqd = ring->mqd_ptr;
2528	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2529	uint32_t tmp;
2530
2531	mqd->header = 0xC0310800;
2532	mqd->compute_pipelinestat_enable = 0x00000001;
2533	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2534	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2535	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2536	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2537	mqd->compute_misc_reserved = 0x00000003;
2538
2539	mqd->dynamic_cu_mask_addr_lo =
2540		lower_32_bits(ring->mqd_gpu_addr
2541			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2542	mqd->dynamic_cu_mask_addr_hi =
2543		upper_32_bits(ring->mqd_gpu_addr
2544			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2545
2546	eop_base_addr = ring->eop_gpu_addr >> 8;
2547	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2548	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2549
2550	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2551	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2552	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2553			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2554
2555	mqd->cp_hqd_eop_control = tmp;
2556
2557	/* enable doorbell? */
2558	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2559
2560	if (ring->use_doorbell) {
2561		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2562				    DOORBELL_OFFSET, ring->doorbell_index);
2563		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2564				    DOORBELL_EN, 1);
2565		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2566				    DOORBELL_SOURCE, 0);
2567		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2568				    DOORBELL_HIT, 0);
2569	} else {
2570		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2571					 DOORBELL_EN, 0);
2572	}
2573
2574	mqd->cp_hqd_pq_doorbell_control = tmp;
2575
2576	/* disable the queue if it's active */
2577	ring->wptr = 0;
2578	mqd->cp_hqd_dequeue_request = 0;
2579	mqd->cp_hqd_pq_rptr = 0;
2580	mqd->cp_hqd_pq_wptr_lo = 0;
2581	mqd->cp_hqd_pq_wptr_hi = 0;
2582
2583	/* set the pointer to the MQD */
2584	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2585	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2586
2587	/* set MQD vmid to 0 */
2588	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2589	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2590	mqd->cp_mqd_control = tmp;
2591
2592	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2593	hqd_gpu_addr = ring->gpu_addr >> 8;
2594	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2595	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2596
2597	/* set up the HQD, this is similar to CP_RB0_CNTL */
2598	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2599	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2600			    (order_base_2(ring->ring_size / 4) - 1));
2601	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2602			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2603#ifdef __BIG_ENDIAN
2604	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2605#endif
2606	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2607	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2608	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2609	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2610	mqd->cp_hqd_pq_control = tmp;
2611
2612	/* set the wb address whether it's enabled or not */
2613	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2614	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2615	mqd->cp_hqd_pq_rptr_report_addr_hi =
2616		upper_32_bits(wb_gpu_addr) & 0xffff;
2617
2618	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2619	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2620	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2621	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2622
2623	tmp = 0;
2624	/* enable the doorbell if requested */
2625	if (ring->use_doorbell) {
2626		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2627		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2628				DOORBELL_OFFSET, ring->doorbell_index);
2629
2630		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2631					 DOORBELL_EN, 1);
2632		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2633					 DOORBELL_SOURCE, 0);
2634		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2635					 DOORBELL_HIT, 0);
2636	}
2637
2638	mqd->cp_hqd_pq_doorbell_control = tmp;
2639
2640	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2641	ring->wptr = 0;
2642	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2643
2644	/* set the vmid for the queue */
2645	mqd->cp_hqd_vmid = 0;
2646
2647	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2648	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2649	mqd->cp_hqd_persistent_state = tmp;
2650
2651	/* set MIN_IB_AVAIL_SIZE */
2652	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2653	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2654	mqd->cp_hqd_ib_control = tmp;
2655
2656	/* activate the queue */
2657	mqd->cp_hqd_active = 1;
2658
2659	return 0;
2660}
2661
2662static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2663{
2664	struct amdgpu_device *adev = ring->adev;
2665	struct v9_mqd *mqd = ring->mqd_ptr;
2666	int j;
2667
2668	/* disable wptr polling */
2669	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2670
2671	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2672	       mqd->cp_hqd_eop_base_addr_lo);
2673	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2674	       mqd->cp_hqd_eop_base_addr_hi);
2675
2676	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2677	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
2678	       mqd->cp_hqd_eop_control);
2679
2680	/* enable doorbell? */
2681	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2682	       mqd->cp_hqd_pq_doorbell_control);
2683
2684	/* disable the queue if it's active */
2685	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2686		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2687		for (j = 0; j < adev->usec_timeout; j++) {
2688			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2689				break;
2690			udelay(1);
2691		}
2692		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2693		       mqd->cp_hqd_dequeue_request);
2694		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
2695		       mqd->cp_hqd_pq_rptr);
2696		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2697		       mqd->cp_hqd_pq_wptr_lo);
2698		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2699		       mqd->cp_hqd_pq_wptr_hi);
2700	}
2701
2702	/* set the pointer to the MQD */
2703	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
2704	       mqd->cp_mqd_base_addr_lo);
2705	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
2706	       mqd->cp_mqd_base_addr_hi);
2707
2708	/* set MQD vmid to 0 */
2709	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
2710	       mqd->cp_mqd_control);
2711
2712	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2713	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
2714	       mqd->cp_hqd_pq_base_lo);
2715	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
2716	       mqd->cp_hqd_pq_base_hi);
2717
2718	/* set up the HQD, this is similar to CP_RB0_CNTL */
2719	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
2720	       mqd->cp_hqd_pq_control);
2721
2722	/* set the wb address whether it's enabled or not */
2723	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
2724				mqd->cp_hqd_pq_rptr_report_addr_lo);
2725	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2726				mqd->cp_hqd_pq_rptr_report_addr_hi);
2727
2728	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2729	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
2730	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
2731	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
2732	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
2733
2734	/* enable the doorbell if requested */
2735	if (ring->use_doorbell) {
2736		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
2737					(AMDGPU_DOORBELL64_KIQ *2) << 2);
2738		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
2739					(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
2740	}
2741
2742	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2743	       mqd->cp_hqd_pq_doorbell_control);
2744
2745	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2746	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2747	       mqd->cp_hqd_pq_wptr_lo);
2748	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2749	       mqd->cp_hqd_pq_wptr_hi);
2750
2751	/* set the vmid for the queue */
2752	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
2753
2754	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
2755	       mqd->cp_hqd_persistent_state);
2756
2757	/* activate the queue */
2758	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
2759	       mqd->cp_hqd_active);
2760
2761	if (ring->use_doorbell)
2762		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2763
2764	return 0;
2765}
2766
2767static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
2768{
2769	struct amdgpu_device *adev = ring->adev;
2770	int j;
2771
2772	/* disable the queue if it's active */
2773	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2774
2775		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2776
2777		for (j = 0; j < adev->usec_timeout; j++) {
2778			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2779				break;
2780			udelay(1);
2781		}
2782
2783		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
2784			DRM_DEBUG("KIQ dequeue request failed.\n");
2785
2786			/* Manual disable if dequeue request times out */
2787			WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
2788		}
2789
2790		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2791		      0);
2792	}
2793
2794	WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
2795	WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
2796	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
2797	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
2798	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
2799	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
2800	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
2801	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
2802
2803	return 0;
2804}
2805
2806static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2807{
2808	struct amdgpu_device *adev = ring->adev;
2809	struct v9_mqd *mqd = ring->mqd_ptr;
2810	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
2811
2812	gfx_v9_0_kiq_setting(ring);
2813
2814	if (adev->in_gpu_reset) { /* for GPU_RESET case */
2815		/* reset MQD to a clean status */
2816		if (adev->gfx.mec.mqd_backup[mqd_idx])
2817			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2818
2819		/* reset ring buffer */
2820		ring->wptr = 0;
2821		amdgpu_ring_clear_ring(ring);
2822
2823		mutex_lock(&adev->srbm_mutex);
2824		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2825		gfx_v9_0_kiq_init_register(ring);
2826		soc15_grbm_select(adev, 0, 0, 0, 0);
2827		mutex_unlock(&adev->srbm_mutex);
2828	} else {
2829		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2830		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2831		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2832		mutex_lock(&adev->srbm_mutex);
2833		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2834		gfx_v9_0_mqd_init(ring);
2835		gfx_v9_0_kiq_init_register(ring);
2836		soc15_grbm_select(adev, 0, 0, 0, 0);
2837		mutex_unlock(&adev->srbm_mutex);
2838
2839		if (adev->gfx.mec.mqd_backup[mqd_idx])
2840			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2841	}
2842
2843	return 0;
2844}
2845
2846static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2847{
2848	struct amdgpu_device *adev = ring->adev;
2849	struct v9_mqd *mqd = ring->mqd_ptr;
2850	int mqd_idx = ring - &adev->gfx.compute_ring[0];
2851
2852	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
2853		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2854		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2855		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2856		mutex_lock(&adev->srbm_mutex);
2857		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2858		gfx_v9_0_mqd_init(ring);
2859		soc15_grbm_select(adev, 0, 0, 0, 0);
2860		mutex_unlock(&adev->srbm_mutex);
2861
2862		if (adev->gfx.mec.mqd_backup[mqd_idx])
2863			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2864	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
2865		/* reset MQD to a clean status */
2866		if (adev->gfx.mec.mqd_backup[mqd_idx])
2867			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2868
2869		/* reset ring buffer */
2870		ring->wptr = 0;
2871		amdgpu_ring_clear_ring(ring);
2872	} else {
2873		amdgpu_ring_clear_ring(ring);
2874	}
2875
2876	return 0;
2877}
2878
2879static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2880{
2881	struct amdgpu_ring *ring = NULL;
2882	int r = 0, i;
2883
2884	gfx_v9_0_cp_compute_enable(adev, true);
2885
2886	ring = &adev->gfx.kiq.ring;
2887
2888	r = amdgpu_bo_reserve(ring->mqd_obj, false);
2889	if (unlikely(r != 0))
2890		goto done;
2891
2892	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2893	if (!r) {
2894		r = gfx_v9_0_kiq_init_queue(ring);
2895		amdgpu_bo_kunmap(ring->mqd_obj);
2896		ring->mqd_ptr = NULL;
2897	}
2898	amdgpu_bo_unreserve(ring->mqd_obj);
2899	if (r)
2900		goto done;
2901
2902	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2903		ring = &adev->gfx.compute_ring[i];
2904
2905		r = amdgpu_bo_reserve(ring->mqd_obj, false);
2906		if (unlikely(r != 0))
2907			goto done;
2908		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2909		if (!r) {
2910			r = gfx_v9_0_kcq_init_queue(ring);
2911			amdgpu_bo_kunmap(ring->mqd_obj);
2912			ring->mqd_ptr = NULL;
2913		}
2914		amdgpu_bo_unreserve(ring->mqd_obj);
2915		if (r)
2916			goto done;
2917	}
2918
2919	r = gfx_v9_0_kiq_kcq_enable(adev);
2920done:
2921	return r;
2922}
2923
2924static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2925{
2926	int r, i;
2927	struct amdgpu_ring *ring;
2928
2929	if (!(adev->flags & AMD_IS_APU))
2930		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2931
2932	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2933		/* legacy firmware loading */
2934		r = gfx_v9_0_cp_gfx_load_microcode(adev);
2935		if (r)
2936			return r;
2937
2938		r = gfx_v9_0_cp_compute_load_microcode(adev);
2939		if (r)
2940			return r;
2941	}
2942
2943	r = gfx_v9_0_cp_gfx_resume(adev);
2944	if (r)
2945		return r;
2946
2947	r = gfx_v9_0_kiq_resume(adev);
2948	if (r)
2949		return r;
2950
2951	ring = &adev->gfx.gfx_ring[0];
2952	r = amdgpu_ring_test_ring(ring);
2953	if (r) {
2954		ring->ready = false;
2955		return r;
2956	}
2957
2958	ring = &adev->gfx.kiq.ring;
2959	ring->ready = true;
2960	r = amdgpu_ring_test_ring(ring);
2961	if (r)
2962		ring->ready = false;
2963
2964	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2965		ring = &adev->gfx.compute_ring[i];
2966
2967		ring->ready = true;
2968		r = amdgpu_ring_test_ring(ring);
2969		if (r)
2970			ring->ready = false;
2971	}
2972
2973	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2974
2975	return 0;
2976}
2977
2978static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
2979{
2980	gfx_v9_0_cp_gfx_enable(adev, enable);
2981	gfx_v9_0_cp_compute_enable(adev, enable);
2982}
2983
2984static int gfx_v9_0_hw_init(void *handle)
2985{
2986	int r;
2987	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2988
2989	gfx_v9_0_init_golden_registers(adev);
2990
2991	gfx_v9_0_gpu_init(adev);
2992
2993	r = gfx_v9_0_rlc_resume(adev);
2994	if (r)
2995		return r;
2996
2997	r = gfx_v9_0_cp_resume(adev);
2998	if (r)
2999		return r;
3000
3001	r = gfx_v9_0_ngg_en(adev);
3002	if (r)
3003		return r;
3004
3005	return r;
3006}
3007
3008static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
3009{
3010	struct amdgpu_device *adev = kiq_ring->adev;
3011	uint32_t scratch, tmp = 0;
3012	int r, i;
3013
3014	r = amdgpu_gfx_scratch_get(adev, &scratch);
3015	if (r) {
3016		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
3017		return r;
3018	}
3019	WREG32(scratch, 0xCAFEDEAD);
3020
3021	r = amdgpu_ring_alloc(kiq_ring, 10);
3022	if (r) {
3023		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3024		amdgpu_gfx_scratch_free(adev, scratch);
3025		return r;
3026	}
3027
3028	/* unmap queues */
3029	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3030	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3031						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3032						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3033						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3034						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3035	amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3036	amdgpu_ring_write(kiq_ring, 0);
3037	amdgpu_ring_write(kiq_ring, 0);
3038	amdgpu_ring_write(kiq_ring, 0);
3039	/* write to scratch for completion */
3040	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3041	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
3042	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
3043	amdgpu_ring_commit(kiq_ring);
3044
3045	for (i = 0; i < adev->usec_timeout; i++) {
3046		tmp = RREG32(scratch);
3047		if (tmp == 0xDEADBEEF)
3048			break;
3049		DRM_UDELAY(1);
3050	}
3051	if (i >= adev->usec_timeout) {
3052		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
3053		r = -EINVAL;
3054	}
3055	amdgpu_gfx_scratch_free(adev, scratch);
3056	return r;
3057}
3058
3059static int gfx_v9_0_hw_fini(void *handle)
3060{
3061	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3062	int i;
3063
3064	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3065	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3066
3067	/* disable KCQ to avoid CPC touch memory not valid anymore */
3068	for (i = 0; i < adev->gfx.num_compute_rings; i++)
3069		gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
3070
3071	if (amdgpu_sriov_vf(adev)) {
3072		gfx_v9_0_cp_gfx_enable(adev, false);
3073		/* must disable polling for SRIOV when hw finished, otherwise
3074		 * CPC engine may still keep fetching WB address which is already
3075		 * invalid after sw finished and trigger DMAR reading error in
3076		 * hypervisor side.
3077		 */
3078		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3079		return 0;
3080	}
3081
3082	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3083	 * otherwise KIQ is hanging when binding back
3084	 */
3085	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
3086		mutex_lock(&adev->srbm_mutex);
3087		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3088				adev->gfx.kiq.ring.pipe,
3089				adev->gfx.kiq.ring.queue, 0);
3090		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3091		soc15_grbm_select(adev, 0, 0, 0, 0);
3092		mutex_unlock(&adev->srbm_mutex);
3093	}
3094
3095	gfx_v9_0_cp_enable(adev, false);
3096	gfx_v9_0_rlc_stop(adev);
3097
3098	return 0;
3099}
3100
3101static int gfx_v9_0_suspend(void *handle)
3102{
3103	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3104
3105	adev->gfx.in_suspend = true;
3106	return gfx_v9_0_hw_fini(adev);
3107}
3108
3109static int gfx_v9_0_resume(void *handle)
3110{
3111	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3112	int r;
3113
3114	r = gfx_v9_0_hw_init(adev);
3115	adev->gfx.in_suspend = false;
3116	return r;
3117}
3118
3119static bool gfx_v9_0_is_idle(void *handle)
3120{
3121	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3122
3123	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3124				GRBM_STATUS, GUI_ACTIVE))
3125		return false;
3126	else
3127		return true;
3128}
3129
3130static int gfx_v9_0_wait_for_idle(void *handle)
3131{
3132	unsigned i;
3133	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3134
3135	for (i = 0; i < adev->usec_timeout; i++) {
3136		if (gfx_v9_0_is_idle(handle))
3137			return 0;
3138		udelay(1);
3139	}
3140	return -ETIMEDOUT;
3141}
3142
3143static int gfx_v9_0_soft_reset(void *handle)
3144{
3145	u32 grbm_soft_reset = 0;
3146	u32 tmp;
3147	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3148
3149	/* GRBM_STATUS */
3150	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3151	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3152		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3153		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3154		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3155		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3156		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3157		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3158						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3159		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3160						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3161	}
3162
3163	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3164		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3165						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3166	}
3167
3168	/* GRBM_STATUS2 */
3169	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3170	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3171		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3172						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3173
3174
3175	if (grbm_soft_reset) {
3176		/* stop the rlc */
3177		gfx_v9_0_rlc_stop(adev);
3178
3179		/* Disable GFX parsing/prefetching */
3180		gfx_v9_0_cp_gfx_enable(adev, false);
3181
3182		/* Disable MEC parsing/prefetching */
3183		gfx_v9_0_cp_compute_enable(adev, false);
3184
3185		if (grbm_soft_reset) {
3186			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3187			tmp |= grbm_soft_reset;
3188			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3189			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3190			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3191
3192			udelay(50);
3193
3194			tmp &= ~grbm_soft_reset;
3195			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3196			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3197		}
3198
3199		/* Wait a little for things to settle down */
3200		udelay(50);
3201	}
3202	return 0;
3203}
3204
3205static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3206{
3207	uint64_t clock;
3208
3209	mutex_lock(&adev->gfx.gpu_clock_mutex);
3210	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3211	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3212		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3213	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3214	return clock;
3215}
3216
3217static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3218					  uint32_t vmid,
3219					  uint32_t gds_base, uint32_t gds_size,
3220					  uint32_t gws_base, uint32_t gws_size,
3221					  uint32_t oa_base, uint32_t oa_size)
3222{
3223	struct amdgpu_device *adev = ring->adev;
3224
3225	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
3226	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
3227
3228	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
3229	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
3230
3231	oa_base = oa_base >> AMDGPU_OA_SHIFT;
3232	oa_size = oa_size >> AMDGPU_OA_SHIFT;
3233
3234	/* GDS Base */
3235	gfx_v9_0_write_data_to_reg(ring, 0, false,
3236				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3237				   gds_base);
3238
3239	/* GDS Size */
3240	gfx_v9_0_write_data_to_reg(ring, 0, false,
3241				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3242				   gds_size);
3243
3244	/* GWS */
3245	gfx_v9_0_write_data_to_reg(ring, 0, false,
3246				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3247				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3248
3249	/* OA */
3250	gfx_v9_0_write_data_to_reg(ring, 0, false,
3251				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3252				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3253}
3254
3255static int gfx_v9_0_early_init(void *handle)
3256{
3257	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3258
3259	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3260	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3261	gfx_v9_0_set_ring_funcs(adev);
3262	gfx_v9_0_set_irq_funcs(adev);
3263	gfx_v9_0_set_gds_init(adev);
3264	gfx_v9_0_set_rlc_funcs(adev);
3265
3266	return 0;
3267}
3268
3269static int gfx_v9_0_late_init(void *handle)
3270{
3271	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3272	int r;
3273
3274	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3275	if (r)
3276		return r;
3277
3278	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3279	if (r)
3280		return r;
3281
3282	return 0;
3283}
3284
3285static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3286{
3287	uint32_t rlc_setting, data;
3288	unsigned i;
3289
3290	if (adev->gfx.rlc.in_safe_mode)
3291		return;
3292
3293	/* if RLC is not enabled, do nothing */
3294	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3295	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3296		return;
3297
3298	if (adev->cg_flags &
3299	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
3300	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3301		data = RLC_SAFE_MODE__CMD_MASK;
3302		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3303		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3304
3305		/* wait for RLC_SAFE_MODE */
3306		for (i = 0; i < adev->usec_timeout; i++) {
3307			if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3308				break;
3309			udelay(1);
3310		}
3311		adev->gfx.rlc.in_safe_mode = true;
3312	}
3313}
3314
3315static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3316{
3317	uint32_t rlc_setting, data;
3318
3319	if (!adev->gfx.rlc.in_safe_mode)
3320		return;
3321
3322	/* if RLC is not enabled, do nothing */
3323	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3324	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3325		return;
3326
3327	if (adev->cg_flags &
3328	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
3329		/*
3330		 * Try to exit safe mode only if it is already in safe
3331		 * mode.
3332		 */
3333		data = RLC_SAFE_MODE__CMD_MASK;
3334		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3335		adev->gfx.rlc.in_safe_mode = false;
3336	}
3337}
3338
3339static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3340						bool enable)
3341{
3342	/* TODO: double check if we need to perform under safe mdoe */
3343	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3344
3345	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3346		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3347		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3348			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3349	} else {
3350		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3351		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3352	}
3353
3354	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
3355}
3356
3357static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3358						bool enable)
3359{
3360	/* TODO: double check if we need to perform under safe mode */
3361	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3362
3363	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3364		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3365	else
3366		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3367
3368	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3369		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3370	else
3371		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3372
3373	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
3374}
3375
3376static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3377						      bool enable)
3378{
3379	uint32_t data, def;
3380
3381	/* It is disabled by HW by default */
3382	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3383		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
3384		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3385		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3386			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3387			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3388			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3389
3390		/* only for Vega10 & Raven1 */
3391		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3392
3393		if (def != data)
3394			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3395
3396		/* MGLS is a global flag to control all MGLS in GFX */
3397		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3398			/* 2 - RLC memory Light sleep */
3399			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
3400				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3401				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3402				if (def != data)
3403					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3404			}
3405			/* 3 - CP memory Light sleep */
3406			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3407				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3408				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3409				if (def != data)
3410					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3411			}
3412		}
3413	} else {
3414		/* 1 - MGCG_OVERRIDE */
3415		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3416		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3417			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3418			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3419			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3420			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3421		if (def != data)
3422			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3423
3424		/* 2 - disable MGLS in RLC */
3425		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3426		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3427			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3428			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3429		}
3430
3431		/* 3 - disable MGLS in CP */
3432		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3433		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3434			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3435			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3436		}
3437	}
3438}
3439
3440static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
3441					   bool enable)
3442{
3443	uint32_t data, def;
3444
3445	adev->gfx.rlc.funcs->enter_safe_mode(adev);
3446
3447	/* Enable 3D CGCG/CGLS */
3448	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3449		/* write cmd to clear cgcg/cgls ov */
3450		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3451		/* unset CGCG override */
3452		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3453		/* update CGCG and CGLS override bits */
3454		if (def != data)
3455			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3456		/* enable 3Dcgcg FSM(0x0020003f) */
3457		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3458		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3459			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
3460		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3461			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3462				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3463		if (def != data)
3464			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3465
3466		/* set IDLE_POLL_COUNT(0x00900100) */
3467		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3468		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3469			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3470		if (def != data)
3471			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3472	} else {
3473		/* Disable CGCG/CGLS */
3474		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3475		/* disable cgcg, cgls should be disabled */
3476		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
3477			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
3478		/* disable cgcg and cgls in FSM */
3479		if (def != data)
3480			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3481	}
3482
3483	adev->gfx.rlc.funcs->exit_safe_mode(adev);
3484}
3485
3486static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3487						      bool enable)
3488{
3489	uint32_t def, data;
3490
3491	adev->gfx.rlc.funcs->enter_safe_mode(adev);
3492
3493	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3494		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3495		/* unset CGCG override */
3496		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3497		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3498			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3499		else
3500			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3501		/* update CGCG and CGLS override bits */
3502		if (def != data)
3503			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3504
3505		/* enable cgcg FSM(0x0020003F) */
3506		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3507		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3508			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3509		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3510			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3511				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3512		if (def != data)
3513			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3514
3515		/* set IDLE_POLL_COUNT(0x00900100) */
3516		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3517		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3518			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3519		if (def != data)
3520			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3521	} else {
3522		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3523		/* reset CGCG/CGLS bits */
3524		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3525		/* disable cgcg and cgls in FSM */
3526		if (def != data)
3527			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3528	}
3529
3530	adev->gfx.rlc.funcs->exit_safe_mode(adev);
3531}
3532
3533static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
3534					    bool enable)
3535{
3536	if (enable) {
3537		/* CGCG/CGLS should be enabled after MGCG/MGLS
3538		 * ===  MGCG + MGLS ===
3539		 */
3540		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3541		/* ===  CGCG /CGLS for GFX 3D Only === */
3542		gfx_v9_0_update_3d_clock_gating(adev, enable);
3543		/* ===  CGCG + CGLS === */
3544		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3545	} else {
3546		/* CGCG/CGLS should be disabled before MGCG/MGLS
3547		 * ===  CGCG + CGLS ===
3548		 */
3549		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3550		/* ===  CGCG /CGLS for GFX 3D Only === */
3551		gfx_v9_0_update_3d_clock_gating(adev, enable);
3552		/* ===  MGCG + MGLS === */
3553		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3554	}
3555	return 0;
3556}
3557
3558static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
3559	.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
3560	.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
3561};
3562
3563static int gfx_v9_0_set_powergating_state(void *handle,
3564					  enum amd_powergating_state state)
3565{
3566	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3567	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
3568
3569	switch (adev->asic_type) {
3570	case CHIP_RAVEN:
3571		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3572			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
3573			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
3574		} else {
3575			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
3576			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
3577		}
3578
3579		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3580			gfx_v9_0_enable_cp_power_gating(adev, true);
3581		else
3582			gfx_v9_0_enable_cp_power_gating(adev, false);
3583
3584		/* update gfx cgpg state */
3585		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
3586
3587		/* update mgcg state */
3588		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
3589		break;
3590	default:
3591		break;
3592	}
3593
3594	return 0;
3595}
3596
3597static int gfx_v9_0_set_clockgating_state(void *handle,
3598					  enum amd_clockgating_state state)
3599{
3600	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3601
3602	if (amdgpu_sriov_vf(adev))
3603		return 0;
3604
3605	switch (adev->asic_type) {
3606	case CHIP_VEGA10:
3607	case CHIP_VEGA12:
3608	case CHIP_RAVEN:
3609		gfx_v9_0_update_gfx_clock_gating(adev,
3610						 state == AMD_CG_STATE_GATE ? true : false);
3611		break;
3612	default:
3613		break;
3614	}
3615	return 0;
3616}
3617
3618static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
3619{
3620	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3621	int data;
3622
3623	if (amdgpu_sriov_vf(adev))
3624		*flags = 0;
3625
3626	/* AMD_CG_SUPPORT_GFX_MGCG */
3627	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3628	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
3629		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
3630
3631	/* AMD_CG_SUPPORT_GFX_CGCG */
3632	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3633	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
3634		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
3635
3636	/* AMD_CG_SUPPORT_GFX_CGLS */
3637	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
3638		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
3639
3640	/* AMD_CG_SUPPORT_GFX_RLC_LS */
3641	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3642	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
3643		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
3644
3645	/* AMD_CG_SUPPORT_GFX_CP_LS */
3646	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3647	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
3648		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
3649
3650	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
3651	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3652	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
3653		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
3654
3655	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
3656	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
3657		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
3658}
3659
3660static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
3661{
3662	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
3663}
3664
3665static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3666{
3667	struct amdgpu_device *adev = ring->adev;
3668	u64 wptr;
3669
3670	/* XXX check if swapping is necessary on BE */
3671	if (ring->use_doorbell) {
3672		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
3673	} else {
3674		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
3675		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
3676	}
3677
3678	return wptr;
3679}
3680
3681static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3682{
3683	struct amdgpu_device *adev = ring->adev;
3684
3685	if (ring->use_doorbell) {
3686		/* XXX check if swapping is necessary on BE */
3687		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3688		WDOORBELL64(ring->doorbell_index, ring->wptr);
3689	} else {
3690		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3691		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3692	}
3693}
3694
3695static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3696{
3697	struct amdgpu_device *adev = ring->adev;
3698	u32 ref_and_mask, reg_mem_engine;
3699	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
3700
3701	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3702		switch (ring->me) {
3703		case 1:
3704			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
3705			break;
3706		case 2:
3707			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
3708			break;
3709		default:
3710			return;
3711		}
3712		reg_mem_engine = 0;
3713	} else {
3714		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
3715		reg_mem_engine = 1; /* pfp */
3716	}
3717
3718	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
3719			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
3720			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
3721			      ref_and_mask, ref_and_mask, 0x20);
3722}
3723
3724static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3725                                      struct amdgpu_ib *ib,
3726                                      unsigned vmid, bool ctx_switch)
3727{
3728	u32 header, control = 0;
3729
3730	if (ib->flags & AMDGPU_IB_FLAG_CE)
3731		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3732	else
3733		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3734
3735	control |= ib->length_dw | (vmid << 24);
3736
3737	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
3738		control |= INDIRECT_BUFFER_PRE_ENB(1);
3739
3740		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
3741			gfx_v9_0_ring_emit_de_meta(ring);
3742	}
3743
3744	amdgpu_ring_write(ring, header);
3745BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3746	amdgpu_ring_write(ring,
3747#ifdef __BIG_ENDIAN
3748		(2 << 0) |
3749#endif
3750		lower_32_bits(ib->gpu_addr));
3751	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3752	amdgpu_ring_write(ring, control);
3753}
3754
3755static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
3756                                          struct amdgpu_ib *ib,
3757                                          unsigned vmid, bool ctx_switch)
3758{
3759        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
3760
3761        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
3762	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3763        amdgpu_ring_write(ring,
3764#ifdef __BIG_ENDIAN
3765                                (2 << 0) |
3766#endif
3767                                lower_32_bits(ib->gpu_addr));
3768        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3769        amdgpu_ring_write(ring, control);
3770}
3771
3772static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3773				     u64 seq, unsigned flags)
3774{
3775	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3776	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3777
3778	/* RELEASE_MEM - flush caches, send int */
3779	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3780	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3781				 EOP_TC_ACTION_EN |
3782				 EOP_TC_WB_ACTION_EN |
3783				 EOP_TC_MD_ACTION_EN |
3784				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3785				 EVENT_INDEX(5)));
3786	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
3787
3788	/*
3789	 * the address should be Qword aligned if 64bit write, Dword
3790	 * aligned if only send 32bit data low (discard data high)
3791	 */
3792	if (write64bit)
3793		BUG_ON(addr & 0x7);
3794	else
3795		BUG_ON(addr & 0x3);
3796	amdgpu_ring_write(ring, lower_32_bits(addr));
3797	amdgpu_ring_write(ring, upper_32_bits(addr));
3798	amdgpu_ring_write(ring, lower_32_bits(seq));
3799	amdgpu_ring_write(ring, upper_32_bits(seq));
3800	amdgpu_ring_write(ring, 0);
3801}
3802
3803static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3804{
3805	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3806	uint32_t seq = ring->fence_drv.sync_seq;
3807	uint64_t addr = ring->fence_drv.gpu_addr;
3808
3809	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
3810			      lower_32_bits(addr), upper_32_bits(addr),
3811			      seq, 0xffffffff, 4);
3812}
3813
3814static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3815					unsigned vmid, uint64_t pd_addr)
3816{
3817	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3818
3819	/* compute doesn't have PFP */
3820	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
3821		/* sync PFP to ME, otherwise we might get invalid PFP reads */
3822		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3823		amdgpu_ring_write(ring, 0x0);
3824	}
3825}
3826
3827static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3828{
3829	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
3830}
3831
3832static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3833{
3834	u64 wptr;
3835
3836	/* XXX check if swapping is necessary on BE */
3837	if (ring->use_doorbell)
3838		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
3839	else
3840		BUG();
3841	return wptr;
3842}
3843
3844static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
3845					   bool acquire)
3846{
3847	struct amdgpu_device *adev = ring->adev;
3848	int pipe_num, tmp, reg;
3849	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
3850
3851	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
3852
3853	/* first me only has 2 entries, GFX and HP3D */
3854	if (ring->me > 0)
3855		pipe_num -= 2;
3856
3857	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
3858	tmp = RREG32(reg);
3859	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
3860	WREG32(reg, tmp);
3861}
3862
3863static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
3864					    struct amdgpu_ring *ring,
3865					    bool acquire)
3866{
3867	int i, pipe;
3868	bool reserve;
3869	struct amdgpu_ring *iring;
3870
3871	mutex_lock(&adev->gfx.pipe_reserve_mutex);
3872	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
3873	if (acquire)
3874		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3875	else
3876		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3877
3878	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
3879		/* Clear all reservations - everyone reacquires all resources */
3880		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
3881			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
3882						       true);
3883
3884		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
3885			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
3886						       true);
3887	} else {
3888		/* Lower all pipes without a current reservation */
3889		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
3890			iring = &adev->gfx.gfx_ring[i];
3891			pipe = amdgpu_gfx_queue_to_bit(adev,
3892						       iring->me,
3893						       iring->pipe,
3894						       0);
3895			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3896			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3897		}
3898
3899		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
3900			iring = &adev->gfx.compute_ring[i];
3901			pipe = amdgpu_gfx_queue_to_bit(adev,
3902						       iring->me,
3903						       iring->pipe,
3904						       0);
3905			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3906			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3907		}
3908	}
3909
3910	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
3911}
3912
3913static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
3914				      struct amdgpu_ring *ring,
3915				      bool acquire)
3916{
3917	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
3918	uint32_t queue_priority = acquire ? 0xf : 0x0;
3919
3920	mutex_lock(&adev->srbm_mutex);
3921	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3922
3923	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
3924	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
3925
3926	soc15_grbm_select(adev, 0, 0, 0, 0);
3927	mutex_unlock(&adev->srbm_mutex);
3928}
3929
3930static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
3931					       enum drm_sched_priority priority)
3932{
3933	struct amdgpu_device *adev = ring->adev;
3934	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
3935
3936	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
3937		return;
3938
3939	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
3940	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
3941}
3942
3943static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3944{
3945	struct amdgpu_device *adev = ring->adev;
3946
3947	/* XXX check if swapping is necessary on BE */
3948	if (ring->use_doorbell) {
3949		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3950		WDOORBELL64(ring->doorbell_index, ring->wptr);
3951	} else{
3952		BUG(); /* only DOORBELL method supported on gfx9 now */
3953	}
3954}
3955
3956static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3957					 u64 seq, unsigned int flags)
3958{
3959	struct amdgpu_device *adev = ring->adev;
3960
3961	/* we only allocate 32bit for each seq wb address */
3962	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3963
3964	/* write fence seq to the "addr" */
3965	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3966	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3967				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3968	amdgpu_ring_write(ring, lower_32_bits(addr));
3969	amdgpu_ring_write(ring, upper_32_bits(addr));
3970	amdgpu_ring_write(ring, lower_32_bits(seq));
3971
3972	if (flags & AMDGPU_FENCE_FLAG_INT) {
3973		/* set register to trigger INT */
3974		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3975		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3976					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3977		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
3978		amdgpu_ring_write(ring, 0);
3979		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3980	}
3981}
3982
3983static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
3984{
3985	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3986	amdgpu_ring_write(ring, 0);
3987}
3988
3989static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
3990{
3991	struct v9_ce_ib_state ce_payload = {0};
3992	uint64_t csa_addr;
3993	int cnt;
3994
3995	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
3996	csa_addr = amdgpu_csa_vaddr(ring->adev);
3997
3998	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3999	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4000				 WRITE_DATA_DST_SEL(8) |
4001				 WR_CONFIRM) |
4002				 WRITE_DATA_CACHE_POLICY(0));
4003	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4004	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4005	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4006}
4007
4008static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4009{
4010	struct v9_de_ib_state de_payload = {0};
4011	uint64_t csa_addr, gds_addr;
4012	int cnt;
4013
4014	csa_addr = amdgpu_csa_vaddr(ring->adev);
4015	gds_addr = csa_addr + 4096;
4016	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4017	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4018
4019	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4020	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4021	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4022				 WRITE_DATA_DST_SEL(8) |
4023				 WR_CONFIRM) |
4024				 WRITE_DATA_CACHE_POLICY(0));
4025	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4026	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4027	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4028}
4029
4030static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4031{
4032	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4033	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4034}
4035
4036static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4037{
4038	uint32_t dw2 = 0;
4039
4040	if (amdgpu_sriov_vf(ring->adev))
4041		gfx_v9_0_ring_emit_ce_meta(ring);
4042
4043	gfx_v9_0_ring_emit_tmz(ring, true);
4044
4045	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4046	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4047		/* set load_global_config & load_global_uconfig */
4048		dw2 |= 0x8001;
4049		/* set load_cs_sh_regs */
4050		dw2 |= 0x01000000;
4051		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4052		dw2 |= 0x10002;
4053
4054		/* set load_ce_ram if preamble presented */
4055		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4056			dw2 |= 0x10000000;
4057	} else {
4058		/* still load_ce_ram if this is the first time preamble presented
4059		 * although there is no context switch happens.
4060		 */
4061		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4062			dw2 |= 0x10000000;
4063	}
4064
4065	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4066	amdgpu_ring_write(ring, dw2);
4067	amdgpu_ring_write(ring, 0);
4068}
4069
4070static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4071{
4072	unsigned ret;
4073	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4074	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4075	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4076	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4077	ret = ring->wptr & ring->buf_mask;
4078	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4079	return ret;
4080}
4081
4082static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4083{
4084	unsigned cur;
4085	BUG_ON(offset > ring->buf_mask);
4086	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4087
4088	cur = (ring->wptr & ring->buf_mask) - 1;
4089	if (likely(cur > offset))
4090		ring->ring[offset] = cur - offset;
4091	else
4092		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4093}
4094
4095static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4096{
4097	struct amdgpu_device *adev = ring->adev;
4098
4099	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4100	amdgpu_ring_write(ring, 0 |	/* src: register*/
4101				(5 << 8) |	/* dst: memory */
4102				(1 << 20));	/* write confirm */
4103	amdgpu_ring_write(ring, reg);
4104	amdgpu_ring_write(ring, 0);
4105	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4106				adev->virt.reg_val_offs * 4));
4107	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4108				adev->virt.reg_val_offs * 4));
4109}
4110
4111static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4112				    uint32_t val)
4113{
4114	uint32_t cmd = 0;
4115
4116	switch (ring->funcs->type) {
4117	case AMDGPU_RING_TYPE_GFX:
4118		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4119		break;
4120	case AMDGPU_RING_TYPE_KIQ:
4121		cmd = (1 << 16); /* no inc addr */
4122		break;
4123	default:
4124		cmd = WR_CONFIRM;
4125		break;
4126	}
4127	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4128	amdgpu_ring_write(ring, cmd);
4129	amdgpu_ring_write(ring, reg);
4130	amdgpu_ring_write(ring, 0);
4131	amdgpu_ring_write(ring, val);
4132}
4133
4134static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4135					uint32_t val, uint32_t mask)
4136{
4137	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4138}
4139
4140static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4141						 enum amdgpu_interrupt_state state)
4142{
4143	switch (state) {
4144	case AMDGPU_IRQ_STATE_DISABLE:
4145	case AMDGPU_IRQ_STATE_ENABLE:
4146		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4147			       TIME_STAMP_INT_ENABLE,
4148			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4149		break;
4150	default:
4151		break;
4152	}
4153}
4154
4155static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4156						     int me, int pipe,
4157						     enum amdgpu_interrupt_state state)
4158{
4159	u32 mec_int_cntl, mec_int_cntl_reg;
4160
4161	/*
4162	 * amdgpu controls only the first MEC. That's why this function only
4163	 * handles the setting of interrupts for this specific MEC. All other
4164	 * pipes' interrupts are set by amdkfd.
4165	 */
4166
4167	if (me == 1) {
4168		switch (pipe) {
4169		case 0:
4170			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4171			break;
4172		case 1:
4173			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4174			break;
4175		case 2:
4176			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4177			break;
4178		case 3:
4179			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4180			break;
4181		default:
4182			DRM_DEBUG("invalid pipe %d\n", pipe);
4183			return;
4184		}
4185	} else {
4186		DRM_DEBUG("invalid me %d\n", me);
4187		return;
4188	}
4189
4190	switch (state) {
4191	case AMDGPU_IRQ_STATE_DISABLE:
4192		mec_int_cntl = RREG32(mec_int_cntl_reg);
4193		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4194					     TIME_STAMP_INT_ENABLE, 0);
4195		WREG32(mec_int_cntl_reg, mec_int_cntl);
4196		break;
4197	case AMDGPU_IRQ_STATE_ENABLE:
4198		mec_int_cntl = RREG32(mec_int_cntl_reg);
4199		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4200					     TIME_STAMP_INT_ENABLE, 1);
4201		WREG32(mec_int_cntl_reg, mec_int_cntl);
4202		break;
4203	default:
4204		break;
4205	}
4206}
4207
4208static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4209					     struct amdgpu_irq_src *source,
4210					     unsigned type,
4211					     enum amdgpu_interrupt_state state)
4212{
4213	switch (state) {
4214	case AMDGPU_IRQ_STATE_DISABLE:
4215	case AMDGPU_IRQ_STATE_ENABLE:
4216		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4217			       PRIV_REG_INT_ENABLE,
4218			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4219		break;
4220	default:
4221		break;
4222	}
4223
4224	return 0;
4225}
4226
4227static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4228					      struct amdgpu_irq_src *source,
4229					      unsigned type,
4230					      enum amdgpu_interrupt_state state)
4231{
4232	switch (state) {
4233	case AMDGPU_IRQ_STATE_DISABLE:
4234	case AMDGPU_IRQ_STATE_ENABLE:
4235		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4236			       PRIV_INSTR_INT_ENABLE,
4237			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4238	default:
4239		break;
4240	}
4241
4242	return 0;
4243}
4244
4245static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4246					    struct amdgpu_irq_src *src,
4247					    unsigned type,
4248					    enum amdgpu_interrupt_state state)
4249{
4250	switch (type) {
4251	case AMDGPU_CP_IRQ_GFX_EOP:
4252		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4253		break;
4254	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4255		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4256		break;
4257	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4258		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4259		break;
4260	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4261		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4262		break;
4263	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4264		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4265		break;
4266	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4267		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4268		break;
4269	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4270		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4271		break;
4272	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4273		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4274		break;
4275	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4276		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4277		break;
4278	default:
4279		break;
4280	}
4281	return 0;
4282}
4283
4284static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
4285			    struct amdgpu_irq_src *source,
4286			    struct amdgpu_iv_entry *entry)
4287{
4288	int i;
4289	u8 me_id, pipe_id, queue_id;
4290	struct amdgpu_ring *ring;
4291
4292	DRM_DEBUG("IH: CP EOP\n");
4293	me_id = (entry->ring_id & 0x0c) >> 2;
4294	pipe_id = (entry->ring_id & 0x03) >> 0;
4295	queue_id = (entry->ring_id & 0x70) >> 4;
4296
4297	switch (me_id) {
4298	case 0:
4299		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4300		break;
4301	case 1:
4302	case 2:
4303		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4304			ring = &adev->gfx.compute_ring[i];
4305			/* Per-queue interrupt is supported for MEC starting from VI.
4306			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4307			  */
4308			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4309				amdgpu_fence_process(ring);
4310		}
4311		break;
4312	}
4313	return 0;
4314}
4315
4316static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
4317				 struct amdgpu_irq_src *source,
4318				 struct amdgpu_iv_entry *entry)
4319{
4320	DRM_ERROR("Illegal register access in command stream\n");
4321	schedule_work(&adev->reset_work);
4322	return 0;
4323}
4324
4325static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
4326				  struct amdgpu_irq_src *source,
4327				  struct amdgpu_iv_entry *entry)
4328{
4329	DRM_ERROR("Illegal instruction in command stream\n");
4330	schedule_work(&adev->reset_work);
4331	return 0;
4332}
4333
4334static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
4335					    struct amdgpu_irq_src *src,
4336					    unsigned int type,
4337					    enum amdgpu_interrupt_state state)
4338{
4339	uint32_t tmp, target;
4340	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4341
4342	if (ring->me == 1)
4343		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4344	else
4345		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
4346	target += ring->pipe;
4347
4348	switch (type) {
4349	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
4350		if (state == AMDGPU_IRQ_STATE_DISABLE) {
4351			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4352			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4353						 GENERIC2_INT_ENABLE, 0);
4354			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4355
4356			tmp = RREG32(target);
4357			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4358						 GENERIC2_INT_ENABLE, 0);
4359			WREG32(target, tmp);
4360		} else {
4361			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4362			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4363						 GENERIC2_INT_ENABLE, 1);
4364			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4365
4366			tmp = RREG32(target);
4367			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4368						 GENERIC2_INT_ENABLE, 1);
4369			WREG32(target, tmp);
4370		}
4371		break;
4372	default:
4373		BUG(); /* kiq only support GENERIC2_INT now */
4374		break;
4375	}
4376	return 0;
4377}
4378
4379static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
4380			    struct amdgpu_irq_src *source,
4381			    struct amdgpu_iv_entry *entry)
4382{
4383	u8 me_id, pipe_id, queue_id;
4384	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
4385
4386	me_id = (entry->ring_id & 0x0c) >> 2;
4387	pipe_id = (entry->ring_id & 0x03) >> 0;
4388	queue_id = (entry->ring_id & 0x70) >> 4;
4389	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
4390		   me_id, pipe_id, queue_id);
4391
4392	amdgpu_fence_process(ring);
4393	return 0;
4394}
4395
4396static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
4397	.name = "gfx_v9_0",
4398	.early_init = gfx_v9_0_early_init,
4399	.late_init = gfx_v9_0_late_init,
4400	.sw_init = gfx_v9_0_sw_init,
4401	.sw_fini = gfx_v9_0_sw_fini,
4402	.hw_init = gfx_v9_0_hw_init,
4403	.hw_fini = gfx_v9_0_hw_fini,
4404	.suspend = gfx_v9_0_suspend,
4405	.resume = gfx_v9_0_resume,
4406	.is_idle = gfx_v9_0_is_idle,
4407	.wait_for_idle = gfx_v9_0_wait_for_idle,
4408	.soft_reset = gfx_v9_0_soft_reset,
4409	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
4410	.set_powergating_state = gfx_v9_0_set_powergating_state,
4411	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
4412};
4413
4414static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4415	.type = AMDGPU_RING_TYPE_GFX,
4416	.align_mask = 0xff,
4417	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4418	.support_64bit_ptrs = true,
4419	.vmhub = AMDGPU_GFXHUB,
4420	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
4421	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
4422	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
4423	.emit_frame_size = /* totally 242 maximum if 16 IBs */
4424		5 +  /* COND_EXEC */
4425		7 +  /* PIPELINE_SYNC */
4426		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4427		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4428		2 + /* VM_FLUSH */
4429		8 +  /* FENCE for VM_FLUSH */
4430		20 + /* GDS switch */
4431		4 + /* double SWITCH_BUFFER,
4432		       the first COND_EXEC jump to the place just
4433			   prior to this double SWITCH_BUFFER  */
4434		5 + /* COND_EXEC */
4435		7 +	 /*	HDP_flush */
4436		4 +	 /*	VGT_flush */
4437		14 + /*	CE_META */
4438		31 + /*	DE_META */
4439		3 + /* CNTX_CTRL */
4440		5 + /* HDP_INVL */
4441		8 + 8 + /* FENCE x2 */
4442		2, /* SWITCH_BUFFER */
4443	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
4444	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
4445	.emit_fence = gfx_v9_0_ring_emit_fence,
4446	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4447	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4448	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4449	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4450	.test_ring = gfx_v9_0_ring_test_ring,
4451	.test_ib = gfx_v9_0_ring_test_ib,
4452	.insert_nop = amdgpu_ring_insert_nop,
4453	.pad_ib = amdgpu_ring_generic_pad_ib,
4454	.emit_switch_buffer = gfx_v9_ring_emit_sb,
4455	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
4456	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
4457	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
4458	.emit_tmz = gfx_v9_0_ring_emit_tmz,
4459	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4460	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4461};
4462
4463static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4464	.type = AMDGPU_RING_TYPE_COMPUTE,
4465	.align_mask = 0xff,
4466	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4467	.support_64bit_ptrs = true,
4468	.vmhub = AMDGPU_GFXHUB,
4469	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
4470	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
4471	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
4472	.emit_frame_size =
4473		20 + /* gfx_v9_0_ring_emit_gds_switch */
4474		7 + /* gfx_v9_0_ring_emit_hdp_flush */
4475		5 + /* hdp invalidate */
4476		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4477		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4478		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4479		2 + /* gfx_v9_0_ring_emit_vm_flush */
4480		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4481	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
4482	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
4483	.emit_fence = gfx_v9_0_ring_emit_fence,
4484	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4485	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4486	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4487	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4488	.test_ring = gfx_v9_0_ring_test_ring,
4489	.test_ib = gfx_v9_0_ring_test_ib,
4490	.insert_nop = amdgpu_ring_insert_nop,
4491	.pad_ib = amdgpu_ring_generic_pad_ib,
4492	.set_priority = gfx_v9_0_ring_set_priority_compute,
4493	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4494	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4495};
4496
4497static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4498	.type = AMDGPU_RING_TYPE_KIQ,
4499	.align_mask = 0xff,
4500	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4501	.support_64bit_ptrs = true,
4502	.vmhub = AMDGPU_GFXHUB,
4503	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
4504	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
4505	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
4506	.emit_frame_size =
4507		20 + /* gfx_v9_0_ring_emit_gds_switch */
4508		7 + /* gfx_v9_0_ring_emit_hdp_flush */
4509		5 + /* hdp invalidate */
4510		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4511		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4512		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4513		2 + /* gfx_v9_0_ring_emit_vm_flush */
4514		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4515	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
4516	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
4517	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
4518	.test_ring = gfx_v9_0_ring_test_ring,
4519	.test_ib = gfx_v9_0_ring_test_ib,
4520	.insert_nop = amdgpu_ring_insert_nop,
4521	.pad_ib = amdgpu_ring_generic_pad_ib,
4522	.emit_rreg = gfx_v9_0_ring_emit_rreg,
4523	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4524	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
4525};
4526
4527static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
4528{
4529	int i;
4530
4531	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
4532
4533	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4534		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
4535
4536	for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
4538}
4539
4540static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
4541	.set = gfx_v9_0_kiq_set_interrupt_state,
4542	.process = gfx_v9_0_kiq_irq,
4543};
4544
4545static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
4546	.set = gfx_v9_0_set_eop_interrupt_state,
4547	.process = gfx_v9_0_eop_irq,
4548};
4549
4550static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
4551	.set = gfx_v9_0_set_priv_reg_fault_state,
4552	.process = gfx_v9_0_priv_reg_irq,
4553};
4554
4555static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
4556	.set = gfx_v9_0_set_priv_inst_fault_state,
4557	.process = gfx_v9_0_priv_inst_irq,
4558};
4559
4560static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
4561{
4562	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4563	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
4564
4565	adev->gfx.priv_reg_irq.num_types = 1;
4566	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
4567
4568	adev->gfx.priv_inst_irq.num_types = 1;
4569	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
4570
4571	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
4572	adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
4573}
4574
4575static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
4576{
4577	switch (adev->asic_type) {
4578	case CHIP_VEGA10:
4579	case CHIP_VEGA12:
4580	case CHIP_RAVEN:
4581		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
4582		break;
4583	default:
4584		break;
4585	}
4586}
4587
4588static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
4589{
4590	/* init asci gds info */
4591	adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
4592	adev->gds.gws.total_size = 64;
4593	adev->gds.oa.total_size = 16;
4594
4595	if (adev->gds.mem.total_size == 64 * 1024) {
4596		adev->gds.mem.gfx_partition_size = 4096;
4597		adev->gds.mem.cs_partition_size = 4096;
4598
4599		adev->gds.gws.gfx_partition_size = 4;
4600		adev->gds.gws.cs_partition_size = 4;
4601
4602		adev->gds.oa.gfx_partition_size = 4;
4603		adev->gds.oa.cs_partition_size = 1;
4604	} else {
4605		adev->gds.mem.gfx_partition_size = 1024;
4606		adev->gds.mem.cs_partition_size = 1024;
4607
4608		adev->gds.gws.gfx_partition_size = 16;
4609		adev->gds.gws.cs_partition_size = 16;
4610
4611		adev->gds.oa.gfx_partition_size = 4;
4612		adev->gds.oa.cs_partition_size = 4;
4613	}
4614}
4615
4616static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
4617						 u32 bitmap)
4618{
4619	u32 data;
4620
4621	if (!bitmap)
4622		return;
4623
4624	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4625	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4626
4627	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
4628}
4629
4630static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
4631{
4632	u32 data, mask;
4633
4634	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
4635	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
4636
4637	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4638	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4639
4640	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4641
4642	return (~data) & mask;
4643}
4644
4645static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
4646				 struct amdgpu_cu_info *cu_info)
4647{
4648	int i, j, k, counter, active_cu_number = 0;
4649	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4650	unsigned disable_masks[4 * 2];
4651
4652	if (!adev || !cu_info)
4653		return -EINVAL;
4654
4655	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
4656
4657	mutex_lock(&adev->grbm_idx_mutex);
4658	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4659		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4660			mask = 1;
4661			ao_bitmap = 0;
4662			counter = 0;
4663			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
4664			if (i < 4 && j < 2)
4665				gfx_v9_0_set_user_cu_inactive_bitmap(
4666					adev, disable_masks[i * 2 + j]);
4667			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
4668			cu_info->bitmap[i][j] = bitmap;
4669
4670			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4671				if (bitmap & mask) {
4672					if (counter < adev->gfx.config.max_cu_per_sh)
4673						ao_bitmap |= mask;
4674					counter ++;
4675				}
4676				mask <<= 1;
4677			}
4678			active_cu_number += counter;
4679			if (i < 2 && j < 2)
4680				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4681			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
4682		}
4683	}
4684	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
4685	mutex_unlock(&adev->grbm_idx_mutex);
4686
4687	cu_info->number = active_cu_number;
4688	cu_info->ao_cu_mask = ao_cu_mask;
4689
4690	return 0;
4691}
4692
4693const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
4694{
4695	.type = AMD_IP_BLOCK_TYPE_GFX,
4696	.major = 9,
4697	.minor = 0,
4698	.rev = 0,
4699	.funcs = &gfx_v9_0_ip_funcs,
4700};