Linux Audio

Check our new training course

Loading...
v4.17
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
 
 
  23#include <linux/kernel.h>
  24#include <linux/firmware.h>
  25#include <drm/drmP.h>
 
 
  26#include "amdgpu.h"
  27#include "amdgpu_gfx.h"
  28#include "soc15.h"
  29#include "soc15d.h"
 
 
  30
  31#include "gc/gc_9_0_offset.h"
  32#include "gc/gc_9_0_sh_mask.h"
 
  33#include "vega10_enum.h"
  34#include "hdp/hdp_4_0_offset.h"
  35
  36#include "soc15_common.h"
  37#include "clearstate_gfx9.h"
  38#include "v9_structs.h"
  39
 
 
 
 
 
 
 
 
 
 
 
 
 
  40#define GFX9_NUM_GFX_RINGS     1
  41#define GFX9_MEC_HPD_SIZE 2048
 
  42#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  43#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
  44#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
  45
  46#define mmPWR_MISC_CNTL_STATUS					0x0183
  47#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
  48#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
  49#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
  50#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
  51#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
  52
  53MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  54MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  55MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  56MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  57MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  58MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  59
  60MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  61MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  62MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  63MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  64MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  65MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  66
 
 
 
 
 
 
 
  67MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  68MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  69MODULE_FIRMWARE("amdgpu/raven_me.bin");
  70MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  71MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  72MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  74static const struct soc15_reg_golden golden_settings_gc_9_0[] =
  75{
  76	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
  77	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
  78	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
  79	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
  80	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
  81	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
  82	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
  83	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
  84	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
  85	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
  86	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
  87	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
  88	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
  89	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
  90	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
  91	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 
 
 
  92	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
  93	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
  94	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
  95	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
  96	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
  97	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
  98	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 
 
  99};
 100
 101static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 102{
 103	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 104	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 
 
 
 105	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 106	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 
 107	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 
 
 
 
 
 108	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 109	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 110};
 111
 112static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 113{
 114	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 115	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 116	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 117	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 118	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 119	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 120	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 121	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 122	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 123	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 124	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 125	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 126	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 127	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 128	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 129	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 130	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 131	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 132	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 133	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 134	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 
 
 
 135};
 136
 137static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 138{
 139	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 140	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 141	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 142	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 143	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 144	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 145	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 146};
 147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 148static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 149{
 
 150	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 151	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 152};
 153
 154static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 155{
 156	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 157	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 158	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 159	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 160	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 161	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 162	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 163	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 164	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 165	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 166	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 167	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 168	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 169	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 170	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 171	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 172};
 173
 174static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 175{
 176	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 177	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 178	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 179	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 180	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 181	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 182	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 183	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 184	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 185	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 186};
 187
 188#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 189#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 190#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 
 191
 192static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 193static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 194static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 195static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 196static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 197                                 struct amdgpu_cu_info *cu_info);
 198static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 199static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
 200static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 201
 202static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 203{
 204	switch (adev->asic_type) {
 205	case CHIP_VEGA10:
 206		soc15_program_register_sequence(adev,
 207						 golden_settings_gc_9_0,
 208						 ARRAY_SIZE(golden_settings_gc_9_0));
 209		soc15_program_register_sequence(adev,
 210						 golden_settings_gc_9_0_vg10,
 211						 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 212		break;
 213	case CHIP_VEGA12:
 214		soc15_program_register_sequence(adev,
 215						golden_settings_gc_9_2_1,
 216						ARRAY_SIZE(golden_settings_gc_9_2_1));
 217		soc15_program_register_sequence(adev,
 218						golden_settings_gc_9_2_1_vg12,
 219						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 220		break;
 221	case CHIP_RAVEN:
 
 
 
 
 
 
 
 
 222		soc15_program_register_sequence(adev,
 223						 golden_settings_gc_9_1,
 224						 ARRAY_SIZE(golden_settings_gc_9_1));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 225		soc15_program_register_sequence(adev,
 226						 golden_settings_gc_9_1_rv1,
 227						 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 
 
 
 
 228		break;
 229	default:
 230		break;
 231	}
 232
 233	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 234					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 235}
 236
 237static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
 238{
 239	adev->gfx.scratch.num_reg = 8;
 240	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 241	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
 242}
 243
 244static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 245				       bool wc, uint32_t reg, uint32_t val)
 246{
 247	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 248	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 249				WRITE_DATA_DST_SEL(0) |
 250				(wc ? WR_CONFIRM : 0));
 251	amdgpu_ring_write(ring, reg);
 252	amdgpu_ring_write(ring, 0);
 253	amdgpu_ring_write(ring, val);
 254}
 255
 256static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 257				  int mem_space, int opt, uint32_t addr0,
 258				  uint32_t addr1, uint32_t ref, uint32_t mask,
 259				  uint32_t inv)
 260{
 261	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 262	amdgpu_ring_write(ring,
 263				 /* memory (1) or register (0) */
 264				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 265				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
 266				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 267				 WAIT_REG_MEM_ENGINE(eng_sel)));
 268
 269	if (mem_space)
 270		BUG_ON(addr0 & 0x3); /* Dword align */
 271	amdgpu_ring_write(ring, addr0);
 272	amdgpu_ring_write(ring, addr1);
 273	amdgpu_ring_write(ring, ref);
 274	amdgpu_ring_write(ring, mask);
 275	amdgpu_ring_write(ring, inv); /* poll interval */
 276}
 277
 278static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 279{
 280	struct amdgpu_device *adev = ring->adev;
 281	uint32_t scratch;
 282	uint32_t tmp = 0;
 283	unsigned i;
 284	int r;
 285
 286	r = amdgpu_gfx_scratch_get(adev, &scratch);
 287	if (r) {
 288		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
 289		return r;
 290	}
 291	WREG32(scratch, 0xCAFEDEAD);
 292	r = amdgpu_ring_alloc(ring, 3);
 293	if (r) {
 294		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 295			  ring->idx, r);
 296		amdgpu_gfx_scratch_free(adev, scratch);
 297		return r;
 298	}
 299	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 300	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 301	amdgpu_ring_write(ring, 0xDEADBEEF);
 302	amdgpu_ring_commit(ring);
 303
 304	for (i = 0; i < adev->usec_timeout; i++) {
 305		tmp = RREG32(scratch);
 306		if (tmp == 0xDEADBEEF)
 307			break;
 308		DRM_UDELAY(1);
 309	}
 310	if (i < adev->usec_timeout) {
 311		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
 312			 ring->idx, i);
 313	} else {
 314		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
 315			  ring->idx, scratch, tmp);
 316		r = -EINVAL;
 317	}
 318	amdgpu_gfx_scratch_free(adev, scratch);
 
 
 319	return r;
 320}
 321
 322static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 323{
 324	struct amdgpu_device *adev = ring->adev;
 325	struct amdgpu_ib ib;
 326	struct dma_fence *f = NULL;
 327
 328	unsigned index;
 329	uint64_t gpu_addr;
 330	uint32_t tmp;
 331	long r;
 332
 333	r = amdgpu_device_wb_get(adev, &index);
 334	if (r) {
 335		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 336		return r;
 337	}
 338
 339	gpu_addr = adev->wb.gpu_addr + (index * 4);
 340	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
 341	memset(&ib, 0, sizeof(ib));
 342	r = amdgpu_ib_get(adev, NULL, 16, &ib);
 343	if (r) {
 344		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 345		goto err1;
 346	}
 347	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
 348	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
 349	ib.ptr[2] = lower_32_bits(gpu_addr);
 350	ib.ptr[3] = upper_32_bits(gpu_addr);
 351	ib.ptr[4] = 0xDEADBEEF;
 352	ib.length_dw = 5;
 353
 354	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 355	if (r)
 356		goto err2;
 357
 358	r = dma_fence_wait_timeout(f, false, timeout);
 359	if (r == 0) {
 360			DRM_ERROR("amdgpu: IB test timed out.\n");
 361			r = -ETIMEDOUT;
 362			goto err2;
 363	} else if (r < 0) {
 364			DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 365			goto err2;
 366	}
 367
 368	tmp = adev->wb.wb[index];
 369	if (tmp == 0xDEADBEEF) {
 370			DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
 371			r = 0;
 372	} else {
 373			DRM_ERROR("ib test on ring %d failed\n", ring->idx);
 374			r = -EINVAL;
 375	}
 376
 377err2:
 378	amdgpu_ib_free(adev, &ib, NULL);
 379	dma_fence_put(f);
 380err1:
 381	amdgpu_device_wb_free(adev, index);
 382	return r;
 383}
 384
 385
 386static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
 387{
 388	release_firmware(adev->gfx.pfp_fw);
 389	adev->gfx.pfp_fw = NULL;
 390	release_firmware(adev->gfx.me_fw);
 391	adev->gfx.me_fw = NULL;
 392	release_firmware(adev->gfx.ce_fw);
 393	adev->gfx.ce_fw = NULL;
 394	release_firmware(adev->gfx.rlc_fw);
 395	adev->gfx.rlc_fw = NULL;
 396	release_firmware(adev->gfx.mec_fw);
 397	adev->gfx.mec_fw = NULL;
 398	release_firmware(adev->gfx.mec2_fw);
 399	adev->gfx.mec2_fw = NULL;
 400
 401	kfree(adev->gfx.rlc.register_list_format);
 402}
 403
 404static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 405{
 406	const char *chip_name;
 407	char fw_name[30];
 408	int err;
 409	struct amdgpu_firmware_info *info = NULL;
 410	const struct common_firmware_header *header = NULL;
 411	const struct gfx_firmware_header_v1_0 *cp_hdr;
 412	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 413	unsigned int *tmp = NULL;
 414	unsigned int i = 0;
 415
 416	DRM_DEBUG("\n");
 417
 418	switch (adev->asic_type) {
 419	case CHIP_VEGA10:
 420		chip_name = "vega10";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 421		break;
 422	case CHIP_VEGA12:
 423		chip_name = "vega12";
 
 424		break;
 425	case CHIP_RAVEN:
 426		chip_name = "raven";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 427		break;
 428	default:
 429		BUG();
 430	}
 
 
 
 
 
 
 
 431
 432	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
 433	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
 434	if (err)
 435		goto out;
 436	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
 437	if (err)
 438		goto out;
 439	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 440	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 441	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 442
 443	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
 444	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
 445	if (err)
 446		goto out;
 447	err = amdgpu_ucode_validate(adev->gfx.me_fw);
 448	if (err)
 449		goto out;
 450	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 451	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 452	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 453
 454	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
 455	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
 456	if (err)
 457		goto out;
 458	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
 459	if (err)
 460		goto out;
 461	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 462	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 463	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 464
 465	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 466	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 467	if (err)
 468		goto out;
 469	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 470	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
 471	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 472	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 473	adev->gfx.rlc.save_and_restore_offset =
 474			le32_to_cpu(rlc_hdr->save_and_restore_offset);
 475	adev->gfx.rlc.clear_state_descriptor_offset =
 476			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
 477	adev->gfx.rlc.avail_scratch_ram_locations =
 478			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
 479	adev->gfx.rlc.reg_restore_list_size =
 480			le32_to_cpu(rlc_hdr->reg_restore_list_size);
 481	adev->gfx.rlc.reg_list_format_start =
 482			le32_to_cpu(rlc_hdr->reg_list_format_start);
 483	adev->gfx.rlc.reg_list_format_separate_start =
 484			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
 485	adev->gfx.rlc.starting_offsets_start =
 486			le32_to_cpu(rlc_hdr->starting_offsets_start);
 487	adev->gfx.rlc.reg_list_format_size_bytes =
 488			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
 489	adev->gfx.rlc.reg_list_size_bytes =
 490			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
 491	adev->gfx.rlc.register_list_format =
 492			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
 493				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
 494	if (!adev->gfx.rlc.register_list_format) {
 495		err = -ENOMEM;
 496		goto out;
 497	}
 
 
 498
 499	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 500			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
 501	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
 502		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
 503
 504	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
 505
 506	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
 507			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
 508	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 509		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 510
 511	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 512	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 513	if (err)
 514		goto out;
 515	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 516	if (err)
 517		goto out;
 518	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 519	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 520	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 521
 
 
 
 
 
 522
 523	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
 524	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 525	if (!err) {
 526		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
 527		if (err)
 528			goto out;
 529		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
 530		adev->gfx.mec2_fw->data;
 531		adev->gfx.mec2_fw_version =
 532		le32_to_cpu(cp_hdr->header.ucode_version);
 533		adev->gfx.mec2_feature_version =
 534		le32_to_cpu(cp_hdr->ucode_feature_version);
 535	} else {
 536		err = 0;
 537		adev->gfx.mec2_fw = NULL;
 538	}
 539
 540	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
 541		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
 542		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
 543		info->fw = adev->gfx.pfp_fw;
 544		header = (const struct common_firmware_header *)info->fw->data;
 545		adev->firmware.fw_size +=
 546			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 547
 548		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
 549		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
 550		info->fw = adev->gfx.me_fw;
 551		header = (const struct common_firmware_header *)info->fw->data;
 552		adev->firmware.fw_size +=
 553			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 554
 555		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
 556		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
 557		info->fw = adev->gfx.ce_fw;
 558		header = (const struct common_firmware_header *)info->fw->data;
 559		adev->firmware.fw_size +=
 560			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 561
 562		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
 563		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
 564		info->fw = adev->gfx.rlc_fw;
 565		header = (const struct common_firmware_header *)info->fw->data;
 566		adev->firmware.fw_size +=
 567			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 568
 569		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 570		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 571		info->fw = adev->gfx.mec_fw;
 572		header = (const struct common_firmware_header *)info->fw->data;
 573		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
 574		adev->firmware.fw_size +=
 575			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 576
 577		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
 578		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
 579		info->fw = adev->gfx.mec_fw;
 580		adev->firmware.fw_size +=
 581			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 582
 583		if (adev->gfx.mec2_fw) {
 584			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
 585			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
 586			info->fw = adev->gfx.mec2_fw;
 587			header = (const struct common_firmware_header *)info->fw->data;
 588			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
 589			adev->firmware.fw_size +=
 590				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 591			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
 592			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
 593			info->fw = adev->gfx.mec2_fw;
 594			adev->firmware.fw_size +=
 595				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
 596		}
 597
 598	}
 599
 600out:
 601	if (err) {
 602		dev_err(adev->dev,
 603			"gfx9: Failed to load firmware \"%s\"\n",
 604			fw_name);
 605		release_firmware(adev->gfx.pfp_fw);
 606		adev->gfx.pfp_fw = NULL;
 607		release_firmware(adev->gfx.me_fw);
 608		adev->gfx.me_fw = NULL;
 609		release_firmware(adev->gfx.ce_fw);
 610		adev->gfx.ce_fw = NULL;
 611		release_firmware(adev->gfx.rlc_fw);
 612		adev->gfx.rlc_fw = NULL;
 613		release_firmware(adev->gfx.mec_fw);
 614		adev->gfx.mec_fw = NULL;
 615		release_firmware(adev->gfx.mec2_fw);
 616		adev->gfx.mec2_fw = NULL;
 617	}
 618	return err;
 619}
 620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 621static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
 622{
 623	u32 count = 0;
 624	const struct cs_section_def *sect = NULL;
 625	const struct cs_extent_def *ext = NULL;
 626
 627	/* begin clear state */
 628	count += 2;
 629	/* context control state */
 630	count += 3;
 631
 632	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
 633		for (ext = sect->section; ext->extent != NULL; ++ext) {
 634			if (sect->id == SECT_CONTEXT)
 635				count += 2 + ext->reg_count;
 636			else
 637				return 0;
 638		}
 639	}
 640
 641	/* end clear state */
 642	count += 2;
 643	/* clear state */
 644	count += 2;
 645
 646	return count;
 647}
 648
 649static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
 650				    volatile u32 *buffer)
 651{
 652	u32 count = 0, i;
 653	const struct cs_section_def *sect = NULL;
 654	const struct cs_extent_def *ext = NULL;
 655
 656	if (adev->gfx.rlc.cs_data == NULL)
 657		return;
 658	if (buffer == NULL)
 659		return;
 660
 661	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
 662	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
 663
 664	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
 665	buffer[count++] = cpu_to_le32(0x80000000);
 666	buffer[count++] = cpu_to_le32(0x80000000);
 667
 668	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
 669		for (ext = sect->section; ext->extent != NULL; ++ext) {
 670			if (sect->id == SECT_CONTEXT) {
 671				buffer[count++] =
 672					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
 673				buffer[count++] = cpu_to_le32(ext->reg_index -
 674						PACKET3_SET_CONTEXT_REG_START);
 675				for (i = 0; i < ext->reg_count; i++)
 676					buffer[count++] = cpu_to_le32(ext->extent[i]);
 677			} else {
 678				return;
 679			}
 680		}
 681	}
 682
 683	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
 684	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
 685
 686	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
 687	buffer[count++] = cpu_to_le32(0);
 688}
 689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 690static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
 691{
 692	uint32_t data;
 693
 694	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
 695	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
 696	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
 697	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
 698	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
 699
 700	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
 701	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
 702
 703	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
 704	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
 705
 706	mutex_lock(&adev->grbm_idx_mutex);
 707	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
 708	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 709	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 710
 711	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
 712	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
 713	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
 714	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
 715	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
 716
 717	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
 718	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
 719	data &= 0x0000FFFF;
 720	data |= 0x00C00000;
 721	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
 722
 723	/* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
 724	WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
 
 
 725
 726	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
 727	 * but used for RLC_LB_CNTL configuration */
 728	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
 729	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
 730	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
 731	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
 732	mutex_unlock(&adev->grbm_idx_mutex);
 
 
 733}
 734
 735static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
 736{
 737	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
 738}
 739
 740static void rv_init_cp_jump_table(struct amdgpu_device *adev)
 741{
 742	const __le32 *fw_data;
 743	volatile u32 *dst_ptr;
 744	int me, i, max_me = 5;
 745	u32 bo_offset = 0;
 746	u32 table_offset, table_size;
 747
 748	/* write the cp table buffer */
 749	dst_ptr = adev->gfx.rlc.cp_table_ptr;
 750	for (me = 0; me < max_me; me++) {
 751		if (me == 0) {
 752			const struct gfx_firmware_header_v1_0 *hdr =
 753				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
 754			fw_data = (const __le32 *)
 755				(adev->gfx.ce_fw->data +
 756				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 757			table_offset = le32_to_cpu(hdr->jt_offset);
 758			table_size = le32_to_cpu(hdr->jt_size);
 759		} else if (me == 1) {
 760			const struct gfx_firmware_header_v1_0 *hdr =
 761				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
 762			fw_data = (const __le32 *)
 763				(adev->gfx.pfp_fw->data +
 764				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 765			table_offset = le32_to_cpu(hdr->jt_offset);
 766			table_size = le32_to_cpu(hdr->jt_size);
 767		} else if (me == 2) {
 768			const struct gfx_firmware_header_v1_0 *hdr =
 769				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
 770			fw_data = (const __le32 *)
 771				(adev->gfx.me_fw->data +
 772				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 773			table_offset = le32_to_cpu(hdr->jt_offset);
 774			table_size = le32_to_cpu(hdr->jt_size);
 775		} else if (me == 3) {
 776			const struct gfx_firmware_header_v1_0 *hdr =
 777				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 778			fw_data = (const __le32 *)
 779				(adev->gfx.mec_fw->data +
 780				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 781			table_offset = le32_to_cpu(hdr->jt_offset);
 782			table_size = le32_to_cpu(hdr->jt_size);
 783		} else  if (me == 4) {
 784			const struct gfx_firmware_header_v1_0 *hdr =
 785				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
 786			fw_data = (const __le32 *)
 787				(adev->gfx.mec2_fw->data +
 788				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 789			table_offset = le32_to_cpu(hdr->jt_offset);
 790			table_size = le32_to_cpu(hdr->jt_size);
 791		}
 792
 793		for (i = 0; i < table_size; i ++) {
 794			dst_ptr[bo_offset + i] =
 795				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
 796		}
 797
 798		bo_offset += table_size;
 799	}
 800}
 801
 802static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev)
 803{
 804	/* clear state block */
 805	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
 806			&adev->gfx.rlc.clear_state_gpu_addr,
 807			(void **)&adev->gfx.rlc.cs_ptr);
 808
 809	/* jump table block */
 810	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
 811			&adev->gfx.rlc.cp_table_gpu_addr,
 812			(void **)&adev->gfx.rlc.cp_table_ptr);
 
 
 
 
 
 813}
 814
 815static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
 816{
 817	volatile u32 *dst_ptr;
 818	u32 dws;
 819	const struct cs_section_def *cs_data;
 820	int r;
 821
 822	adev->gfx.rlc.cs_data = gfx9_cs_data;
 823
 824	cs_data = adev->gfx.rlc.cs_data;
 825
 826	if (cs_data) {
 827		/* clear state block */
 828		adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev);
 829		r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
 830					      AMDGPU_GEM_DOMAIN_VRAM,
 831					      &adev->gfx.rlc.clear_state_obj,
 832					      &adev->gfx.rlc.clear_state_gpu_addr,
 833					      (void **)&adev->gfx.rlc.cs_ptr);
 834		if (r) {
 835			dev_err(adev->dev, "(%d) failed to create rlc csb bo\n",
 836				r);
 837			gfx_v9_0_rlc_fini(adev);
 838			return r;
 839		}
 840		/* set up the cs buffer */
 841		dst_ptr = adev->gfx.rlc.cs_ptr;
 842		gfx_v9_0_get_csb_buffer(adev, dst_ptr);
 843		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
 844		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 845	}
 846
 847	if (adev->asic_type == CHIP_RAVEN) {
 848		/* TODO: double check the cp_table_size for RV */
 849		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
 850		r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
 851					      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
 852					      &adev->gfx.rlc.cp_table_obj,
 853					      &adev->gfx.rlc.cp_table_gpu_addr,
 854					      (void **)&adev->gfx.rlc.cp_table_ptr);
 855		if (r) {
 856			dev_err(adev->dev,
 857				"(%d) failed to create cp table bo\n", r);
 858			gfx_v9_0_rlc_fini(adev);
 859			return r;
 860		}
 861
 862		rv_init_cp_jump_table(adev);
 863		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
 864		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
 865
 866		gfx_v9_0_init_lbpw(adev);
 867	}
 868
 869	return 0;
 870}
 871
 872static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 873{
 874	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
 875	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
 876}
 877
 878static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 879{
 880	int r;
 881	u32 *hpd;
 882	const __le32 *fw_data;
 883	unsigned fw_size;
 884	u32 *fw;
 885	size_t mec_hpd_size;
 886
 887	const struct gfx_firmware_header_v1_0 *mec_hdr;
 888
 889	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 890
 891	/* take ownership of the relevant compute queues */
 892	amdgpu_gfx_compute_queue_acquire(adev);
 893	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
 
 
 
 
 
 
 
 
 
 
 
 
 894
 895	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
 896				      AMDGPU_GEM_DOMAIN_GTT,
 897				      &adev->gfx.mec.hpd_eop_obj,
 898				      &adev->gfx.mec.hpd_eop_gpu_addr,
 899				      (void **)&hpd);
 900	if (r) {
 901		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 902		gfx_v9_0_mec_fini(adev);
 903		return r;
 904	}
 905
 906	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
 907
 908	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 909	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 910
 911	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
 912
 913	fw_data = (const __le32 *)
 914		(adev->gfx.mec_fw->data +
 915		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
 916	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
 917
 918	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
 919				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 920				      &adev->gfx.mec.mec_fw_obj,
 921				      &adev->gfx.mec.mec_fw_gpu_addr,
 922				      (void **)&fw);
 923	if (r) {
 924		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
 925		gfx_v9_0_mec_fini(adev);
 926		return r;
 927	}
 928
 929	memcpy(fw, fw_data, fw_size);
 930
 931	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
 932	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 933
 934	return 0;
 935}
 936
 937static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
 938{
 939	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 940		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 941		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 942		(address << SQ_IND_INDEX__INDEX__SHIFT) |
 943		(SQ_IND_INDEX__FORCE_READ_MASK));
 944	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 945}
 946
 947static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
 948			   uint32_t wave, uint32_t thread,
 949			   uint32_t regno, uint32_t num, uint32_t *out)
 950{
 951	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
 952		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
 953		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
 954		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
 955		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
 956		(SQ_IND_INDEX__FORCE_READ_MASK) |
 957		(SQ_IND_INDEX__AUTO_INCR_MASK));
 958	while (num--)
 959		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
 960}
 961
 962static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
 963{
 964	/* type 1 wave data */
 965	dst[(*no_fields)++] = 1;
 966	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
 967	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
 968	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
 969	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
 970	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
 971	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
 972	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
 973	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
 974	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
 975	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
 976	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
 977	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
 978	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
 979	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
 
 980}
 981
 982static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 983				     uint32_t wave, uint32_t start,
 984				     uint32_t size, uint32_t *dst)
 985{
 986	wave_read_regs(
 987		adev, simd, wave, 0,
 988		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 989}
 990
 991static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
 992				     uint32_t wave, uint32_t thread,
 993				     uint32_t start, uint32_t size,
 994				     uint32_t *dst)
 995{
 996	wave_read_regs(
 997		adev, simd, wave, thread,
 998		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
 999}
1000
1001static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1002				  u32 me, u32 pipe, u32 q)
1003{
1004	soc15_grbm_select(adev, me, pipe, q, 0);
1005}
1006
1007static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1008	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1009	.select_se_sh = &gfx_v9_0_select_se_sh,
1010	.read_wave_data = &gfx_v9_0_read_wave_data,
1011	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1012	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1013	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1014};
1015
1016static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 
 
 
 
 
 
 
 
 
 
 
 
1017{
1018	u32 gb_addr_config;
 
1019
1020	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1021
1022	switch (adev->asic_type) {
1023	case CHIP_VEGA10:
1024		adev->gfx.config.max_hw_contexts = 8;
1025		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1026		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1027		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1028		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1029		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1030		break;
1031	case CHIP_VEGA12:
1032		adev->gfx.config.max_hw_contexts = 8;
1033		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1034		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1035		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1036		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1037		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1038		DRM_INFO("fix gfx.config for vega12\n");
1039		break;
1040	case CHIP_RAVEN:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041		adev->gfx.config.max_hw_contexts = 8;
1042		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1043		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1044		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1045		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1046		gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1047		break;
1048	default:
1049		BUG();
1050		break;
1051	}
1052
1053	adev->gfx.config.gb_addr_config = gb_addr_config;
1054
1055	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1056			REG_GET_FIELD(
1057					adev->gfx.config.gb_addr_config,
1058					GB_ADDR_CONFIG,
1059					NUM_PIPES);
1060
1061	adev->gfx.config.max_tile_pipes =
1062		adev->gfx.config.gb_addr_config_fields.num_pipes;
1063
1064	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1065			REG_GET_FIELD(
1066					adev->gfx.config.gb_addr_config,
1067					GB_ADDR_CONFIG,
1068					NUM_BANKS);
1069	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1070			REG_GET_FIELD(
1071					adev->gfx.config.gb_addr_config,
1072					GB_ADDR_CONFIG,
1073					MAX_COMPRESSED_FRAGS);
1074	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1075			REG_GET_FIELD(
1076					adev->gfx.config.gb_addr_config,
1077					GB_ADDR_CONFIG,
1078					NUM_RB_PER_SE);
1079	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1080			REG_GET_FIELD(
1081					adev->gfx.config.gb_addr_config,
1082					GB_ADDR_CONFIG,
1083					NUM_SHADER_ENGINES);
1084	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1085			REG_GET_FIELD(
1086					adev->gfx.config.gb_addr_config,
1087					GB_ADDR_CONFIG,
1088					PIPE_INTERLEAVE_SIZE));
1089}
1090
1091static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1092				   struct amdgpu_ngg_buf *ngg_buf,
1093				   int size_se,
1094				   int default_size_se)
1095{
1096	int r;
1097
1098	if (size_se < 0) {
1099		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1100		return -EINVAL;
1101	}
1102	size_se = size_se ? size_se : default_size_se;
1103
1104	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1105	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1106				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1107				    &ngg_buf->bo,
1108				    &ngg_buf->gpu_addr,
1109				    NULL);
1110	if (r) {
1111		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1112		return r;
1113	}
1114	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1115
1116	return r;
1117}
1118
1119static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1120{
1121	int i;
1122
1123	for (i = 0; i < NGG_BUF_MAX; i++)
1124		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1125				      &adev->gfx.ngg.buf[i].gpu_addr,
1126				      NULL);
1127
1128	memset(&adev->gfx.ngg.buf[0], 0,
1129			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1130
1131	adev->gfx.ngg.init = false;
1132
1133	return 0;
1134}
1135
1136static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1137{
1138	int r;
1139
1140	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1141		return 0;
1142
1143	/* GDS reserve memory: 64 bytes alignment */
1144	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1145	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
1146	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
1147	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1148	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1149
1150	/* Primitive Buffer */
1151	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1152				    amdgpu_prim_buf_per_se,
1153				    64 * 1024);
1154	if (r) {
1155		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1156		goto err;
1157	}
1158
1159	/* Position Buffer */
1160	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1161				    amdgpu_pos_buf_per_se,
1162				    256 * 1024);
1163	if (r) {
1164		dev_err(adev->dev, "Failed to create Position Buffer\n");
1165		goto err;
1166	}
1167
1168	/* Control Sideband */
1169	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1170				    amdgpu_cntl_sb_buf_per_se,
1171				    256);
1172	if (r) {
1173		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1174		goto err;
1175	}
1176
1177	/* Parameter Cache, not created by default */
1178	if (amdgpu_param_buf_per_se <= 0)
1179		goto out;
1180
1181	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1182				    amdgpu_param_buf_per_se,
1183				    512 * 1024);
1184	if (r) {
1185		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1186		goto err;
1187	}
1188
1189out:
1190	adev->gfx.ngg.init = true;
1191	return 0;
1192err:
1193	gfx_v9_0_ngg_fini(adev);
1194	return r;
1195}
1196
1197static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1198{
1199	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1200	int r;
1201	u32 data, base;
1202
1203	if (!amdgpu_ngg)
1204		return 0;
1205
1206	/* Program buffer size */
1207	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1208			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1209	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1210			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1211	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1212
1213	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1214			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1215	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1216			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1217	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1218
1219	/* Program buffer base address */
1220	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1221	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1222	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1223
1224	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1225	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1226	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1227
1228	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1229	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1230	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1231
1232	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1233	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1234	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1235
1236	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1237	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1238	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1239
1240	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1241	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1242	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1243
1244	/* Clear GDS reserved memory */
1245	r = amdgpu_ring_alloc(ring, 17);
1246	if (r) {
1247		DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
1248			  ring->idx, r);
1249		return r;
1250	}
1251
1252	gfx_v9_0_write_data_to_reg(ring, 0, false,
1253				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1254			           (adev->gds.mem.total_size +
1255				    adev->gfx.ngg.gds_reserve_size) >>
1256				   AMDGPU_GDS_SHIFT);
1257
1258	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1259	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1260				PACKET3_DMA_DATA_DST_SEL(1) |
1261				PACKET3_DMA_DATA_SRC_SEL(2)));
1262	amdgpu_ring_write(ring, 0);
1263	amdgpu_ring_write(ring, 0);
1264	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1265	amdgpu_ring_write(ring, 0);
1266	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1267				adev->gfx.ngg.gds_reserve_size);
1268
1269	gfx_v9_0_write_data_to_reg(ring, 0, false,
1270				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1271
1272	amdgpu_ring_commit(ring);
1273
1274	return 0;
1275}
1276
1277static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1278				      int mec, int pipe, int queue)
1279{
1280	int r;
1281	unsigned irq_type;
1282	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
1283
1284	ring = &adev->gfx.compute_ring[ring_id];
1285
1286	/* mec0 is me1 */
1287	ring->me = mec + 1;
1288	ring->pipe = pipe;
1289	ring->queue = queue;
1290
1291	ring->ring_obj = NULL;
1292	ring->use_doorbell = true;
1293	ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1;
1294	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1295				+ (ring_id * GFX9_MEC_HPD_SIZE);
 
1296	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1297
1298	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1299		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1300		+ ring->pipe;
1301
 
1302	/* type-2 packets are deprecated on MEC, use type-3 instead */
1303	r = amdgpu_ring_init(adev, ring, 1024,
1304			     &adev->gfx.eop_irq, irq_type);
1305	if (r)
1306		return r;
1307
1308
1309	return 0;
1310}
1311
1312static int gfx_v9_0_sw_init(void *handle)
1313{
1314	int i, j, k, r, ring_id;
1315	struct amdgpu_ring *ring;
1316	struct amdgpu_kiq *kiq;
1317	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
1318
1319	switch (adev->asic_type) {
1320	case CHIP_VEGA10:
1321	case CHIP_VEGA12:
1322	case CHIP_RAVEN:
 
 
 
 
 
1323		adev->gfx.mec.num_mec = 2;
1324		break;
1325	default:
1326		adev->gfx.mec.num_mec = 1;
1327		break;
1328	}
1329
1330	adev->gfx.mec.num_pipe_per_mec = 4;
1331	adev->gfx.mec.num_queue_per_pipe = 8;
1332
1333	/* KIQ event */
1334	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1335	if (r)
1336		return r;
1337
1338	/* EOP Event */
1339	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
1340	if (r)
1341		return r;
1342
1343	/* Privileged reg */
1344	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
1345			      &adev->gfx.priv_reg_irq);
1346	if (r)
1347		return r;
1348
1349	/* Privileged inst */
1350	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
1351			      &adev->gfx.priv_inst_irq);
1352	if (r)
1353		return r;
1354
1355	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1356
1357	gfx_v9_0_scratch_init(adev);
1358
1359	r = gfx_v9_0_init_microcode(adev);
1360	if (r) {
1361		DRM_ERROR("Failed to load gfx firmware!\n");
1362		return r;
1363	}
1364
1365	r = gfx_v9_0_rlc_init(adev);
1366	if (r) {
1367		DRM_ERROR("Failed to init rlc BOs!\n");
 
1368		return r;
 
 
 
 
 
 
 
 
 
 
 
1369	}
1370
1371	r = gfx_v9_0_mec_init(adev);
1372	if (r) {
1373		DRM_ERROR("Failed to init MEC BOs!\n");
1374		return r;
1375	}
1376
1377	/* set up the gfx ring */
1378	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1379		ring = &adev->gfx.gfx_ring[i];
1380		ring->ring_obj = NULL;
1381		if (!i)
1382			sprintf(ring->name, "gfx");
1383		else
1384			sprintf(ring->name, "gfx_%d", i);
1385		ring->use_doorbell = true;
1386		ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
1387		r = amdgpu_ring_init(adev, ring, 1024,
1388				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
 
 
 
 
 
1389		if (r)
1390			return r;
1391	}
1392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1393	/* set up the compute queues - allocate horizontally across pipes */
1394	ring_id = 0;
1395	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1396		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1397			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1398				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
 
1399					continue;
1400
1401				r = gfx_v9_0_compute_ring_init(adev,
1402							       ring_id,
1403							       i, k, j);
1404				if (r)
1405					return r;
1406
1407				ring_id++;
1408			}
1409		}
1410	}
1411
1412	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1413	if (r) {
1414		DRM_ERROR("Failed to init KIQ BOs!\n");
1415		return r;
1416	}
1417
1418	kiq = &adev->gfx.kiq;
1419	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1420	if (r)
1421		return r;
1422
1423	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1424	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1425	if (r)
1426		return r;
1427
1428	/* reserve GDS, GWS and OA resource for gfx */
1429	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1430				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1431				    &adev->gds.gds_gfx_bo, NULL, NULL);
1432	if (r)
1433		return r;
1434
1435	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1436				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1437				    &adev->gds.gws_gfx_bo, NULL, NULL);
1438	if (r)
1439		return r;
1440
1441	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1442				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1443				    &adev->gds.oa_gfx_bo, NULL, NULL);
1444	if (r)
1445		return r;
1446
1447	adev->gfx.ce_ram_size = 0x8000;
1448
1449	gfx_v9_0_gpu_early_init(adev);
1450
1451	r = gfx_v9_0_ngg_init(adev);
1452	if (r)
1453		return r;
1454
 
 
 
 
 
1455	return 0;
1456}
1457
1458
1459static int gfx_v9_0_sw_fini(void *handle)
1460{
1461	int i;
1462	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1463
1464	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1465	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1466	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
 
 
1467
1468	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1469		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1470	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1471		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1472
1473	amdgpu_gfx_compute_mqd_sw_fini(adev);
1474	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1475	amdgpu_gfx_kiq_fini(adev);
1476
1477	gfx_v9_0_mec_fini(adev);
1478	gfx_v9_0_ngg_fini(adev);
1479	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
1480				&adev->gfx.rlc.clear_state_gpu_addr,
1481				(void **)&adev->gfx.rlc.cs_ptr);
1482	if (adev->asic_type == CHIP_RAVEN) {
1483		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1484				&adev->gfx.rlc.cp_table_gpu_addr,
1485				(void **)&adev->gfx.rlc.cp_table_ptr);
1486	}
1487	gfx_v9_0_free_microcode(adev);
1488
1489	return 0;
1490}
1491
1492
1493static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1494{
1495	/* TODO */
1496}
1497
1498static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
 
1499{
1500	u32 data;
1501
1502	if (instance == 0xffffffff)
1503		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1504	else
1505		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1506
1507	if (se_num == 0xffffffff)
1508		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1509	else
1510		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1511
1512	if (sh_num == 0xffffffff)
1513		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1514	else
1515		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1516
1517	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
1518}
1519
1520static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1521{
1522	u32 data, mask;
1523
1524	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1525	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1526
1527	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1528	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1529
1530	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1531					 adev->gfx.config.max_sh_per_se);
1532
1533	return (~data) & mask;
1534}
1535
1536static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1537{
1538	int i, j;
1539	u32 data;
1540	u32 active_rbs = 0;
1541	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1542					adev->gfx.config.max_sh_per_se;
1543
1544	mutex_lock(&adev->grbm_idx_mutex);
1545	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1546		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1547			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1548			data = gfx_v9_0_get_rb_active_bitmap(adev);
1549			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1550					       rb_bitmap_width_per_sh);
1551		}
1552	}
1553	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1554	mutex_unlock(&adev->grbm_idx_mutex);
1555
1556	adev->gfx.config.backend_enable_mask = active_rbs;
1557	adev->gfx.config.num_rbs = hweight32(active_rbs);
1558}
1559
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1560#define DEFAULT_SH_MEM_BASES	(0x6000)
1561#define FIRST_COMPUTE_VMID	(8)
1562#define LAST_COMPUTE_VMID	(16)
1563static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1564{
1565	int i;
1566	uint32_t sh_mem_config;
1567	uint32_t sh_mem_bases;
1568
1569	/*
1570	 * Configure apertures:
1571	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1572	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1573	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1574	 */
1575	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1576
1577	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1578			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1579			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1580
1581	mutex_lock(&adev->srbm_mutex);
1582	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1583		soc15_grbm_select(adev, 0, 0, 0, i);
1584		/* CP and shaders */
1585		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1586		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1587	}
1588	soc15_grbm_select(adev, 0, 0, 0, 0);
1589	mutex_unlock(&adev->srbm_mutex);
 
 
 
 
 
 
 
 
 
1590}
1591
1592static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593{
1594	u32 tmp;
1595	int i;
1596
1597	WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1598
1599	gfx_v9_0_tiling_mode_table_init(adev);
1600
1601	gfx_v9_0_setup_rb(adev);
 
1602	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
 
1603
1604	/* XXX SH_MEM regs */
1605	/* where to put LDS, scratch, GPUVM in FSA64 space */
1606	mutex_lock(&adev->srbm_mutex);
1607	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1608		soc15_grbm_select(adev, 0, 0, 0, i);
1609		/* CP and shaders */
1610		if (i == 0) {
1611			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1612					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1613			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1614			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
 
 
1615		} else {
1616			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1617					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1618			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
1619			tmp = adev->gmc.shared_aperture_start >> 48;
1620			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 
 
 
 
 
1621		}
1622	}
1623	soc15_grbm_select(adev, 0, 0, 0, 0);
1624
1625	mutex_unlock(&adev->srbm_mutex);
1626
1627	gfx_v9_0_init_compute_vmid(adev);
1628
1629	mutex_lock(&adev->grbm_idx_mutex);
1630	/*
1631	 * making sure that the following register writes will be broadcasted
1632	 * to all the shaders
1633	 */
1634	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1635
1636	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
1637		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
1638			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1639		   (adev->gfx.config.sc_prim_fifo_size_backend <<
1640			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1641		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
1642			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1643		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1644			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1645	mutex_unlock(&adev->grbm_idx_mutex);
1646
1647}
1648
1649static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1650{
1651	u32 i, j, k;
1652	u32 mask;
1653
1654	mutex_lock(&adev->grbm_idx_mutex);
1655	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1656		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1657			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1658			for (k = 0; k < adev->usec_timeout; k++) {
1659				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1660					break;
1661				udelay(1);
1662			}
1663			if (k == adev->usec_timeout) {
1664				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1665						      0xffffffff, 0xffffffff);
1666				mutex_unlock(&adev->grbm_idx_mutex);
1667				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
1668					 i, j);
1669				return;
1670			}
1671		}
1672	}
1673	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1674	mutex_unlock(&adev->grbm_idx_mutex);
1675
1676	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1677		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1678		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1679		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1680	for (k = 0; k < adev->usec_timeout; k++) {
1681		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
1682			break;
1683		udelay(1);
1684	}
1685}
1686
1687static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1688					       bool enable)
1689{
1690	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
 
 
 
 
1691
1692	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
1693	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
1694	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
1695	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
 
1696
1697	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
1698}
1699
1700static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
1701{
 
1702	/* csib */
1703	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
1704			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1705	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
1706			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1707	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
1708			adev->gfx.rlc.clear_state_size);
1709}
1710
1711static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
1712				int indirect_offset,
1713				int list_size,
1714				int *unique_indirect_regs,
1715				int *unique_indirect_reg_count,
1716				int max_indirect_reg_count,
1717				int *indirect_start_offsets,
1718				int *indirect_start_offsets_count,
1719				int max_indirect_start_offsets_count)
1720{
1721	int idx;
1722	bool new_entry = true;
1723
1724	for (; indirect_offset < list_size; indirect_offset++) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1725
1726		if (new_entry) {
1727			new_entry = false;
1728			indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
1729			*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
1730			BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
1731		}
1732
1733		if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
1734			new_entry = true;
1735			continue;
1736		}
1737
1738		indirect_offset += 2;
1739
1740		/* look for the matching indice */
1741		for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
1742			if (unique_indirect_regs[idx] ==
1743				register_list_format[indirect_offset])
1744				break;
1745		}
1746
1747		if (idx >= *unique_indirect_reg_count) {
1748			unique_indirect_regs[*unique_indirect_reg_count] =
1749				register_list_format[indirect_offset];
1750			idx = *unique_indirect_reg_count;
1751			*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
1752			BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
1753		}
1754
1755		register_list_format[indirect_offset] = idx;
1756	}
1757}
1758
1759static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
1760{
1761	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1762	int unique_indirect_reg_count = 0;
1763
1764	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
1765	int indirect_start_offsets_count = 0;
1766
1767	int list_size = 0;
1768	int i = 0;
1769	u32 tmp = 0;
1770
1771	u32 *register_list_format =
1772		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
 
1773	if (!register_list_format)
1774		return -ENOMEM;
1775	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
1776		adev->gfx.rlc.reg_list_format_size_bytes);
1777
1778	/* setup unique_indirect_regs array and indirect_start_offsets array */
1779	gfx_v9_0_parse_ind_reg_list(register_list_format,
1780				GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
1781				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
1782				unique_indirect_regs,
1783				&unique_indirect_reg_count,
1784				ARRAY_SIZE(unique_indirect_regs),
1785				indirect_start_offsets,
1786				&indirect_start_offsets_count,
1787				ARRAY_SIZE(indirect_start_offsets));
1788
1789	/* enable auto inc in case it is disabled */
1790	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
1791	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1792	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
1793
1794	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
1795	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
1796		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
1797	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1798		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1799			adev->gfx.rlc.register_restore[i]);
1800
1801	/* load direct register */
1802	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
1803	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
1804		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
1805			adev->gfx.rlc.register_restore[i]);
1806
1807	/* load indirect register */
1808	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1809		adev->gfx.rlc.reg_list_format_start);
1810	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
 
 
1811		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1812			register_list_format[i]);
1813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1814	/* set save/restore list size */
1815	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
1816	list_size = list_size >> 1;
1817	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1818		adev->gfx.rlc.reg_restore_list_size);
1819	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
1820
1821	/* write the starting offsets to RLC scratch ram */
1822	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
1823		adev->gfx.rlc.starting_offsets_start);
1824	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
1825		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
1826			indirect_start_offsets[i]);
1827
1828	/* load unique indirect regs*/
1829	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
1830		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
1831			unique_indirect_regs[i] & 0x3FFFF);
1832		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
1833			unique_indirect_regs[i] >> 20);
 
 
 
 
 
1834	}
1835
1836	kfree(register_list_format);
1837	return 0;
1838}
1839
1840static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
1841{
1842	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
1843}
1844
1845static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
1846					     bool enable)
1847{
1848	uint32_t data = 0;
1849	uint32_t default_data = 0;
1850
1851	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
1852	if (enable == true) {
1853		/* enable GFXIP control over CGPG */
1854		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1855		if(default_data != data)
1856			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1857
1858		/* update status */
1859		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
1860		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
1861		if(default_data != data)
1862			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1863	} else {
1864		/* restore GFXIP control over GCPG */
1865		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
1866		if(default_data != data)
1867			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
1868	}
1869}
1870
1871static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
1872{
1873	uint32_t data = 0;
1874
1875	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
1876			      AMD_PG_SUPPORT_GFX_SMG |
1877			      AMD_PG_SUPPORT_GFX_DMG)) {
1878		/* init IDLE_POLL_COUNT = 60 */
1879		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
1880		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
1881		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
1882		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
1883
1884		/* init RLC PG Delay */
1885		data = 0;
1886		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
1887		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
1888		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
1889		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
1890		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
1891
1892		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
1893		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
1894		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
1895		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
1896
1897		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
1898		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
1899		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
1900		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
1901
1902		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
1903		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
1904
1905		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
1906		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
1907		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
1908
1909		pwr_10_0_gfxip_control_over_cgpg(adev, true);
1910	}
1911}
1912
1913static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
1914						bool enable)
1915{
1916	uint32_t data = 0;
1917	uint32_t default_data = 0;
1918
1919	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1920	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1921			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
1922			     enable ? 1 : 0);
1923	if (default_data != data)
1924		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1925}
1926
1927static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
1928						bool enable)
1929{
1930	uint32_t data = 0;
1931	uint32_t default_data = 0;
1932
1933	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1934	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1935			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
1936			     enable ? 1 : 0);
1937	if(default_data != data)
1938		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1939}
1940
1941static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
1942					bool enable)
1943{
1944	uint32_t data = 0;
1945	uint32_t default_data = 0;
1946
1947	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1948	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1949			     CP_PG_DISABLE,
1950			     enable ? 0 : 1);
1951	if(default_data != data)
1952		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1953}
1954
1955static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
1956						bool enable)
1957{
1958	uint32_t data, default_data;
1959
1960	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1961	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1962			     GFX_POWER_GATING_ENABLE,
1963			     enable ? 1 : 0);
1964	if(default_data != data)
1965		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1966}
1967
1968static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
1969						bool enable)
1970{
1971	uint32_t data, default_data;
1972
1973	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1974	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1975			     GFX_PIPELINE_PG_ENABLE,
1976			     enable ? 1 : 0);
1977	if(default_data != data)
1978		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1979
1980	if (!enable)
1981		/* read any GFX register to wake up GFX */
1982		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
1983}
1984
1985static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
1986						       bool enable)
1987{
1988	uint32_t data, default_data;
1989
1990	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
1991	data = REG_SET_FIELD(data, RLC_PG_CNTL,
1992			     STATIC_PER_CU_PG_ENABLE,
1993			     enable ? 1 : 0);
1994	if(default_data != data)
1995		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
1996}
1997
1998static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
1999						bool enable)
2000{
2001	uint32_t data, default_data;
2002
2003	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2004	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2005			     DYN_PER_CU_PG_ENABLE,
2006			     enable ? 1 : 0);
2007	if(default_data != data)
2008		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2009}
2010
2011static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2012{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2013	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2014			      AMD_PG_SUPPORT_GFX_SMG |
2015			      AMD_PG_SUPPORT_GFX_DMG |
2016			      AMD_PG_SUPPORT_CP |
2017			      AMD_PG_SUPPORT_GDS |
2018			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2019		gfx_v9_0_init_csb(adev);
2020		gfx_v9_0_init_rlc_save_restore_list(adev);
2021		gfx_v9_0_enable_save_restore_machine(adev);
2022
2023		if (adev->asic_type == CHIP_RAVEN) {
2024			WREG32(mmRLC_JUMP_TABLE_RESTORE,
2025				adev->gfx.rlc.cp_table_gpu_addr >> 8);
2026			gfx_v9_0_init_gfx_power_gating(adev);
2027
2028			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
2029				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
2030				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
2031			} else {
2032				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
2033				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
2034			}
2035
2036			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
2037				gfx_v9_0_enable_cp_power_gating(adev, true);
2038			else
2039				gfx_v9_0_enable_cp_power_gating(adev, false);
2040		}
2041	}
2042}
2043
2044void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2045{
2046	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2047	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2048	gfx_v9_0_wait_for_rlc_serdes(adev);
2049}
2050
2051static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2052{
2053	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2054	udelay(50);
2055	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2056	udelay(50);
2057}
2058
2059static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2060{
2061#ifdef AMDGPU_RLC_DEBUG_RETRY
2062	u32 rlc_ucode_ver;
2063#endif
2064
2065	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
 
2066
2067	/* carrizo do enable cp interrupt after cp inited */
2068	if (!(adev->flags & AMD_IS_APU))
2069		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2070
2071	udelay(50);
2072
2073#ifdef AMDGPU_RLC_DEBUG_RETRY
2074	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2075	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2076	if(rlc_ucode_ver == 0x108) {
2077		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2078				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2079		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2080		 * default is 0x9C4 to create a 100us interval */
2081		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2082		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2083		 * to disable the page fault retry interrupts, default is
2084		 * 0x100 (256) */
2085		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2086	}
2087#endif
2088}
2089
2090static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2091{
2092	const struct rlc_firmware_header_v2_0 *hdr;
2093	const __le32 *fw_data;
2094	unsigned i, fw_size;
2095
2096	if (!adev->gfx.rlc_fw)
2097		return -EINVAL;
2098
2099	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2100	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2101
2102	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2103			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2104	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2105
2106	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2107			RLCG_UCODE_LOADING_START_ADDRESS);
2108	for (i = 0; i < fw_size; i++)
2109		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2110	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2111
2112	return 0;
2113}
2114
2115static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2116{
2117	int r;
2118
2119	if (amdgpu_sriov_vf(adev)) {
2120		gfx_v9_0_init_csb(adev);
2121		return 0;
2122	}
2123
2124	gfx_v9_0_rlc_stop(adev);
2125
2126	/* disable CG */
2127	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2128
2129	/* disable PG */
2130	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
2131
2132	gfx_v9_0_rlc_reset(adev);
2133
2134	gfx_v9_0_init_pg(adev);
2135
2136	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2137		/* legacy rlc firmware loading */
2138		r = gfx_v9_0_rlc_load_microcode(adev);
2139		if (r)
2140			return r;
2141	}
2142
2143	if (adev->asic_type == CHIP_RAVEN) {
2144		if (amdgpu_lbpw != 0)
 
 
 
 
 
 
 
 
 
 
2145			gfx_v9_0_enable_lbpw(adev, true);
2146		else
2147			gfx_v9_0_enable_lbpw(adev, false);
 
 
 
2148	}
2149
2150	gfx_v9_0_rlc_start(adev);
 
 
2151
2152	return 0;
2153}
2154
2155static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2156{
2157	int i;
2158	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2159
2160	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2161	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2162	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2163	if (!enable) {
2164		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2165			adev->gfx.gfx_ring[i].ready = false;
2166	}
2167	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
2168	udelay(50);
2169}
2170
2171static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2172{
2173	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2174	const struct gfx_firmware_header_v1_0 *ce_hdr;
2175	const struct gfx_firmware_header_v1_0 *me_hdr;
2176	const __le32 *fw_data;
2177	unsigned i, fw_size;
2178
2179	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2180		return -EINVAL;
2181
2182	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2183		adev->gfx.pfp_fw->data;
2184	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2185		adev->gfx.ce_fw->data;
2186	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2187		adev->gfx.me_fw->data;
2188
2189	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2190	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2191	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2192
2193	gfx_v9_0_cp_gfx_enable(adev, false);
2194
2195	/* PFP */
2196	fw_data = (const __le32 *)
2197		(adev->gfx.pfp_fw->data +
2198		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2199	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2200	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2201	for (i = 0; i < fw_size; i++)
2202		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2203	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2204
2205	/* CE */
2206	fw_data = (const __le32 *)
2207		(adev->gfx.ce_fw->data +
2208		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2209	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2210	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2211	for (i = 0; i < fw_size; i++)
2212		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2213	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2214
2215	/* ME */
2216	fw_data = (const __le32 *)
2217		(adev->gfx.me_fw->data +
2218		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2219	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2220	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2221	for (i = 0; i < fw_size; i++)
2222		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2223	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2224
2225	return 0;
2226}
2227
2228static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2229{
2230	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2231	const struct cs_section_def *sect = NULL;
2232	const struct cs_extent_def *ext = NULL;
2233	int r, i, tmp;
2234
2235	/* init the CP */
2236	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2237	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2238
2239	gfx_v9_0_cp_gfx_enable(adev, true);
2240
 
 
 
 
 
 
 
 
2241	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2242	if (r) {
2243		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2244		return r;
2245	}
2246
2247	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2248	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2249
2250	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2251	amdgpu_ring_write(ring, 0x80000000);
2252	amdgpu_ring_write(ring, 0x80000000);
2253
2254	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2255		for (ext = sect->section; ext->extent != NULL; ++ext) {
2256			if (sect->id == SECT_CONTEXT) {
2257				amdgpu_ring_write(ring,
2258				       PACKET3(PACKET3_SET_CONTEXT_REG,
2259					       ext->reg_count));
2260				amdgpu_ring_write(ring,
2261				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2262				for (i = 0; i < ext->reg_count; i++)
2263					amdgpu_ring_write(ring, ext->extent[i]);
2264			}
2265		}
2266	}
2267
2268	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2269	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2270
2271	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2272	amdgpu_ring_write(ring, 0);
2273
2274	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2275	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2276	amdgpu_ring_write(ring, 0x8000);
2277	amdgpu_ring_write(ring, 0x8000);
2278
2279	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2280	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2281		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2282	amdgpu_ring_write(ring, tmp);
2283	amdgpu_ring_write(ring, 0);
2284
2285	amdgpu_ring_commit(ring);
2286
2287	return 0;
2288}
2289
2290static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2291{
2292	struct amdgpu_ring *ring;
2293	u32 tmp;
2294	u32 rb_bufsz;
2295	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2296
2297	/* Set the write pointer delay */
2298	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2299
2300	/* set the RB to use vmid 0 */
2301	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2302
2303	/* Set ring buffer size */
2304	ring = &adev->gfx.gfx_ring[0];
2305	rb_bufsz = order_base_2(ring->ring_size / 8);
2306	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2307	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2308#ifdef __BIG_ENDIAN
2309	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2310#endif
2311	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2312
2313	/* Initialize the ring buffer's write pointers */
2314	ring->wptr = 0;
2315	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2316	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2317
2318	/* set the wb address wether it's enabled or not */
2319	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2320	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2321	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2322
2323	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2324	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2325	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2326
2327	mdelay(1);
2328	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2329
2330	rb_addr = ring->gpu_addr >> 8;
2331	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2332	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2333
2334	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2335	if (ring->use_doorbell) {
2336		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2337				    DOORBELL_OFFSET, ring->doorbell_index);
2338		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2339				    DOORBELL_EN, 1);
2340	} else {
2341		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2342	}
2343	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2344
2345	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2346			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2347	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2348
2349	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2350		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2351
2352
2353	/* start the ring */
2354	gfx_v9_0_cp_gfx_start(adev);
2355	ring->ready = true;
2356
2357	return 0;
2358}
2359
2360static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2361{
2362	int i;
2363
2364	if (enable) {
2365		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
2366	} else {
2367		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
2368			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2369		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2370			adev->gfx.compute_ring[i].ready = false;
2371		adev->gfx.kiq.ring.ready = false;
2372	}
2373	udelay(50);
2374}
2375
2376static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2377{
2378	const struct gfx_firmware_header_v1_0 *mec_hdr;
2379	const __le32 *fw_data;
2380	unsigned i;
2381	u32 tmp;
2382
2383	if (!adev->gfx.mec_fw)
2384		return -EINVAL;
2385
2386	gfx_v9_0_cp_compute_enable(adev, false);
2387
2388	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2389	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2390
2391	fw_data = (const __le32 *)
2392		(adev->gfx.mec_fw->data +
2393		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2394	tmp = 0;
2395	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2396	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2397	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2398
2399	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2400		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2401	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2402		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2403
2404	/* MEC1 */
2405	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2406			 mec_hdr->jt_offset);
2407	for (i = 0; i < mec_hdr->jt_size; i++)
2408		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2409			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2410
2411	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2412			adev->gfx.mec_fw_version);
2413	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2414
2415	return 0;
2416}
2417
2418/* KIQ functions */
2419static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2420{
2421	uint32_t tmp;
2422	struct amdgpu_device *adev = ring->adev;
2423
2424	/* tell RLC which is KIQ queue */
2425	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2426	tmp &= 0xffffff00;
2427	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2428	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2429	tmp |= 0x80;
2430	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2431}
2432
2433static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2434{
2435	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2436	uint32_t scratch, tmp = 0;
2437	uint64_t queue_mask = 0;
2438	int r, i;
2439
2440	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2441		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2442			continue;
2443
2444		/* This situation may be hit in the future if a new HW
2445		 * generation exposes more than 64 queues. If so, the
2446		 * definition of queue_mask needs updating */
2447		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2448			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2449			break;
2450		}
2451
2452		queue_mask |= (1ull << i);
2453	}
2454
2455	r = amdgpu_gfx_scratch_get(adev, &scratch);
2456	if (r) {
2457		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2458		return r;
2459	}
2460	WREG32(scratch, 0xCAFEDEAD);
2461
2462	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
2463	if (r) {
2464		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2465		amdgpu_gfx_scratch_free(adev, scratch);
2466		return r;
2467	}
2468
2469	/* set resources */
2470	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2471	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2472			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2473	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2474	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2475	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2476	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2477	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2478	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2479	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2480		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2481		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2482		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2483
2484		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2485		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2486		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2487				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2488				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2489				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2490				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2491				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2492				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2493				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2494				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2495				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2496		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2497		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2498		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2499		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2500		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2501	}
2502	/* write to scratch for completion */
2503	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2504	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2505	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2506	amdgpu_ring_commit(kiq_ring);
2507
2508	for (i = 0; i < adev->usec_timeout; i++) {
2509		tmp = RREG32(scratch);
2510		if (tmp == 0xDEADBEEF)
2511			break;
2512		DRM_UDELAY(1);
2513	}
2514	if (i >= adev->usec_timeout) {
2515		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
2516			  scratch, tmp);
2517		r = -EINVAL;
2518	}
2519	amdgpu_gfx_scratch_free(adev, scratch);
2520
2521	return r;
2522}
2523
2524static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2525{
2526	struct amdgpu_device *adev = ring->adev;
2527	struct v9_mqd *mqd = ring->mqd_ptr;
2528	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2529	uint32_t tmp;
2530
2531	mqd->header = 0xC0310800;
2532	mqd->compute_pipelinestat_enable = 0x00000001;
2533	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2534	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2535	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2536	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 
 
 
 
2537	mqd->compute_misc_reserved = 0x00000003;
2538
2539	mqd->dynamic_cu_mask_addr_lo =
2540		lower_32_bits(ring->mqd_gpu_addr
2541			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2542	mqd->dynamic_cu_mask_addr_hi =
2543		upper_32_bits(ring->mqd_gpu_addr
2544			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2545
2546	eop_base_addr = ring->eop_gpu_addr >> 8;
2547	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2548	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2549
2550	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2551	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2552	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2553			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2554
2555	mqd->cp_hqd_eop_control = tmp;
2556
2557	/* enable doorbell? */
2558	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2559
2560	if (ring->use_doorbell) {
2561		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2562				    DOORBELL_OFFSET, ring->doorbell_index);
2563		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2564				    DOORBELL_EN, 1);
2565		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2566				    DOORBELL_SOURCE, 0);
2567		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2568				    DOORBELL_HIT, 0);
2569	} else {
2570		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2571					 DOORBELL_EN, 0);
2572	}
2573
2574	mqd->cp_hqd_pq_doorbell_control = tmp;
2575
2576	/* disable the queue if it's active */
2577	ring->wptr = 0;
2578	mqd->cp_hqd_dequeue_request = 0;
2579	mqd->cp_hqd_pq_rptr = 0;
2580	mqd->cp_hqd_pq_wptr_lo = 0;
2581	mqd->cp_hqd_pq_wptr_hi = 0;
2582
2583	/* set the pointer to the MQD */
2584	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2585	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2586
2587	/* set MQD vmid to 0 */
2588	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2589	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2590	mqd->cp_mqd_control = tmp;
2591
2592	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2593	hqd_gpu_addr = ring->gpu_addr >> 8;
2594	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2595	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2596
2597	/* set up the HQD, this is similar to CP_RB0_CNTL */
2598	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2599	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2600			    (order_base_2(ring->ring_size / 4) - 1));
2601	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2602			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2603#ifdef __BIG_ENDIAN
2604	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2605#endif
2606	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2607	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2608	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2609	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2610	mqd->cp_hqd_pq_control = tmp;
2611
2612	/* set the wb address whether it's enabled or not */
2613	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2614	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2615	mqd->cp_hqd_pq_rptr_report_addr_hi =
2616		upper_32_bits(wb_gpu_addr) & 0xffff;
2617
2618	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2619	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2620	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2621	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2622
2623	tmp = 0;
2624	/* enable the doorbell if requested */
2625	if (ring->use_doorbell) {
2626		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2627		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2628				DOORBELL_OFFSET, ring->doorbell_index);
2629
2630		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2631					 DOORBELL_EN, 1);
2632		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2633					 DOORBELL_SOURCE, 0);
2634		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2635					 DOORBELL_HIT, 0);
2636	}
2637
2638	mqd->cp_hqd_pq_doorbell_control = tmp;
2639
2640	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2641	ring->wptr = 0;
2642	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2643
2644	/* set the vmid for the queue */
2645	mqd->cp_hqd_vmid = 0;
2646
2647	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2648	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2649	mqd->cp_hqd_persistent_state = tmp;
2650
2651	/* set MIN_IB_AVAIL_SIZE */
2652	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2653	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2654	mqd->cp_hqd_ib_control = tmp;
2655
2656	/* activate the queue */
2657	mqd->cp_hqd_active = 1;
 
 
 
 
 
 
 
2658
2659	return 0;
2660}
2661
2662static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2663{
2664	struct amdgpu_device *adev = ring->adev;
2665	struct v9_mqd *mqd = ring->mqd_ptr;
2666	int j;
2667
2668	/* disable wptr polling */
2669	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2670
2671	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2672	       mqd->cp_hqd_eop_base_addr_lo);
2673	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2674	       mqd->cp_hqd_eop_base_addr_hi);
2675
2676	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2677	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
2678	       mqd->cp_hqd_eop_control);
2679
2680	/* enable doorbell? */
2681	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2682	       mqd->cp_hqd_pq_doorbell_control);
2683
2684	/* disable the queue if it's active */
2685	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2686		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2687		for (j = 0; j < adev->usec_timeout; j++) {
2688			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2689				break;
2690			udelay(1);
2691		}
2692		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2693		       mqd->cp_hqd_dequeue_request);
2694		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
2695		       mqd->cp_hqd_pq_rptr);
2696		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2697		       mqd->cp_hqd_pq_wptr_lo);
2698		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2699		       mqd->cp_hqd_pq_wptr_hi);
2700	}
2701
2702	/* set the pointer to the MQD */
2703	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
2704	       mqd->cp_mqd_base_addr_lo);
2705	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
2706	       mqd->cp_mqd_base_addr_hi);
2707
2708	/* set MQD vmid to 0 */
2709	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
2710	       mqd->cp_mqd_control);
2711
2712	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2713	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
2714	       mqd->cp_hqd_pq_base_lo);
2715	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
2716	       mqd->cp_hqd_pq_base_hi);
2717
2718	/* set up the HQD, this is similar to CP_RB0_CNTL */
2719	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
2720	       mqd->cp_hqd_pq_control);
2721
2722	/* set the wb address whether it's enabled or not */
2723	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
2724				mqd->cp_hqd_pq_rptr_report_addr_lo);
2725	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2726				mqd->cp_hqd_pq_rptr_report_addr_hi);
2727
2728	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2729	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
2730	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
2731	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
2732	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
2733
2734	/* enable the doorbell if requested */
2735	if (ring->use_doorbell) {
2736		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
2737					(AMDGPU_DOORBELL64_KIQ *2) << 2);
2738		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
2739					(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
 
 
 
 
 
 
 
 
 
2740	}
2741
2742	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
2743	       mqd->cp_hqd_pq_doorbell_control);
2744
2745	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2746	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
2747	       mqd->cp_hqd_pq_wptr_lo);
2748	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
2749	       mqd->cp_hqd_pq_wptr_hi);
2750
2751	/* set the vmid for the queue */
2752	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
2753
2754	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
2755	       mqd->cp_hqd_persistent_state);
2756
2757	/* activate the queue */
2758	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
2759	       mqd->cp_hqd_active);
2760
2761	if (ring->use_doorbell)
2762		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2763
2764	return 0;
2765}
2766
2767static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
2768{
2769	struct amdgpu_device *adev = ring->adev;
2770	int j;
2771
2772	/* disable the queue if it's active */
2773	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
2774
2775		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
2776
2777		for (j = 0; j < adev->usec_timeout; j++) {
2778			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
2779				break;
2780			udelay(1);
2781		}
2782
2783		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
2784			DRM_DEBUG("KIQ dequeue request failed.\n");
2785
2786			/* Manual disable if dequeue request times out */
2787			WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
2788		}
2789
2790		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
2791		      0);
2792	}
2793
2794	WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
2795	WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
2796	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
2797	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
2798	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
2799	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
2800	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
2801	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
2802
2803	return 0;
2804}
2805
2806static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2807{
2808	struct amdgpu_device *adev = ring->adev;
2809	struct v9_mqd *mqd = ring->mqd_ptr;
2810	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
2811
2812	gfx_v9_0_kiq_setting(ring);
2813
2814	if (adev->in_gpu_reset) { /* for GPU_RESET case */
2815		/* reset MQD to a clean status */
2816		if (adev->gfx.mec.mqd_backup[mqd_idx])
2817			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
 
 
 
 
 
 
2818
2819		/* reset ring buffer */
2820		ring->wptr = 0;
2821		amdgpu_ring_clear_ring(ring);
2822
2823		mutex_lock(&adev->srbm_mutex);
2824		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2825		gfx_v9_0_kiq_init_register(ring);
2826		soc15_grbm_select(adev, 0, 0, 0, 0);
2827		mutex_unlock(&adev->srbm_mutex);
2828	} else {
2829		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2830		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2831		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
 
 
2832		mutex_lock(&adev->srbm_mutex);
2833		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2834		gfx_v9_0_mqd_init(ring);
2835		gfx_v9_0_kiq_init_register(ring);
2836		soc15_grbm_select(adev, 0, 0, 0, 0);
2837		mutex_unlock(&adev->srbm_mutex);
2838
2839		if (adev->gfx.mec.mqd_backup[mqd_idx])
2840			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2841	}
2842
2843	return 0;
2844}
2845
2846static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2847{
2848	struct amdgpu_device *adev = ring->adev;
2849	struct v9_mqd *mqd = ring->mqd_ptr;
2850	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
2851
2852	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
 
 
 
 
 
 
2853		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
2854		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
2855		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
2856		mutex_lock(&adev->srbm_mutex);
2857		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2858		gfx_v9_0_mqd_init(ring);
2859		soc15_grbm_select(adev, 0, 0, 0, 0);
2860		mutex_unlock(&adev->srbm_mutex);
2861
2862		if (adev->gfx.mec.mqd_backup[mqd_idx])
2863			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
2864	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
2865		/* reset MQD to a clean status */
2866		if (adev->gfx.mec.mqd_backup[mqd_idx])
2867			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
2868
2869		/* reset ring buffer */
2870		ring->wptr = 0;
2871		amdgpu_ring_clear_ring(ring);
2872	} else {
2873		amdgpu_ring_clear_ring(ring);
2874	}
2875
2876	return 0;
2877}
2878
2879static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2880{
2881	struct amdgpu_ring *ring = NULL;
2882	int r = 0, i;
2883
2884	gfx_v9_0_cp_compute_enable(adev, true);
2885
2886	ring = &adev->gfx.kiq.ring;
2887
2888	r = amdgpu_bo_reserve(ring->mqd_obj, false);
2889	if (unlikely(r != 0))
2890		goto done;
2891
2892	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2893	if (!r) {
2894		r = gfx_v9_0_kiq_init_queue(ring);
2895		amdgpu_bo_kunmap(ring->mqd_obj);
2896		ring->mqd_ptr = NULL;
2897	}
 
 
 
 
2898	amdgpu_bo_unreserve(ring->mqd_obj);
2899	if (r)
2900		goto done;
 
 
 
 
 
 
 
2901
2902	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2903		ring = &adev->gfx.compute_ring[i];
2904
2905		r = amdgpu_bo_reserve(ring->mqd_obj, false);
2906		if (unlikely(r != 0))
2907			goto done;
2908		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2909		if (!r) {
2910			r = gfx_v9_0_kcq_init_queue(ring);
2911			amdgpu_bo_kunmap(ring->mqd_obj);
2912			ring->mqd_ptr = NULL;
2913		}
2914		amdgpu_bo_unreserve(ring->mqd_obj);
2915		if (r)
2916			goto done;
2917	}
2918
2919	r = gfx_v9_0_kiq_kcq_enable(adev);
2920done:
2921	return r;
2922}
2923
2924static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2925{
2926	int r, i;
2927	struct amdgpu_ring *ring;
2928
2929	if (!(adev->flags & AMD_IS_APU))
2930		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2931
2932	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2933		/* legacy firmware loading */
2934		r = gfx_v9_0_cp_gfx_load_microcode(adev);
2935		if (r)
2936			return r;
 
 
2937
2938		r = gfx_v9_0_cp_compute_load_microcode(adev);
2939		if (r)
2940			return r;
2941	}
2942
2943	r = gfx_v9_0_cp_gfx_resume(adev);
2944	if (r)
2945		return r;
2946
2947	r = gfx_v9_0_kiq_resume(adev);
2948	if (r)
2949		return r;
2950
2951	ring = &adev->gfx.gfx_ring[0];
2952	r = amdgpu_ring_test_ring(ring);
2953	if (r) {
2954		ring->ready = false;
2955		return r;
2956	}
2957
2958	ring = &adev->gfx.kiq.ring;
2959	ring->ready = true;
2960	r = amdgpu_ring_test_ring(ring);
2961	if (r)
2962		ring->ready = false;
 
 
 
 
 
 
 
2963
2964	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2965		ring = &adev->gfx.compute_ring[i];
2966
2967		ring->ready = true;
2968		r = amdgpu_ring_test_ring(ring);
2969		if (r)
2970			ring->ready = false;
2971	}
2972
2973	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2974
2975	return 0;
2976}
2977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2978static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
2979{
2980	gfx_v9_0_cp_gfx_enable(adev, enable);
 
2981	gfx_v9_0_cp_compute_enable(adev, enable);
2982}
2983
2984static int gfx_v9_0_hw_init(void *handle)
2985{
2986	int r;
2987	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2988
2989	gfx_v9_0_init_golden_registers(adev);
 
2990
2991	gfx_v9_0_gpu_init(adev);
2992
2993	r = gfx_v9_0_rlc_resume(adev);
 
 
2994	if (r)
2995		return r;
2996
2997	r = gfx_v9_0_cp_resume(adev);
2998	if (r)
2999		return r;
3000
3001	r = gfx_v9_0_ngg_en(adev);
3002	if (r)
3003		return r;
3004
3005	return r;
3006}
3007
3008static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
3009{
3010	struct amdgpu_device *adev = kiq_ring->adev;
3011	uint32_t scratch, tmp = 0;
3012	int r, i;
3013
3014	r = amdgpu_gfx_scratch_get(adev, &scratch);
3015	if (r) {
3016		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
3017		return r;
3018	}
3019	WREG32(scratch, 0xCAFEDEAD);
3020
3021	r = amdgpu_ring_alloc(kiq_ring, 10);
3022	if (r) {
3023		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3024		amdgpu_gfx_scratch_free(adev, scratch);
3025		return r;
3026	}
3027
3028	/* unmap queues */
3029	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3030	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3031						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3032						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3033						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3034						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3035	amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3036	amdgpu_ring_write(kiq_ring, 0);
3037	amdgpu_ring_write(kiq_ring, 0);
3038	amdgpu_ring_write(kiq_ring, 0);
3039	/* write to scratch for completion */
3040	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3041	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
3042	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
3043	amdgpu_ring_commit(kiq_ring);
3044
3045	for (i = 0; i < adev->usec_timeout; i++) {
3046		tmp = RREG32(scratch);
3047		if (tmp == 0xDEADBEEF)
3048			break;
3049		DRM_UDELAY(1);
3050	}
3051	if (i >= adev->usec_timeout) {
3052		DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
3053		r = -EINVAL;
3054	}
3055	amdgpu_gfx_scratch_free(adev, scratch);
3056	return r;
3057}
3058
3059static int gfx_v9_0_hw_fini(void *handle)
3060{
3061	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3062	int i;
3063
 
 
3064	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3065	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3066
3067	/* disable KCQ to avoid CPC touch memory not valid anymore */
3068	for (i = 0; i < adev->gfx.num_compute_rings; i++)
3069		gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
 
3070
3071	if (amdgpu_sriov_vf(adev)) {
3072		gfx_v9_0_cp_gfx_enable(adev, false);
3073		/* must disable polling for SRIOV when hw finished, otherwise
3074		 * CPC engine may still keep fetching WB address which is already
3075		 * invalid after sw finished and trigger DMAR reading error in
3076		 * hypervisor side.
3077		 */
3078		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3079		return 0;
3080	}
3081
3082	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3083	 * otherwise KIQ is hanging when binding back
3084	 */
3085	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
3086		mutex_lock(&adev->srbm_mutex);
3087		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3088				adev->gfx.kiq.ring.pipe,
3089				adev->gfx.kiq.ring.queue, 0);
3090		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3091		soc15_grbm_select(adev, 0, 0, 0, 0);
3092		mutex_unlock(&adev->srbm_mutex);
3093	}
3094
3095	gfx_v9_0_cp_enable(adev, false);
3096	gfx_v9_0_rlc_stop(adev);
3097
 
 
 
 
 
 
 
 
3098	return 0;
3099}
3100
3101static int gfx_v9_0_suspend(void *handle)
3102{
3103	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3104
3105	adev->gfx.in_suspend = true;
3106	return gfx_v9_0_hw_fini(adev);
3107}
3108
3109static int gfx_v9_0_resume(void *handle)
3110{
3111	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3112	int r;
3113
3114	r = gfx_v9_0_hw_init(adev);
3115	adev->gfx.in_suspend = false;
3116	return r;
3117}
3118
3119static bool gfx_v9_0_is_idle(void *handle)
3120{
3121	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3122
3123	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3124				GRBM_STATUS, GUI_ACTIVE))
3125		return false;
3126	else
3127		return true;
3128}
3129
3130static int gfx_v9_0_wait_for_idle(void *handle)
3131{
3132	unsigned i;
3133	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3134
3135	for (i = 0; i < adev->usec_timeout; i++) {
3136		if (gfx_v9_0_is_idle(handle))
3137			return 0;
3138		udelay(1);
3139	}
3140	return -ETIMEDOUT;
3141}
3142
3143static int gfx_v9_0_soft_reset(void *handle)
3144{
3145	u32 grbm_soft_reset = 0;
3146	u32 tmp;
3147	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3148
3149	/* GRBM_STATUS */
3150	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3151	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3152		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3153		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3154		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3155		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3156		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3157		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3158						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3159		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3160						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3161	}
3162
3163	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3164		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3165						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3166	}
3167
3168	/* GRBM_STATUS2 */
3169	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3170	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3171		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3172						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3173
3174
3175	if (grbm_soft_reset) {
3176		/* stop the rlc */
3177		gfx_v9_0_rlc_stop(adev);
3178
3179		/* Disable GFX parsing/prefetching */
3180		gfx_v9_0_cp_gfx_enable(adev, false);
 
3181
3182		/* Disable MEC parsing/prefetching */
3183		gfx_v9_0_cp_compute_enable(adev, false);
3184
3185		if (grbm_soft_reset) {
3186			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3187			tmp |= grbm_soft_reset;
3188			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3189			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3190			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3191
3192			udelay(50);
3193
3194			tmp &= ~grbm_soft_reset;
3195			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3196			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3197		}
3198
3199		/* Wait a little for things to settle down */
3200		udelay(50);
3201	}
3202	return 0;
3203}
3204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3205static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3206{
3207	uint64_t clock;
3208
3209	mutex_lock(&adev->gfx.gpu_clock_mutex);
3210	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3211	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3212		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3213	mutex_unlock(&adev->gfx.gpu_clock_mutex);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3214	return clock;
3215}
3216
3217static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3218					  uint32_t vmid,
3219					  uint32_t gds_base, uint32_t gds_size,
3220					  uint32_t gws_base, uint32_t gws_size,
3221					  uint32_t oa_base, uint32_t oa_size)
3222{
3223	struct amdgpu_device *adev = ring->adev;
3224
3225	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
3226	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
3227
3228	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
3229	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
3230
3231	oa_base = oa_base >> AMDGPU_OA_SHIFT;
3232	oa_size = oa_size >> AMDGPU_OA_SHIFT;
3233
3234	/* GDS Base */
3235	gfx_v9_0_write_data_to_reg(ring, 0, false,
3236				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3237				   gds_base);
3238
3239	/* GDS Size */
3240	gfx_v9_0_write_data_to_reg(ring, 0, false,
3241				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3242				   gds_size);
3243
3244	/* GWS */
3245	gfx_v9_0_write_data_to_reg(ring, 0, false,
3246				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3247				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3248
3249	/* OA */
3250	gfx_v9_0_write_data_to_reg(ring, 0, false,
3251				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3252				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3253}
3254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3255static int gfx_v9_0_early_init(void *handle)
3256{
3257	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3258
3259	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3260	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 
 
 
 
 
 
 
 
 
3261	gfx_v9_0_set_ring_funcs(adev);
3262	gfx_v9_0_set_irq_funcs(adev);
3263	gfx_v9_0_set_gds_init(adev);
3264	gfx_v9_0_set_rlc_funcs(adev);
3265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3266	return 0;
3267}
3268
3269static int gfx_v9_0_late_init(void *handle)
3270{
3271	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3272	int r;
3273
3274	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3275	if (r)
3276		return r;
3277
3278	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3279	if (r)
3280		return r;
3281
 
 
 
 
 
 
 
 
 
 
 
3282	return 0;
3283}
3284
3285static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
3286{
3287	uint32_t rlc_setting, data;
3288	unsigned i;
3289
3290	if (adev->gfx.rlc.in_safe_mode)
3291		return;
3292
3293	/* if RLC is not enabled, do nothing */
3294	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3295	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3296		return;
3297
3298	if (adev->cg_flags &
3299	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
3300	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3301		data = RLC_SAFE_MODE__CMD_MASK;
3302		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3303		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3304
3305		/* wait for RLC_SAFE_MODE */
3306		for (i = 0; i < adev->usec_timeout; i++) {
3307			if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3308				break;
3309			udelay(1);
3310		}
3311		adev->gfx.rlc.in_safe_mode = true;
3312	}
3313}
3314
3315static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
3316{
3317	uint32_t rlc_setting, data;
3318
3319	if (!adev->gfx.rlc.in_safe_mode)
3320		return;
3321
3322	/* if RLC is not enabled, do nothing */
3323	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3324	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3325		return;
3326
3327	if (adev->cg_flags &
3328	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
3329		/*
3330		 * Try to exit safe mode only if it is already in safe
3331		 * mode.
3332		 */
3333		data = RLC_SAFE_MODE__CMD_MASK;
3334		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3335		adev->gfx.rlc.in_safe_mode = false;
3336	}
3337}
3338
 
 
 
 
 
 
 
 
3339static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3340						bool enable)
3341{
3342	/* TODO: double check if we need to perform under safe mdoe */
3343	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3344
3345	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3346		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3347		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3348			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3349	} else {
3350		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3351		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
 
3352	}
3353
3354	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
3355}
3356
3357static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3358						bool enable)
3359{
3360	/* TODO: double check if we need to perform under safe mode */
3361	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3362
3363	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3364		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3365	else
3366		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3367
3368	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3369		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3370	else
3371		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3372
3373	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
3374}
3375
3376static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3377						      bool enable)
3378{
3379	uint32_t data, def;
3380
 
 
3381	/* It is disabled by HW by default */
3382	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3383		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
3384		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3385		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3386			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 
 
 
3387			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3388			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
3389
3390		/* only for Vega10 & Raven1 */
3391		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
3392
3393		if (def != data)
3394			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3395
3396		/* MGLS is a global flag to control all MGLS in GFX */
3397		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
3398			/* 2 - RLC memory Light sleep */
3399			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
3400				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3401				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3402				if (def != data)
3403					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3404			}
3405			/* 3 - CP memory Light sleep */
3406			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
3407				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3408				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3409				if (def != data)
3410					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3411			}
3412		}
3413	} else {
3414		/* 1 - MGCG_OVERRIDE */
3415		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3416		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
3417			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
 
 
 
3418			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3419			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
3420			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
 
3421		if (def != data)
3422			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3423
3424		/* 2 - disable MGLS in RLC */
3425		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3426		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
3427			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
3428			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
3429		}
3430
3431		/* 3 - disable MGLS in CP */
3432		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3433		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
3434			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
3435			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
3436		}
3437	}
 
 
3438}
3439
3440static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
3441					   bool enable)
3442{
3443	uint32_t data, def;
3444
3445	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 
 
3446
3447	/* Enable 3D CGCG/CGLS */
3448	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
3449		/* write cmd to clear cgcg/cgls ov */
3450		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3451		/* unset CGCG override */
3452		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3453		/* update CGCG and CGLS override bits */
3454		if (def != data)
3455			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3456		/* enable 3Dcgcg FSM(0x0020003f) */
 
3457		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3458		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3459			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
 
 
 
 
 
3460		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3461			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3462				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
3463		if (def != data)
3464			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3465
3466		/* set IDLE_POLL_COUNT(0x00900100) */
3467		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3468		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3469			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3470		if (def != data)
3471			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3472	} else {
3473		/* Disable CGCG/CGLS */
3474		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3475		/* disable cgcg, cgls should be disabled */
3476		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
3477			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
3478		/* disable cgcg and cgls in FSM */
3479		if (def != data)
3480			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
3481	}
3482
3483	adev->gfx.rlc.funcs->exit_safe_mode(adev);
3484}
3485
3486static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3487						      bool enable)
3488{
3489	uint32_t def, data;
3490
3491	adev->gfx.rlc.funcs->enter_safe_mode(adev);
3492
3493	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
3494		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3495		/* unset CGCG override */
3496		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3497		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3498			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3499		else
3500			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3501		/* update CGCG and CGLS override bits */
3502		if (def != data)
3503			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
3504
3505		/* enable cgcg FSM(0x0020003F) */
3506		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3507		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3508			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
 
 
 
 
 
3509		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3510			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3511				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3512		if (def != data)
3513			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3514
3515		/* set IDLE_POLL_COUNT(0x00900100) */
3516		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
3517		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3518			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3519		if (def != data)
3520			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
3521	} else {
3522		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3523		/* reset CGCG/CGLS bits */
3524		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
3525		/* disable cgcg and cgls in FSM */
3526		if (def != data)
3527			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
3528	}
3529
3530	adev->gfx.rlc.funcs->exit_safe_mode(adev);
3531}
3532
3533static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
3534					    bool enable)
3535{
3536	if (enable) {
3537		/* CGCG/CGLS should be enabled after MGCG/MGLS
3538		 * ===  MGCG + MGLS ===
3539		 */
3540		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3541		/* ===  CGCG /CGLS for GFX 3D Only === */
3542		gfx_v9_0_update_3d_clock_gating(adev, enable);
3543		/* ===  CGCG + CGLS === */
3544		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3545	} else {
3546		/* CGCG/CGLS should be disabled before MGCG/MGLS
3547		 * ===  CGCG + CGLS ===
3548		 */
3549		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
3550		/* ===  CGCG /CGLS for GFX 3D Only === */
3551		gfx_v9_0_update_3d_clock_gating(adev, enable);
3552		/* ===  MGCG + MGLS === */
3553		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
3554	}
3555	return 0;
3556}
3557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3558static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
3559	.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
3560	.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
 
 
 
 
 
 
 
 
 
 
 
3561};
3562
3563static int gfx_v9_0_set_powergating_state(void *handle,
3564					  enum amd_powergating_state state)
3565{
3566	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3567	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
 
 
 
 
 
 
 
3568
3569	switch (adev->asic_type) {
3570	case CHIP_RAVEN:
3571		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3572			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
3573			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
3574		} else {
3575			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
3576			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
3577		}
3578
3579		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3580			gfx_v9_0_enable_cp_power_gating(adev, true);
3581		else
3582			gfx_v9_0_enable_cp_power_gating(adev, false);
3583
3584		/* update gfx cgpg state */
3585		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
3586
3587		/* update mgcg state */
3588		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
 
 
 
 
 
 
3589		break;
3590	default:
3591		break;
3592	}
3593
3594	return 0;
3595}
3596
3597static int gfx_v9_0_set_clockgating_state(void *handle,
3598					  enum amd_clockgating_state state)
3599{
3600	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3601
3602	if (amdgpu_sriov_vf(adev))
3603		return 0;
3604
3605	switch (adev->asic_type) {
3606	case CHIP_VEGA10:
3607	case CHIP_VEGA12:
3608	case CHIP_RAVEN:
 
 
 
 
 
3609		gfx_v9_0_update_gfx_clock_gating(adev,
3610						 state == AMD_CG_STATE_GATE ? true : false);
3611		break;
3612	default:
3613		break;
3614	}
3615	return 0;
3616}
3617
3618static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
3619{
3620	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3621	int data;
3622
3623	if (amdgpu_sriov_vf(adev))
3624		*flags = 0;
3625
3626	/* AMD_CG_SUPPORT_GFX_MGCG */
3627	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3628	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
3629		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
3630
3631	/* AMD_CG_SUPPORT_GFX_CGCG */
3632	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
3633	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
3634		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
3635
3636	/* AMD_CG_SUPPORT_GFX_CGLS */
3637	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
3638		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
3639
3640	/* AMD_CG_SUPPORT_GFX_RLC_LS */
3641	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
3642	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
3643		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
3644
3645	/* AMD_CG_SUPPORT_GFX_CP_LS */
3646	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
3647	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
3648		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
3649
3650	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
3651	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
3652	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
3653		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
3654
3655	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
3656	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
3657		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
 
 
3658}
3659
3660static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
3661{
3662	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
3663}
3664
3665static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3666{
3667	struct amdgpu_device *adev = ring->adev;
3668	u64 wptr;
3669
3670	/* XXX check if swapping is necessary on BE */
3671	if (ring->use_doorbell) {
3672		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
3673	} else {
3674		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
3675		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
3676	}
3677
3678	return wptr;
3679}
3680
3681static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3682{
3683	struct amdgpu_device *adev = ring->adev;
3684
3685	if (ring->use_doorbell) {
3686		/* XXX check if swapping is necessary on BE */
3687		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3688		WDOORBELL64(ring->doorbell_index, ring->wptr);
3689	} else {
3690		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3691		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3692	}
3693}
3694
3695static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3696{
3697	struct amdgpu_device *adev = ring->adev;
3698	u32 ref_and_mask, reg_mem_engine;
3699	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
3700
3701	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3702		switch (ring->me) {
3703		case 1:
3704			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
3705			break;
3706		case 2:
3707			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
3708			break;
3709		default:
3710			return;
3711		}
3712		reg_mem_engine = 0;
3713	} else {
3714		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
3715		reg_mem_engine = 1; /* pfp */
3716	}
3717
3718	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
3719			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
3720			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
3721			      ref_and_mask, ref_and_mask, 0x20);
3722}
3723
3724static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3725                                      struct amdgpu_ib *ib,
3726                                      unsigned vmid, bool ctx_switch)
 
3727{
 
3728	u32 header, control = 0;
3729
3730	if (ib->flags & AMDGPU_IB_FLAG_CE)
3731		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3732	else
3733		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3734
3735	control |= ib->length_dw | (vmid << 24);
3736
3737	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
3738		control |= INDIRECT_BUFFER_PRE_ENB(1);
3739
3740		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
3741			gfx_v9_0_ring_emit_de_meta(ring);
 
 
 
 
 
 
 
3742	}
3743
3744	amdgpu_ring_write(ring, header);
3745BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3746	amdgpu_ring_write(ring,
3747#ifdef __BIG_ENDIAN
3748		(2 << 0) |
3749#endif
3750		lower_32_bits(ib->gpu_addr));
3751	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 
3752	amdgpu_ring_write(ring, control);
3753}
3754
3755static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
3756                                          struct amdgpu_ib *ib,
3757                                          unsigned vmid, bool ctx_switch)
 
 
 
 
 
 
 
 
3758{
3759        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3760
3761        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3762	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3763        amdgpu_ring_write(ring,
3764#ifdef __BIG_ENDIAN
3765                                (2 << 0) |
3766#endif
3767                                lower_32_bits(ib->gpu_addr));
3768        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3769        amdgpu_ring_write(ring, control);
3770}
3771
3772static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3773				     u64 seq, unsigned flags)
3774{
3775	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3776	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
 
 
3777
3778	/* RELEASE_MEM - flush caches, send int */
3779	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3780	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3781				 EOP_TC_ACTION_EN |
3782				 EOP_TC_WB_ACTION_EN |
3783				 EOP_TC_MD_ACTION_EN |
3784				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3785				 EVENT_INDEX(5)));
 
 
 
 
 
 
 
3786	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
3787
3788	/*
3789	 * the address should be Qword aligned if 64bit write, Dword
3790	 * aligned if only send 32bit data low (discard data high)
3791	 */
3792	if (write64bit)
3793		BUG_ON(addr & 0x7);
3794	else
3795		BUG_ON(addr & 0x3);
3796	amdgpu_ring_write(ring, lower_32_bits(addr));
3797	amdgpu_ring_write(ring, upper_32_bits(addr));
3798	amdgpu_ring_write(ring, lower_32_bits(seq));
3799	amdgpu_ring_write(ring, upper_32_bits(seq));
3800	amdgpu_ring_write(ring, 0);
3801}
3802
3803static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3804{
3805	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3806	uint32_t seq = ring->fence_drv.sync_seq;
3807	uint64_t addr = ring->fence_drv.gpu_addr;
3808
3809	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
3810			      lower_32_bits(addr), upper_32_bits(addr),
3811			      seq, 0xffffffff, 4);
3812}
3813
3814static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3815					unsigned vmid, uint64_t pd_addr)
3816{
3817	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3818
3819	/* compute doesn't have PFP */
3820	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
3821		/* sync PFP to ME, otherwise we might get invalid PFP reads */
3822		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3823		amdgpu_ring_write(ring, 0x0);
3824	}
3825}
3826
3827static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3828{
3829	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
3830}
3831
3832static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3833{
3834	u64 wptr;
3835
3836	/* XXX check if swapping is necessary on BE */
3837	if (ring->use_doorbell)
3838		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
3839	else
3840		BUG();
3841	return wptr;
3842}
3843
3844static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
3845					   bool acquire)
3846{
3847	struct amdgpu_device *adev = ring->adev;
3848	int pipe_num, tmp, reg;
3849	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
3850
3851	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
3852
3853	/* first me only has 2 entries, GFX and HP3D */
3854	if (ring->me > 0)
3855		pipe_num -= 2;
3856
3857	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
3858	tmp = RREG32(reg);
3859	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
3860	WREG32(reg, tmp);
3861}
3862
3863static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
3864					    struct amdgpu_ring *ring,
3865					    bool acquire)
3866{
3867	int i, pipe;
3868	bool reserve;
3869	struct amdgpu_ring *iring;
3870
3871	mutex_lock(&adev->gfx.pipe_reserve_mutex);
3872	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
3873	if (acquire)
3874		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3875	else
3876		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3877
3878	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
3879		/* Clear all reservations - everyone reacquires all resources */
3880		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
3881			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
3882						       true);
3883
3884		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
3885			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
3886						       true);
3887	} else {
3888		/* Lower all pipes without a current reservation */
3889		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
3890			iring = &adev->gfx.gfx_ring[i];
3891			pipe = amdgpu_gfx_queue_to_bit(adev,
3892						       iring->me,
3893						       iring->pipe,
3894						       0);
3895			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3896			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3897		}
3898
3899		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
3900			iring = &adev->gfx.compute_ring[i];
3901			pipe = amdgpu_gfx_queue_to_bit(adev,
3902						       iring->me,
3903						       iring->pipe,
3904						       0);
3905			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
3906			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
3907		}
3908	}
3909
3910	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
3911}
3912
3913static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
3914				      struct amdgpu_ring *ring,
3915				      bool acquire)
3916{
3917	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
3918	uint32_t queue_priority = acquire ? 0xf : 0x0;
3919
3920	mutex_lock(&adev->srbm_mutex);
3921	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3922
3923	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
3924	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
3925
3926	soc15_grbm_select(adev, 0, 0, 0, 0);
3927	mutex_unlock(&adev->srbm_mutex);
3928}
3929
3930static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
3931					       enum drm_sched_priority priority)
3932{
3933	struct amdgpu_device *adev = ring->adev;
3934	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
3935
3936	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
3937		return;
3938
3939	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
3940	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
3941}
3942
3943static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3944{
3945	struct amdgpu_device *adev = ring->adev;
3946
3947	/* XXX check if swapping is necessary on BE */
3948	if (ring->use_doorbell) {
3949		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3950		WDOORBELL64(ring->doorbell_index, ring->wptr);
3951	} else{
3952		BUG(); /* only DOORBELL method supported on gfx9 now */
3953	}
3954}
3955
3956static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3957					 u64 seq, unsigned int flags)
3958{
3959	struct amdgpu_device *adev = ring->adev;
3960
3961	/* we only allocate 32bit for each seq wb address */
3962	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3963
3964	/* write fence seq to the "addr" */
3965	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3966	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3967				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3968	amdgpu_ring_write(ring, lower_32_bits(addr));
3969	amdgpu_ring_write(ring, upper_32_bits(addr));
3970	amdgpu_ring_write(ring, lower_32_bits(seq));
3971
3972	if (flags & AMDGPU_FENCE_FLAG_INT) {
3973		/* set register to trigger INT */
3974		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3975		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3976					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3977		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
3978		amdgpu_ring_write(ring, 0);
3979		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3980	}
3981}
3982
3983static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
3984{
3985	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3986	amdgpu_ring_write(ring, 0);
3987}
3988
3989static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
3990{
 
3991	struct v9_ce_ib_state ce_payload = {0};
3992	uint64_t csa_addr;
 
3993	int cnt;
3994
3995	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
3996	csa_addr = amdgpu_csa_vaddr(ring->adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
3997
3998	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3999	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4000				 WRITE_DATA_DST_SEL(8) |
4001				 WR_CONFIRM) |
4002				 WRITE_DATA_CACHE_POLICY(0));
4003	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4004	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4005	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4006}
4007
4008static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4009{
 
4010	struct v9_de_ib_state de_payload = {0};
4011	uint64_t csa_addr, gds_addr;
 
4012	int cnt;
4013
4014	csa_addr = amdgpu_csa_vaddr(ring->adev);
4015	gds_addr = csa_addr + 4096;
4016	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4017	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4018
4019	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4020	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4021	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4022				 WRITE_DATA_DST_SEL(8) |
4023				 WR_CONFIRM) |
4024				 WRITE_DATA_CACHE_POLICY(0));
4025	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4026	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4027	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
 
 
 
 
 
 
 
4028}
4029
4030static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
 
4031{
 
 
4032	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4033	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4034}
4035
4036static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4037{
4038	uint32_t dw2 = 0;
4039
4040	if (amdgpu_sriov_vf(ring->adev))
4041		gfx_v9_0_ring_emit_ce_meta(ring);
4042
4043	gfx_v9_0_ring_emit_tmz(ring, true);
4044
4045	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4046	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4047		/* set load_global_config & load_global_uconfig */
4048		dw2 |= 0x8001;
4049		/* set load_cs_sh_regs */
4050		dw2 |= 0x01000000;
4051		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4052		dw2 |= 0x10002;
4053
4054		/* set load_ce_ram if preamble presented */
4055		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4056			dw2 |= 0x10000000;
4057	} else {
4058		/* still load_ce_ram if this is the first time preamble presented
4059		 * although there is no context switch happens.
4060		 */
4061		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4062			dw2 |= 0x10000000;
4063	}
4064
4065	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4066	amdgpu_ring_write(ring, dw2);
4067	amdgpu_ring_write(ring, 0);
4068}
4069
4070static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4071{
4072	unsigned ret;
4073	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4074	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4075	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4076	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4077	ret = ring->wptr & ring->buf_mask;
4078	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4079	return ret;
4080}
4081
4082static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4083{
4084	unsigned cur;
4085	BUG_ON(offset > ring->buf_mask);
4086	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4087
4088	cur = (ring->wptr & ring->buf_mask) - 1;
4089	if (likely(cur > offset))
4090		ring->ring[offset] = cur - offset;
4091	else
4092		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4093}
4094
4095static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 
4096{
4097	struct amdgpu_device *adev = ring->adev;
4098
4099	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4100	amdgpu_ring_write(ring, 0 |	/* src: register*/
4101				(5 << 8) |	/* dst: memory */
4102				(1 << 20));	/* write confirm */
4103	amdgpu_ring_write(ring, reg);
4104	amdgpu_ring_write(ring, 0);
4105	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4106				adev->virt.reg_val_offs * 4));
4107	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4108				adev->virt.reg_val_offs * 4));
4109}
4110
4111static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4112				    uint32_t val)
4113{
4114	uint32_t cmd = 0;
4115
4116	switch (ring->funcs->type) {
4117	case AMDGPU_RING_TYPE_GFX:
4118		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4119		break;
4120	case AMDGPU_RING_TYPE_KIQ:
4121		cmd = (1 << 16); /* no inc addr */
4122		break;
4123	default:
4124		cmd = WR_CONFIRM;
4125		break;
4126	}
4127	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4128	amdgpu_ring_write(ring, cmd);
4129	amdgpu_ring_write(ring, reg);
4130	amdgpu_ring_write(ring, 0);
4131	amdgpu_ring_write(ring, val);
4132}
4133
4134static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4135					uint32_t val, uint32_t mask)
4136{
4137	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4138}
4139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4140static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4141						 enum amdgpu_interrupt_state state)
4142{
4143	switch (state) {
4144	case AMDGPU_IRQ_STATE_DISABLE:
4145	case AMDGPU_IRQ_STATE_ENABLE:
4146		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4147			       TIME_STAMP_INT_ENABLE,
4148			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4149		break;
4150	default:
4151		break;
4152	}
4153}
4154
4155static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4156						     int me, int pipe,
4157						     enum amdgpu_interrupt_state state)
4158{
4159	u32 mec_int_cntl, mec_int_cntl_reg;
4160
4161	/*
4162	 * amdgpu controls only the first MEC. That's why this function only
4163	 * handles the setting of interrupts for this specific MEC. All other
4164	 * pipes' interrupts are set by amdkfd.
4165	 */
4166
4167	if (me == 1) {
4168		switch (pipe) {
4169		case 0:
4170			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4171			break;
4172		case 1:
4173			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4174			break;
4175		case 2:
4176			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4177			break;
4178		case 3:
4179			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4180			break;
4181		default:
4182			DRM_DEBUG("invalid pipe %d\n", pipe);
4183			return;
4184		}
4185	} else {
4186		DRM_DEBUG("invalid me %d\n", me);
4187		return;
4188	}
4189
4190	switch (state) {
4191	case AMDGPU_IRQ_STATE_DISABLE:
4192		mec_int_cntl = RREG32(mec_int_cntl_reg);
4193		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4194					     TIME_STAMP_INT_ENABLE, 0);
4195		WREG32(mec_int_cntl_reg, mec_int_cntl);
4196		break;
4197	case AMDGPU_IRQ_STATE_ENABLE:
4198		mec_int_cntl = RREG32(mec_int_cntl_reg);
4199		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4200					     TIME_STAMP_INT_ENABLE, 1);
4201		WREG32(mec_int_cntl_reg, mec_int_cntl);
4202		break;
4203	default:
4204		break;
4205	}
4206}
4207
4208static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4209					     struct amdgpu_irq_src *source,
4210					     unsigned type,
4211					     enum amdgpu_interrupt_state state)
4212{
4213	switch (state) {
4214	case AMDGPU_IRQ_STATE_DISABLE:
4215	case AMDGPU_IRQ_STATE_ENABLE:
4216		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4217			       PRIV_REG_INT_ENABLE,
4218			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4219		break;
4220	default:
4221		break;
4222	}
4223
4224	return 0;
4225}
4226
4227static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4228					      struct amdgpu_irq_src *source,
4229					      unsigned type,
4230					      enum amdgpu_interrupt_state state)
4231{
4232	switch (state) {
4233	case AMDGPU_IRQ_STATE_DISABLE:
4234	case AMDGPU_IRQ_STATE_ENABLE:
4235		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4236			       PRIV_INSTR_INT_ENABLE,
4237			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 
4238	default:
4239		break;
4240	}
4241
4242	return 0;
4243}
4244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4245static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4246					    struct amdgpu_irq_src *src,
4247					    unsigned type,
4248					    enum amdgpu_interrupt_state state)
4249{
4250	switch (type) {
4251	case AMDGPU_CP_IRQ_GFX_EOP:
4252		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4253		break;
4254	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4255		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4256		break;
4257	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4258		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4259		break;
4260	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4261		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4262		break;
4263	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4264		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4265		break;
4266	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4267		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4268		break;
4269	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4270		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4271		break;
4272	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4273		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4274		break;
4275	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4276		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4277		break;
4278	default:
4279		break;
4280	}
4281	return 0;
4282}
4283
4284static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
4285			    struct amdgpu_irq_src *source,
4286			    struct amdgpu_iv_entry *entry)
4287{
4288	int i;
4289	u8 me_id, pipe_id, queue_id;
4290	struct amdgpu_ring *ring;
4291
4292	DRM_DEBUG("IH: CP EOP\n");
4293	me_id = (entry->ring_id & 0x0c) >> 2;
4294	pipe_id = (entry->ring_id & 0x03) >> 0;
4295	queue_id = (entry->ring_id & 0x70) >> 4;
4296
4297	switch (me_id) {
4298	case 0:
4299		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
 
 
 
 
 
4300		break;
4301	case 1:
4302	case 2:
4303		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4304			ring = &adev->gfx.compute_ring[i];
4305			/* Per-queue interrupt is supported for MEC starting from VI.
4306			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4307			  */
4308			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4309				amdgpu_fence_process(ring);
4310		}
4311		break;
4312	}
4313	return 0;
4314}
4315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4316static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
4317				 struct amdgpu_irq_src *source,
4318				 struct amdgpu_iv_entry *entry)
4319{
4320	DRM_ERROR("Illegal register access in command stream\n");
4321	schedule_work(&adev->reset_work);
4322	return 0;
4323}
4324
4325static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
4326				  struct amdgpu_irq_src *source,
4327				  struct amdgpu_iv_entry *entry)
4328{
4329	DRM_ERROR("Illegal instruction in command stream\n");
4330	schedule_work(&adev->reset_work);
4331	return 0;
4332}
4333
4334static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
4335					    struct amdgpu_irq_src *src,
4336					    unsigned int type,
4337					    enum amdgpu_interrupt_state state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4338{
4339	uint32_t tmp, target;
4340	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
4341
4342	if (ring->me == 1)
4343		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4344	else
4345		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
4346	target += ring->pipe;
4347
4348	switch (type) {
4349	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
4350		if (state == AMDGPU_IRQ_STATE_DISABLE) {
4351			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4352			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4353						 GENERIC2_INT_ENABLE, 0);
4354			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4355
4356			tmp = RREG32(target);
4357			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4358						 GENERIC2_INT_ENABLE, 0);
4359			WREG32(target, tmp);
4360		} else {
4361			tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
4362			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
4363						 GENERIC2_INT_ENABLE, 1);
4364			WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
4365
4366			tmp = RREG32(target);
4367			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
4368						 GENERIC2_INT_ENABLE, 1);
4369			WREG32(target, tmp);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4370		}
4371		break;
4372	default:
4373		BUG(); /* kiq only support GENERIC2_INT now */
4374		break;
4375	}
 
 
 
 
 
 
4376	return 0;
4377}
4378
4379static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
4380			    struct amdgpu_irq_src *source,
4381			    struct amdgpu_iv_entry *entry)
4382{
4383	u8 me_id, pipe_id, queue_id;
4384	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 
 
 
 
 
 
4385
4386	me_id = (entry->ring_id & 0x0c) >> 2;
4387	pipe_id = (entry->ring_id & 0x03) >> 0;
4388	queue_id = (entry->ring_id & 0x70) >> 4;
4389	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
4390		   me_id, pipe_id, queue_id);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4391
4392	amdgpu_fence_process(ring);
4393	return 0;
4394}
4395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4396static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
4397	.name = "gfx_v9_0",
4398	.early_init = gfx_v9_0_early_init,
4399	.late_init = gfx_v9_0_late_init,
4400	.sw_init = gfx_v9_0_sw_init,
4401	.sw_fini = gfx_v9_0_sw_fini,
4402	.hw_init = gfx_v9_0_hw_init,
4403	.hw_fini = gfx_v9_0_hw_fini,
4404	.suspend = gfx_v9_0_suspend,
4405	.resume = gfx_v9_0_resume,
4406	.is_idle = gfx_v9_0_is_idle,
4407	.wait_for_idle = gfx_v9_0_wait_for_idle,
4408	.soft_reset = gfx_v9_0_soft_reset,
4409	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
4410	.set_powergating_state = gfx_v9_0_set_powergating_state,
4411	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
4412};
4413
4414static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
4415	.type = AMDGPU_RING_TYPE_GFX,
4416	.align_mask = 0xff,
4417	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4418	.support_64bit_ptrs = true,
4419	.vmhub = AMDGPU_GFXHUB,
4420	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
4421	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
4422	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
4423	.emit_frame_size = /* totally 242 maximum if 16 IBs */
4424		5 +  /* COND_EXEC */
4425		7 +  /* PIPELINE_SYNC */
4426		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4427		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4428		2 + /* VM_FLUSH */
4429		8 +  /* FENCE for VM_FLUSH */
4430		20 + /* GDS switch */
4431		4 + /* double SWITCH_BUFFER,
4432		       the first COND_EXEC jump to the place just
4433			   prior to this double SWITCH_BUFFER  */
4434		5 + /* COND_EXEC */
4435		7 +	 /*	HDP_flush */
4436		4 +	 /*	VGT_flush */
4437		14 + /*	CE_META */
4438		31 + /*	DE_META */
4439		3 + /* CNTX_CTRL */
4440		5 + /* HDP_INVL */
4441		8 + 8 + /* FENCE x2 */
4442		2, /* SWITCH_BUFFER */
 
4443	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
4444	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
4445	.emit_fence = gfx_v9_0_ring_emit_fence,
4446	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4447	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4448	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4449	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4450	.test_ring = gfx_v9_0_ring_test_ring,
4451	.test_ib = gfx_v9_0_ring_test_ib,
4452	.insert_nop = amdgpu_ring_insert_nop,
4453	.pad_ib = amdgpu_ring_generic_pad_ib,
4454	.emit_switch_buffer = gfx_v9_ring_emit_sb,
4455	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
4456	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
4457	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
4458	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4459	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4460	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 
 
 
 
 
 
4461};
4462
4463static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
4464	.type = AMDGPU_RING_TYPE_COMPUTE,
4465	.align_mask = 0xff,
4466	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4467	.support_64bit_ptrs = true,
4468	.vmhub = AMDGPU_GFXHUB,
4469	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
4470	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
4471	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
4472	.emit_frame_size =
4473		20 + /* gfx_v9_0_ring_emit_gds_switch */
4474		7 + /* gfx_v9_0_ring_emit_hdp_flush */
4475		5 + /* hdp invalidate */
4476		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4477		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4478		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4479		2 + /* gfx_v9_0_ring_emit_vm_flush */
4480		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
4481	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 
 
 
4482	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
4483	.emit_fence = gfx_v9_0_ring_emit_fence,
4484	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
4485	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
4486	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
4487	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
4488	.test_ring = gfx_v9_0_ring_test_ring,
4489	.test_ib = gfx_v9_0_ring_test_ib,
4490	.insert_nop = amdgpu_ring_insert_nop,
4491	.pad_ib = amdgpu_ring_generic_pad_ib,
4492	.set_priority = gfx_v9_0_ring_set_priority_compute,
4493	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4494	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 
 
 
4495};
4496
4497static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
4498	.type = AMDGPU_RING_TYPE_KIQ,
4499	.align_mask = 0xff,
4500	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
4501	.support_64bit_ptrs = true,
4502	.vmhub = AMDGPU_GFXHUB,
4503	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
4504	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
4505	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
4506	.emit_frame_size =
4507		20 + /* gfx_v9_0_ring_emit_gds_switch */
4508		7 + /* gfx_v9_0_ring_emit_hdp_flush */
4509		5 + /* hdp invalidate */
4510		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
4511		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
4512		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4513		2 + /* gfx_v9_0_ring_emit_vm_flush */
4514		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
4515	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
4516	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
4517	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
4518	.test_ring = gfx_v9_0_ring_test_ring,
4519	.test_ib = gfx_v9_0_ring_test_ib,
4520	.insert_nop = amdgpu_ring_insert_nop,
4521	.pad_ib = amdgpu_ring_generic_pad_ib,
4522	.emit_rreg = gfx_v9_0_ring_emit_rreg,
4523	.emit_wreg = gfx_v9_0_ring_emit_wreg,
4524	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 
4525};
4526
4527static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
4528{
4529	int i;
4530
4531	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
4532
4533	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4534		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
4535
 
 
 
 
 
4536	for (i = 0; i < adev->gfx.num_compute_rings; i++)
4537		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
4538}
4539
4540static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
4541	.set = gfx_v9_0_kiq_set_interrupt_state,
4542	.process = gfx_v9_0_kiq_irq,
4543};
4544
4545static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
4546	.set = gfx_v9_0_set_eop_interrupt_state,
4547	.process = gfx_v9_0_eop_irq,
4548};
4549
4550static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
4551	.set = gfx_v9_0_set_priv_reg_fault_state,
4552	.process = gfx_v9_0_priv_reg_irq,
4553};
4554
4555static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
4556	.set = gfx_v9_0_set_priv_inst_fault_state,
4557	.process = gfx_v9_0_priv_inst_irq,
4558};
4559
 
 
 
 
 
 
4560static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
4561{
4562	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4563	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
4564
4565	adev->gfx.priv_reg_irq.num_types = 1;
4566	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
4567
4568	adev->gfx.priv_inst_irq.num_types = 1;
4569	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
4570
4571	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
4572	adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
4573}
4574
4575static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
4576{
4577	switch (adev->asic_type) {
4578	case CHIP_VEGA10:
4579	case CHIP_VEGA12:
4580	case CHIP_RAVEN:
 
 
 
 
 
4581		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
4582		break;
4583	default:
4584		break;
4585	}
4586}
4587
4588static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
4589{
4590	/* init asci gds info */
4591	adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
4592	adev->gds.gws.total_size = 64;
4593	adev->gds.oa.total_size = 16;
4594
4595	if (adev->gds.mem.total_size == 64 * 1024) {
4596		adev->gds.mem.gfx_partition_size = 4096;
4597		adev->gds.mem.cs_partition_size = 4096;
4598
4599		adev->gds.gws.gfx_partition_size = 4;
4600		adev->gds.gws.cs_partition_size = 4;
4601
4602		adev->gds.oa.gfx_partition_size = 4;
4603		adev->gds.oa.cs_partition_size = 1;
4604	} else {
4605		adev->gds.mem.gfx_partition_size = 1024;
4606		adev->gds.mem.cs_partition_size = 1024;
4607
4608		adev->gds.gws.gfx_partition_size = 16;
4609		adev->gds.gws.cs_partition_size = 16;
 
 
4610
4611		adev->gds.oa.gfx_partition_size = 4;
4612		adev->gds.oa.cs_partition_size = 4;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4613	}
 
 
 
4614}
4615
4616static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
4617						 u32 bitmap)
4618{
4619	u32 data;
4620
4621	if (!bitmap)
4622		return;
4623
4624	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4625	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4626
4627	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
4628}
4629
4630static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
4631{
4632	u32 data, mask;
4633
4634	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
4635	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
4636
4637	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
4638	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
4639
4640	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4641
4642	return (~data) & mask;
4643}
4644
4645static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
4646				 struct amdgpu_cu_info *cu_info)
4647{
4648	int i, j, k, counter, active_cu_number = 0;
4649	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4650	unsigned disable_masks[4 * 2];
4651
4652	if (!adev || !cu_info)
4653		return -EINVAL;
4654
4655	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
 
 
 
 
 
 
 
 
 
4656
4657	mutex_lock(&adev->grbm_idx_mutex);
4658	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4659		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4660			mask = 1;
4661			ao_bitmap = 0;
4662			counter = 0;
4663			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
4664			if (i < 4 && j < 2)
4665				gfx_v9_0_set_user_cu_inactive_bitmap(
4666					adev, disable_masks[i * 2 + j]);
4667			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
4668			cu_info->bitmap[i][j] = bitmap;
 
 
 
 
 
 
 
 
 
 
 
 
 
4669
4670			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4671				if (bitmap & mask) {
4672					if (counter < adev->gfx.config.max_cu_per_sh)
4673						ao_bitmap |= mask;
4674					counter ++;
4675				}
4676				mask <<= 1;
4677			}
4678			active_cu_number += counter;
4679			if (i < 2 && j < 2)
4680				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4681			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
4682		}
4683	}
4684	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
4685	mutex_unlock(&adev->grbm_idx_mutex);
4686
4687	cu_info->number = active_cu_number;
4688	cu_info->ao_cu_mask = ao_cu_mask;
 
4689
4690	return 0;
4691}
4692
4693const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
4694{
4695	.type = AMD_IP_BLOCK_TYPE_GFX,
4696	.major = 9,
4697	.minor = 0,
4698	.rev = 0,
4699	.funcs = &gfx_v9_0_ip_funcs,
4700};
v6.8
   1/*
   2 * Copyright 2016 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/delay.h>
  25#include <linux/kernel.h>
  26#include <linux/firmware.h>
  27#include <linux/module.h>
  28#include <linux/pci.h>
  29
  30#include "amdgpu.h"
  31#include "amdgpu_gfx.h"
  32#include "soc15.h"
  33#include "soc15d.h"
  34#include "amdgpu_atomfirmware.h"
  35#include "amdgpu_pm.h"
  36
  37#include "gc/gc_9_0_offset.h"
  38#include "gc/gc_9_0_sh_mask.h"
  39
  40#include "vega10_enum.h"
 
  41
  42#include "soc15_common.h"
  43#include "clearstate_gfx9.h"
  44#include "v9_structs.h"
  45
  46#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
  47
  48#include "amdgpu_ras.h"
  49
  50#include "amdgpu_ring_mux.h"
  51#include "gfx_v9_4.h"
  52#include "gfx_v9_0.h"
  53#include "gfx_v9_4_2.h"
  54
  55#include "asic_reg/pwr/pwr_10_0_offset.h"
  56#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
  57#include "asic_reg/gc/gc_9_0_default.h"
  58
  59#define GFX9_NUM_GFX_RINGS     1
  60#define GFX9_NUM_SW_GFX_RINGS  2
  61#define GFX9_MEC_HPD_SIZE 4096
  62#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
  63#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
 
  64
  65#define mmGCEA_PROBE_MAP                        0x070c
  66#define mmGCEA_PROBE_MAP_BASE_IDX               0
 
 
 
 
  67
  68MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
  69MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
  70MODULE_FIRMWARE("amdgpu/vega10_me.bin");
  71MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
  72MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
  73MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
  74
  75MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
  76MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
  77MODULE_FIRMWARE("amdgpu/vega12_me.bin");
  78MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
  79MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
  80MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
  81
  82MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
  83MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
  84MODULE_FIRMWARE("amdgpu/vega20_me.bin");
  85MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
  86MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
  87MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
  88
  89MODULE_FIRMWARE("amdgpu/raven_ce.bin");
  90MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
  91MODULE_FIRMWARE("amdgpu/raven_me.bin");
  92MODULE_FIRMWARE("amdgpu/raven_mec.bin");
  93MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
  94MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
  95
  96MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
  97MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
  98MODULE_FIRMWARE("amdgpu/picasso_me.bin");
  99MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
 100MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
 101MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
 102MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 103
 104MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 105MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
 106MODULE_FIRMWARE("amdgpu/raven2_me.bin");
 107MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
 108MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
 109MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
 110MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
 111
 112MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
 113MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
 114
 115MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
 116MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
 117MODULE_FIRMWARE("amdgpu/renoir_me.bin");
 118MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
 119MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
 120
 121MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
 122MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
 123MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
 124MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
 125MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
 126MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
 127
 128MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
 129MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
 130MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
 131MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
 132MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
 133
 134#define mmTCP_CHAN_STEER_0_ARCT								0x0b03
 135#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
 136#define mmTCP_CHAN_STEER_1_ARCT								0x0b04
 137#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
 138#define mmTCP_CHAN_STEER_2_ARCT								0x0b09
 139#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
 140#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
 141#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
 142#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
 143#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
 144#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
 145#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
 146
 147#define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
 148#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
 149#define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
 150#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
 151
 152enum ta_ras_gfx_subblock {
 153	/*CPC*/
 154	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
 155	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
 156	TA_RAS_BLOCK__GFX_CPC_UCODE,
 157	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
 158	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
 159	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
 160	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
 161	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
 162	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 163	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
 164	/* CPF*/
 165	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 166	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
 167	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
 168	TA_RAS_BLOCK__GFX_CPF_TAG,
 169	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
 170	/* CPG*/
 171	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 172	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
 173	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
 174	TA_RAS_BLOCK__GFX_CPG_TAG,
 175	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
 176	/* GDS*/
 177	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 178	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
 179	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
 180	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
 181	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
 182	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 183	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
 184	/* SPI*/
 185	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
 186	/* SQ*/
 187	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 188	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
 189	TA_RAS_BLOCK__GFX_SQ_LDS_D,
 190	TA_RAS_BLOCK__GFX_SQ_LDS_I,
 191	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
 192	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
 193	/* SQC (3 ranges)*/
 194	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 195	/* SQC range 0*/
 196	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
 197	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
 198		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
 199	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
 200	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
 201	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
 202	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
 203	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
 204	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 205	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
 206		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
 207	/* SQC range 1*/
 208	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 209	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
 210		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
 211	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
 212	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
 213	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
 214	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
 215	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
 216	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
 217	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
 218	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 219	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
 220		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
 221	/* SQC range 2*/
 222	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 223	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
 224		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
 225	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
 226	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
 227	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
 228	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
 229	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
 230	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
 231	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
 232	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 233	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
 234		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
 235	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
 236	/* TA*/
 237	TA_RAS_BLOCK__GFX_TA_INDEX_START,
 238	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
 239	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
 240	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
 241	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
 242	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 243	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
 244	/* TCA*/
 245	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 246	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
 247	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 248	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
 249	/* TCC (5 sub-ranges)*/
 250	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 251	/* TCC range 0*/
 252	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
 253	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
 254	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
 255	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
 256	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
 257	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
 258	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
 259	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
 260	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 261	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
 262	/* TCC range 1*/
 263	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 264	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
 265	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 266	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
 267		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
 268	/* TCC range 2*/
 269	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 270	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
 271	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
 272	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
 273	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
 274	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
 275	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
 276	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
 277	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 278	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
 279		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
 280	/* TCC range 3*/
 281	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 282	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
 283	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 284	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
 285		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
 286	/* TCC range 4*/
 287	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 288	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
 289		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
 290	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 291	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
 292		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
 293	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
 294	/* TCI*/
 295	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
 296	/* TCP*/
 297	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 298	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
 299	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
 300	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
 301	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
 302	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
 303	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
 304	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 305	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
 306	/* TD*/
 307	TA_RAS_BLOCK__GFX_TD_INDEX_START,
 308	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
 309	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
 310	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 311	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
 312	/* EA (3 sub-ranges)*/
 313	TA_RAS_BLOCK__GFX_EA_INDEX_START,
 314	/* EA range 0*/
 315	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
 316	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
 317	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
 318	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
 319	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
 320	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
 321	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
 322	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
 323	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 324	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
 325	/* EA range 1*/
 326	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 327	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
 328	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
 329	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
 330	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
 331	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
 332	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
 333	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 334	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
 335	/* EA range 2*/
 336	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 337	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
 338	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
 339	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
 340	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 341	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
 342	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
 343	/* UTC VM L2 bank*/
 344	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
 345	/* UTC VM walker*/
 346	TA_RAS_BLOCK__UTC_VML2_WALKER,
 347	/* UTC ATC L2 2MB cache*/
 348	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
 349	/* UTC ATC L2 4KB cache*/
 350	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
 351	TA_RAS_BLOCK__GFX_MAX
 352};
 353
 354struct ras_gfx_subblock {
 355	unsigned char *name;
 356	int ta_subblock;
 357	int hw_supported_error_type;
 358	int sw_supported_error_type;
 359};
 360
 361#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
 362	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
 363		#subblock,                                                     \
 364		TA_RAS_BLOCK__##subblock,                                      \
 365		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
 366		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
 367	}
 368
 369static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
 370	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
 371	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
 372	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 373	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 374	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
 375	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 376	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 377	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 378	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
 379	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
 380	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
 381	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
 382	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
 383	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
 384	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 385	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
 386	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
 387			     0),
 388	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
 389			     0),
 390	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
 391	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 392	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 393	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
 394	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
 395	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
 396	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
 397	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 398			     0, 0),
 399	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 400			     0),
 401	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 402			     0, 0),
 403	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
 404			     0),
 405	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
 406			     0, 0),
 407	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
 408			     0),
 409	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 410			     1),
 411	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 412			     0, 0, 0),
 413	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 414			     0),
 415	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 416			     0),
 417	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 418			     0),
 419	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 420			     0),
 421	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 422			     0),
 423	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 424			     0, 0),
 425	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 426			     0),
 427	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
 428			     0),
 429	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
 430			     0, 0, 0),
 431	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 432			     0),
 433	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 434			     0),
 435	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
 436			     0),
 437	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
 438			     0),
 439	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
 440			     0),
 441	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
 442			     0, 0),
 443	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
 444			     0),
 445	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
 446	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 447	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 448	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 449	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 450	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
 451	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 452	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
 453	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
 454			     1),
 455	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
 456			     1),
 457	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
 458			     1),
 459	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
 460			     0),
 461	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
 462			     0),
 463	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 464	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
 465	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
 466	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
 467	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
 468	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
 469	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 470	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
 471	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
 472	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 473	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
 474	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
 475			     0),
 476	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 477	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
 478			     0),
 479	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
 480			     0, 0),
 481	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
 482			     0),
 483	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 484	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
 485	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
 486	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 487	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
 488	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
 489	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
 490	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
 491	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
 492	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
 493	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
 494	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
 495	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 496	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 497	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 498	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 499	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 500	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 501	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
 502	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 503	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 504	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 505	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 506	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 507	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 508	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
 509	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 510	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 511	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 512	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
 513	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
 514	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
 515	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
 516	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
 517};
 518
 519static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 520{
 521	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 522	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
 
 
 523	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 
 524	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 525	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 526	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 
 
 
 
 
 527	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 528	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 529	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 530	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
 531	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
 532	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 533	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 534	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 535	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 536	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 537	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
 538	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 539	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 540	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 541};
 542
 543static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 544{
 545	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
 546	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 547	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 548	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 549	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 550	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 551	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
 552	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 553	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
 554	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 555	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 556	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 557	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 558	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 559	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 560	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 561	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
 562	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 563};
 564
 565static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
 566{
 567	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
 568	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 569	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 570	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
 571	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
 572	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
 573	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
 574	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
 575	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
 576	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
 577	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
 578};
 579
 580static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 581{
 582	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 583	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
 584	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
 585	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 586	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 587	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 588	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 589	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 590	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 591	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 592	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
 593	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
 594	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
 595	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
 596	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 597	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 598	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 599	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 600	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 601	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
 602	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
 603	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 604	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 605	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 606};
 607
 608static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
 609{
 610	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
 611	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
 612	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
 613	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
 614	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
 615	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 616	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
 617};
 618
 619static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
 620{
 621	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
 622	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 623	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 624	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
 625	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
 626	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
 627	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
 628	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
 629	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 630	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 631	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
 632	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
 633	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
 634	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 635	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
 636	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 637	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
 638	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 639	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
 640};
 641
 642static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
 643{
 644	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 645	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
 646	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
 647	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
 648	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
 649	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 650	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 651	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 652	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 653	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 654	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
 655	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
 656};
 657
 658static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
 659{
 660	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
 661	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
 662	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
 663};
 664
 665static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
 666{
 667	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 668	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
 669	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 670	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 671	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 672	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
 673	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
 674	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
 675	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
 676	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 677	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 678	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 679	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 680	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 681	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
 682	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 683};
 684
 685static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
 686{
 687	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
 688	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
 689	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
 690	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
 691	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
 692	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
 693	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
 694	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
 695	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
 696	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
 697	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
 698	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
 699	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
 700};
 701
 702static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
 703{
 704	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
 705	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
 706	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
 707	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
 708	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
 709	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
 710	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
 711	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
 712	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
 713	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
 714	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
 715};
 716
 717static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
 718	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
 719	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
 720};
 721
 722static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
 723{
 724	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 725	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 726	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 727	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 728	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 729	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 730	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 731	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
 732};
 733
 734static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
 735{
 736	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 737	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 738	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 739	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 740	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 741	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 742	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 743	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
 744};
 745
 746#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 747#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 748#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
 749#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
 750
 751static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
 752static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
 753static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
 754static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 755static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 756				struct amdgpu_cu_info *cu_info);
 757static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 758static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
 759static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 760static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 761					  void *ras_error_status);
 762static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
 763				     void *inject_if, uint32_t instance_mask);
 764static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
 765static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
 766					      unsigned int vmid);
 767
 768static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
 769				uint64_t queue_mask)
 770{
 771	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
 772	amdgpu_ring_write(kiq_ring,
 773		PACKET3_SET_RESOURCES_VMID_MASK(0) |
 774		/* vmid_mask:0* queue_type:0 (KIQ) */
 775		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
 776	amdgpu_ring_write(kiq_ring,
 777			lower_32_bits(queue_mask));	/* queue mask lo */
 778	amdgpu_ring_write(kiq_ring,
 779			upper_32_bits(queue_mask));	/* queue mask hi */
 780	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
 781	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
 782	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
 783	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
 784}
 785
 786static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
 787				 struct amdgpu_ring *ring)
 788{
 789	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
 790	uint64_t wptr_addr = ring->wptr_gpu_addr;
 791	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 792
 793	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
 794	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
 795	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 796			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
 797			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
 798			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
 799			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
 800			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
 801			 /*queue_type: normal compute queue */
 802			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
 803			 /* alloc format: all_on_one_pipe */
 804			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
 805			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
 806			 /* num_queues: must be 1 */
 807			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
 808	amdgpu_ring_write(kiq_ring,
 809			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
 810	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
 811	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
 812	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
 813	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
 814}
 815
 816static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
 817				   struct amdgpu_ring *ring,
 818				   enum amdgpu_unmap_queues_action action,
 819				   u64 gpu_addr, u64 seq)
 820{
 821	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 822
 823	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
 824	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 825			  PACKET3_UNMAP_QUEUES_ACTION(action) |
 826			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
 827			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
 828			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
 829	amdgpu_ring_write(kiq_ring,
 830			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 831
 832	if (action == PREEMPT_QUEUES_NO_UNMAP) {
 833		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
 834		amdgpu_ring_write(kiq_ring, 0);
 835		amdgpu_ring_write(kiq_ring, 0);
 836
 837	} else {
 838		amdgpu_ring_write(kiq_ring, 0);
 839		amdgpu_ring_write(kiq_ring, 0);
 840		amdgpu_ring_write(kiq_ring, 0);
 841	}
 842}
 843
 844static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
 845				   struct amdgpu_ring *ring,
 846				   u64 addr,
 847				   u64 seq)
 848{
 849	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
 850
 851	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
 852	amdgpu_ring_write(kiq_ring,
 853			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
 854			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
 855			  PACKET3_QUERY_STATUS_COMMAND(2));
 856	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
 857	amdgpu_ring_write(kiq_ring,
 858			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
 859			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
 860	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
 861	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
 862	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
 863	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
 864}
 865
 866static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
 867				uint16_t pasid, uint32_t flush_type,
 868				bool all_hub)
 869{
 870	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
 871	amdgpu_ring_write(kiq_ring,
 872			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
 873			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
 874			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
 875			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
 876}
 877
 878static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
 879	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
 880	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
 881	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
 882	.kiq_query_status = gfx_v9_0_kiq_query_status,
 883	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
 884	.set_resources_size = 8,
 885	.map_queues_size = 7,
 886	.unmap_queues_size = 6,
 887	.query_status_size = 7,
 888	.invalidate_tlbs_size = 2,
 889};
 890
 891static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 892{
 893	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
 894}
 895
 896static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 897{
 898	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
 899	case IP_VERSION(9, 0, 1):
 900		soc15_program_register_sequence(adev,
 901						golden_settings_gc_9_0,
 902						ARRAY_SIZE(golden_settings_gc_9_0));
 903		soc15_program_register_sequence(adev,
 904						golden_settings_gc_9_0_vg10,
 905						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
 906		break;
 907	case IP_VERSION(9, 2, 1):
 908		soc15_program_register_sequence(adev,
 909						golden_settings_gc_9_2_1,
 910						ARRAY_SIZE(golden_settings_gc_9_2_1));
 911		soc15_program_register_sequence(adev,
 912						golden_settings_gc_9_2_1_vg12,
 913						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 914		break;
 915	case IP_VERSION(9, 4, 0):
 916		soc15_program_register_sequence(adev,
 917						golden_settings_gc_9_0,
 918						ARRAY_SIZE(golden_settings_gc_9_0));
 919		soc15_program_register_sequence(adev,
 920						golden_settings_gc_9_0_vg20,
 921						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
 922		break;
 923	case IP_VERSION(9, 4, 1):
 924		soc15_program_register_sequence(adev,
 925						golden_settings_gc_9_4_1_arct,
 926						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
 927		break;
 928	case IP_VERSION(9, 2, 2):
 929	case IP_VERSION(9, 1, 0):
 930		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
 931						ARRAY_SIZE(golden_settings_gc_9_1));
 932		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
 933			soc15_program_register_sequence(adev,
 934							golden_settings_gc_9_1_rv2,
 935							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
 936		else
 937			soc15_program_register_sequence(adev,
 938							golden_settings_gc_9_1_rv1,
 939							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
 940		break;
 941	 case IP_VERSION(9, 3, 0):
 942		soc15_program_register_sequence(adev,
 943						golden_settings_gc_9_1_rn,
 944						ARRAY_SIZE(golden_settings_gc_9_1_rn));
 945		return; /* for renoir, don't need common goldensetting */
 946	case IP_VERSION(9, 4, 2):
 947		gfx_v9_4_2_init_golden_registers(adev,
 948						 adev->smuio.funcs->get_die_id(adev));
 949		break;
 950	default:
 951		break;
 952	}
 953
 954	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
 955	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
 956		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
 957						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
 
 
 
 
 
 958}
 959
 960static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
 961				       bool wc, uint32_t reg, uint32_t val)
 962{
 963	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 964	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
 965				WRITE_DATA_DST_SEL(0) |
 966				(wc ? WR_CONFIRM : 0));
 967	amdgpu_ring_write(ring, reg);
 968	amdgpu_ring_write(ring, 0);
 969	amdgpu_ring_write(ring, val);
 970}
 971
 972static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
 973				  int mem_space, int opt, uint32_t addr0,
 974				  uint32_t addr1, uint32_t ref, uint32_t mask,
 975				  uint32_t inv)
 976{
 977	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
 978	amdgpu_ring_write(ring,
 979				 /* memory (1) or register (0) */
 980				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
 981				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
 982				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
 983				 WAIT_REG_MEM_ENGINE(eng_sel)));
 984
 985	if (mem_space)
 986		BUG_ON(addr0 & 0x3); /* Dword align */
 987	amdgpu_ring_write(ring, addr0);
 988	amdgpu_ring_write(ring, addr1);
 989	amdgpu_ring_write(ring, ref);
 990	amdgpu_ring_write(ring, mask);
 991	amdgpu_ring_write(ring, inv); /* poll interval */
 992}
 993
 994static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 995{
 996	struct amdgpu_device *adev = ring->adev;
 997	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
 998	uint32_t tmp = 0;
 999	unsigned i;
1000	int r;
1001
 
 
 
 
 
1002	WREG32(scratch, 0xCAFEDEAD);
1003	r = amdgpu_ring_alloc(ring, 3);
1004	if (r)
 
 
 
1005		return r;
1006
1007	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1008	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1009	amdgpu_ring_write(ring, 0xDEADBEEF);
1010	amdgpu_ring_commit(ring);
1011
1012	for (i = 0; i < adev->usec_timeout; i++) {
1013		tmp = RREG32(scratch);
1014		if (tmp == 0xDEADBEEF)
1015			break;
1016		udelay(1);
 
 
 
 
 
 
 
 
1017	}
1018
1019	if (i >= adev->usec_timeout)
1020		r = -ETIMEDOUT;
1021	return r;
1022}
1023
1024static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1025{
1026	struct amdgpu_device *adev = ring->adev;
1027	struct amdgpu_ib ib;
1028	struct dma_fence *f = NULL;
1029
1030	unsigned index;
1031	uint64_t gpu_addr;
1032	uint32_t tmp;
1033	long r;
1034
1035	r = amdgpu_device_wb_get(adev, &index);
1036	if (r)
 
1037		return r;
 
1038
1039	gpu_addr = adev->wb.gpu_addr + (index * 4);
1040	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1041	memset(&ib, 0, sizeof(ib));
1042
1043	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1044	if (r)
1045		goto err1;
1046
1047	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1048	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1049	ib.ptr[2] = lower_32_bits(gpu_addr);
1050	ib.ptr[3] = upper_32_bits(gpu_addr);
1051	ib.ptr[4] = 0xDEADBEEF;
1052	ib.length_dw = 5;
1053
1054	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1055	if (r)
1056		goto err2;
1057
1058	r = dma_fence_wait_timeout(f, false, timeout);
1059	if (r == 0) {
1060		r = -ETIMEDOUT;
1061		goto err2;
 
1062	} else if (r < 0) {
1063		goto err2;
 
1064	}
1065
1066	tmp = adev->wb.wb[index];
1067	if (tmp == 0xDEADBEEF)
1068		r = 0;
1069	else
1070		r = -EINVAL;
 
 
 
1071
1072err2:
1073	amdgpu_ib_free(adev, &ib, NULL);
1074	dma_fence_put(f);
1075err1:
1076	amdgpu_device_wb_free(adev, index);
1077	return r;
1078}
1079
1080
1081static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1082{
1083	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1084	amdgpu_ucode_release(&adev->gfx.me_fw);
1085	amdgpu_ucode_release(&adev->gfx.ce_fw);
1086	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1087	amdgpu_ucode_release(&adev->gfx.mec_fw);
1088	amdgpu_ucode_release(&adev->gfx.mec2_fw);
 
 
 
 
 
 
1089
1090	kfree(adev->gfx.rlc.register_list_format);
1091}
1092
1093static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1094{
1095	adev->gfx.me_fw_write_wait = false;
1096	adev->gfx.mec_fw_write_wait = false;
 
 
 
 
 
 
 
 
 
1097
1098	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1099	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1100	     (adev->gfx.mec_feature_version < 46) ||
1101	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1102	     (adev->gfx.pfp_feature_version < 46)))
1103		DRM_WARN_ONCE("CP firmware version too old, please update!");
1104
1105	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1106	case IP_VERSION(9, 0, 1):
1107		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1108		    (adev->gfx.me_feature_version >= 42) &&
1109		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1110		    (adev->gfx.pfp_feature_version >= 42))
1111			adev->gfx.me_fw_write_wait = true;
1112
1113		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1114		    (adev->gfx.mec_feature_version >= 42))
1115			adev->gfx.mec_fw_write_wait = true;
1116		break;
1117	case IP_VERSION(9, 2, 1):
1118		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1119		    (adev->gfx.me_feature_version >= 44) &&
1120		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1121		    (adev->gfx.pfp_feature_version >= 44))
1122			adev->gfx.me_fw_write_wait = true;
1123
1124		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1125		    (adev->gfx.mec_feature_version >= 44))
1126			adev->gfx.mec_fw_write_wait = true;
1127		break;
1128	case IP_VERSION(9, 4, 0):
1129		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1130		    (adev->gfx.me_feature_version >= 44) &&
1131		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1132		    (adev->gfx.pfp_feature_version >= 44))
1133			adev->gfx.me_fw_write_wait = true;
1134
1135		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1136		    (adev->gfx.mec_feature_version >= 44))
1137			adev->gfx.mec_fw_write_wait = true;
1138		break;
1139	case IP_VERSION(9, 1, 0):
1140	case IP_VERSION(9, 2, 2):
1141		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1142		    (adev->gfx.me_feature_version >= 42) &&
1143		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1144		    (adev->gfx.pfp_feature_version >= 42))
1145			adev->gfx.me_fw_write_wait = true;
1146
1147		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1148		    (adev->gfx.mec_feature_version >= 42))
1149			adev->gfx.mec_fw_write_wait = true;
1150		break;
1151	default:
1152		adev->gfx.me_fw_write_wait = true;
1153		adev->gfx.mec_fw_write_wait = true;
1154		break;
1155	}
1156}
1157
1158struct amdgpu_gfxoff_quirk {
1159	u16 chip_vendor;
1160	u16 chip_device;
1161	u16 subsys_vendor;
1162	u16 subsys_device;
1163	u8 revision;
1164};
1165
1166static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1167	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1168	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1169	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1170	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1171	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1172	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1173	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1174	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1175	{ 0, 0, 0, 0, 0 },
1176};
1177
1178static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1179{
1180	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1181
1182	while (p && p->chip_device != 0) {
1183		if (pdev->vendor == p->chip_vendor &&
1184		    pdev->device == p->chip_device &&
1185		    pdev->subsystem_vendor == p->subsys_vendor &&
1186		    pdev->subsystem_device == p->subsys_device &&
1187		    pdev->revision == p->revision) {
1188			return true;
1189		}
1190		++p;
1191	}
1192	return false;
1193}
1194
1195static bool is_raven_kicker(struct amdgpu_device *adev)
1196{
1197	if (adev->pm.fw_version >= 0x41e2b)
1198		return true;
1199	else
1200		return false;
1201}
1202
1203static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1204{
1205	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1206	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1207	    (adev->gfx.me_feature_version >= 52))
1208		return true;
1209	else
1210		return false;
1211}
1212
1213static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1214{
1215	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1216		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1217
1218	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1219	case IP_VERSION(9, 0, 1):
1220	case IP_VERSION(9, 2, 1):
1221	case IP_VERSION(9, 4, 0):
1222		break;
1223	case IP_VERSION(9, 2, 2):
1224	case IP_VERSION(9, 1, 0):
1225		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1226		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1227		    ((!is_raven_kicker(adev) &&
1228		      adev->gfx.rlc_fw_version < 531) ||
1229		     (adev->gfx.rlc_feature_version < 1) ||
1230		     !adev->gfx.rlc.is_rlc_v2_1))
1231			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1232
1233		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1234			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1235				AMD_PG_SUPPORT_CP |
1236				AMD_PG_SUPPORT_RLC_SMU_HS;
1237		break;
1238	case IP_VERSION(9, 3, 0):
1239		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1240			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1241				AMD_PG_SUPPORT_CP |
1242				AMD_PG_SUPPORT_RLC_SMU_HS;
1243		break;
1244	default:
1245		break;
1246	}
1247}
1248
1249static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1250					  char *chip_name)
1251{
1252	char fw_name[30];
1253	int err;
1254
1255	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1256	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1257	if (err)
1258		goto out;
1259	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
 
 
 
 
 
1260
1261	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1262	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1263	if (err)
1264		goto out;
1265	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
 
 
 
 
 
1266
1267	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1268	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
 
 
 
1269	if (err)
1270		goto out;
1271	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
 
 
1272
1273out:
1274	if (err) {
1275		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1276		amdgpu_ucode_release(&adev->gfx.me_fw);
1277		amdgpu_ucode_release(&adev->gfx.ce_fw);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1278	}
1279	return err;
1280}
1281
1282static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1283				       char *chip_name)
1284{
1285	char fw_name[30];
1286	int err;
1287	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1288	uint16_t version_major;
1289	uint16_t version_minor;
1290	uint32_t smu_version;
 
 
1291
1292	/*
1293	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1294	 * instead of picasso_rlc.bin.
1295	 * Judgment method:
1296	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1297	 *          or revision >= 0xD8 && revision <= 0xDF
1298	 * otherwise is PCO FP5
1299	 */
1300	if (!strcmp(chip_name, "picasso") &&
1301		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1302		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1303		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1304	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1305		(smu_version >= 0x41e2b))
1306		/**
1307		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1308		*/
1309		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1310	else
1311		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1312	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1313	if (err)
1314		goto out;
1315	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1316
1317	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1318	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1319	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1320out:
1321	if (err)
1322		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1323
1324	return err;
1325}
1326
1327static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1328{
1329	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1330	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1331	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1332		return false;
1333
1334	return true;
1335}
1336
1337static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1338					      char *chip_name)
1339{
1340	char fw_name[30];
1341	int err;
1342
1343	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1344		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1345	else
1346		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1347
1348	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1349	if (err)
1350		goto out;
1351	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1352	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
 
1353
1354	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1355		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1356			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1357		else
1358			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1359
1360		/* ignore failures to load */
1361		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1362		if (!err) {
1363			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1364			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1365		} else {
1366			err = 0;
1367			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1368		}
 
 
 
1369	} else {
1370		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1371		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1372	}
1373
1374	gfx_v9_0_check_if_need_gfxoff(adev);
1375	gfx_v9_0_check_fw_write_wait(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1376
1377out:
1378	if (err)
1379		amdgpu_ucode_release(&adev->gfx.mec_fw);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1380	return err;
1381}
1382
1383static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1384{
1385	char ucode_prefix[30];
1386	int r;
1387
1388	DRM_DEBUG("\n");
1389	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1390
1391	/* No CPG in Arcturus */
1392	if (adev->gfx.num_gfx_rings) {
1393		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1394		if (r)
1395			return r;
1396	}
1397
1398	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1399	if (r)
1400		return r;
1401
1402	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1403	if (r)
1404		return r;
1405
1406	return r;
1407}
1408
1409static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1410{
1411	u32 count = 0;
1412	const struct cs_section_def *sect = NULL;
1413	const struct cs_extent_def *ext = NULL;
1414
1415	/* begin clear state */
1416	count += 2;
1417	/* context control state */
1418	count += 3;
1419
1420	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1421		for (ext = sect->section; ext->extent != NULL; ++ext) {
1422			if (sect->id == SECT_CONTEXT)
1423				count += 2 + ext->reg_count;
1424			else
1425				return 0;
1426		}
1427	}
1428
1429	/* end clear state */
1430	count += 2;
1431	/* clear state */
1432	count += 2;
1433
1434	return count;
1435}
1436
1437static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1438				    volatile u32 *buffer)
1439{
1440	u32 count = 0, i;
1441	const struct cs_section_def *sect = NULL;
1442	const struct cs_extent_def *ext = NULL;
1443
1444	if (adev->gfx.rlc.cs_data == NULL)
1445		return;
1446	if (buffer == NULL)
1447		return;
1448
1449	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1450	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1451
1452	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1453	buffer[count++] = cpu_to_le32(0x80000000);
1454	buffer[count++] = cpu_to_le32(0x80000000);
1455
1456	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1457		for (ext = sect->section; ext->extent != NULL; ++ext) {
1458			if (sect->id == SECT_CONTEXT) {
1459				buffer[count++] =
1460					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1461				buffer[count++] = cpu_to_le32(ext->reg_index -
1462						PACKET3_SET_CONTEXT_REG_START);
1463				for (i = 0; i < ext->reg_count; i++)
1464					buffer[count++] = cpu_to_le32(ext->extent[i]);
1465			} else {
1466				return;
1467			}
1468		}
1469	}
1470
1471	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1472	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1473
1474	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1475	buffer[count++] = cpu_to_le32(0);
1476}
1477
1478static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1479{
1480	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1481	uint32_t pg_always_on_cu_num = 2;
1482	uint32_t always_on_cu_num;
1483	uint32_t i, j, k;
1484	uint32_t mask, cu_bitmap, counter;
1485
1486	if (adev->flags & AMD_IS_APU)
1487		always_on_cu_num = 4;
1488	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1489		always_on_cu_num = 8;
1490	else
1491		always_on_cu_num = 12;
1492
1493	mutex_lock(&adev->grbm_idx_mutex);
1494	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1495		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1496			mask = 1;
1497			cu_bitmap = 0;
1498			counter = 0;
1499			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1500
1501			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1502				if (cu_info->bitmap[0][i][j] & mask) {
1503					if (counter == pg_always_on_cu_num)
1504						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1505					if (counter < always_on_cu_num)
1506						cu_bitmap |= mask;
1507					else
1508						break;
1509					counter++;
1510				}
1511				mask <<= 1;
1512			}
1513
1514			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1515			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1516		}
1517	}
1518	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1519	mutex_unlock(&adev->grbm_idx_mutex);
1520}
1521
1522static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1523{
1524	uint32_t data;
1525
1526	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1527	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1528	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1529	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1530	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1531
1532	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1533	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1534
1535	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1536	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1537
1538	mutex_lock(&adev->grbm_idx_mutex);
1539	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1540	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1541	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1542
1543	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1544	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1545	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1546	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1547	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1548
1549	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1550	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1551	data &= 0x0000FFFF;
1552	data |= 0x00C00000;
1553	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1554
1555	/*
1556	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1557	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1558	 */
1559
1560	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1561	 * but used for RLC_LB_CNTL configuration */
1562	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1563	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1564	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1565	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1566	mutex_unlock(&adev->grbm_idx_mutex);
1567
1568	gfx_v9_0_init_always_on_cu_mask(adev);
1569}
1570
1571static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1572{
1573	uint32_t data;
1574
1575	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1576	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1577	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1578	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1579	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1580
1581	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1582	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1583
1584	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1585	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1586
1587	mutex_lock(&adev->grbm_idx_mutex);
1588	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1589	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1590	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1591
1592	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1593	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1594	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1595	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1596	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1597
1598	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1599	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1600	data &= 0x0000FFFF;
1601	data |= 0x00C00000;
1602	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1603
1604	/*
1605	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1606	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1607	 */
1608
1609	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1610	 * but used for RLC_LB_CNTL configuration */
1611	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1612	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1613	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1614	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1615	mutex_unlock(&adev->grbm_idx_mutex);
1616
1617	gfx_v9_0_init_always_on_cu_mask(adev);
1618}
1619
1620static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1621{
1622	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1623}
1624
1625static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1626{
1627	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1628		return 5;
1629	else
1630		return 4;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1631}
1632
1633static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1634{
1635	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
 
 
 
1636
1637	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1638	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1639	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1640	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1641	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1642	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1643	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1644	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1645	adev->gfx.rlc.rlcg_reg_access_supported = true;
1646}
1647
1648static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1649{
 
 
1650	const struct cs_section_def *cs_data;
1651	int r;
1652
1653	adev->gfx.rlc.cs_data = gfx9_cs_data;
1654
1655	cs_data = adev->gfx.rlc.cs_data;
1656
1657	if (cs_data) {
1658		/* init clear state block */
1659		r = amdgpu_gfx_rlc_init_csb(adev);
1660		if (r)
 
 
 
 
 
 
 
 
1661			return r;
 
 
 
 
 
 
1662	}
1663
1664	if (adev->flags & AMD_IS_APU) {
1665		/* TODO: double check the cp_table_size for RV */
1666		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1667		r = amdgpu_gfx_rlc_init_cpt(adev);
1668		if (r)
 
 
 
 
 
 
 
1669			return r;
 
 
 
 
 
 
 
1670	}
1671
1672	return 0;
1673}
1674
1675static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1676{
1677	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1678	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1679}
1680
1681static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1682{
1683	int r;
1684	u32 *hpd;
1685	const __le32 *fw_data;
1686	unsigned fw_size;
1687	u32 *fw;
1688	size_t mec_hpd_size;
1689
1690	const struct gfx_firmware_header_v1_0 *mec_hdr;
1691
1692	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1693
1694	/* take ownership of the relevant compute queues */
1695	amdgpu_gfx_compute_queue_acquire(adev);
1696	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1697	if (mec_hpd_size) {
1698		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1699					      AMDGPU_GEM_DOMAIN_VRAM |
1700					      AMDGPU_GEM_DOMAIN_GTT,
1701					      &adev->gfx.mec.hpd_eop_obj,
1702					      &adev->gfx.mec.hpd_eop_gpu_addr,
1703					      (void **)&hpd);
1704		if (r) {
1705			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1706			gfx_v9_0_mec_fini(adev);
1707			return r;
1708		}
1709
1710		memset(hpd, 0, mec_hpd_size);
 
 
 
 
 
 
 
 
 
 
 
1711
1712		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1713		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1714	}
1715
1716	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1717
1718	fw_data = (const __le32 *)
1719		(adev->gfx.mec_fw->data +
1720		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1721	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1722
1723	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1724				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1725				      &adev->gfx.mec.mec_fw_obj,
1726				      &adev->gfx.mec.mec_fw_gpu_addr,
1727				      (void **)&fw);
1728	if (r) {
1729		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1730		gfx_v9_0_mec_fini(adev);
1731		return r;
1732	}
1733
1734	memcpy(fw, fw_data, fw_size);
1735
1736	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1737	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1738
1739	return 0;
1740}
1741
1742static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1743{
1744	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1745		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1746		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1747		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1748		(SQ_IND_INDEX__FORCE_READ_MASK));
1749	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1750}
1751
1752static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1753			   uint32_t wave, uint32_t thread,
1754			   uint32_t regno, uint32_t num, uint32_t *out)
1755{
1756	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1757		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1758		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1759		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1760		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1761		(SQ_IND_INDEX__FORCE_READ_MASK) |
1762		(SQ_IND_INDEX__AUTO_INCR_MASK));
1763	while (num--)
1764		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1765}
1766
1767static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1768{
1769	/* type 1 wave data */
1770	dst[(*no_fields)++] = 1;
1771	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1772	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1773	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1774	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1775	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1776	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1777	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1778	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1779	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1780	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1781	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1782	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1783	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1784	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1785	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1786}
1787
1788static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1789				     uint32_t wave, uint32_t start,
1790				     uint32_t size, uint32_t *dst)
1791{
1792	wave_read_regs(
1793		adev, simd, wave, 0,
1794		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1795}
1796
1797static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1798				     uint32_t wave, uint32_t thread,
1799				     uint32_t start, uint32_t size,
1800				     uint32_t *dst)
1801{
1802	wave_read_regs(
1803		adev, simd, wave, thread,
1804		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1805}
1806
1807static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1808				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1809{
1810	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1811}
1812
1813static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1814        .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1815        .select_se_sh = &gfx_v9_0_select_se_sh,
1816        .read_wave_data = &gfx_v9_0_read_wave_data,
1817        .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1818        .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1819        .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1820};
1821
1822const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1823		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1824		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1825		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1826};
1827
1828static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1829	.ras_block = {
1830		.hw_ops = &gfx_v9_0_ras_ops,
1831	},
1832};
1833
1834static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1835{
1836	u32 gb_addr_config;
1837	int err;
1838
1839	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1840	case IP_VERSION(9, 0, 1):
 
 
1841		adev->gfx.config.max_hw_contexts = 8;
1842		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1843		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1844		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1845		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1846		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1847		break;
1848	case IP_VERSION(9, 2, 1):
1849		adev->gfx.config.max_hw_contexts = 8;
1850		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1851		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1852		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1853		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1854		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1855		DRM_INFO("fix gfx.config for vega12\n");
1856		break;
1857	case IP_VERSION(9, 4, 0):
1858		adev->gfx.ras = &gfx_v9_0_ras;
1859		adev->gfx.config.max_hw_contexts = 8;
1860		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1861		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1862		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1863		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1864		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1865		gb_addr_config &= ~0xf3e777ff;
1866		gb_addr_config |= 0x22014042;
1867		/* check vbios table if gpu info is not available */
1868		err = amdgpu_atomfirmware_get_gfx_info(adev);
1869		if (err)
1870			return err;
1871		break;
1872	case IP_VERSION(9, 2, 2):
1873	case IP_VERSION(9, 1, 0):
1874		adev->gfx.config.max_hw_contexts = 8;
1875		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1880			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1881		else
1882			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1883		break;
1884	case IP_VERSION(9, 4, 1):
1885		adev->gfx.ras = &gfx_v9_4_ras;
1886		adev->gfx.config.max_hw_contexts = 8;
1887		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1888		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1889		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1890		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1891		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1892		gb_addr_config &= ~0xf3e777ff;
1893		gb_addr_config |= 0x22014042;
1894		break;
1895	case IP_VERSION(9, 3, 0):
1896		adev->gfx.config.max_hw_contexts = 8;
1897		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1898		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1899		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1900		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1901		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1902		gb_addr_config &= ~0xf3e777ff;
1903		gb_addr_config |= 0x22010042;
1904		break;
1905	case IP_VERSION(9, 4, 2):
1906		adev->gfx.ras = &gfx_v9_4_2_ras;
1907		adev->gfx.config.max_hw_contexts = 8;
1908		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1912		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1913		gb_addr_config &= ~0xf3e777ff;
1914		gb_addr_config |= 0x22014042;
1915		/* check vbios table if gpu info is not available */
1916		err = amdgpu_atomfirmware_get_gfx_info(adev);
1917		if (err)
1918			return err;
1919		break;
1920	default:
1921		BUG();
1922		break;
1923	}
1924
1925	adev->gfx.config.gb_addr_config = gb_addr_config;
1926
1927	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1928			REG_GET_FIELD(
1929					adev->gfx.config.gb_addr_config,
1930					GB_ADDR_CONFIG,
1931					NUM_PIPES);
1932
1933	adev->gfx.config.max_tile_pipes =
1934		adev->gfx.config.gb_addr_config_fields.num_pipes;
1935
1936	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1937			REG_GET_FIELD(
1938					adev->gfx.config.gb_addr_config,
1939					GB_ADDR_CONFIG,
1940					NUM_BANKS);
1941	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1942			REG_GET_FIELD(
1943					adev->gfx.config.gb_addr_config,
1944					GB_ADDR_CONFIG,
1945					MAX_COMPRESSED_FRAGS);
1946	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1947			REG_GET_FIELD(
1948					adev->gfx.config.gb_addr_config,
1949					GB_ADDR_CONFIG,
1950					NUM_RB_PER_SE);
1951	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1952			REG_GET_FIELD(
1953					adev->gfx.config.gb_addr_config,
1954					GB_ADDR_CONFIG,
1955					NUM_SHADER_ENGINES);
1956	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1957			REG_GET_FIELD(
1958					adev->gfx.config.gb_addr_config,
1959					GB_ADDR_CONFIG,
1960					PIPE_INTERLEAVE_SIZE));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1961
1962	return 0;
1963}
1964
1965static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1966				      int mec, int pipe, int queue)
1967{
 
1968	unsigned irq_type;
1969	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1970	unsigned int hw_prio;
1971
1972	ring = &adev->gfx.compute_ring[ring_id];
1973
1974	/* mec0 is me1 */
1975	ring->me = mec + 1;
1976	ring->pipe = pipe;
1977	ring->queue = queue;
1978
1979	ring->ring_obj = NULL;
1980	ring->use_doorbell = true;
1981	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1982	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1983				+ (ring_id * GFX9_MEC_HPD_SIZE);
1984	ring->vm_hub = AMDGPU_GFXHUB(0);
1985	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1986
1987	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1988		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1989		+ ring->pipe;
1990	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1991			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1992	/* type-2 packets are deprecated on MEC, use type-3 instead */
1993	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1994				hw_prio, NULL);
 
 
 
 
 
1995}
1996
1997static int gfx_v9_0_sw_init(void *handle)
1998{
1999	int i, j, k, r, ring_id;
2000	struct amdgpu_ring *ring;
2001	struct amdgpu_kiq *kiq;
2002	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2003	unsigned int hw_prio;
2004
2005	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2006	case IP_VERSION(9, 0, 1):
2007	case IP_VERSION(9, 2, 1):
2008	case IP_VERSION(9, 4, 0):
2009	case IP_VERSION(9, 2, 2):
2010	case IP_VERSION(9, 1, 0):
2011	case IP_VERSION(9, 4, 1):
2012	case IP_VERSION(9, 3, 0):
2013	case IP_VERSION(9, 4, 2):
2014		adev->gfx.mec.num_mec = 2;
2015		break;
2016	default:
2017		adev->gfx.mec.num_mec = 1;
2018		break;
2019	}
2020
2021	adev->gfx.mec.num_pipe_per_mec = 4;
2022	adev->gfx.mec.num_queue_per_pipe = 8;
2023
 
 
 
 
 
2024	/* EOP Event */
2025	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2026	if (r)
2027		return r;
2028
2029	/* Privileged reg */
2030	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2031			      &adev->gfx.priv_reg_irq);
2032	if (r)
2033		return r;
2034
2035	/* Privileged inst */
2036	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2037			      &adev->gfx.priv_inst_irq);
2038	if (r)
2039		return r;
2040
2041	/* ECC error */
2042	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2043			      &adev->gfx.cp_ecc_error_irq);
2044	if (r)
 
 
 
2045		return r;
 
2046
2047	/* FUE error */
2048	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2049			      &adev->gfx.cp_ecc_error_irq);
2050	if (r)
2051		return r;
2052
2053	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2054
2055	if (adev->gfx.rlc.funcs) {
2056		if (adev->gfx.rlc.funcs->init) {
2057			r = adev->gfx.rlc.funcs->init(adev);
2058			if (r) {
2059				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2060				return r;
2061			}
2062		}
2063	}
2064
2065	r = gfx_v9_0_mec_init(adev);
2066	if (r) {
2067		DRM_ERROR("Failed to init MEC BOs!\n");
2068		return r;
2069	}
2070
2071	/* set up the gfx ring */
2072	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2073		ring = &adev->gfx.gfx_ring[i];
2074		ring->ring_obj = NULL;
2075		if (!i)
2076			sprintf(ring->name, "gfx");
2077		else
2078			sprintf(ring->name, "gfx_%d", i);
2079		ring->use_doorbell = true;
2080		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2081
2082		/* disable scheduler on the real ring */
2083		ring->no_scheduler = true;
2084		ring->vm_hub = AMDGPU_GFXHUB(0);
2085		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2086				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2087				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2088		if (r)
2089			return r;
2090	}
2091
2092	/* set up the software rings */
2093	if (adev->gfx.num_gfx_rings) {
2094		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2095			ring = &adev->gfx.sw_gfx_ring[i];
2096			ring->ring_obj = NULL;
2097			sprintf(ring->name, amdgpu_sw_ring_name(i));
2098			ring->use_doorbell = true;
2099			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2100			ring->is_sw_ring = true;
2101			hw_prio = amdgpu_sw_ring_priority(i);
2102			ring->vm_hub = AMDGPU_GFXHUB(0);
2103			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2104					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2105					     NULL);
2106			if (r)
2107				return r;
2108			ring->wptr = 0;
2109		}
2110
2111		/* init the muxer and add software rings */
2112		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2113					 GFX9_NUM_SW_GFX_RINGS);
2114		if (r) {
2115			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2116			return r;
2117		}
2118		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2119			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2120							&adev->gfx.sw_gfx_ring[i]);
2121			if (r) {
2122				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2123				return r;
2124			}
2125		}
2126	}
2127
2128	/* set up the compute queues - allocate horizontally across pipes */
2129	ring_id = 0;
2130	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2131		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2132			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2133				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2134								     k, j))
2135					continue;
2136
2137				r = gfx_v9_0_compute_ring_init(adev,
2138							       ring_id,
2139							       i, k, j);
2140				if (r)
2141					return r;
2142
2143				ring_id++;
2144			}
2145		}
2146	}
2147
2148	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2149	if (r) {
2150		DRM_ERROR("Failed to init KIQ BOs!\n");
2151		return r;
2152	}
2153
2154	kiq = &adev->gfx.kiq[0];
2155	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0);
2156	if (r)
2157		return r;
2158
2159	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2160	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2161	if (r)
2162		return r;
2163
2164	adev->gfx.ce_ram_size = 0x8000;
2165
2166	r = gfx_v9_0_gpu_early_init(adev);
 
 
2167	if (r)
2168		return r;
2169
2170	if (amdgpu_gfx_ras_sw_init(adev)) {
2171		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2172		return -EINVAL;
2173	}
2174
2175	return 0;
2176}
2177
2178
2179static int gfx_v9_0_sw_fini(void *handle)
2180{
2181	int i;
2182	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2183
2184	if (adev->gfx.num_gfx_rings) {
2185		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2186			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2187		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2188	}
2189
2190	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2191		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2192	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2193		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2194
2195	amdgpu_gfx_mqd_sw_fini(adev, 0);
2196	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2197	amdgpu_gfx_kiq_fini(adev, 0);
2198
2199	gfx_v9_0_mec_fini(adev);
 
2200	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2201				&adev->gfx.rlc.clear_state_gpu_addr,
2202				(void **)&adev->gfx.rlc.cs_ptr);
2203	if (adev->flags & AMD_IS_APU) {
2204		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2205				&adev->gfx.rlc.cp_table_gpu_addr,
2206				(void **)&adev->gfx.rlc.cp_table_ptr);
2207	}
2208	gfx_v9_0_free_microcode(adev);
2209
2210	return 0;
2211}
2212
2213
2214static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2215{
2216	/* TODO */
2217}
2218
2219void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2220			   u32 instance, int xcc_id)
2221{
2222	u32 data;
2223
2224	if (instance == 0xffffffff)
2225		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2226	else
2227		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2228
2229	if (se_num == 0xffffffff)
2230		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2231	else
2232		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2233
2234	if (sh_num == 0xffffffff)
2235		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2236	else
2237		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2238
2239	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2240}
2241
2242static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2243{
2244	u32 data, mask;
2245
2246	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2247	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2248
2249	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2250	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2251
2252	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2253					 adev->gfx.config.max_sh_per_se);
2254
2255	return (~data) & mask;
2256}
2257
2258static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2259{
2260	int i, j;
2261	u32 data;
2262	u32 active_rbs = 0;
2263	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2264					adev->gfx.config.max_sh_per_se;
2265
2266	mutex_lock(&adev->grbm_idx_mutex);
2267	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2268		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2269			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2270			data = gfx_v9_0_get_rb_active_bitmap(adev);
2271			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2272					       rb_bitmap_width_per_sh);
2273		}
2274	}
2275	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2276	mutex_unlock(&adev->grbm_idx_mutex);
2277
2278	adev->gfx.config.backend_enable_mask = active_rbs;
2279	adev->gfx.config.num_rbs = hweight32(active_rbs);
2280}
2281
2282static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2283				uint32_t first_vmid,
2284				uint32_t last_vmid)
2285{
2286	uint32_t data;
2287	uint32_t trap_config_vmid_mask = 0;
2288	int i;
2289
2290	/* Calculate trap config vmid mask */
2291	for (i = first_vmid; i < last_vmid; i++)
2292		trap_config_vmid_mask |= (1 << i);
2293
2294	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2295			VMID_SEL, trap_config_vmid_mask);
2296	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2297			TRAP_EN, 1);
2298	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2299	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2300
2301	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2302	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2303}
2304
2305#define DEFAULT_SH_MEM_BASES	(0x6000)
 
 
2306static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2307{
2308	int i;
2309	uint32_t sh_mem_config;
2310	uint32_t sh_mem_bases;
2311
2312	/*
2313	 * Configure apertures:
2314	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2315	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2316	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2317	 */
2318	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2319
2320	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2321			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2322			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2323
2324	mutex_lock(&adev->srbm_mutex);
2325	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2326		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2327		/* CP and shaders */
2328		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2329		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2330	}
2331	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2332	mutex_unlock(&adev->srbm_mutex);
2333
2334	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2335	   access. These should be enabled by FW for target VMIDs. */
2336	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2337		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2338		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2339		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2340		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2341	}
2342}
2343
2344static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2345{
2346	int vmid;
2347
2348	/*
2349	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2350	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2351	 * the driver can enable them for graphics. VMID0 should maintain
2352	 * access so that HWS firmware can save/restore entries.
2353	 */
2354	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2355		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2356		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2357		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2358		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2359	}
2360}
2361
2362static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2363{
2364	uint32_t tmp;
2365
2366	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2367	case IP_VERSION(9, 4, 1):
2368		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2369		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2370				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2371		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2372		break;
2373	default:
2374		break;
2375	}
2376}
2377
2378static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2379{
2380	u32 tmp;
2381	int i;
2382
2383	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2384
2385	gfx_v9_0_tiling_mode_table_init(adev);
2386
2387	if (adev->gfx.num_gfx_rings)
2388		gfx_v9_0_setup_rb(adev);
2389	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2390	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2391
2392	/* XXX SH_MEM regs */
2393	/* where to put LDS, scratch, GPUVM in FSA64 space */
2394	mutex_lock(&adev->srbm_mutex);
2395	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2396		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2397		/* CP and shaders */
2398		if (i == 0) {
2399			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2400					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2401			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2402					    !!adev->gmc.noretry);
2403			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2404			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2405		} else {
2406			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2407					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2408			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2409					    !!adev->gmc.noretry);
2410			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2411			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2412				(adev->gmc.private_aperture_start >> 48));
2413			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2414				(adev->gmc.shared_aperture_start >> 48));
2415			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2416		}
2417	}
2418	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2419
2420	mutex_unlock(&adev->srbm_mutex);
2421
2422	gfx_v9_0_init_compute_vmid(adev);
2423	gfx_v9_0_init_gds_vmid(adev);
2424	gfx_v9_0_init_sq_config(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2425}
2426
2427static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2428{
2429	u32 i, j, k;
2430	u32 mask;
2431
2432	mutex_lock(&adev->grbm_idx_mutex);
2433	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2434		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2435			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2436			for (k = 0; k < adev->usec_timeout; k++) {
2437				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2438					break;
2439				udelay(1);
2440			}
2441			if (k == adev->usec_timeout) {
2442				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2443						      0xffffffff, 0xffffffff, 0);
2444				mutex_unlock(&adev->grbm_idx_mutex);
2445				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2446					 i, j);
2447				return;
2448			}
2449		}
2450	}
2451	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2452	mutex_unlock(&adev->grbm_idx_mutex);
2453
2454	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2455		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2456		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2457		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2458	for (k = 0; k < adev->usec_timeout; k++) {
2459		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2460			break;
2461		udelay(1);
2462	}
2463}
2464
2465static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2466					       bool enable)
2467{
2468	u32 tmp;
2469
2470	/* These interrupts should be enabled to drive DS clock */
2471
2472	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2473
2474	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2475	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2476	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2477	if(adev->gfx.num_gfx_rings)
2478		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2479
2480	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2481}
2482
2483static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2484{
2485	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2486	/* csib */
2487	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2488			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2489	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2490			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2491	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2492			adev->gfx.rlc.clear_state_size);
2493}
2494
2495static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2496				int indirect_offset,
2497				int list_size,
2498				int *unique_indirect_regs,
2499				int unique_indirect_reg_count,
 
2500				int *indirect_start_offsets,
2501				int *indirect_start_offsets_count,
2502				int max_start_offsets_count)
2503{
2504	int idx;
 
2505
2506	for (; indirect_offset < list_size; indirect_offset++) {
2507		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2508		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2509		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2510
2511		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2512			indirect_offset += 2;
2513
2514			/* look for the matching indice */
2515			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2516				if (unique_indirect_regs[idx] ==
2517					register_list_format[indirect_offset] ||
2518					!unique_indirect_regs[idx])
2519					break;
2520			}
2521
2522			BUG_ON(idx >= unique_indirect_reg_count);
 
 
 
 
 
 
 
 
 
 
 
 
2523
2524			if (!unique_indirect_regs[idx])
2525				unique_indirect_regs[idx] = register_list_format[indirect_offset];
 
 
 
 
2526
2527			indirect_offset++;
 
 
 
 
 
2528		}
 
 
2529	}
2530}
2531
2532static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2533{
2534	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2535	int unique_indirect_reg_count = 0;
2536
2537	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2538	int indirect_start_offsets_count = 0;
2539
2540	int list_size = 0;
2541	int i = 0, j = 0;
2542	u32 tmp = 0;
2543
2544	u32 *register_list_format =
2545		kmemdup(adev->gfx.rlc.register_list_format,
2546			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2547	if (!register_list_format)
2548		return -ENOMEM;
 
 
2549
2550	/* setup unique_indirect_regs array and indirect_start_offsets array */
2551	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2552	gfx_v9_1_parse_ind_reg_list(register_list_format,
2553				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2554				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2555				    unique_indirect_regs,
2556				    unique_indirect_reg_count,
2557				    indirect_start_offsets,
2558				    &indirect_start_offsets_count,
2559				    ARRAY_SIZE(indirect_start_offsets));
2560
2561	/* enable auto inc in case it is disabled */
2562	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2563	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2564	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2565
2566	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2567	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2568		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2569	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2570		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2571			adev->gfx.rlc.register_restore[i]);
2572
 
 
 
 
 
 
2573	/* load indirect register */
2574	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2575		adev->gfx.rlc.reg_list_format_start);
2576
2577	/* direct register portion */
2578	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2579		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2580			register_list_format[i]);
2581
2582	/* indirect register portion */
2583	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2584		if (register_list_format[i] == 0xFFFFFFFF) {
2585			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2586			continue;
2587		}
2588
2589		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2590		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2591
2592		for (j = 0; j < unique_indirect_reg_count; j++) {
2593			if (register_list_format[i] == unique_indirect_regs[j]) {
2594				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2595				break;
2596			}
2597		}
2598
2599		BUG_ON(j >= unique_indirect_reg_count);
2600
2601		i++;
2602	}
2603
2604	/* set save/restore list size */
2605	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2606	list_size = list_size >> 1;
2607	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2608		adev->gfx.rlc.reg_restore_list_size);
2609	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2610
2611	/* write the starting offsets to RLC scratch ram */
2612	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2613		adev->gfx.rlc.starting_offsets_start);
2614	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2615		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2616		       indirect_start_offsets[i]);
2617
2618	/* load unique indirect regs*/
2619	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2620		if (unique_indirect_regs[i] != 0) {
2621			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2622			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2623			       unique_indirect_regs[i] & 0x3FFFF);
2624
2625			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2626			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2627			       unique_indirect_regs[i] >> 20);
2628		}
2629	}
2630
2631	kfree(register_list_format);
2632	return 0;
2633}
2634
2635static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2636{
2637	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2638}
2639
2640static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2641					     bool enable)
2642{
2643	uint32_t data = 0;
2644	uint32_t default_data = 0;
2645
2646	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2647	if (enable) {
2648		/* enable GFXIP control over CGPG */
2649		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2650		if(default_data != data)
2651			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2652
2653		/* update status */
2654		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2655		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2656		if(default_data != data)
2657			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2658	} else {
2659		/* restore GFXIP control over GCPG */
2660		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2661		if(default_data != data)
2662			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2663	}
2664}
2665
2666static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2667{
2668	uint32_t data = 0;
2669
2670	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2671			      AMD_PG_SUPPORT_GFX_SMG |
2672			      AMD_PG_SUPPORT_GFX_DMG)) {
2673		/* init IDLE_POLL_COUNT = 60 */
2674		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2675		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2676		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2677		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2678
2679		/* init RLC PG Delay */
2680		data = 0;
2681		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2682		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2683		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2684		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2685		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2686
2687		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2688		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2689		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2690		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2691
2692		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2693		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2694		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2695		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2696
2697		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2698		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2699
2700		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2701		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2702		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2703		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2704			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2705	}
2706}
2707
2708static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2709						bool enable)
2710{
2711	uint32_t data = 0;
2712	uint32_t default_data = 0;
2713
2714	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2715	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2716			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2717			     enable ? 1 : 0);
2718	if (default_data != data)
2719		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2720}
2721
2722static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2723						bool enable)
2724{
2725	uint32_t data = 0;
2726	uint32_t default_data = 0;
2727
2728	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2729	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2730			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2731			     enable ? 1 : 0);
2732	if(default_data != data)
2733		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2734}
2735
2736static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2737					bool enable)
2738{
2739	uint32_t data = 0;
2740	uint32_t default_data = 0;
2741
2742	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2743	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2744			     CP_PG_DISABLE,
2745			     enable ? 0 : 1);
2746	if(default_data != data)
2747		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2748}
2749
2750static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2751						bool enable)
2752{
2753	uint32_t data, default_data;
2754
2755	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2756	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2757			     GFX_POWER_GATING_ENABLE,
2758			     enable ? 1 : 0);
2759	if(default_data != data)
2760		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2761}
2762
2763static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2764						bool enable)
2765{
2766	uint32_t data, default_data;
2767
2768	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2769	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2770			     GFX_PIPELINE_PG_ENABLE,
2771			     enable ? 1 : 0);
2772	if(default_data != data)
2773		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2774
2775	if (!enable)
2776		/* read any GFX register to wake up GFX */
2777		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2778}
2779
2780static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2781						       bool enable)
2782{
2783	uint32_t data, default_data;
2784
2785	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2786	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2787			     STATIC_PER_CU_PG_ENABLE,
2788			     enable ? 1 : 0);
2789	if(default_data != data)
2790		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2791}
2792
2793static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2794						bool enable)
2795{
2796	uint32_t data, default_data;
2797
2798	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2799	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2800			     DYN_PER_CU_PG_ENABLE,
2801			     enable ? 1 : 0);
2802	if(default_data != data)
2803		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2804}
2805
2806static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2807{
2808	gfx_v9_0_init_csb(adev);
2809
2810	/*
2811	 * Rlc save restore list is workable since v2_1.
2812	 * And it's needed by gfxoff feature.
2813	 */
2814	if (adev->gfx.rlc.is_rlc_v2_1) {
2815		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2816			    IP_VERSION(9, 2, 1) ||
2817		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2818			gfx_v9_1_init_rlc_save_restore_list(adev);
2819		gfx_v9_0_enable_save_restore_machine(adev);
2820	}
2821
2822	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2823			      AMD_PG_SUPPORT_GFX_SMG |
2824			      AMD_PG_SUPPORT_GFX_DMG |
2825			      AMD_PG_SUPPORT_CP |
2826			      AMD_PG_SUPPORT_GDS |
2827			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2828		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2829			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2830		gfx_v9_0_init_gfx_power_gating(adev);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2831	}
2832}
2833
2834static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2835{
2836	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2837	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2838	gfx_v9_0_wait_for_rlc_serdes(adev);
2839}
2840
2841static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2842{
2843	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2844	udelay(50);
2845	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2846	udelay(50);
2847}
2848
2849static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2850{
2851#ifdef AMDGPU_RLC_DEBUG_RETRY
2852	u32 rlc_ucode_ver;
2853#endif
2854
2855	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2856	udelay(50);
2857
2858	/* carrizo do enable cp interrupt after cp inited */
2859	if (!(adev->flags & AMD_IS_APU)) {
2860		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2861		udelay(50);
2862	}
2863
2864#ifdef AMDGPU_RLC_DEBUG_RETRY
2865	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2866	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2867	if(rlc_ucode_ver == 0x108) {
2868		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2869				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2870		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2871		 * default is 0x9C4 to create a 100us interval */
2872		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2873		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2874		 * to disable the page fault retry interrupts, default is
2875		 * 0x100 (256) */
2876		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2877	}
2878#endif
2879}
2880
2881static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2882{
2883	const struct rlc_firmware_header_v2_0 *hdr;
2884	const __le32 *fw_data;
2885	unsigned i, fw_size;
2886
2887	if (!adev->gfx.rlc_fw)
2888		return -EINVAL;
2889
2890	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2891	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2892
2893	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2894			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2895	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2896
2897	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2898			RLCG_UCODE_LOADING_START_ADDRESS);
2899	for (i = 0; i < fw_size; i++)
2900		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2901	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2902
2903	return 0;
2904}
2905
2906static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2907{
2908	int r;
2909
2910	if (amdgpu_sriov_vf(adev)) {
2911		gfx_v9_0_init_csb(adev);
2912		return 0;
2913	}
2914
2915	adev->gfx.rlc.funcs->stop(adev);
2916
2917	/* disable CG */
2918	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2919
 
 
 
 
 
2920	gfx_v9_0_init_pg(adev);
2921
2922	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2923		/* legacy rlc firmware loading */
2924		r = gfx_v9_0_rlc_load_microcode(adev);
2925		if (r)
2926			return r;
2927	}
2928
2929	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2930	case IP_VERSION(9, 2, 2):
2931	case IP_VERSION(9, 1, 0):
2932		gfx_v9_0_init_lbpw(adev);
2933		if (amdgpu_lbpw == 0)
2934			gfx_v9_0_enable_lbpw(adev, false);
2935		else
2936			gfx_v9_0_enable_lbpw(adev, true);
2937		break;
2938	case IP_VERSION(9, 4, 0):
2939		gfx_v9_4_init_lbpw(adev);
2940		if (amdgpu_lbpw > 0)
2941			gfx_v9_0_enable_lbpw(adev, true);
2942		else
2943			gfx_v9_0_enable_lbpw(adev, false);
2944		break;
2945	default:
2946		break;
2947	}
2948
2949	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
2950
2951	adev->gfx.rlc.funcs->start(adev);
2952
2953	return 0;
2954}
2955
2956static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2957{
 
2958	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2959
2960	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2961	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2962	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2963	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
 
 
 
 
2964	udelay(50);
2965}
2966
2967static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2968{
2969	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2970	const struct gfx_firmware_header_v1_0 *ce_hdr;
2971	const struct gfx_firmware_header_v1_0 *me_hdr;
2972	const __le32 *fw_data;
2973	unsigned i, fw_size;
2974
2975	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2976		return -EINVAL;
2977
2978	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2979		adev->gfx.pfp_fw->data;
2980	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2981		adev->gfx.ce_fw->data;
2982	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2983		adev->gfx.me_fw->data;
2984
2985	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2986	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2987	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2988
2989	gfx_v9_0_cp_gfx_enable(adev, false);
2990
2991	/* PFP */
2992	fw_data = (const __le32 *)
2993		(adev->gfx.pfp_fw->data +
2994		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2995	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2996	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2997	for (i = 0; i < fw_size; i++)
2998		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2999	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3000
3001	/* CE */
3002	fw_data = (const __le32 *)
3003		(adev->gfx.ce_fw->data +
3004		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3005	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3006	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3007	for (i = 0; i < fw_size; i++)
3008		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3009	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3010
3011	/* ME */
3012	fw_data = (const __le32 *)
3013		(adev->gfx.me_fw->data +
3014		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3015	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3016	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3017	for (i = 0; i < fw_size; i++)
3018		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3019	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3020
3021	return 0;
3022}
3023
3024static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3025{
3026	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3027	const struct cs_section_def *sect = NULL;
3028	const struct cs_extent_def *ext = NULL;
3029	int r, i, tmp;
3030
3031	/* init the CP */
3032	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3033	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3034
3035	gfx_v9_0_cp_gfx_enable(adev, true);
3036
3037	/* Now only limit the quirk on the APU gfx9 series and already
3038	 * confirmed that the APU gfx10/gfx11 needn't such update.
3039	 */
3040	if (adev->flags & AMD_IS_APU &&
3041			adev->in_s3 && !adev->suspend_complete) {
3042		DRM_INFO(" Will skip the CSB packet resubmit\n");
3043		return 0;
3044	}
3045	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3046	if (r) {
3047		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3048		return r;
3049	}
3050
3051	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3052	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3053
3054	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3055	amdgpu_ring_write(ring, 0x80000000);
3056	amdgpu_ring_write(ring, 0x80000000);
3057
3058	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3059		for (ext = sect->section; ext->extent != NULL; ++ext) {
3060			if (sect->id == SECT_CONTEXT) {
3061				amdgpu_ring_write(ring,
3062				       PACKET3(PACKET3_SET_CONTEXT_REG,
3063					       ext->reg_count));
3064				amdgpu_ring_write(ring,
3065				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3066				for (i = 0; i < ext->reg_count; i++)
3067					amdgpu_ring_write(ring, ext->extent[i]);
3068			}
3069		}
3070	}
3071
3072	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3074
3075	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3076	amdgpu_ring_write(ring, 0);
3077
3078	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3079	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3080	amdgpu_ring_write(ring, 0x8000);
3081	amdgpu_ring_write(ring, 0x8000);
3082
3083	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3084	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3085		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3086	amdgpu_ring_write(ring, tmp);
3087	amdgpu_ring_write(ring, 0);
3088
3089	amdgpu_ring_commit(ring);
3090
3091	return 0;
3092}
3093
3094static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3095{
3096	struct amdgpu_ring *ring;
3097	u32 tmp;
3098	u32 rb_bufsz;
3099	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3100
3101	/* Set the write pointer delay */
3102	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3103
3104	/* set the RB to use vmid 0 */
3105	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3106
3107	/* Set ring buffer size */
3108	ring = &adev->gfx.gfx_ring[0];
3109	rb_bufsz = order_base_2(ring->ring_size / 8);
3110	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3111	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3112#ifdef __BIG_ENDIAN
3113	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3114#endif
3115	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3116
3117	/* Initialize the ring buffer's write pointers */
3118	ring->wptr = 0;
3119	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3120	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3121
3122	/* set the wb address wether it's enabled or not */
3123	rptr_addr = ring->rptr_gpu_addr;
3124	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3125	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3126
3127	wptr_gpu_addr = ring->wptr_gpu_addr;
3128	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3129	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3130
3131	mdelay(1);
3132	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3133
3134	rb_addr = ring->gpu_addr >> 8;
3135	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3136	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3137
3138	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3139	if (ring->use_doorbell) {
3140		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3141				    DOORBELL_OFFSET, ring->doorbell_index);
3142		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3143				    DOORBELL_EN, 1);
3144	} else {
3145		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3146	}
3147	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3148
3149	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3150			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3151	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3152
3153	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3154		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3155
3156
3157	/* start the ring */
3158	gfx_v9_0_cp_gfx_start(adev);
 
3159
3160	return 0;
3161}
3162
3163static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3164{
 
 
3165	if (enable) {
3166		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3167	} else {
3168		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3169			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3170		adev->gfx.kiq[0].ring.sched.ready = false;
 
 
3171	}
3172	udelay(50);
3173}
3174
3175static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3176{
3177	const struct gfx_firmware_header_v1_0 *mec_hdr;
3178	const __le32 *fw_data;
3179	unsigned i;
3180	u32 tmp;
3181
3182	if (!adev->gfx.mec_fw)
3183		return -EINVAL;
3184
3185	gfx_v9_0_cp_compute_enable(adev, false);
3186
3187	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3188	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3189
3190	fw_data = (const __le32 *)
3191		(adev->gfx.mec_fw->data +
3192		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3193	tmp = 0;
3194	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3195	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3196	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3197
3198	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3199		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3200	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3201		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3202
3203	/* MEC1 */
3204	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3205			 mec_hdr->jt_offset);
3206	for (i = 0; i < mec_hdr->jt_size; i++)
3207		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3208			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3209
3210	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3211			adev->gfx.mec_fw_version);
3212	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3213
3214	return 0;
3215}
3216
3217/* KIQ functions */
3218static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3219{
3220	uint32_t tmp;
3221	struct amdgpu_device *adev = ring->adev;
3222
3223	/* tell RLC which is KIQ queue */
3224	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3225	tmp &= 0xffffff00;
3226	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3227	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3228	tmp |= 0x80;
3229	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3230}
3231
3232static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3233{
3234	struct amdgpu_device *adev = ring->adev;
 
 
 
 
 
 
 
3235
3236	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3237		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3238			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3239			mqd->cp_hqd_queue_priority =
3240				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
 
3241		}
 
 
 
 
 
 
 
 
3242	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3243}
3244
3245static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3246{
3247	struct amdgpu_device *adev = ring->adev;
3248	struct v9_mqd *mqd = ring->mqd_ptr;
3249	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3250	uint32_t tmp;
3251
3252	mqd->header = 0xC0310800;
3253	mqd->compute_pipelinestat_enable = 0x00000001;
3254	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3255	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3256	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3257	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3258	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3259	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3260	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3261	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3262	mqd->compute_misc_reserved = 0x00000003;
3263
3264	mqd->dynamic_cu_mask_addr_lo =
3265		lower_32_bits(ring->mqd_gpu_addr
3266			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3267	mqd->dynamic_cu_mask_addr_hi =
3268		upper_32_bits(ring->mqd_gpu_addr
3269			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3270
3271	eop_base_addr = ring->eop_gpu_addr >> 8;
3272	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3273	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3274
3275	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3276	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3277	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3278			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3279
3280	mqd->cp_hqd_eop_control = tmp;
3281
3282	/* enable doorbell? */
3283	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3284
3285	if (ring->use_doorbell) {
3286		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3287				    DOORBELL_OFFSET, ring->doorbell_index);
3288		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3289				    DOORBELL_EN, 1);
3290		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3291				    DOORBELL_SOURCE, 0);
3292		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3293				    DOORBELL_HIT, 0);
3294	} else {
3295		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3296					 DOORBELL_EN, 0);
3297	}
3298
3299	mqd->cp_hqd_pq_doorbell_control = tmp;
3300
3301	/* disable the queue if it's active */
3302	ring->wptr = 0;
3303	mqd->cp_hqd_dequeue_request = 0;
3304	mqd->cp_hqd_pq_rptr = 0;
3305	mqd->cp_hqd_pq_wptr_lo = 0;
3306	mqd->cp_hqd_pq_wptr_hi = 0;
3307
3308	/* set the pointer to the MQD */
3309	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3310	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3311
3312	/* set MQD vmid to 0 */
3313	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3314	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3315	mqd->cp_mqd_control = tmp;
3316
3317	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3318	hqd_gpu_addr = ring->gpu_addr >> 8;
3319	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3320	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3321
3322	/* set up the HQD, this is similar to CP_RB0_CNTL */
3323	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3324	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3325			    (order_base_2(ring->ring_size / 4) - 1));
3326	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3327			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3328#ifdef __BIG_ENDIAN
3329	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3330#endif
3331	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3332	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3333	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3334	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3335	mqd->cp_hqd_pq_control = tmp;
3336
3337	/* set the wb address whether it's enabled or not */
3338	wb_gpu_addr = ring->rptr_gpu_addr;
3339	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3340	mqd->cp_hqd_pq_rptr_report_addr_hi =
3341		upper_32_bits(wb_gpu_addr) & 0xffff;
3342
3343	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3344	wb_gpu_addr = ring->wptr_gpu_addr;
3345	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3346	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3348	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3349	ring->wptr = 0;
3350	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3351
3352	/* set the vmid for the queue */
3353	mqd->cp_hqd_vmid = 0;
3354
3355	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3356	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3357	mqd->cp_hqd_persistent_state = tmp;
3358
3359	/* set MIN_IB_AVAIL_SIZE */
3360	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3361	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3362	mqd->cp_hqd_ib_control = tmp;
3363
3364	/* set static priority for a queue/ring */
3365	gfx_v9_0_mqd_set_priority(ring, mqd);
3366	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3367
3368	/* map_queues packet doesn't need activate the queue,
3369	 * so only kiq need set this field.
3370	 */
3371	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3372		mqd->cp_hqd_active = 1;
3373
3374	return 0;
3375}
3376
3377static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3378{
3379	struct amdgpu_device *adev = ring->adev;
3380	struct v9_mqd *mqd = ring->mqd_ptr;
3381	int j;
3382
3383	/* disable wptr polling */
3384	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3385
3386	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3387	       mqd->cp_hqd_eop_base_addr_lo);
3388	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3389	       mqd->cp_hqd_eop_base_addr_hi);
3390
3391	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3392	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3393	       mqd->cp_hqd_eop_control);
3394
3395	/* enable doorbell? */
3396	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3397	       mqd->cp_hqd_pq_doorbell_control);
3398
3399	/* disable the queue if it's active */
3400	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3401		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3402		for (j = 0; j < adev->usec_timeout; j++) {
3403			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3404				break;
3405			udelay(1);
3406		}
3407		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3408		       mqd->cp_hqd_dequeue_request);
3409		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3410		       mqd->cp_hqd_pq_rptr);
3411		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3412		       mqd->cp_hqd_pq_wptr_lo);
3413		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3414		       mqd->cp_hqd_pq_wptr_hi);
3415	}
3416
3417	/* set the pointer to the MQD */
3418	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3419	       mqd->cp_mqd_base_addr_lo);
3420	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3421	       mqd->cp_mqd_base_addr_hi);
3422
3423	/* set MQD vmid to 0 */
3424	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3425	       mqd->cp_mqd_control);
3426
3427	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3428	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3429	       mqd->cp_hqd_pq_base_lo);
3430	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3431	       mqd->cp_hqd_pq_base_hi);
3432
3433	/* set up the HQD, this is similar to CP_RB0_CNTL */
3434	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3435	       mqd->cp_hqd_pq_control);
3436
3437	/* set the wb address whether it's enabled or not */
3438	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3439				mqd->cp_hqd_pq_rptr_report_addr_lo);
3440	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3441				mqd->cp_hqd_pq_rptr_report_addr_hi);
3442
3443	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3444	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3445	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3446	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3447	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3448
3449	/* enable the doorbell if requested */
3450	if (ring->use_doorbell) {
3451		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3452					(adev->doorbell_index.kiq * 2) << 2);
3453		/* If GC has entered CGPG, ringing doorbell > first page
3454		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3455		 * workaround this issue. And this change has to align with firmware
3456		 * update.
3457		 */
3458		if (check_if_enlarge_doorbell_range(adev))
3459			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3460					(adev->doorbell.size - 4));
3461		else
3462			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3463					(adev->doorbell_index.userqueue_end * 2) << 2);
3464	}
3465
3466	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3467	       mqd->cp_hqd_pq_doorbell_control);
3468
3469	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3470	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3471	       mqd->cp_hqd_pq_wptr_lo);
3472	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3473	       mqd->cp_hqd_pq_wptr_hi);
3474
3475	/* set the vmid for the queue */
3476	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3477
3478	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3479	       mqd->cp_hqd_persistent_state);
3480
3481	/* activate the queue */
3482	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3483	       mqd->cp_hqd_active);
3484
3485	if (ring->use_doorbell)
3486		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3487
3488	return 0;
3489}
3490
3491static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3492{
3493	struct amdgpu_device *adev = ring->adev;
3494	int j;
3495
3496	/* disable the queue if it's active */
3497	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3498
3499		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3500
3501		for (j = 0; j < adev->usec_timeout; j++) {
3502			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3503				break;
3504			udelay(1);
3505		}
3506
3507		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3508			DRM_DEBUG("KIQ dequeue request failed.\n");
3509
3510			/* Manual disable if dequeue request times out */
3511			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3512		}
3513
3514		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3515		      0);
3516	}
3517
3518	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3519	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3520	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3521	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3522	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3523	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3524	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3525	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3526
3527	return 0;
3528}
3529
3530static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3531{
3532	struct amdgpu_device *adev = ring->adev;
3533	struct v9_mqd *mqd = ring->mqd_ptr;
3534	struct v9_mqd *tmp_mqd;
3535
3536	gfx_v9_0_kiq_setting(ring);
3537
3538	/* GPU could be in bad state during probe, driver trigger the reset
3539	 * after load the SMU, in this case , the mqd is not be initialized.
3540	 * driver need to re-init the mqd.
3541	 * check mqd->cp_hqd_pq_control since this value should not be 0
3542	 */
3543	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3544	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3545		/* for GPU_RESET case , reset MQD to a clean status */
3546		if (adev->gfx.kiq[0].mqd_backup)
3547			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3548
3549		/* reset ring buffer */
3550		ring->wptr = 0;
3551		amdgpu_ring_clear_ring(ring);
3552
3553		mutex_lock(&adev->srbm_mutex);
3554		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3555		gfx_v9_0_kiq_init_register(ring);
3556		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3557		mutex_unlock(&adev->srbm_mutex);
3558	} else {
3559		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3560		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3561		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3562		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3563			amdgpu_ring_clear_ring(ring);
3564		mutex_lock(&adev->srbm_mutex);
3565		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3566		gfx_v9_0_mqd_init(ring);
3567		gfx_v9_0_kiq_init_register(ring);
3568		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3569		mutex_unlock(&adev->srbm_mutex);
3570
3571		if (adev->gfx.kiq[0].mqd_backup)
3572			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3573	}
3574
3575	return 0;
3576}
3577
3578static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3579{
3580	struct amdgpu_device *adev = ring->adev;
3581	struct v9_mqd *mqd = ring->mqd_ptr;
3582	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3583	struct v9_mqd *tmp_mqd;
3584
3585	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3586	 * is not be initialized before
3587	 */
3588	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3589
3590	if (!tmp_mqd->cp_hqd_pq_control ||
3591	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3592		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3593		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3594		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3595		mutex_lock(&adev->srbm_mutex);
3596		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3597		gfx_v9_0_mqd_init(ring);
3598		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3599		mutex_unlock(&adev->srbm_mutex);
3600
3601		if (adev->gfx.mec.mqd_backup[mqd_idx])
3602			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3603	} else {
3604		/* restore MQD to a clean status */
3605		if (adev->gfx.mec.mqd_backup[mqd_idx])
3606			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
 
3607		/* reset ring buffer */
3608		ring->wptr = 0;
3609		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
 
3610		amdgpu_ring_clear_ring(ring);
3611	}
3612
3613	return 0;
3614}
3615
3616static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3617{
3618	struct amdgpu_ring *ring;
3619	int r;
 
 
3620
3621	ring = &adev->gfx.kiq[0].ring;
3622
3623	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3624	if (unlikely(r != 0))
3625		return r;
3626
3627	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3628	if (unlikely(r != 0)) {
3629		amdgpu_bo_unreserve(ring->mqd_obj);
3630		return r;
 
3631	}
3632
3633	gfx_v9_0_kiq_init_queue(ring);
3634	amdgpu_bo_kunmap(ring->mqd_obj);
3635	ring->mqd_ptr = NULL;
3636	amdgpu_bo_unreserve(ring->mqd_obj);
3637	return 0;
3638}
3639
3640static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3641{
3642	struct amdgpu_ring *ring = NULL;
3643	int r = 0, i;
3644
3645	gfx_v9_0_cp_compute_enable(adev, true);
3646
3647	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3648		ring = &adev->gfx.compute_ring[i];
3649
3650		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3651		if (unlikely(r != 0))
3652			goto done;
3653		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3654		if (!r) {
3655			r = gfx_v9_0_kcq_init_queue(ring);
3656			amdgpu_bo_kunmap(ring->mqd_obj);
3657			ring->mqd_ptr = NULL;
3658		}
3659		amdgpu_bo_unreserve(ring->mqd_obj);
3660		if (r)
3661			goto done;
3662	}
3663
3664	r = amdgpu_gfx_enable_kcq(adev, 0);
3665done:
3666	return r;
3667}
3668
3669static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3670{
3671	int r, i;
3672	struct amdgpu_ring *ring;
3673
3674	if (!(adev->flags & AMD_IS_APU))
3675		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3676
3677	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3678		if (adev->gfx.num_gfx_rings) {
3679			/* legacy firmware loading */
3680			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3681			if (r)
3682				return r;
3683		}
3684
3685		r = gfx_v9_0_cp_compute_load_microcode(adev);
3686		if (r)
3687			return r;
3688	}
3689
 
 
 
 
3690	r = gfx_v9_0_kiq_resume(adev);
3691	if (r)
3692		return r;
3693
3694	if (adev->gfx.num_gfx_rings) {
3695		r = gfx_v9_0_cp_gfx_resume(adev);
3696		if (r)
3697			return r;
 
3698	}
3699
3700	r = gfx_v9_0_kcq_resume(adev);
 
 
3701	if (r)
3702		return r;
3703
3704	if (adev->gfx.num_gfx_rings) {
3705		ring = &adev->gfx.gfx_ring[0];
3706		r = amdgpu_ring_test_helper(ring);
3707		if (r)
3708			return r;
3709	}
3710
3711	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3712		ring = &adev->gfx.compute_ring[i];
3713		amdgpu_ring_test_helper(ring);
 
 
 
 
3714	}
3715
3716	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3717
3718	return 0;
3719}
3720
3721static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3722{
3723	u32 tmp;
3724
3725	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3726	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3727		return;
3728
3729	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3730	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3731				adev->df.hash_status.hash_64k);
3732	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3733				adev->df.hash_status.hash_2m);
3734	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3735				adev->df.hash_status.hash_1g);
3736	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3737}
3738
3739static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3740{
3741	if (adev->gfx.num_gfx_rings)
3742		gfx_v9_0_cp_gfx_enable(adev, enable);
3743	gfx_v9_0_cp_compute_enable(adev, enable);
3744}
3745
3746static int gfx_v9_0_hw_init(void *handle)
3747{
3748	int r;
3749	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3750
3751	if (!amdgpu_sriov_vf(adev))
3752		gfx_v9_0_init_golden_registers(adev);
3753
3754	gfx_v9_0_constants_init(adev);
3755
3756	gfx_v9_0_init_tcp_config(adev);
3757
3758	r = adev->gfx.rlc.funcs->resume(adev);
3759	if (r)
3760		return r;
3761
3762	r = gfx_v9_0_cp_resume(adev);
3763	if (r)
3764		return r;
3765
3766	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3767		gfx_v9_4_2_set_power_brake_sequence(adev);
 
3768
3769	return r;
3770}
3771
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3772static int gfx_v9_0_hw_fini(void *handle)
3773{
3774	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
3775
3776	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3777		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3778	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3779	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3780
3781	/* DF freeze and kcq disable will fail */
3782	if (!amdgpu_ras_intr_triggered())
3783		/* disable KCQ to avoid CPC touch memory not valid anymore */
3784		amdgpu_gfx_disable_kcq(adev, 0);
3785
3786	if (amdgpu_sriov_vf(adev)) {
3787		gfx_v9_0_cp_gfx_enable(adev, false);
3788		/* must disable polling for SRIOV when hw finished, otherwise
3789		 * CPC engine may still keep fetching WB address which is already
3790		 * invalid after sw finished and trigger DMAR reading error in
3791		 * hypervisor side.
3792		 */
3793		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3794		return 0;
3795	}
3796
3797	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3798	 * otherwise KIQ is hanging when binding back
3799	 */
3800	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3801		mutex_lock(&adev->srbm_mutex);
3802		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3803				adev->gfx.kiq[0].ring.pipe,
3804				adev->gfx.kiq[0].ring.queue, 0, 0);
3805		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3806		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3807		mutex_unlock(&adev->srbm_mutex);
3808	}
3809
3810	gfx_v9_0_cp_enable(adev, false);
 
3811
3812	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3813	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3814	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3815		dev_dbg(adev->dev, "Skipping RLC halt\n");
3816		return 0;
3817	}
3818
3819	adev->gfx.rlc.funcs->stop(adev);
3820	return 0;
3821}
3822
3823static int gfx_v9_0_suspend(void *handle)
3824{
3825	return gfx_v9_0_hw_fini(handle);
 
 
 
3826}
3827
3828static int gfx_v9_0_resume(void *handle)
3829{
3830	return gfx_v9_0_hw_init(handle);
 
 
 
 
 
3831}
3832
3833static bool gfx_v9_0_is_idle(void *handle)
3834{
3835	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3836
3837	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3838				GRBM_STATUS, GUI_ACTIVE))
3839		return false;
3840	else
3841		return true;
3842}
3843
3844static int gfx_v9_0_wait_for_idle(void *handle)
3845{
3846	unsigned i;
3847	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3848
3849	for (i = 0; i < adev->usec_timeout; i++) {
3850		if (gfx_v9_0_is_idle(handle))
3851			return 0;
3852		udelay(1);
3853	}
3854	return -ETIMEDOUT;
3855}
3856
3857static int gfx_v9_0_soft_reset(void *handle)
3858{
3859	u32 grbm_soft_reset = 0;
3860	u32 tmp;
3861	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3862
3863	/* GRBM_STATUS */
3864	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3865	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3866		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3867		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3868		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3869		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3870		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3871		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3872						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3873		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3874						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3875	}
3876
3877	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3878		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3879						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3880	}
3881
3882	/* GRBM_STATUS2 */
3883	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3884	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3885		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3886						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3887
3888
3889	if (grbm_soft_reset) {
3890		/* stop the rlc */
3891		adev->gfx.rlc.funcs->stop(adev);
3892
3893		if (adev->gfx.num_gfx_rings)
3894			/* Disable GFX parsing/prefetching */
3895			gfx_v9_0_cp_gfx_enable(adev, false);
3896
3897		/* Disable MEC parsing/prefetching */
3898		gfx_v9_0_cp_compute_enable(adev, false);
3899
3900		if (grbm_soft_reset) {
3901			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3902			tmp |= grbm_soft_reset;
3903			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3904			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3905			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3906
3907			udelay(50);
3908
3909			tmp &= ~grbm_soft_reset;
3910			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3911			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3912		}
3913
3914		/* Wait a little for things to settle down */
3915		udelay(50);
3916	}
3917	return 0;
3918}
3919
3920static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
3921{
3922	signed long r, cnt = 0;
3923	unsigned long flags;
3924	uint32_t seq, reg_val_offs = 0;
3925	uint64_t value = 0;
3926	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
3927	struct amdgpu_ring *ring = &kiq->ring;
3928
3929	BUG_ON(!ring->funcs->emit_rreg);
3930
3931	spin_lock_irqsave(&kiq->ring_lock, flags);
3932	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
3933		pr_err("critical bug! too many kiq readers\n");
3934		goto failed_unlock;
3935	}
3936	amdgpu_ring_alloc(ring, 32);
3937	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3938	amdgpu_ring_write(ring, 9 |	/* src: register*/
3939				(5 << 8) |	/* dst: memory */
3940				(1 << 16) |	/* count sel */
3941				(1 << 20));	/* write confirm */
3942	amdgpu_ring_write(ring, 0);
3943	amdgpu_ring_write(ring, 0);
3944	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3945				reg_val_offs * 4));
3946	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3947				reg_val_offs * 4));
3948	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
3949	if (r)
3950		goto failed_undo;
3951
3952	amdgpu_ring_commit(ring);
3953	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3954
3955	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3956
3957	/* don't wait anymore for gpu reset case because this way may
3958	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
3959	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
3960	 * never return if we keep waiting in virt_kiq_rreg, which cause
3961	 * gpu_recover() hang there.
3962	 *
3963	 * also don't wait anymore for IRQ context
3964	 * */
3965	if (r < 1 && (amdgpu_in_reset(adev)))
3966		goto failed_kiq_read;
3967
3968	might_sleep();
3969	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
3970		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
3971		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
3972	}
3973
3974	if (cnt > MAX_KIQ_REG_TRY)
3975		goto failed_kiq_read;
3976
3977	mb();
3978	value = (uint64_t)adev->wb.wb[reg_val_offs] |
3979		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
3980	amdgpu_device_wb_free(adev, reg_val_offs);
3981	return value;
3982
3983failed_undo:
3984	amdgpu_ring_undo(ring);
3985failed_unlock:
3986	spin_unlock_irqrestore(&kiq->ring_lock, flags);
3987failed_kiq_read:
3988	if (reg_val_offs)
3989		amdgpu_device_wb_free(adev, reg_val_offs);
3990	pr_err("failed to read gpu clock\n");
3991	return ~0;
3992}
3993
3994static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3995{
3996	uint64_t clock, clock_lo, clock_hi, hi_check;
3997
3998	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3999	case IP_VERSION(9, 3, 0):
4000		preempt_disable();
4001		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4002		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4003		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4004		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4005		 * roughly every 42 seconds.
4006		 */
4007		if (hi_check != clock_hi) {
4008			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4009			clock_hi = hi_check;
4010		}
4011		preempt_enable();
4012		clock = clock_lo | (clock_hi << 32ULL);
4013		break;
4014	default:
4015		amdgpu_gfx_off_ctrl(adev, false);
4016		mutex_lock(&adev->gfx.gpu_clock_mutex);
4017		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4018			    IP_VERSION(9, 0, 1) &&
4019		    amdgpu_sriov_runtime(adev)) {
4020			clock = gfx_v9_0_kiq_read_clock(adev);
4021		} else {
4022			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4023			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4024				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4025		}
4026		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4027		amdgpu_gfx_off_ctrl(adev, true);
4028		break;
4029	}
4030	return clock;
4031}
4032
4033static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4034					  uint32_t vmid,
4035					  uint32_t gds_base, uint32_t gds_size,
4036					  uint32_t gws_base, uint32_t gws_size,
4037					  uint32_t oa_base, uint32_t oa_size)
4038{
4039	struct amdgpu_device *adev = ring->adev;
4040
 
 
 
 
 
 
 
 
 
4041	/* GDS Base */
4042	gfx_v9_0_write_data_to_reg(ring, 0, false,
4043				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4044				   gds_base);
4045
4046	/* GDS Size */
4047	gfx_v9_0_write_data_to_reg(ring, 0, false,
4048				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4049				   gds_size);
4050
4051	/* GWS */
4052	gfx_v9_0_write_data_to_reg(ring, 0, false,
4053				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4054				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4055
4056	/* OA */
4057	gfx_v9_0_write_data_to_reg(ring, 0, false,
4058				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4059				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4060}
4061
4062static const u32 vgpr_init_compute_shader[] =
4063{
4064	0xb07c0000, 0xbe8000ff,
4065	0x000000f8, 0xbf110800,
4066	0x7e000280, 0x7e020280,
4067	0x7e040280, 0x7e060280,
4068	0x7e080280, 0x7e0a0280,
4069	0x7e0c0280, 0x7e0e0280,
4070	0x80808800, 0xbe803200,
4071	0xbf84fff5, 0xbf9c0000,
4072	0xd28c0001, 0x0001007f,
4073	0xd28d0001, 0x0002027e,
4074	0x10020288, 0xb8810904,
4075	0xb7814000, 0xd1196a01,
4076	0x00000301, 0xbe800087,
4077	0xbefc00c1, 0xd89c4000,
4078	0x00020201, 0xd89cc080,
4079	0x00040401, 0x320202ff,
4080	0x00000800, 0x80808100,
4081	0xbf84fff8, 0x7e020280,
4082	0xbf810000, 0x00000000,
4083};
4084
4085static const u32 sgpr_init_compute_shader[] =
4086{
4087	0xb07c0000, 0xbe8000ff,
4088	0x0000005f, 0xbee50080,
4089	0xbe812c65, 0xbe822c65,
4090	0xbe832c65, 0xbe842c65,
4091	0xbe852c65, 0xb77c0005,
4092	0x80808500, 0xbf84fff8,
4093	0xbe800080, 0xbf810000,
4094};
4095
4096static const u32 vgpr_init_compute_shader_arcturus[] = {
4097	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4098	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4099	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4100	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4101	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4102	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4103	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4104	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4105	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4106	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4107	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4108	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4109	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4110	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4111	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4112	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4113	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4114	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4115	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4116	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4117	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4118	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4119	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4120	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4121	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4122	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4123	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4124	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4125	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4126	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4127	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4128	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4129	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4130	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4131	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4132	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4133	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4134	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4135	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4136	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4137	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4138	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4139	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4140	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4141	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4142	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4143	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4144	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4145	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4146	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4147	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4148	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4149	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4150	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4151	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4152	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4153	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4154	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4155	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4156	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4157	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4158	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4159	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4160	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4161	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4162	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4163	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4164	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4165	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4166	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4167	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4168	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4169	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4170	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4171	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4172	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4173	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4174	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4175	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4176	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4177	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4178	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4179	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4180	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4181	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4182	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4183	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4184	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4185	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4186	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4187	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4188	0xbf84fff8, 0xbf810000,
4189};
4190
4191/* When below register arrays changed, please update gpr_reg_size,
4192  and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4193  to cover all gfx9 ASICs */
4194static const struct soc15_reg_entry vgpr_init_regs[] = {
4195   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4196   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4197   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4198   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4199   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4200   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4201   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4202   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4203   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4204   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4205   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4206   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4207   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4208   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4209};
4210
4211static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4212   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4213   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4214   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4215   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4216   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4217   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4218   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4219   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4220   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4221   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4222   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4223   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4224   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4225   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4226};
4227
4228static const struct soc15_reg_entry sgpr1_init_regs[] = {
4229   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4230   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4231   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4232   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4233   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4234   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4235   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4236   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4237   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4238   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4239   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4240   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4241   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4242   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4243};
4244
4245static const struct soc15_reg_entry sgpr2_init_regs[] = {
4246   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4247   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4248   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4249   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4250   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4251   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4252   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4253   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4254   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4255   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4256   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4257   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4258   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4259   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4260};
4261
4262static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4263   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4264   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4265   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4266   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4267   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4268   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4269   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4270   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4271   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4272   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4273   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4274   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4275   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4276   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4277   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4278   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4279   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4280   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4281   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4282   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4283   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4284   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4285   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4286   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4287   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4288   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4289   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4290   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4291   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4292   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4293   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4294   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4295   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4296};
4297
4298static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4299{
4300	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4301	int i, r;
4302
4303	/* only support when RAS is enabled */
4304	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4305		return 0;
4306
4307	r = amdgpu_ring_alloc(ring, 7);
4308	if (r) {
4309		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4310			ring->name, r);
4311		return r;
4312	}
4313
4314	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4315	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4316
4317	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4318	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4319				PACKET3_DMA_DATA_DST_SEL(1) |
4320				PACKET3_DMA_DATA_SRC_SEL(2) |
4321				PACKET3_DMA_DATA_ENGINE(0)));
4322	amdgpu_ring_write(ring, 0);
4323	amdgpu_ring_write(ring, 0);
4324	amdgpu_ring_write(ring, 0);
4325	amdgpu_ring_write(ring, 0);
4326	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4327				adev->gds.gds_size);
4328
4329	amdgpu_ring_commit(ring);
4330
4331	for (i = 0; i < adev->usec_timeout; i++) {
4332		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4333			break;
4334		udelay(1);
4335	}
4336
4337	if (i >= adev->usec_timeout)
4338		r = -ETIMEDOUT;
4339
4340	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4341
4342	return r;
4343}
4344
4345static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4346{
4347	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4348	struct amdgpu_ib ib;
4349	struct dma_fence *f = NULL;
4350	int r, i;
4351	unsigned total_size, vgpr_offset, sgpr_offset;
4352	u64 gpu_addr;
4353
4354	int compute_dim_x = adev->gfx.config.max_shader_engines *
4355						adev->gfx.config.max_cu_per_sh *
4356						adev->gfx.config.max_sh_per_se;
4357	int sgpr_work_group_size = 5;
4358	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4359	int vgpr_init_shader_size;
4360	const u32 *vgpr_init_shader_ptr;
4361	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4362
4363	/* only support when RAS is enabled */
4364	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4365		return 0;
4366
4367	/* bail if the compute ring is not ready */
4368	if (!ring->sched.ready)
4369		return 0;
4370
4371	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4372		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4373		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4374		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4375	} else {
4376		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4377		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4378		vgpr_init_regs_ptr = vgpr_init_regs;
4379	}
4380
4381	total_size =
4382		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4383	total_size +=
4384		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4385	total_size +=
4386		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4387	total_size = ALIGN(total_size, 256);
4388	vgpr_offset = total_size;
4389	total_size += ALIGN(vgpr_init_shader_size, 256);
4390	sgpr_offset = total_size;
4391	total_size += sizeof(sgpr_init_compute_shader);
4392
4393	/* allocate an indirect buffer to put the commands in */
4394	memset(&ib, 0, sizeof(ib));
4395	r = amdgpu_ib_get(adev, NULL, total_size,
4396					AMDGPU_IB_POOL_DIRECT, &ib);
4397	if (r) {
4398		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4399		return r;
4400	}
4401
4402	/* load the compute shaders */
4403	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4404		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4405
4406	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4407		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4408
4409	/* init the ib length to 0 */
4410	ib.length_dw = 0;
4411
4412	/* VGPR */
4413	/* write the register state for the compute dispatch */
4414	for (i = 0; i < gpr_reg_size; i++) {
4415		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4416		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4417								- PACKET3_SET_SH_REG_START;
4418		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4419	}
4420	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4421	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4422	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4423	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4424							- PACKET3_SET_SH_REG_START;
4425	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4426	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4427
4428	/* write dispatch packet */
4429	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4430	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4431	ib.ptr[ib.length_dw++] = 1; /* y */
4432	ib.ptr[ib.length_dw++] = 1; /* z */
4433	ib.ptr[ib.length_dw++] =
4434		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4435
4436	/* write CS partial flush packet */
4437	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4438	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4439
4440	/* SGPR1 */
4441	/* write the register state for the compute dispatch */
4442	for (i = 0; i < gpr_reg_size; i++) {
4443		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4444		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4445								- PACKET3_SET_SH_REG_START;
4446		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4447	}
4448	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4449	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4450	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4451	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4452							- PACKET3_SET_SH_REG_START;
4453	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4454	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4455
4456	/* write dispatch packet */
4457	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4458	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4459	ib.ptr[ib.length_dw++] = 1; /* y */
4460	ib.ptr[ib.length_dw++] = 1; /* z */
4461	ib.ptr[ib.length_dw++] =
4462		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4463
4464	/* write CS partial flush packet */
4465	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4466	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4467
4468	/* SGPR2 */
4469	/* write the register state for the compute dispatch */
4470	for (i = 0; i < gpr_reg_size; i++) {
4471		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4472		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4473								- PACKET3_SET_SH_REG_START;
4474		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4475	}
4476	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4477	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4478	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4479	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4480							- PACKET3_SET_SH_REG_START;
4481	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4482	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4483
4484	/* write dispatch packet */
4485	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4486	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4487	ib.ptr[ib.length_dw++] = 1; /* y */
4488	ib.ptr[ib.length_dw++] = 1; /* z */
4489	ib.ptr[ib.length_dw++] =
4490		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4491
4492	/* write CS partial flush packet */
4493	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4494	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4495
4496	/* shedule the ib on the ring */
4497	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4498	if (r) {
4499		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4500		goto fail;
4501	}
4502
4503	/* wait for the GPU to finish processing the IB */
4504	r = dma_fence_wait(f, false);
4505	if (r) {
4506		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4507		goto fail;
4508	}
4509
4510fail:
4511	amdgpu_ib_free(adev, &ib, NULL);
4512	dma_fence_put(f);
4513
4514	return r;
4515}
4516
4517static int gfx_v9_0_early_init(void *handle)
4518{
4519	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4520
4521	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4522
4523	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4524	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4525		adev->gfx.num_gfx_rings = 0;
4526	else
4527		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4528	adev->gfx.xcc_mask = 1;
4529	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4530					  AMDGPU_MAX_COMPUTE_RINGS);
4531	gfx_v9_0_set_kiq_pm4_funcs(adev);
4532	gfx_v9_0_set_ring_funcs(adev);
4533	gfx_v9_0_set_irq_funcs(adev);
4534	gfx_v9_0_set_gds_init(adev);
4535	gfx_v9_0_set_rlc_funcs(adev);
4536
4537	/* init rlcg reg access ctrl */
4538	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4539
4540	return gfx_v9_0_init_microcode(adev);
4541}
4542
4543static int gfx_v9_0_ecc_late_init(void *handle)
4544{
4545	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4546	int r;
4547
4548	/*
4549	 * Temp workaround to fix the issue that CP firmware fails to
4550	 * update read pointer when CPDMA is writing clearing operation
4551	 * to GDS in suspend/resume sequence on several cards. So just
4552	 * limit this operation in cold boot sequence.
4553	 */
4554	if ((!adev->in_suspend) &&
4555	    (adev->gds.gds_size)) {
4556		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4557		if (r)
4558			return r;
4559	}
4560
4561	/* requires IBs so do in late init after IB pool is initialized */
4562	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4563		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4564	else
4565		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4566
4567	if (r)
4568		return r;
4569
4570	if (adev->gfx.ras &&
4571	    adev->gfx.ras->enable_watchdog_timer)
4572		adev->gfx.ras->enable_watchdog_timer(adev);
4573
4574	return 0;
4575}
4576
4577static int gfx_v9_0_late_init(void *handle)
4578{
4579	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4580	int r;
4581
4582	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4583	if (r)
4584		return r;
4585
4586	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4587	if (r)
4588		return r;
4589
4590	r = gfx_v9_0_ecc_late_init(handle);
4591	if (r)
4592		return r;
4593
4594	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4595		gfx_v9_4_2_debug_trap_config_init(adev,
4596			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4597	else
4598		gfx_v9_0_debug_trap_config_init(adev,
4599			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4600
4601	return 0;
4602}
4603
4604static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4605{
4606	uint32_t rlc_setting;
 
 
 
 
4607
4608	/* if RLC is not enabled, do nothing */
4609	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4610	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4611		return false;
4612
4613	return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4614}
4615
4616static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4617{
4618	uint32_t data;
4619	unsigned i;
 
 
4620
4621	data = RLC_SAFE_MODE__CMD_MASK;
4622	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4623	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
 
4624
4625	/* wait for RLC_SAFE_MODE */
4626	for (i = 0; i < adev->usec_timeout; i++) {
4627		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4628			break;
4629		udelay(1);
 
 
 
 
4630	}
4631}
4632
4633static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4634{
4635	uint32_t data;
4636
4637	data = RLC_SAFE_MODE__CMD_MASK;
4638	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4639}
4640
4641static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4642						bool enable)
4643{
4644	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
 
4645
4646	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4647		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4648		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4649			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4650	} else {
4651		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4652		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4653			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4654	}
4655
4656	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4657}
4658
4659static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4660						bool enable)
4661{
4662	/* TODO: double check if we need to perform under safe mode */
4663	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4664
4665	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4666		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4667	else
4668		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4669
4670	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4671		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4672	else
4673		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4674
4675	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4676}
4677
4678static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4679						      bool enable)
4680{
4681	uint32_t data, def;
4682
4683	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4684
4685	/* It is disabled by HW by default */
4686	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4687		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4688		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4689
4690		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4691			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4692
4693		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4694			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4695			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4696
4697		/* only for Vega10 & Raven1 */
4698		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4699
4700		if (def != data)
4701			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4702
4703		/* MGLS is a global flag to control all MGLS in GFX */
4704		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4705			/* 2 - RLC memory Light sleep */
4706			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4707				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4708				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4709				if (def != data)
4710					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4711			}
4712			/* 3 - CP memory Light sleep */
4713			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4714				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4715				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4716				if (def != data)
4717					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4718			}
4719		}
4720	} else {
4721		/* 1 - MGCG_OVERRIDE */
4722		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4723
4724		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4725			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4726
4727		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4728			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4729			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4730			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4731
4732		if (def != data)
4733			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4734
4735		/* 2 - disable MGLS in RLC */
4736		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4737		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4738			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4739			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4740		}
4741
4742		/* 3 - disable MGLS in CP */
4743		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4744		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4745			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4746			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4747		}
4748	}
4749
4750	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4751}
4752
4753static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4754					   bool enable)
4755{
4756	uint32_t data, def;
4757
4758	if (!adev->gfx.num_gfx_rings)
4759		return;
4760
4761	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4762
4763	/* Enable 3D CGCG/CGLS */
4764	if (enable) {
4765		/* write cmd to clear cgcg/cgls ov */
4766		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4767		/* unset CGCG override */
4768		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4769		/* update CGCG and CGLS override bits */
4770		if (def != data)
4771			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4772
4773		/* enable 3Dcgcg FSM(0x0000363f) */
4774		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4775
4776		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4777			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4778				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4779		else
4780			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4781
4782		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4783			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4784				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4785		if (def != data)
4786			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4787
4788		/* set IDLE_POLL_COUNT(0x00900100) */
4789		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4790		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4791			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4792		if (def != data)
4793			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4794	} else {
4795		/* Disable CGCG/CGLS */
4796		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4797		/* disable cgcg, cgls should be disabled */
4798		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4799			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4800		/* disable cgcg and cgls in FSM */
4801		if (def != data)
4802			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4803	}
4804
4805	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4806}
4807
4808static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4809						      bool enable)
4810{
4811	uint32_t def, data;
4812
4813	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4814
4815	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4816		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4817		/* unset CGCG override */
4818		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4819		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4820			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4821		else
4822			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4823		/* update CGCG and CGLS override bits */
4824		if (def != data)
4825			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4826
4827		/* enable cgcg FSM(0x0000363F) */
4828		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4829
4830		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4831			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4832				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4833		else
4834			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4835				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4836		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4837			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4838				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4839		if (def != data)
4840			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4841
4842		/* set IDLE_POLL_COUNT(0x00900100) */
4843		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4844		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4845			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4846		if (def != data)
4847			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4848	} else {
4849		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4850		/* reset CGCG/CGLS bits */
4851		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4852		/* disable cgcg and cgls in FSM */
4853		if (def != data)
4854			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4855	}
4856
4857	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4858}
4859
4860static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4861					    bool enable)
4862{
4863	if (enable) {
4864		/* CGCG/CGLS should be enabled after MGCG/MGLS
4865		 * ===  MGCG + MGLS ===
4866		 */
4867		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4868		/* ===  CGCG /CGLS for GFX 3D Only === */
4869		gfx_v9_0_update_3d_clock_gating(adev, enable);
4870		/* ===  CGCG + CGLS === */
4871		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4872	} else {
4873		/* CGCG/CGLS should be disabled before MGCG/MGLS
4874		 * ===  CGCG + CGLS ===
4875		 */
4876		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4877		/* ===  CGCG /CGLS for GFX 3D Only === */
4878		gfx_v9_0_update_3d_clock_gating(adev, enable);
4879		/* ===  MGCG + MGLS === */
4880		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4881	}
4882	return 0;
4883}
4884
4885static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
4886					      unsigned int vmid)
4887{
4888	u32 reg, data;
4889
4890	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
4891	if (amdgpu_sriov_is_pp_one_vf(adev))
4892		data = RREG32_NO_KIQ(reg);
4893	else
4894		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
4895
4896	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
4897	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
4898
4899	if (amdgpu_sriov_is_pp_one_vf(adev))
4900		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
4901	else
4902		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
4903}
4904
4905static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid)
4906{
4907	amdgpu_gfx_off_ctrl(adev, false);
4908
4909	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
4910
4911	amdgpu_gfx_off_ctrl(adev, true);
4912}
4913
4914static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
4915					uint32_t offset,
4916					struct soc15_reg_rlcg *entries, int arr_size)
4917{
4918	int i;
4919	uint32_t reg;
4920
4921	if (!entries)
4922		return false;
4923
4924	for (i = 0; i < arr_size; i++) {
4925		const struct soc15_reg_rlcg *entry;
4926
4927		entry = &entries[i];
4928		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
4929		if (offset == reg)
4930			return true;
4931	}
4932
4933	return false;
4934}
4935
4936static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
4937{
4938	return gfx_v9_0_check_rlcg_range(adev, offset,
4939					(void *)rlcg_access_gc_9_0,
4940					ARRAY_SIZE(rlcg_access_gc_9_0));
4941}
4942
4943static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4944	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4945	.set_safe_mode = gfx_v9_0_set_safe_mode,
4946	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4947	.init = gfx_v9_0_rlc_init,
4948	.get_csb_size = gfx_v9_0_get_csb_size,
4949	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4950	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4951	.resume = gfx_v9_0_rlc_resume,
4952	.stop = gfx_v9_0_rlc_stop,
4953	.reset = gfx_v9_0_rlc_reset,
4954	.start = gfx_v9_0_rlc_start,
4955	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
4956	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
4957};
4958
4959static int gfx_v9_0_set_powergating_state(void *handle,
4960					  enum amd_powergating_state state)
4961{
4962	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4963	bool enable = (state == AMD_PG_STATE_GATE);
4964
4965	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4966	case IP_VERSION(9, 2, 2):
4967	case IP_VERSION(9, 1, 0):
4968	case IP_VERSION(9, 3, 0):
4969		if (!enable)
4970			amdgpu_gfx_off_ctrl(adev, false);
4971
 
 
4972		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4973			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4974			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4975		} else {
4976			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4977			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4978		}
4979
4980		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4981			gfx_v9_0_enable_cp_power_gating(adev, true);
4982		else
4983			gfx_v9_0_enable_cp_power_gating(adev, false);
4984
4985		/* update gfx cgpg state */
4986		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4987
4988		/* update mgcg state */
4989		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4990
4991		if (enable)
4992			amdgpu_gfx_off_ctrl(adev, true);
4993		break;
4994	case IP_VERSION(9, 2, 1):
4995		amdgpu_gfx_off_ctrl(adev, enable);
4996		break;
4997	default:
4998		break;
4999	}
5000
5001	return 0;
5002}
5003
5004static int gfx_v9_0_set_clockgating_state(void *handle,
5005					  enum amd_clockgating_state state)
5006{
5007	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5008
5009	if (amdgpu_sriov_vf(adev))
5010		return 0;
5011
5012	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5013	case IP_VERSION(9, 0, 1):
5014	case IP_VERSION(9, 2, 1):
5015	case IP_VERSION(9, 4, 0):
5016	case IP_VERSION(9, 2, 2):
5017	case IP_VERSION(9, 1, 0):
5018	case IP_VERSION(9, 4, 1):
5019	case IP_VERSION(9, 3, 0):
5020	case IP_VERSION(9, 4, 2):
5021		gfx_v9_0_update_gfx_clock_gating(adev,
5022						 state == AMD_CG_STATE_GATE);
5023		break;
5024	default:
5025		break;
5026	}
5027	return 0;
5028}
5029
5030static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5031{
5032	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5033	int data;
5034
5035	if (amdgpu_sriov_vf(adev))
5036		*flags = 0;
5037
5038	/* AMD_CG_SUPPORT_GFX_MGCG */
5039	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5040	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5041		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5042
5043	/* AMD_CG_SUPPORT_GFX_CGCG */
5044	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5045	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5046		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5047
5048	/* AMD_CG_SUPPORT_GFX_CGLS */
5049	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5050		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5051
5052	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5053	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5054	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5055		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5056
5057	/* AMD_CG_SUPPORT_GFX_CP_LS */
5058	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5059	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5060		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5061
5062	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5063		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5064		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5065		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5066			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5067
5068		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5069		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5070			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5071	}
5072}
5073
5074static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5075{
5076	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5077}
5078
5079static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5080{
5081	struct amdgpu_device *adev = ring->adev;
5082	u64 wptr;
5083
5084	/* XXX check if swapping is necessary on BE */
5085	if (ring->use_doorbell) {
5086		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5087	} else {
5088		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5089		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5090	}
5091
5092	return wptr;
5093}
5094
5095static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5096{
5097	struct amdgpu_device *adev = ring->adev;
5098
5099	if (ring->use_doorbell) {
5100		/* XXX check if swapping is necessary on BE */
5101		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5102		WDOORBELL64(ring->doorbell_index, ring->wptr);
5103	} else {
5104		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5105		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5106	}
5107}
5108
5109static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5110{
5111	struct amdgpu_device *adev = ring->adev;
5112	u32 ref_and_mask, reg_mem_engine;
5113	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5114
5115	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5116		switch (ring->me) {
5117		case 1:
5118			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5119			break;
5120		case 2:
5121			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5122			break;
5123		default:
5124			return;
5125		}
5126		reg_mem_engine = 0;
5127	} else {
5128		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5129		reg_mem_engine = 1; /* pfp */
5130	}
5131
5132	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5133			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5134			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5135			      ref_and_mask, ref_and_mask, 0x20);
5136}
5137
5138static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5139					struct amdgpu_job *job,
5140					struct amdgpu_ib *ib,
5141					uint32_t flags)
5142{
5143	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5144	u32 header, control = 0;
5145
5146	if (ib->flags & AMDGPU_IB_FLAG_CE)
5147		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5148	else
5149		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5150
5151	control |= ib->length_dw | (vmid << 24);
5152
5153	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5154		control |= INDIRECT_BUFFER_PRE_ENB(1);
5155
5156		if (flags & AMDGPU_IB_PREEMPTED)
5157			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5158
5159		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5160			gfx_v9_0_ring_emit_de_meta(ring,
5161						   (!amdgpu_sriov_vf(ring->adev) &&
5162						   flags & AMDGPU_IB_PREEMPTED) ?
5163						   true : false,
5164						   job->gds_size > 0 && job->gds_base != 0);
5165	}
5166
5167	amdgpu_ring_write(ring, header);
5168	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5169	amdgpu_ring_write(ring,
5170#ifdef __BIG_ENDIAN
5171		(2 << 0) |
5172#endif
5173		lower_32_bits(ib->gpu_addr));
5174	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5175	amdgpu_ring_ib_on_emit_cntl(ring);
5176	amdgpu_ring_write(ring, control);
5177}
5178
5179static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5180				     unsigned offset)
5181{
5182	u32 control = ring->ring[offset];
5183
5184	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5185	ring->ring[offset] = control;
5186}
5187
5188static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5189					unsigned offset)
5190{
5191	struct amdgpu_device *adev = ring->adev;
5192	void *ce_payload_cpu_addr;
5193	uint64_t payload_offset, payload_size;
5194
5195	payload_size = sizeof(struct v9_ce_ib_state);
5196
5197	if (ring->is_mes_queue) {
5198		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5199					  gfx[0].gfx_meta_data) +
5200			offsetof(struct v9_gfx_meta_data, ce_payload);
5201		ce_payload_cpu_addr =
5202			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5203	} else {
5204		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5205		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5206	}
5207
5208	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5209		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5210	} else {
5211		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5212		       (ring->buf_mask + 1 - offset) << 2);
5213		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5214		memcpy((void *)&ring->ring[0],
5215		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5216		       payload_size);
5217	}
5218}
5219
5220static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5221					unsigned offset)
5222{
5223	struct amdgpu_device *adev = ring->adev;
5224	void *de_payload_cpu_addr;
5225	uint64_t payload_offset, payload_size;
5226
5227	payload_size = sizeof(struct v9_de_ib_state);
5228
5229	if (ring->is_mes_queue) {
5230		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5231					  gfx[0].gfx_meta_data) +
5232			offsetof(struct v9_gfx_meta_data, de_payload);
5233		de_payload_cpu_addr =
5234			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5235	} else {
5236		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5237		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5238	}
5239
5240	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5241		IB_COMPLETION_STATUS_PREEMPTED;
5242
5243	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5244		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5245	} else {
5246		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5247		       (ring->buf_mask + 1 - offset) << 2);
5248		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5249		memcpy((void *)&ring->ring[0],
5250		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5251		       payload_size);
5252	}
5253}
5254
5255static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5256					  struct amdgpu_job *job,
5257					  struct amdgpu_ib *ib,
5258					  uint32_t flags)
5259{
5260	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5261	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5262
5263	/* Currently, there is a high possibility to get wave ID mismatch
5264	 * between ME and GDS, leading to a hw deadlock, because ME generates
5265	 * different wave IDs than the GDS expects. This situation happens
5266	 * randomly when at least 5 compute pipes use GDS ordered append.
5267	 * The wave IDs generated by ME are also wrong after suspend/resume.
5268	 * Those are probably bugs somewhere else in the kernel driver.
5269	 *
5270	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5271	 * GDS to 0 for this ring (me/pipe).
5272	 */
5273	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5274		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5275		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5276		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5277	}
5278
5279	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5280	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5281	amdgpu_ring_write(ring,
5282#ifdef __BIG_ENDIAN
5283				(2 << 0) |
5284#endif
5285				lower_32_bits(ib->gpu_addr));
5286	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5287	amdgpu_ring_write(ring, control);
5288}
5289
5290static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5291				     u64 seq, unsigned flags)
5292{
5293	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5294	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5295	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5296	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5297	uint32_t dw2 = 0;
5298
5299	/* RELEASE_MEM - flush caches, send int */
5300	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5301
5302	if (writeback) {
5303		dw2 = EOP_TC_NC_ACTION_EN;
5304	} else {
5305		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5306				EOP_TC_MD_ACTION_EN;
5307	}
5308	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5309				EVENT_INDEX(5);
5310	if (exec)
5311		dw2 |= EOP_EXEC;
5312
5313	amdgpu_ring_write(ring, dw2);
5314	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5315
5316	/*
5317	 * the address should be Qword aligned if 64bit write, Dword
5318	 * aligned if only send 32bit data low (discard data high)
5319	 */
5320	if (write64bit)
5321		BUG_ON(addr & 0x7);
5322	else
5323		BUG_ON(addr & 0x3);
5324	amdgpu_ring_write(ring, lower_32_bits(addr));
5325	amdgpu_ring_write(ring, upper_32_bits(addr));
5326	amdgpu_ring_write(ring, lower_32_bits(seq));
5327	amdgpu_ring_write(ring, upper_32_bits(seq));
5328	amdgpu_ring_write(ring, 0);
5329}
5330
5331static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5332{
5333	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5334	uint32_t seq = ring->fence_drv.sync_seq;
5335	uint64_t addr = ring->fence_drv.gpu_addr;
5336
5337	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5338			      lower_32_bits(addr), upper_32_bits(addr),
5339			      seq, 0xffffffff, 4);
5340}
5341
5342static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5343					unsigned vmid, uint64_t pd_addr)
5344{
5345	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5346
5347	/* compute doesn't have PFP */
5348	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5349		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5350		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5351		amdgpu_ring_write(ring, 0x0);
5352	}
5353}
5354
5355static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5356{
5357	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5358}
5359
5360static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5361{
5362	u64 wptr;
5363
5364	/* XXX check if swapping is necessary on BE */
5365	if (ring->use_doorbell)
5366		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5367	else
5368		BUG();
5369	return wptr;
5370}
5371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5372static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5373{
5374	struct amdgpu_device *adev = ring->adev;
5375
5376	/* XXX check if swapping is necessary on BE */
5377	if (ring->use_doorbell) {
5378		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5379		WDOORBELL64(ring->doorbell_index, ring->wptr);
5380	} else{
5381		BUG(); /* only DOORBELL method supported on gfx9 now */
5382	}
5383}
5384
5385static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5386					 u64 seq, unsigned int flags)
5387{
5388	struct amdgpu_device *adev = ring->adev;
5389
5390	/* we only allocate 32bit for each seq wb address */
5391	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5392
5393	/* write fence seq to the "addr" */
5394	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5395	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5396				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5397	amdgpu_ring_write(ring, lower_32_bits(addr));
5398	amdgpu_ring_write(ring, upper_32_bits(addr));
5399	amdgpu_ring_write(ring, lower_32_bits(seq));
5400
5401	if (flags & AMDGPU_FENCE_FLAG_INT) {
5402		/* set register to trigger INT */
5403		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5404		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5405					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5406		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5407		amdgpu_ring_write(ring, 0);
5408		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5409	}
5410}
5411
5412static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5413{
5414	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5415	amdgpu_ring_write(ring, 0);
5416}
5417
5418static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5419{
5420	struct amdgpu_device *adev = ring->adev;
5421	struct v9_ce_ib_state ce_payload = {0};
5422	uint64_t offset, ce_payload_gpu_addr;
5423	void *ce_payload_cpu_addr;
5424	int cnt;
5425
5426	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5427
5428	if (ring->is_mes_queue) {
5429		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5430				  gfx[0].gfx_meta_data) +
5431			offsetof(struct v9_gfx_meta_data, ce_payload);
5432		ce_payload_gpu_addr =
5433			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5434		ce_payload_cpu_addr =
5435			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5436	} else {
5437		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5438		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5439		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5440	}
5441
5442	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5443	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5444				 WRITE_DATA_DST_SEL(8) |
5445				 WR_CONFIRM) |
5446				 WRITE_DATA_CACHE_POLICY(0));
5447	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5448	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5449
5450	amdgpu_ring_ib_on_emit_ce(ring);
5451
5452	if (resume)
5453		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5454					   sizeof(ce_payload) >> 2);
5455	else
5456		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5457					   sizeof(ce_payload) >> 2);
5458}
5459
5460static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5461{
5462	int i, r = 0;
5463	struct amdgpu_device *adev = ring->adev;
5464	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5465	struct amdgpu_ring *kiq_ring = &kiq->ring;
5466	unsigned long flags;
5467
5468	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5469		return -EINVAL;
5470
5471	spin_lock_irqsave(&kiq->ring_lock, flags);
5472
5473	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5474		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5475		return -ENOMEM;
5476	}
5477
5478	/* assert preemption condition */
5479	amdgpu_ring_set_preempt_cond_exec(ring, false);
5480
5481	ring->trail_seq += 1;
5482	amdgpu_ring_alloc(ring, 13);
5483	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5484				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5485
5486	/* assert IB preemption, emit the trailing fence */
5487	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5488				   ring->trail_fence_gpu_addr,
5489				   ring->trail_seq);
5490
5491	amdgpu_ring_commit(kiq_ring);
5492	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5493
5494	/* poll the trailing fence */
5495	for (i = 0; i < adev->usec_timeout; i++) {
5496		if (ring->trail_seq ==
5497			le32_to_cpu(*ring->trail_fence_cpu_addr))
5498			break;
5499		udelay(1);
5500	}
5501
5502	if (i >= adev->usec_timeout) {
5503		r = -EINVAL;
5504		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5505	}
5506
5507	/*reset the CP_VMID_PREEMPT after trailing fence*/
5508	amdgpu_ring_emit_wreg(ring,
5509			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5510			      0x0);
5511	amdgpu_ring_commit(ring);
5512
5513	/* deassert preemption condition */
5514	amdgpu_ring_set_preempt_cond_exec(ring, true);
5515	return r;
5516}
5517
5518static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5519{
5520	struct amdgpu_device *adev = ring->adev;
5521	struct v9_de_ib_state de_payload = {0};
5522	uint64_t offset, gds_addr, de_payload_gpu_addr;
5523	void *de_payload_cpu_addr;
5524	int cnt;
5525
5526	if (ring->is_mes_queue) {
5527		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5528				  gfx[0].gfx_meta_data) +
5529			offsetof(struct v9_gfx_meta_data, de_payload);
5530		de_payload_gpu_addr =
5531			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5532		de_payload_cpu_addr =
5533			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5534
5535		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5536				  gfx[0].gds_backup) +
5537			offsetof(struct v9_gfx_meta_data, de_payload);
5538		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5539	} else {
5540		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5541		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5542		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5543
5544		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5545				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5546				 PAGE_SIZE);
5547	}
5548
5549	if (usegds) {
5550		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5551		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5552	}
5553
5554	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5555	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5556	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5557				 WRITE_DATA_DST_SEL(8) |
5558				 WR_CONFIRM) |
5559				 WRITE_DATA_CACHE_POLICY(0));
5560	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5561	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5562
5563	amdgpu_ring_ib_on_emit_de(ring);
5564	if (resume)
5565		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5566					   sizeof(de_payload) >> 2);
5567	else
5568		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5569					   sizeof(de_payload) >> 2);
5570}
5571
5572static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5573				   bool secure)
5574{
5575	uint32_t v = secure ? FRAME_TMZ : 0;
5576
5577	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5578	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5579}
5580
5581static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5582{
5583	uint32_t dw2 = 0;
5584
5585	gfx_v9_0_ring_emit_ce_meta(ring,
5586				   (!amdgpu_sriov_vf(ring->adev) &&
5587				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
5588
5589	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5590	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5591		/* set load_global_config & load_global_uconfig */
5592		dw2 |= 0x8001;
5593		/* set load_cs_sh_regs */
5594		dw2 |= 0x01000000;
5595		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5596		dw2 |= 0x10002;
5597
5598		/* set load_ce_ram if preamble presented */
5599		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5600			dw2 |= 0x10000000;
5601	} else {
5602		/* still load_ce_ram if this is the first time preamble presented
5603		 * although there is no context switch happens.
5604		 */
5605		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5606			dw2 |= 0x10000000;
5607	}
5608
5609	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5610	amdgpu_ring_write(ring, dw2);
5611	amdgpu_ring_write(ring, 0);
5612}
5613
5614static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5615{
5616	unsigned ret;
5617	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5618	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5619	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5620	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5621	ret = ring->wptr & ring->buf_mask;
5622	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5623	return ret;
5624}
5625
5626static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5627{
5628	unsigned cur;
5629	BUG_ON(offset > ring->buf_mask);
5630	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5631
5632	cur = (ring->wptr - 1) & ring->buf_mask;
5633	if (likely(cur > offset))
5634		ring->ring[offset] = cur - offset;
5635	else
5636		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5637}
5638
5639static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5640				    uint32_t reg_val_offs)
5641{
5642	struct amdgpu_device *adev = ring->adev;
5643
5644	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5645	amdgpu_ring_write(ring, 0 |	/* src: register*/
5646				(5 << 8) |	/* dst: memory */
5647				(1 << 20));	/* write confirm */
5648	amdgpu_ring_write(ring, reg);
5649	amdgpu_ring_write(ring, 0);
5650	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5651				reg_val_offs * 4));
5652	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5653				reg_val_offs * 4));
5654}
5655
5656static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5657				    uint32_t val)
5658{
5659	uint32_t cmd = 0;
5660
5661	switch (ring->funcs->type) {
5662	case AMDGPU_RING_TYPE_GFX:
5663		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5664		break;
5665	case AMDGPU_RING_TYPE_KIQ:
5666		cmd = (1 << 16); /* no inc addr */
5667		break;
5668	default:
5669		cmd = WR_CONFIRM;
5670		break;
5671	}
5672	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5673	amdgpu_ring_write(ring, cmd);
5674	amdgpu_ring_write(ring, reg);
5675	amdgpu_ring_write(ring, 0);
5676	amdgpu_ring_write(ring, val);
5677}
5678
5679static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5680					uint32_t val, uint32_t mask)
5681{
5682	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5683}
5684
5685static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5686						  uint32_t reg0, uint32_t reg1,
5687						  uint32_t ref, uint32_t mask)
5688{
5689	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5690	struct amdgpu_device *adev = ring->adev;
5691	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5692		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5693
5694	if (fw_version_ok)
5695		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5696				      ref, mask, 0x20);
5697	else
5698		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5699							   ref, mask);
5700}
5701
5702static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5703{
5704	struct amdgpu_device *adev = ring->adev;
5705	uint32_t value = 0;
5706
5707	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5708	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5709	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5710	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5711	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5712}
5713
5714static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5715						 enum amdgpu_interrupt_state state)
5716{
5717	switch (state) {
5718	case AMDGPU_IRQ_STATE_DISABLE:
5719	case AMDGPU_IRQ_STATE_ENABLE:
5720		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5721			       TIME_STAMP_INT_ENABLE,
5722			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5723		break;
5724	default:
5725		break;
5726	}
5727}
5728
5729static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5730						     int me, int pipe,
5731						     enum amdgpu_interrupt_state state)
5732{
5733	u32 mec_int_cntl, mec_int_cntl_reg;
5734
5735	/*
5736	 * amdgpu controls only the first MEC. That's why this function only
5737	 * handles the setting of interrupts for this specific MEC. All other
5738	 * pipes' interrupts are set by amdkfd.
5739	 */
5740
5741	if (me == 1) {
5742		switch (pipe) {
5743		case 0:
5744			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5745			break;
5746		case 1:
5747			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5748			break;
5749		case 2:
5750			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5751			break;
5752		case 3:
5753			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5754			break;
5755		default:
5756			DRM_DEBUG("invalid pipe %d\n", pipe);
5757			return;
5758		}
5759	} else {
5760		DRM_DEBUG("invalid me %d\n", me);
5761		return;
5762	}
5763
5764	switch (state) {
5765	case AMDGPU_IRQ_STATE_DISABLE:
5766		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5767		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5768					     TIME_STAMP_INT_ENABLE, 0);
5769		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5770		break;
5771	case AMDGPU_IRQ_STATE_ENABLE:
5772		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5773		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5774					     TIME_STAMP_INT_ENABLE, 1);
5775		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5776		break;
5777	default:
5778		break;
5779	}
5780}
5781
5782static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5783					     struct amdgpu_irq_src *source,
5784					     unsigned type,
5785					     enum amdgpu_interrupt_state state)
5786{
5787	switch (state) {
5788	case AMDGPU_IRQ_STATE_DISABLE:
5789	case AMDGPU_IRQ_STATE_ENABLE:
5790		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5791			       PRIV_REG_INT_ENABLE,
5792			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5793		break;
5794	default:
5795		break;
5796	}
5797
5798	return 0;
5799}
5800
5801static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5802					      struct amdgpu_irq_src *source,
5803					      unsigned type,
5804					      enum amdgpu_interrupt_state state)
5805{
5806	switch (state) {
5807	case AMDGPU_IRQ_STATE_DISABLE:
5808	case AMDGPU_IRQ_STATE_ENABLE:
5809		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5810			       PRIV_INSTR_INT_ENABLE,
5811			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5812		break;
5813	default:
5814		break;
5815	}
5816
5817	return 0;
5818}
5819
5820#define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5821	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5822			CP_ECC_ERROR_INT_ENABLE, 1)
5823
5824#define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5825	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5826			CP_ECC_ERROR_INT_ENABLE, 0)
5827
5828static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5829					      struct amdgpu_irq_src *source,
5830					      unsigned type,
5831					      enum amdgpu_interrupt_state state)
5832{
5833	switch (state) {
5834	case AMDGPU_IRQ_STATE_DISABLE:
5835		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5836				CP_ECC_ERROR_INT_ENABLE, 0);
5837		DISABLE_ECC_ON_ME_PIPE(1, 0);
5838		DISABLE_ECC_ON_ME_PIPE(1, 1);
5839		DISABLE_ECC_ON_ME_PIPE(1, 2);
5840		DISABLE_ECC_ON_ME_PIPE(1, 3);
5841		break;
5842
5843	case AMDGPU_IRQ_STATE_ENABLE:
5844		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5845				CP_ECC_ERROR_INT_ENABLE, 1);
5846		ENABLE_ECC_ON_ME_PIPE(1, 0);
5847		ENABLE_ECC_ON_ME_PIPE(1, 1);
5848		ENABLE_ECC_ON_ME_PIPE(1, 2);
5849		ENABLE_ECC_ON_ME_PIPE(1, 3);
5850		break;
5851	default:
5852		break;
5853	}
5854
5855	return 0;
5856}
5857
5858
5859static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5860					    struct amdgpu_irq_src *src,
5861					    unsigned type,
5862					    enum amdgpu_interrupt_state state)
5863{
5864	switch (type) {
5865	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5866		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5867		break;
5868	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5869		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5870		break;
5871	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5872		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5873		break;
5874	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5875		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5876		break;
5877	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5878		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5879		break;
5880	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5881		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5882		break;
5883	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5884		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5885		break;
5886	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5887		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5888		break;
5889	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5890		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5891		break;
5892	default:
5893		break;
5894	}
5895	return 0;
5896}
5897
5898static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5899			    struct amdgpu_irq_src *source,
5900			    struct amdgpu_iv_entry *entry)
5901{
5902	int i;
5903	u8 me_id, pipe_id, queue_id;
5904	struct amdgpu_ring *ring;
5905
5906	DRM_DEBUG("IH: CP EOP\n");
5907	me_id = (entry->ring_id & 0x0c) >> 2;
5908	pipe_id = (entry->ring_id & 0x03) >> 0;
5909	queue_id = (entry->ring_id & 0x70) >> 4;
5910
5911	switch (me_id) {
5912	case 0:
5913		if (adev->gfx.num_gfx_rings &&
5914		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
5915			/* Fence signals are handled on the software rings*/
5916			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
5917				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
5918		}
5919		break;
5920	case 1:
5921	case 2:
5922		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5923			ring = &adev->gfx.compute_ring[i];
5924			/* Per-queue interrupt is supported for MEC starting from VI.
5925			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5926			  */
5927			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5928				amdgpu_fence_process(ring);
5929		}
5930		break;
5931	}
5932	return 0;
5933}
5934
5935static void gfx_v9_0_fault(struct amdgpu_device *adev,
5936			   struct amdgpu_iv_entry *entry)
5937{
5938	u8 me_id, pipe_id, queue_id;
5939	struct amdgpu_ring *ring;
5940	int i;
5941
5942	me_id = (entry->ring_id & 0x0c) >> 2;
5943	pipe_id = (entry->ring_id & 0x03) >> 0;
5944	queue_id = (entry->ring_id & 0x70) >> 4;
5945
5946	switch (me_id) {
5947	case 0:
5948		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5949		break;
5950	case 1:
5951	case 2:
5952		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5953			ring = &adev->gfx.compute_ring[i];
5954			if (ring->me == me_id && ring->pipe == pipe_id &&
5955			    ring->queue == queue_id)
5956				drm_sched_fault(&ring->sched);
5957		}
5958		break;
5959	}
5960}
5961
5962static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5963				 struct amdgpu_irq_src *source,
5964				 struct amdgpu_iv_entry *entry)
5965{
5966	DRM_ERROR("Illegal register access in command stream\n");
5967	gfx_v9_0_fault(adev, entry);
5968	return 0;
5969}
5970
5971static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5972				  struct amdgpu_irq_src *source,
5973				  struct amdgpu_iv_entry *entry)
5974{
5975	DRM_ERROR("Illegal instruction in command stream\n");
5976	gfx_v9_0_fault(adev, entry);
5977	return 0;
5978}
5979
5980
5981static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5982	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5983	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5984	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5985	},
5986	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5987	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5988	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5989	},
5990	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5991	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5992	  0, 0
5993	},
5994	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5995	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
5996	  0, 0
5997	},
5998	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
5999	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6000	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6001	},
6002	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6003	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6004	  0, 0
6005	},
6006	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6007	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6008	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6009	},
6010	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6011	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6012	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6013	},
6014	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6015	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6016	  0, 0
6017	},
6018	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6019	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6020	  0, 0
6021	},
6022	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6023	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6024	  0, 0
6025	},
6026	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6027	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6028	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6029	},
6030	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6031	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6032	  0, 0
6033	},
6034	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6035	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6036	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6037	},
6038	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6039	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6040	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6041	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6042	},
6043	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6044	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6045	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6046	  0, 0
6047	},
6048	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6049	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6050	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6051	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6052	},
6053	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6054	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6055	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6056	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6057	},
6058	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6059	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6060	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6061	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6062	},
6063	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6064	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6065	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6066	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6067	},
6068	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6069	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6070	  0, 0
6071	},
6072	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6073	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6074	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6075	},
6076	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6077	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6078	  0, 0
6079	},
6080	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6081	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6082	  0, 0
6083	},
6084	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6085	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6086	  0, 0
6087	},
6088	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6089	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6090	  0, 0
6091	},
6092	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6093	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6094	  0, 0
6095	},
6096	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6097	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6098	  0, 0
6099	},
6100	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6101	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6102	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6103	},
6104	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6105	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6106	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6107	},
6108	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6109	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6110	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6111	},
6112	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6113	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6114	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6115	},
6116	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6117	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6118	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6119	},
6120	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6121	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6122	  0, 0
6123	},
6124	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6125	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6126	  0, 0
6127	},
6128	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6129	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6130	  0, 0
6131	},
6132	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6133	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6134	  0, 0
6135	},
6136	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6137	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6138	  0, 0
6139	},
6140	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6141	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6142	  0, 0
6143	},
6144	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6145	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6146	  0, 0
6147	},
6148	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6149	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6150	  0, 0
6151	},
6152	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6153	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6154	  0, 0
6155	},
6156	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6157	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6158	  0, 0
6159	},
6160	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6161	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6162	  0, 0
6163	},
6164	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6165	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6166	  0, 0
6167	},
6168	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6169	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6170	  0, 0
6171	},
6172	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6173	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6174	  0, 0
6175	},
6176	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6177	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6178	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6179	},
6180	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6181	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6182	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6183	},
6184	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6185	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6186	  0, 0
6187	},
6188	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6189	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6190	  0, 0
6191	},
6192	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6193	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6194	  0, 0
6195	},
6196	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6197	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6198	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6199	},
6200	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6201	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6202	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6203	},
6204	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6205	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6206	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6207	},
6208	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6209	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6210	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6211	},
6212	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6213	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6214	  0, 0
6215	},
6216	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6217	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6218	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6219	},
6220	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6221	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6222	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6223	},
6224	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6225	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6226	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6227	},
6228	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6229	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6230	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6231	},
6232	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6233	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6234	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6235	},
6236	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6237	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6238	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6239	},
6240	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6241	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6242	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6243	},
6244	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6245	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6246	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6247	},
6248	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6249	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6250	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6251	},
6252	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6253	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6254	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6255	},
6256	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6257	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6258	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6259	},
6260	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6261	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6262	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6263	},
6264	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6265	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6266	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6267	},
6268	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6269	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6270	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6271	},
6272	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6273	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6274	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6275	},
6276	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6277	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6278	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6279	},
6280	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6281	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6282	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6283	},
6284	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6285	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6286	  0, 0
6287	},
6288	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6289	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6290	  0, 0
6291	},
6292	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6293	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6294	  0, 0
6295	},
6296	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6297	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6298	  0, 0
6299	},
6300	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6301	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6302	  0, 0
6303	},
6304	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6305	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6306	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6307	},
6308	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6309	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6310	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6311	},
6312	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6313	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6314	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6315	},
6316	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6317	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6318	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6319	},
6320	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6321	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6322	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6323	},
6324	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6325	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6326	  0, 0
6327	},
6328	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6329	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6330	  0, 0
6331	},
6332	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6333	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6334	  0, 0
6335	},
6336	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6337	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6338	  0, 0
6339	},
6340	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6341	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6342	  0, 0
6343	},
6344	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6345	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6346	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6347	},
6348	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6349	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6350	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6351	},
6352	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6353	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6354	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6355	},
6356	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6357	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6358	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6359	},
6360	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6361	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6362	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6363	},
6364	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6365	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6366	  0, 0
6367	},
6368	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6369	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6370	  0, 0
6371	},
6372	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6373	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6374	  0, 0
6375	},
6376	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6377	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6378	  0, 0
6379	},
6380	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6381	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6382	  0, 0
6383	},
6384	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6385	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6386	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6387	},
6388	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6389	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6390	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6391	},
6392	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6393	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6394	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6395	},
6396	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6397	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6398	  0, 0
6399	},
6400	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6401	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6402	  0, 0
6403	},
6404	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6405	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6406	  0, 0
6407	},
6408	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6409	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6410	  0, 0
6411	},
6412	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6413	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6414	  0, 0
6415	},
6416	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6417	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6418	  0, 0
6419	}
6420};
6421
6422static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6423				     void *inject_if, uint32_t instance_mask)
6424{
6425	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6426	int ret;
6427	struct ta_ras_trigger_error_input block_info = { 0 };
6428
6429	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6430		return -EINVAL;
 
 
 
6431
6432	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6433		return -EINVAL;
6434
6435	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6436		return -EPERM;
6437
6438	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6439	      info->head.type)) {
6440		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6441			ras_gfx_subblocks[info->head.sub_block_index].name,
6442			info->head.type);
6443		return -EPERM;
6444	}
6445
6446	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6447	      info->head.type)) {
6448		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6449			ras_gfx_subblocks[info->head.sub_block_index].name,
6450			info->head.type);
6451		return -EPERM;
6452	}
6453
6454	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6455	block_info.sub_block_index =
6456		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6457	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6458	block_info.address = info->address;
6459	block_info.value = info->value;
6460
6461	mutex_lock(&adev->grbm_idx_mutex);
6462	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6463	mutex_unlock(&adev->grbm_idx_mutex);
6464
6465	return ret;
6466}
6467
6468static const char * const vml2_mems[] = {
6469	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6470	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6471	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6472	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6473	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6474	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6475	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6476	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6477	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6478	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6479	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6480	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6481	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6482	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6483	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6484	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6485};
6486
6487static const char * const vml2_walker_mems[] = {
6488	"UTC_VML2_CACHE_PDE0_MEM0",
6489	"UTC_VML2_CACHE_PDE0_MEM1",
6490	"UTC_VML2_CACHE_PDE1_MEM0",
6491	"UTC_VML2_CACHE_PDE1_MEM1",
6492	"UTC_VML2_CACHE_PDE2_MEM0",
6493	"UTC_VML2_CACHE_PDE2_MEM1",
6494	"UTC_VML2_RDIF_LOG_FIFO",
6495};
6496
6497static const char * const atc_l2_cache_2m_mems[] = {
6498	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6499	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6500	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6501	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6502};
6503
6504static const char *atc_l2_cache_4k_mems[] = {
6505	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6506	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6507	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6508	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6509	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6510	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6511	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6512	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6513	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6514	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6515	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6516	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6517	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6518	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6519	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6520	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6521	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6522	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6523	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6524	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6525	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6526	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6527	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6528	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6529	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6530	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6531	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6532	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6533	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6534	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6535	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6536	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6537};
6538
6539static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6540					 struct ras_err_data *err_data)
6541{
6542	uint32_t i, data;
6543	uint32_t sec_count, ded_count;
6544
6545	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6546	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6547	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6548	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6549	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6550	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6551	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6552	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6553
6554	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6555		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6556		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6557
6558		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6559		if (sec_count) {
6560			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6561				"SEC %d\n", i, vml2_mems[i], sec_count);
6562			err_data->ce_count += sec_count;
6563		}
6564
6565		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6566		if (ded_count) {
6567			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6568				"DED %d\n", i, vml2_mems[i], ded_count);
6569			err_data->ue_count += ded_count;
6570		}
6571	}
6572
6573	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6574		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6575		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6576
6577		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6578						SEC_COUNT);
6579		if (sec_count) {
6580			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6581				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6582			err_data->ce_count += sec_count;
6583		}
6584
6585		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6586						DED_COUNT);
6587		if (ded_count) {
6588			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6589				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6590			err_data->ue_count += ded_count;
6591		}
6592	}
6593
6594	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6595		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6596		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6597
6598		sec_count = (data & 0x00006000L) >> 0xd;
6599		if (sec_count) {
6600			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6601				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6602				sec_count);
6603			err_data->ce_count += sec_count;
6604		}
6605	}
6606
6607	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6608		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6609		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6610
6611		sec_count = (data & 0x00006000L) >> 0xd;
6612		if (sec_count) {
6613			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6614				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6615				sec_count);
6616			err_data->ce_count += sec_count;
6617		}
6618
6619		ded_count = (data & 0x00018000L) >> 0xf;
6620		if (ded_count) {
6621			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6622				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6623				ded_count);
6624			err_data->ue_count += ded_count;
6625		}
 
 
 
 
6626	}
6627
6628	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6629	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6630	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6631	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6632
6633	return 0;
6634}
6635
6636static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6637	const struct soc15_reg_entry *reg,
6638	uint32_t se_id, uint32_t inst_id, uint32_t value,
6639	uint32_t *sec_count, uint32_t *ded_count)
6640{
6641	uint32_t i;
6642	uint32_t sec_cnt, ded_cnt;
6643
6644	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6645		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6646			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6647			gfx_v9_0_ras_fields[i].inst != reg->inst)
6648			continue;
6649
6650		sec_cnt = (value &
6651				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6652				gfx_v9_0_ras_fields[i].sec_count_shift;
6653		if (sec_cnt) {
6654			dev_info(adev->dev, "GFX SubBlock %s, "
6655				"Instance[%d][%d], SEC %d\n",
6656				gfx_v9_0_ras_fields[i].name,
6657				se_id, inst_id,
6658				sec_cnt);
6659			*sec_count += sec_cnt;
6660		}
6661
6662		ded_cnt = (value &
6663				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6664				gfx_v9_0_ras_fields[i].ded_count_shift;
6665		if (ded_cnt) {
6666			dev_info(adev->dev, "GFX SubBlock %s, "
6667				"Instance[%d][%d], DED %d\n",
6668				gfx_v9_0_ras_fields[i].name,
6669				se_id, inst_id,
6670				ded_cnt);
6671			*ded_count += ded_cnt;
6672		}
6673	}
6674
 
6675	return 0;
6676}
6677
6678static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6679{
6680	int i, j, k;
6681
6682	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6683		return;
6684
6685	/* read back registers to clear the counters */
6686	mutex_lock(&adev->grbm_idx_mutex);
6687	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6688		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6689			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6690				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6691				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6692			}
6693		}
6694	}
6695	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6696	mutex_unlock(&adev->grbm_idx_mutex);
6697
6698	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6699	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6700	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6701	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6702	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6703	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6704	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6705	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6706
6707	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6708		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6709		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6710	}
6711
6712	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6713		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6714		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6715	}
6716
6717	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6718		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6719		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6720	}
6721
6722	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6723		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6724		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6725	}
6726
6727	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6728	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6729	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6730	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6731}
6732
6733static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6734					  void *ras_error_status)
6735{
6736	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6737	uint32_t sec_count = 0, ded_count = 0;
6738	uint32_t i, j, k;
6739	uint32_t reg_value;
6740
6741	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6742		return;
6743
6744	err_data->ue_count = 0;
6745	err_data->ce_count = 0;
6746
6747	mutex_lock(&adev->grbm_idx_mutex);
6748
6749	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6750		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6751			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6752				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6753				reg_value =
6754					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6755				if (reg_value)
6756					gfx_v9_0_ras_error_count(adev,
6757						&gfx_v9_0_edc_counter_regs[i],
6758						j, k, reg_value,
6759						&sec_count, &ded_count);
6760			}
6761		}
6762	}
6763
6764	err_data->ce_count += sec_count;
6765	err_data->ue_count += ded_count;
6766
6767	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6768	mutex_unlock(&adev->grbm_idx_mutex);
6769
6770	gfx_v9_0_query_utc_edc_status(adev, err_data);
6771}
6772
6773static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6774{
6775	const unsigned int cp_coher_cntl =
6776			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6777			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6778			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6779			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6780			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6781
6782	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6783	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6784	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6785	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6786	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6787	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6788	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6789	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6790}
6791
6792static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6793					uint32_t pipe, bool enable)
6794{
6795	struct amdgpu_device *adev = ring->adev;
6796	uint32_t val;
6797	uint32_t wcl_cs_reg;
6798
6799	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6800	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6801
6802	switch (pipe) {
6803	case 0:
6804		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6805		break;
6806	case 1:
6807		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6808		break;
6809	case 2:
6810		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6811		break;
6812	case 3:
6813		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6814		break;
6815	default:
6816		DRM_DEBUG("invalid pipe %d\n", pipe);
6817		return;
6818	}
6819
6820	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6821
6822}
6823static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6824{
6825	struct amdgpu_device *adev = ring->adev;
6826	uint32_t val;
6827	int i;
6828
6829
6830	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6831	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6832	 * around 25% of gpu resources.
6833	 */
6834	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6835	amdgpu_ring_emit_wreg(ring,
6836			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6837			      val);
6838
6839	/* Restrict waves for normal/low priority compute queues as well
6840	 * to get best QoS for high priority compute jobs.
6841	 *
6842	 * amdgpu controls only 1st ME(0-3 CS pipes).
6843	 */
6844	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6845		if (i != ring->pipe)
6846			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6847
6848	}
6849}
6850
6851static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6852	.name = "gfx_v9_0",
6853	.early_init = gfx_v9_0_early_init,
6854	.late_init = gfx_v9_0_late_init,
6855	.sw_init = gfx_v9_0_sw_init,
6856	.sw_fini = gfx_v9_0_sw_fini,
6857	.hw_init = gfx_v9_0_hw_init,
6858	.hw_fini = gfx_v9_0_hw_fini,
6859	.suspend = gfx_v9_0_suspend,
6860	.resume = gfx_v9_0_resume,
6861	.is_idle = gfx_v9_0_is_idle,
6862	.wait_for_idle = gfx_v9_0_wait_for_idle,
6863	.soft_reset = gfx_v9_0_soft_reset,
6864	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6865	.set_powergating_state = gfx_v9_0_set_powergating_state,
6866	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6867};
6868
6869static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6870	.type = AMDGPU_RING_TYPE_GFX,
6871	.align_mask = 0xff,
6872	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6873	.support_64bit_ptrs = true,
6874	.secure_submission_supported = true,
6875	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6876	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6877	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6878	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6879		5 +  /* COND_EXEC */
6880		7 +  /* PIPELINE_SYNC */
6881		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6882		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6883		2 + /* VM_FLUSH */
6884		8 +  /* FENCE for VM_FLUSH */
6885		20 + /* GDS switch */
6886		4 + /* double SWITCH_BUFFER,
6887		       the first COND_EXEC jump to the place just
6888			   prior to this double SWITCH_BUFFER  */
6889		5 + /* COND_EXEC */
6890		7 +	 /*	HDP_flush */
6891		4 +	 /*	VGT_flush */
6892		14 + /*	CE_META */
6893		31 + /*	DE_META */
6894		3 + /* CNTX_CTRL */
6895		5 + /* HDP_INVL */
6896		8 + 8 + /* FENCE x2 */
6897		2 + /* SWITCH_BUFFER */
6898		7, /* gfx_v9_0_emit_mem_sync */
6899	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6900	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6901	.emit_fence = gfx_v9_0_ring_emit_fence,
6902	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6903	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6904	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6905	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6906	.test_ring = gfx_v9_0_ring_test_ring,
 
6907	.insert_nop = amdgpu_ring_insert_nop,
6908	.pad_ib = amdgpu_ring_generic_pad_ib,
6909	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6910	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6911	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6912	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6913	.preempt_ib = gfx_v9_0_ring_preempt_ib,
6914	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6915	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6916	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6917	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6918	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6919	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6920};
6921
6922static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
6923	.type = AMDGPU_RING_TYPE_GFX,
6924	.align_mask = 0xff,
6925	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6926	.support_64bit_ptrs = true,
6927	.secure_submission_supported = true,
6928	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
6929	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
6930	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
6931	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6932		5 +  /* COND_EXEC */
6933		7 +  /* PIPELINE_SYNC */
6934		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6935		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6936		2 + /* VM_FLUSH */
6937		8 +  /* FENCE for VM_FLUSH */
6938		20 + /* GDS switch */
6939		4 + /* double SWITCH_BUFFER,
6940		     * the first COND_EXEC jump to the place just
6941		     * prior to this double SWITCH_BUFFER
6942		     */
6943		5 + /* COND_EXEC */
6944		7 +	 /*	HDP_flush */
6945		4 +	 /*	VGT_flush */
6946		14 + /*	CE_META */
6947		31 + /*	DE_META */
6948		3 + /* CNTX_CTRL */
6949		5 + /* HDP_INVL */
6950		8 + 8 + /* FENCE x2 */
6951		2 + /* SWITCH_BUFFER */
6952		7, /* gfx_v9_0_emit_mem_sync */
6953	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6954	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6955	.emit_fence = gfx_v9_0_ring_emit_fence,
6956	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6957	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6958	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6959	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6960	.test_ring = gfx_v9_0_ring_test_ring,
6961	.test_ib = gfx_v9_0_ring_test_ib,
6962	.insert_nop = amdgpu_sw_ring_insert_nop,
6963	.pad_ib = amdgpu_ring_generic_pad_ib,
6964	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6965	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6966	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6967	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6968	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6969	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6970	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6971	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6972	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6973	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
6974	.patch_cntl = gfx_v9_0_ring_patch_cntl,
6975	.patch_de = gfx_v9_0_ring_patch_de_meta,
6976	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
6977};
6978
6979static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6980	.type = AMDGPU_RING_TYPE_COMPUTE,
6981	.align_mask = 0xff,
6982	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6983	.support_64bit_ptrs = true,
 
6984	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6985	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6986	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6987	.emit_frame_size =
6988		20 + /* gfx_v9_0_ring_emit_gds_switch */
6989		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6990		5 + /* hdp invalidate */
6991		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6992		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6993		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6994		2 + /* gfx_v9_0_ring_emit_vm_flush */
6995		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6996		7 + /* gfx_v9_0_emit_mem_sync */
6997		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6998		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6999	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7000	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7001	.emit_fence = gfx_v9_0_ring_emit_fence,
7002	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7003	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7004	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7005	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7006	.test_ring = gfx_v9_0_ring_test_ring,
7007	.test_ib = gfx_v9_0_ring_test_ib,
7008	.insert_nop = amdgpu_ring_insert_nop,
7009	.pad_ib = amdgpu_ring_generic_pad_ib,
 
7010	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7011	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7012	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7013	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7014	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7015};
7016
7017static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7018	.type = AMDGPU_RING_TYPE_KIQ,
7019	.align_mask = 0xff,
7020	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7021	.support_64bit_ptrs = true,
 
7022	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7023	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7024	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7025	.emit_frame_size =
7026		20 + /* gfx_v9_0_ring_emit_gds_switch */
7027		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7028		5 + /* hdp invalidate */
7029		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7030		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7031		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7032		2 + /* gfx_v9_0_ring_emit_vm_flush */
7033		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7034	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 
7035	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7036	.test_ring = gfx_v9_0_ring_test_ring,
 
7037	.insert_nop = amdgpu_ring_insert_nop,
7038	.pad_ib = amdgpu_ring_generic_pad_ib,
7039	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7040	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7041	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7042	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7043};
7044
7045static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7046{
7047	int i;
7048
7049	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7050
7051	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7052		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7053
7054	if (adev->gfx.num_gfx_rings) {
7055		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7056			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7057	}
7058
7059	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7060		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7061}
7062
 
 
 
 
 
7063static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7064	.set = gfx_v9_0_set_eop_interrupt_state,
7065	.process = gfx_v9_0_eop_irq,
7066};
7067
7068static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7069	.set = gfx_v9_0_set_priv_reg_fault_state,
7070	.process = gfx_v9_0_priv_reg_irq,
7071};
7072
7073static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7074	.set = gfx_v9_0_set_priv_inst_fault_state,
7075	.process = gfx_v9_0_priv_inst_irq,
7076};
7077
7078static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7079	.set = gfx_v9_0_set_cp_ecc_error_state,
7080	.process = amdgpu_gfx_cp_ecc_error_irq,
7081};
7082
7083
7084static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7085{
7086	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7087	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7088
7089	adev->gfx.priv_reg_irq.num_types = 1;
7090	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7091
7092	adev->gfx.priv_inst_irq.num_types = 1;
7093	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7094
7095	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7096	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7097}
7098
7099static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7100{
7101	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7102	case IP_VERSION(9, 0, 1):
7103	case IP_VERSION(9, 2, 1):
7104	case IP_VERSION(9, 4, 0):
7105	case IP_VERSION(9, 2, 2):
7106	case IP_VERSION(9, 1, 0):
7107	case IP_VERSION(9, 4, 1):
7108	case IP_VERSION(9, 3, 0):
7109	case IP_VERSION(9, 4, 2):
7110		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7111		break;
7112	default:
7113		break;
7114	}
7115}
7116
7117static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7118{
7119	/* init asci gds info */
7120	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7121	case IP_VERSION(9, 0, 1):
7122	case IP_VERSION(9, 2, 1):
7123	case IP_VERSION(9, 4, 0):
7124		adev->gds.gds_size = 0x10000;
7125		break;
7126	case IP_VERSION(9, 2, 2):
7127	case IP_VERSION(9, 1, 0):
7128	case IP_VERSION(9, 4, 1):
7129		adev->gds.gds_size = 0x1000;
7130		break;
7131	case IP_VERSION(9, 4, 2):
7132		/* aldebaran removed all the GDS internal memory,
7133		 * only support GWS opcode in kernel, like barrier
7134		 * semaphore.etc */
7135		adev->gds.gds_size = 0;
7136		break;
7137	default:
7138		adev->gds.gds_size = 0x10000;
7139		break;
7140	}
7141
7142	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7143	case IP_VERSION(9, 0, 1):
7144	case IP_VERSION(9, 4, 0):
7145		adev->gds.gds_compute_max_wave_id = 0x7ff;
7146		break;
7147	case IP_VERSION(9, 2, 1):
7148		adev->gds.gds_compute_max_wave_id = 0x27f;
7149		break;
7150	case IP_VERSION(9, 2, 2):
7151	case IP_VERSION(9, 1, 0):
7152		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7153			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7154		else
7155			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7156		break;
7157	case IP_VERSION(9, 4, 1):
7158		adev->gds.gds_compute_max_wave_id = 0xfff;
7159		break;
7160	case IP_VERSION(9, 4, 2):
7161		/* deprecated for Aldebaran, no usage at all */
7162		adev->gds.gds_compute_max_wave_id = 0;
7163		break;
7164	default:
7165		/* this really depends on the chip */
7166		adev->gds.gds_compute_max_wave_id = 0x7ff;
7167		break;
7168	}
7169
7170	adev->gds.gws_size = 64;
7171	adev->gds.oa_size = 16;
7172}
7173
7174static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7175						 u32 bitmap)
7176{
7177	u32 data;
7178
7179	if (!bitmap)
7180		return;
7181
7182	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7183	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7184
7185	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7186}
7187
7188static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7189{
7190	u32 data, mask;
7191
7192	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7193	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7194
7195	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7196	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7197
7198	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7199
7200	return (~data) & mask;
7201}
7202
7203static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7204				 struct amdgpu_cu_info *cu_info)
7205{
7206	int i, j, k, counter, active_cu_number = 0;
7207	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7208	unsigned disable_masks[4 * 4];
7209
7210	if (!adev || !cu_info)
7211		return -EINVAL;
7212
7213	/*
7214	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7215	 */
7216	if (adev->gfx.config.max_shader_engines *
7217		adev->gfx.config.max_sh_per_se > 16)
7218		return -EINVAL;
7219
7220	amdgpu_gfx_parse_disable_cu(disable_masks,
7221				    adev->gfx.config.max_shader_engines,
7222				    adev->gfx.config.max_sh_per_se);
7223
7224	mutex_lock(&adev->grbm_idx_mutex);
7225	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7226		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7227			mask = 1;
7228			ao_bitmap = 0;
7229			counter = 0;
7230			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7231			gfx_v9_0_set_user_cu_inactive_bitmap(
7232				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
 
7233			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7234
7235			/*
7236			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7237			 * 4x4 size array, and it's usually suitable for Vega
7238			 * ASICs which has 4*2 SE/SH layout.
7239			 * But for Arcturus, SE/SH layout is changed to 8*1.
7240			 * To mostly reduce the impact, we make it compatible
7241			 * with current bitmap array as below:
7242			 *    SE4,SH0 --> bitmap[0][1]
7243			 *    SE5,SH0 --> bitmap[1][1]
7244			 *    SE6,SH0 --> bitmap[2][1]
7245			 *    SE7,SH0 --> bitmap[3][1]
7246			 */
7247			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7248
7249			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7250				if (bitmap & mask) {
7251					if (counter < adev->gfx.config.max_cu_per_sh)
7252						ao_bitmap |= mask;
7253					counter ++;
7254				}
7255				mask <<= 1;
7256			}
7257			active_cu_number += counter;
7258			if (i < 2 && j < 2)
7259				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7260			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7261		}
7262	}
7263	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7264	mutex_unlock(&adev->grbm_idx_mutex);
7265
7266	cu_info->number = active_cu_number;
7267	cu_info->ao_cu_mask = ao_cu_mask;
7268	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7269
7270	return 0;
7271}
7272
7273const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7274{
7275	.type = AMD_IP_BLOCK_TYPE_GFX,
7276	.major = 9,
7277	.minor = 0,
7278	.rev = 0,
7279	.funcs = &gfx_v9_0_ip_funcs,
7280};