Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/slab.h>
  26#include <linux/module.h>
  27#include <drm/drmP.h>
  28#include "radeon.h"
  29#include "radeon_asic.h"
  30#include "radeon_audio.h"
  31#include "cikd.h"
  32#include "atom.h"
  33#include "cik_blit_shaders.h"
  34#include "radeon_ucode.h"
  35#include "clearstate_ci.h"
  36
  37#define SH_MEM_CONFIG_GFX_DEFAULT \
  38	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  39
  40MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  41MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  42MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  43MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  44MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  47MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  48MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  49
  50MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  51MODULE_FIRMWARE("radeon/bonaire_me.bin");
  52MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  53MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  54MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  55MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  56MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  57MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  58MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  59
  60MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  61MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  62MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  63MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  64MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  66MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  67MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  68MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  69
  70MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  71MODULE_FIRMWARE("radeon/hawaii_me.bin");
  72MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  73MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  74MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  75MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  76MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  77MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  78MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  79
  80MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  81MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  82MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  83MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  84MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  85MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  86
  87MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  88MODULE_FIRMWARE("radeon/kaveri_me.bin");
  89MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  90MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  91MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  92MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  93MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  94
  95MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
  96MODULE_FIRMWARE("radeon/KABINI_me.bin");
  97MODULE_FIRMWARE("radeon/KABINI_ce.bin");
  98MODULE_FIRMWARE("radeon/KABINI_mec.bin");
  99MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 100MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 101
 102MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 103MODULE_FIRMWARE("radeon/kabini_me.bin");
 104MODULE_FIRMWARE("radeon/kabini_ce.bin");
 105MODULE_FIRMWARE("radeon/kabini_mec.bin");
 106MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 107MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 108
 109MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 110MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 111MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 112MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 113MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 114MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 115
 116MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 117MODULE_FIRMWARE("radeon/mullins_me.bin");
 118MODULE_FIRMWARE("radeon/mullins_ce.bin");
 119MODULE_FIRMWARE("radeon/mullins_mec.bin");
 120MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 121MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 122
 123extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 124extern void r600_ih_ring_fini(struct radeon_device *rdev);
 125extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 126extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 127extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 128extern void sumo_rlc_fini(struct radeon_device *rdev);
 129extern int sumo_rlc_init(struct radeon_device *rdev);
 130extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 131extern void si_rlc_reset(struct radeon_device *rdev);
 132extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 133static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 134extern int cik_sdma_resume(struct radeon_device *rdev);
 135extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 136extern void cik_sdma_fini(struct radeon_device *rdev);
 137extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 138static void cik_rlc_stop(struct radeon_device *rdev);
 139static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 140static void cik_program_aspm(struct radeon_device *rdev);
 141static void cik_init_pg(struct radeon_device *rdev);
 142static void cik_init_cg(struct radeon_device *rdev);
 143static void cik_fini_pg(struct radeon_device *rdev);
 144static void cik_fini_cg(struct radeon_device *rdev);
 145static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 146					  bool enable);
 147
 148/**
 149 * cik_get_allowed_info_register - fetch the register for the info ioctl
 150 *
 151 * @rdev: radeon_device pointer
 152 * @reg: register offset in bytes
 153 * @val: register value
 154 *
 155 * Returns 0 for success or -EINVAL for an invalid register
 156 *
 157 */
 158int cik_get_allowed_info_register(struct radeon_device *rdev,
 159				  u32 reg, u32 *val)
 160{
 161	switch (reg) {
 162	case GRBM_STATUS:
 163	case GRBM_STATUS2:
 164	case GRBM_STATUS_SE0:
 165	case GRBM_STATUS_SE1:
 166	case GRBM_STATUS_SE2:
 167	case GRBM_STATUS_SE3:
 168	case SRBM_STATUS:
 169	case SRBM_STATUS2:
 170	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 171	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 172	case UVD_STATUS:
 173	/* TODO VCE */
 174		*val = RREG32(reg);
 175		return 0;
 176	default:
 177		return -EINVAL;
 178	}
 179}
 180
 181/*
 182 * Indirect registers accessor
 183 */
 184u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 185{
 186	unsigned long flags;
 187	u32 r;
 188
 189	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 190	WREG32(CIK_DIDT_IND_INDEX, (reg));
 191	r = RREG32(CIK_DIDT_IND_DATA);
 192	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 193	return r;
 194}
 195
 196void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 197{
 198	unsigned long flags;
 199
 200	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 201	WREG32(CIK_DIDT_IND_INDEX, (reg));
 202	WREG32(CIK_DIDT_IND_DATA, (v));
 203	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 204}
 205
 206/* get temperature in millidegrees */
 207int ci_get_temp(struct radeon_device *rdev)
 208{
 209	u32 temp;
 210	int actual_temp = 0;
 211
 212	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 213		CTF_TEMP_SHIFT;
 214
 215	if (temp & 0x200)
 216		actual_temp = 255;
 217	else
 218		actual_temp = temp & 0x1ff;
 219
 220	actual_temp = actual_temp * 1000;
 221
 222	return actual_temp;
 223}
 224
 225/* get temperature in millidegrees */
 226int kv_get_temp(struct radeon_device *rdev)
 227{
 228	u32 temp;
 229	int actual_temp = 0;
 230
 231	temp = RREG32_SMC(0xC0300E0C);
 232
 233	if (temp)
 234		actual_temp = (temp / 8) - 49;
 235	else
 236		actual_temp = 0;
 237
 238	actual_temp = actual_temp * 1000;
 239
 240	return actual_temp;
 241}
 242
 243/*
 244 * Indirect registers accessor
 245 */
 246u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 247{
 248	unsigned long flags;
 249	u32 r;
 250
 251	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 252	WREG32(PCIE_INDEX, reg);
 253	(void)RREG32(PCIE_INDEX);
 254	r = RREG32(PCIE_DATA);
 255	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 256	return r;
 257}
 258
 259void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 260{
 261	unsigned long flags;
 262
 263	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 264	WREG32(PCIE_INDEX, reg);
 265	(void)RREG32(PCIE_INDEX);
 266	WREG32(PCIE_DATA, v);
 267	(void)RREG32(PCIE_DATA);
 268	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 269}
 270
 271static const u32 spectre_rlc_save_restore_register_list[] =
 272{
 273	(0x0e00 << 16) | (0xc12c >> 2),
 274	0x00000000,
 275	(0x0e00 << 16) | (0xc140 >> 2),
 276	0x00000000,
 277	(0x0e00 << 16) | (0xc150 >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc15c >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc168 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0xc170 >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0xc178 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0xc204 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0xc2b4 >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0xc2b8 >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0xc2bc >> 2),
 294	0x00000000,
 295	(0x0e00 << 16) | (0xc2c0 >> 2),
 296	0x00000000,
 297	(0x0e00 << 16) | (0x8228 >> 2),
 298	0x00000000,
 299	(0x0e00 << 16) | (0x829c >> 2),
 300	0x00000000,
 301	(0x0e00 << 16) | (0x869c >> 2),
 302	0x00000000,
 303	(0x0600 << 16) | (0x98f4 >> 2),
 304	0x00000000,
 305	(0x0e00 << 16) | (0x98f8 >> 2),
 306	0x00000000,
 307	(0x0e00 << 16) | (0x9900 >> 2),
 308	0x00000000,
 309	(0x0e00 << 16) | (0xc260 >> 2),
 310	0x00000000,
 311	(0x0e00 << 16) | (0x90e8 >> 2),
 312	0x00000000,
 313	(0x0e00 << 16) | (0x3c000 >> 2),
 314	0x00000000,
 315	(0x0e00 << 16) | (0x3c00c >> 2),
 316	0x00000000,
 317	(0x0e00 << 16) | (0x8c1c >> 2),
 318	0x00000000,
 319	(0x0e00 << 16) | (0x9700 >> 2),
 320	0x00000000,
 321	(0x0e00 << 16) | (0xcd20 >> 2),
 322	0x00000000,
 323	(0x4e00 << 16) | (0xcd20 >> 2),
 324	0x00000000,
 325	(0x5e00 << 16) | (0xcd20 >> 2),
 326	0x00000000,
 327	(0x6e00 << 16) | (0xcd20 >> 2),
 328	0x00000000,
 329	(0x7e00 << 16) | (0xcd20 >> 2),
 330	0x00000000,
 331	(0x8e00 << 16) | (0xcd20 >> 2),
 332	0x00000000,
 333	(0x9e00 << 16) | (0xcd20 >> 2),
 334	0x00000000,
 335	(0xae00 << 16) | (0xcd20 >> 2),
 336	0x00000000,
 337	(0xbe00 << 16) | (0xcd20 >> 2),
 338	0x00000000,
 339	(0x0e00 << 16) | (0x89bc >> 2),
 340	0x00000000,
 341	(0x0e00 << 16) | (0x8900 >> 2),
 342	0x00000000,
 343	0x3,
 344	(0x0e00 << 16) | (0xc130 >> 2),
 345	0x00000000,
 346	(0x0e00 << 16) | (0xc134 >> 2),
 347	0x00000000,
 348	(0x0e00 << 16) | (0xc1fc >> 2),
 349	0x00000000,
 350	(0x0e00 << 16) | (0xc208 >> 2),
 351	0x00000000,
 352	(0x0e00 << 16) | (0xc264 >> 2),
 353	0x00000000,
 354	(0x0e00 << 16) | (0xc268 >> 2),
 355	0x00000000,
 356	(0x0e00 << 16) | (0xc26c >> 2),
 357	0x00000000,
 358	(0x0e00 << 16) | (0xc270 >> 2),
 359	0x00000000,
 360	(0x0e00 << 16) | (0xc274 >> 2),
 361	0x00000000,
 362	(0x0e00 << 16) | (0xc278 >> 2),
 363	0x00000000,
 364	(0x0e00 << 16) | (0xc27c >> 2),
 365	0x00000000,
 366	(0x0e00 << 16) | (0xc280 >> 2),
 367	0x00000000,
 368	(0x0e00 << 16) | (0xc284 >> 2),
 369	0x00000000,
 370	(0x0e00 << 16) | (0xc288 >> 2),
 371	0x00000000,
 372	(0x0e00 << 16) | (0xc28c >> 2),
 373	0x00000000,
 374	(0x0e00 << 16) | (0xc290 >> 2),
 375	0x00000000,
 376	(0x0e00 << 16) | (0xc294 >> 2),
 377	0x00000000,
 378	(0x0e00 << 16) | (0xc298 >> 2),
 379	0x00000000,
 380	(0x0e00 << 16) | (0xc29c >> 2),
 381	0x00000000,
 382	(0x0e00 << 16) | (0xc2a0 >> 2),
 383	0x00000000,
 384	(0x0e00 << 16) | (0xc2a4 >> 2),
 385	0x00000000,
 386	(0x0e00 << 16) | (0xc2a8 >> 2),
 387	0x00000000,
 388	(0x0e00 << 16) | (0xc2ac  >> 2),
 389	0x00000000,
 390	(0x0e00 << 16) | (0xc2b0 >> 2),
 391	0x00000000,
 392	(0x0e00 << 16) | (0x301d0 >> 2),
 393	0x00000000,
 394	(0x0e00 << 16) | (0x30238 >> 2),
 395	0x00000000,
 396	(0x0e00 << 16) | (0x30250 >> 2),
 397	0x00000000,
 398	(0x0e00 << 16) | (0x30254 >> 2),
 399	0x00000000,
 400	(0x0e00 << 16) | (0x30258 >> 2),
 401	0x00000000,
 402	(0x0e00 << 16) | (0x3025c >> 2),
 403	0x00000000,
 404	(0x4e00 << 16) | (0xc900 >> 2),
 405	0x00000000,
 406	(0x5e00 << 16) | (0xc900 >> 2),
 407	0x00000000,
 408	(0x6e00 << 16) | (0xc900 >> 2),
 409	0x00000000,
 410	(0x7e00 << 16) | (0xc900 >> 2),
 411	0x00000000,
 412	(0x8e00 << 16) | (0xc900 >> 2),
 413	0x00000000,
 414	(0x9e00 << 16) | (0xc900 >> 2),
 415	0x00000000,
 416	(0xae00 << 16) | (0xc900 >> 2),
 417	0x00000000,
 418	(0xbe00 << 16) | (0xc900 >> 2),
 419	0x00000000,
 420	(0x4e00 << 16) | (0xc904 >> 2),
 421	0x00000000,
 422	(0x5e00 << 16) | (0xc904 >> 2),
 423	0x00000000,
 424	(0x6e00 << 16) | (0xc904 >> 2),
 425	0x00000000,
 426	(0x7e00 << 16) | (0xc904 >> 2),
 427	0x00000000,
 428	(0x8e00 << 16) | (0xc904 >> 2),
 429	0x00000000,
 430	(0x9e00 << 16) | (0xc904 >> 2),
 431	0x00000000,
 432	(0xae00 << 16) | (0xc904 >> 2),
 433	0x00000000,
 434	(0xbe00 << 16) | (0xc904 >> 2),
 435	0x00000000,
 436	(0x4e00 << 16) | (0xc908 >> 2),
 437	0x00000000,
 438	(0x5e00 << 16) | (0xc908 >> 2),
 439	0x00000000,
 440	(0x6e00 << 16) | (0xc908 >> 2),
 441	0x00000000,
 442	(0x7e00 << 16) | (0xc908 >> 2),
 443	0x00000000,
 444	(0x8e00 << 16) | (0xc908 >> 2),
 445	0x00000000,
 446	(0x9e00 << 16) | (0xc908 >> 2),
 447	0x00000000,
 448	(0xae00 << 16) | (0xc908 >> 2),
 449	0x00000000,
 450	(0xbe00 << 16) | (0xc908 >> 2),
 451	0x00000000,
 452	(0x4e00 << 16) | (0xc90c >> 2),
 453	0x00000000,
 454	(0x5e00 << 16) | (0xc90c >> 2),
 455	0x00000000,
 456	(0x6e00 << 16) | (0xc90c >> 2),
 457	0x00000000,
 458	(0x7e00 << 16) | (0xc90c >> 2),
 459	0x00000000,
 460	(0x8e00 << 16) | (0xc90c >> 2),
 461	0x00000000,
 462	(0x9e00 << 16) | (0xc90c >> 2),
 463	0x00000000,
 464	(0xae00 << 16) | (0xc90c >> 2),
 465	0x00000000,
 466	(0xbe00 << 16) | (0xc90c >> 2),
 467	0x00000000,
 468	(0x4e00 << 16) | (0xc910 >> 2),
 469	0x00000000,
 470	(0x5e00 << 16) | (0xc910 >> 2),
 471	0x00000000,
 472	(0x6e00 << 16) | (0xc910 >> 2),
 473	0x00000000,
 474	(0x7e00 << 16) | (0xc910 >> 2),
 475	0x00000000,
 476	(0x8e00 << 16) | (0xc910 >> 2),
 477	0x00000000,
 478	(0x9e00 << 16) | (0xc910 >> 2),
 479	0x00000000,
 480	(0xae00 << 16) | (0xc910 >> 2),
 481	0x00000000,
 482	(0xbe00 << 16) | (0xc910 >> 2),
 483	0x00000000,
 484	(0x0e00 << 16) | (0xc99c >> 2),
 485	0x00000000,
 486	(0x0e00 << 16) | (0x9834 >> 2),
 487	0x00000000,
 488	(0x0000 << 16) | (0x30f00 >> 2),
 489	0x00000000,
 490	(0x0001 << 16) | (0x30f00 >> 2),
 491	0x00000000,
 492	(0x0000 << 16) | (0x30f04 >> 2),
 493	0x00000000,
 494	(0x0001 << 16) | (0x30f04 >> 2),
 495	0x00000000,
 496	(0x0000 << 16) | (0x30f08 >> 2),
 497	0x00000000,
 498	(0x0001 << 16) | (0x30f08 >> 2),
 499	0x00000000,
 500	(0x0000 << 16) | (0x30f0c >> 2),
 501	0x00000000,
 502	(0x0001 << 16) | (0x30f0c >> 2),
 503	0x00000000,
 504	(0x0600 << 16) | (0x9b7c >> 2),
 505	0x00000000,
 506	(0x0e00 << 16) | (0x8a14 >> 2),
 507	0x00000000,
 508	(0x0e00 << 16) | (0x8a18 >> 2),
 509	0x00000000,
 510	(0x0600 << 16) | (0x30a00 >> 2),
 511	0x00000000,
 512	(0x0e00 << 16) | (0x8bf0 >> 2),
 513	0x00000000,
 514	(0x0e00 << 16) | (0x8bcc >> 2),
 515	0x00000000,
 516	(0x0e00 << 16) | (0x8b24 >> 2),
 517	0x00000000,
 518	(0x0e00 << 16) | (0x30a04 >> 2),
 519	0x00000000,
 520	(0x0600 << 16) | (0x30a10 >> 2),
 521	0x00000000,
 522	(0x0600 << 16) | (0x30a14 >> 2),
 523	0x00000000,
 524	(0x0600 << 16) | (0x30a18 >> 2),
 525	0x00000000,
 526	(0x0600 << 16) | (0x30a2c >> 2),
 527	0x00000000,
 528	(0x0e00 << 16) | (0xc700 >> 2),
 529	0x00000000,
 530	(0x0e00 << 16) | (0xc704 >> 2),
 531	0x00000000,
 532	(0x0e00 << 16) | (0xc708 >> 2),
 533	0x00000000,
 534	(0x0e00 << 16) | (0xc768 >> 2),
 535	0x00000000,
 536	(0x0400 << 16) | (0xc770 >> 2),
 537	0x00000000,
 538	(0x0400 << 16) | (0xc774 >> 2),
 539	0x00000000,
 540	(0x0400 << 16) | (0xc778 >> 2),
 541	0x00000000,
 542	(0x0400 << 16) | (0xc77c >> 2),
 543	0x00000000,
 544	(0x0400 << 16) | (0xc780 >> 2),
 545	0x00000000,
 546	(0x0400 << 16) | (0xc784 >> 2),
 547	0x00000000,
 548	(0x0400 << 16) | (0xc788 >> 2),
 549	0x00000000,
 550	(0x0400 << 16) | (0xc78c >> 2),
 551	0x00000000,
 552	(0x0400 << 16) | (0xc798 >> 2),
 553	0x00000000,
 554	(0x0400 << 16) | (0xc79c >> 2),
 555	0x00000000,
 556	(0x0400 << 16) | (0xc7a0 >> 2),
 557	0x00000000,
 558	(0x0400 << 16) | (0xc7a4 >> 2),
 559	0x00000000,
 560	(0x0400 << 16) | (0xc7a8 >> 2),
 561	0x00000000,
 562	(0x0400 << 16) | (0xc7ac >> 2),
 563	0x00000000,
 564	(0x0400 << 16) | (0xc7b0 >> 2),
 565	0x00000000,
 566	(0x0400 << 16) | (0xc7b4 >> 2),
 567	0x00000000,
 568	(0x0e00 << 16) | (0x9100 >> 2),
 569	0x00000000,
 570	(0x0e00 << 16) | (0x3c010 >> 2),
 571	0x00000000,
 572	(0x0e00 << 16) | (0x92a8 >> 2),
 573	0x00000000,
 574	(0x0e00 << 16) | (0x92ac >> 2),
 575	0x00000000,
 576	(0x0e00 << 16) | (0x92b4 >> 2),
 577	0x00000000,
 578	(0x0e00 << 16) | (0x92b8 >> 2),
 579	0x00000000,
 580	(0x0e00 << 16) | (0x92bc >> 2),
 581	0x00000000,
 582	(0x0e00 << 16) | (0x92c0 >> 2),
 583	0x00000000,
 584	(0x0e00 << 16) | (0x92c4 >> 2),
 585	0x00000000,
 586	(0x0e00 << 16) | (0x92c8 >> 2),
 587	0x00000000,
 588	(0x0e00 << 16) | (0x92cc >> 2),
 589	0x00000000,
 590	(0x0e00 << 16) | (0x92d0 >> 2),
 591	0x00000000,
 592	(0x0e00 << 16) | (0x8c00 >> 2),
 593	0x00000000,
 594	(0x0e00 << 16) | (0x8c04 >> 2),
 595	0x00000000,
 596	(0x0e00 << 16) | (0x8c20 >> 2),
 597	0x00000000,
 598	(0x0e00 << 16) | (0x8c38 >> 2),
 599	0x00000000,
 600	(0x0e00 << 16) | (0x8c3c >> 2),
 601	0x00000000,
 602	(0x0e00 << 16) | (0xae00 >> 2),
 603	0x00000000,
 604	(0x0e00 << 16) | (0x9604 >> 2),
 605	0x00000000,
 606	(0x0e00 << 16) | (0xac08 >> 2),
 607	0x00000000,
 608	(0x0e00 << 16) | (0xac0c >> 2),
 609	0x00000000,
 610	(0x0e00 << 16) | (0xac10 >> 2),
 611	0x00000000,
 612	(0x0e00 << 16) | (0xac14 >> 2),
 613	0x00000000,
 614	(0x0e00 << 16) | (0xac58 >> 2),
 615	0x00000000,
 616	(0x0e00 << 16) | (0xac68 >> 2),
 617	0x00000000,
 618	(0x0e00 << 16) | (0xac6c >> 2),
 619	0x00000000,
 620	(0x0e00 << 16) | (0xac70 >> 2),
 621	0x00000000,
 622	(0x0e00 << 16) | (0xac74 >> 2),
 623	0x00000000,
 624	(0x0e00 << 16) | (0xac78 >> 2),
 625	0x00000000,
 626	(0x0e00 << 16) | (0xac7c >> 2),
 627	0x00000000,
 628	(0x0e00 << 16) | (0xac80 >> 2),
 629	0x00000000,
 630	(0x0e00 << 16) | (0xac84 >> 2),
 631	0x00000000,
 632	(0x0e00 << 16) | (0xac88 >> 2),
 633	0x00000000,
 634	(0x0e00 << 16) | (0xac8c >> 2),
 635	0x00000000,
 636	(0x0e00 << 16) | (0x970c >> 2),
 637	0x00000000,
 638	(0x0e00 << 16) | (0x9714 >> 2),
 639	0x00000000,
 640	(0x0e00 << 16) | (0x9718 >> 2),
 641	0x00000000,
 642	(0x0e00 << 16) | (0x971c >> 2),
 643	0x00000000,
 644	(0x0e00 << 16) | (0x31068 >> 2),
 645	0x00000000,
 646	(0x4e00 << 16) | (0x31068 >> 2),
 647	0x00000000,
 648	(0x5e00 << 16) | (0x31068 >> 2),
 649	0x00000000,
 650	(0x6e00 << 16) | (0x31068 >> 2),
 651	0x00000000,
 652	(0x7e00 << 16) | (0x31068 >> 2),
 653	0x00000000,
 654	(0x8e00 << 16) | (0x31068 >> 2),
 655	0x00000000,
 656	(0x9e00 << 16) | (0x31068 >> 2),
 657	0x00000000,
 658	(0xae00 << 16) | (0x31068 >> 2),
 659	0x00000000,
 660	(0xbe00 << 16) | (0x31068 >> 2),
 661	0x00000000,
 662	(0x0e00 << 16) | (0xcd10 >> 2),
 663	0x00000000,
 664	(0x0e00 << 16) | (0xcd14 >> 2),
 665	0x00000000,
 666	(0x0e00 << 16) | (0x88b0 >> 2),
 667	0x00000000,
 668	(0x0e00 << 16) | (0x88b4 >> 2),
 669	0x00000000,
 670	(0x0e00 << 16) | (0x88b8 >> 2),
 671	0x00000000,
 672	(0x0e00 << 16) | (0x88bc >> 2),
 673	0x00000000,
 674	(0x0400 << 16) | (0x89c0 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0x88c4 >> 2),
 677	0x00000000,
 678	(0x0e00 << 16) | (0x88c8 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0x88d0 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0x88d4 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0x88d8 >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0x8980 >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0x30938 >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0x3093c >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0x30940 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0x89a0 >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0x30900 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0x30904 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0x89b4 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0x3c210 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0x3c214 >> 2),
 705	0x00000000,
 706	(0x0e00 << 16) | (0x3c218 >> 2),
 707	0x00000000,
 708	(0x0e00 << 16) | (0x8904 >> 2),
 709	0x00000000,
 710	0x5,
 711	(0x0e00 << 16) | (0x8c28 >> 2),
 712	(0x0e00 << 16) | (0x8c2c >> 2),
 713	(0x0e00 << 16) | (0x8c30 >> 2),
 714	(0x0e00 << 16) | (0x8c34 >> 2),
 715	(0x0e00 << 16) | (0x9600 >> 2),
 716};
 717
 718static const u32 kalindi_rlc_save_restore_register_list[] =
 719{
 720	(0x0e00 << 16) | (0xc12c >> 2),
 721	0x00000000,
 722	(0x0e00 << 16) | (0xc140 >> 2),
 723	0x00000000,
 724	(0x0e00 << 16) | (0xc150 >> 2),
 725	0x00000000,
 726	(0x0e00 << 16) | (0xc15c >> 2),
 727	0x00000000,
 728	(0x0e00 << 16) | (0xc168 >> 2),
 729	0x00000000,
 730	(0x0e00 << 16) | (0xc170 >> 2),
 731	0x00000000,
 732	(0x0e00 << 16) | (0xc204 >> 2),
 733	0x00000000,
 734	(0x0e00 << 16) | (0xc2b4 >> 2),
 735	0x00000000,
 736	(0x0e00 << 16) | (0xc2b8 >> 2),
 737	0x00000000,
 738	(0x0e00 << 16) | (0xc2bc >> 2),
 739	0x00000000,
 740	(0x0e00 << 16) | (0xc2c0 >> 2),
 741	0x00000000,
 742	(0x0e00 << 16) | (0x8228 >> 2),
 743	0x00000000,
 744	(0x0e00 << 16) | (0x829c >> 2),
 745	0x00000000,
 746	(0x0e00 << 16) | (0x869c >> 2),
 747	0x00000000,
 748	(0x0600 << 16) | (0x98f4 >> 2),
 749	0x00000000,
 750	(0x0e00 << 16) | (0x98f8 >> 2),
 751	0x00000000,
 752	(0x0e00 << 16) | (0x9900 >> 2),
 753	0x00000000,
 754	(0x0e00 << 16) | (0xc260 >> 2),
 755	0x00000000,
 756	(0x0e00 << 16) | (0x90e8 >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0x3c000 >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x3c00c >> 2),
 761	0x00000000,
 762	(0x0e00 << 16) | (0x8c1c >> 2),
 763	0x00000000,
 764	(0x0e00 << 16) | (0x9700 >> 2),
 765	0x00000000,
 766	(0x0e00 << 16) | (0xcd20 >> 2),
 767	0x00000000,
 768	(0x4e00 << 16) | (0xcd20 >> 2),
 769	0x00000000,
 770	(0x5e00 << 16) | (0xcd20 >> 2),
 771	0x00000000,
 772	(0x6e00 << 16) | (0xcd20 >> 2),
 773	0x00000000,
 774	(0x7e00 << 16) | (0xcd20 >> 2),
 775	0x00000000,
 776	(0x0e00 << 16) | (0x89bc >> 2),
 777	0x00000000,
 778	(0x0e00 << 16) | (0x8900 >> 2),
 779	0x00000000,
 780	0x3,
 781	(0x0e00 << 16) | (0xc130 >> 2),
 782	0x00000000,
 783	(0x0e00 << 16) | (0xc134 >> 2),
 784	0x00000000,
 785	(0x0e00 << 16) | (0xc1fc >> 2),
 786	0x00000000,
 787	(0x0e00 << 16) | (0xc208 >> 2),
 788	0x00000000,
 789	(0x0e00 << 16) | (0xc264 >> 2),
 790	0x00000000,
 791	(0x0e00 << 16) | (0xc268 >> 2),
 792	0x00000000,
 793	(0x0e00 << 16) | (0xc26c >> 2),
 794	0x00000000,
 795	(0x0e00 << 16) | (0xc270 >> 2),
 796	0x00000000,
 797	(0x0e00 << 16) | (0xc274 >> 2),
 798	0x00000000,
 799	(0x0e00 << 16) | (0xc28c >> 2),
 800	0x00000000,
 801	(0x0e00 << 16) | (0xc290 >> 2),
 802	0x00000000,
 803	(0x0e00 << 16) | (0xc294 >> 2),
 804	0x00000000,
 805	(0x0e00 << 16) | (0xc298 >> 2),
 806	0x00000000,
 807	(0x0e00 << 16) | (0xc2a0 >> 2),
 808	0x00000000,
 809	(0x0e00 << 16) | (0xc2a4 >> 2),
 810	0x00000000,
 811	(0x0e00 << 16) | (0xc2a8 >> 2),
 812	0x00000000,
 813	(0x0e00 << 16) | (0xc2ac >> 2),
 814	0x00000000,
 815	(0x0e00 << 16) | (0x301d0 >> 2),
 816	0x00000000,
 817	(0x0e00 << 16) | (0x30238 >> 2),
 818	0x00000000,
 819	(0x0e00 << 16) | (0x30250 >> 2),
 820	0x00000000,
 821	(0x0e00 << 16) | (0x30254 >> 2),
 822	0x00000000,
 823	(0x0e00 << 16) | (0x30258 >> 2),
 824	0x00000000,
 825	(0x0e00 << 16) | (0x3025c >> 2),
 826	0x00000000,
 827	(0x4e00 << 16) | (0xc900 >> 2),
 828	0x00000000,
 829	(0x5e00 << 16) | (0xc900 >> 2),
 830	0x00000000,
 831	(0x6e00 << 16) | (0xc900 >> 2),
 832	0x00000000,
 833	(0x7e00 << 16) | (0xc900 >> 2),
 834	0x00000000,
 835	(0x4e00 << 16) | (0xc904 >> 2),
 836	0x00000000,
 837	(0x5e00 << 16) | (0xc904 >> 2),
 838	0x00000000,
 839	(0x6e00 << 16) | (0xc904 >> 2),
 840	0x00000000,
 841	(0x7e00 << 16) | (0xc904 >> 2),
 842	0x00000000,
 843	(0x4e00 << 16) | (0xc908 >> 2),
 844	0x00000000,
 845	(0x5e00 << 16) | (0xc908 >> 2),
 846	0x00000000,
 847	(0x6e00 << 16) | (0xc908 >> 2),
 848	0x00000000,
 849	(0x7e00 << 16) | (0xc908 >> 2),
 850	0x00000000,
 851	(0x4e00 << 16) | (0xc90c >> 2),
 852	0x00000000,
 853	(0x5e00 << 16) | (0xc90c >> 2),
 854	0x00000000,
 855	(0x6e00 << 16) | (0xc90c >> 2),
 856	0x00000000,
 857	(0x7e00 << 16) | (0xc90c >> 2),
 858	0x00000000,
 859	(0x4e00 << 16) | (0xc910 >> 2),
 860	0x00000000,
 861	(0x5e00 << 16) | (0xc910 >> 2),
 862	0x00000000,
 863	(0x6e00 << 16) | (0xc910 >> 2),
 864	0x00000000,
 865	(0x7e00 << 16) | (0xc910 >> 2),
 866	0x00000000,
 867	(0x0e00 << 16) | (0xc99c >> 2),
 868	0x00000000,
 869	(0x0e00 << 16) | (0x9834 >> 2),
 870	0x00000000,
 871	(0x0000 << 16) | (0x30f00 >> 2),
 872	0x00000000,
 873	(0x0000 << 16) | (0x30f04 >> 2),
 874	0x00000000,
 875	(0x0000 << 16) | (0x30f08 >> 2),
 876	0x00000000,
 877	(0x0000 << 16) | (0x30f0c >> 2),
 878	0x00000000,
 879	(0x0600 << 16) | (0x9b7c >> 2),
 880	0x00000000,
 881	(0x0e00 << 16) | (0x8a14 >> 2),
 882	0x00000000,
 883	(0x0e00 << 16) | (0x8a18 >> 2),
 884	0x00000000,
 885	(0x0600 << 16) | (0x30a00 >> 2),
 886	0x00000000,
 887	(0x0e00 << 16) | (0x8bf0 >> 2),
 888	0x00000000,
 889	(0x0e00 << 16) | (0x8bcc >> 2),
 890	0x00000000,
 891	(0x0e00 << 16) | (0x8b24 >> 2),
 892	0x00000000,
 893	(0x0e00 << 16) | (0x30a04 >> 2),
 894	0x00000000,
 895	(0x0600 << 16) | (0x30a10 >> 2),
 896	0x00000000,
 897	(0x0600 << 16) | (0x30a14 >> 2),
 898	0x00000000,
 899	(0x0600 << 16) | (0x30a18 >> 2),
 900	0x00000000,
 901	(0x0600 << 16) | (0x30a2c >> 2),
 902	0x00000000,
 903	(0x0e00 << 16) | (0xc700 >> 2),
 904	0x00000000,
 905	(0x0e00 << 16) | (0xc704 >> 2),
 906	0x00000000,
 907	(0x0e00 << 16) | (0xc708 >> 2),
 908	0x00000000,
 909	(0x0e00 << 16) | (0xc768 >> 2),
 910	0x00000000,
 911	(0x0400 << 16) | (0xc770 >> 2),
 912	0x00000000,
 913	(0x0400 << 16) | (0xc774 >> 2),
 914	0x00000000,
 915	(0x0400 << 16) | (0xc798 >> 2),
 916	0x00000000,
 917	(0x0400 << 16) | (0xc79c >> 2),
 918	0x00000000,
 919	(0x0e00 << 16) | (0x9100 >> 2),
 920	0x00000000,
 921	(0x0e00 << 16) | (0x3c010 >> 2),
 922	0x00000000,
 923	(0x0e00 << 16) | (0x8c00 >> 2),
 924	0x00000000,
 925	(0x0e00 << 16) | (0x8c04 >> 2),
 926	0x00000000,
 927	(0x0e00 << 16) | (0x8c20 >> 2),
 928	0x00000000,
 929	(0x0e00 << 16) | (0x8c38 >> 2),
 930	0x00000000,
 931	(0x0e00 << 16) | (0x8c3c >> 2),
 932	0x00000000,
 933	(0x0e00 << 16) | (0xae00 >> 2),
 934	0x00000000,
 935	(0x0e00 << 16) | (0x9604 >> 2),
 936	0x00000000,
 937	(0x0e00 << 16) | (0xac08 >> 2),
 938	0x00000000,
 939	(0x0e00 << 16) | (0xac0c >> 2),
 940	0x00000000,
 941	(0x0e00 << 16) | (0xac10 >> 2),
 942	0x00000000,
 943	(0x0e00 << 16) | (0xac14 >> 2),
 944	0x00000000,
 945	(0x0e00 << 16) | (0xac58 >> 2),
 946	0x00000000,
 947	(0x0e00 << 16) | (0xac68 >> 2),
 948	0x00000000,
 949	(0x0e00 << 16) | (0xac6c >> 2),
 950	0x00000000,
 951	(0x0e00 << 16) | (0xac70 >> 2),
 952	0x00000000,
 953	(0x0e00 << 16) | (0xac74 >> 2),
 954	0x00000000,
 955	(0x0e00 << 16) | (0xac78 >> 2),
 956	0x00000000,
 957	(0x0e00 << 16) | (0xac7c >> 2),
 958	0x00000000,
 959	(0x0e00 << 16) | (0xac80 >> 2),
 960	0x00000000,
 961	(0x0e00 << 16) | (0xac84 >> 2),
 962	0x00000000,
 963	(0x0e00 << 16) | (0xac88 >> 2),
 964	0x00000000,
 965	(0x0e00 << 16) | (0xac8c >> 2),
 966	0x00000000,
 967	(0x0e00 << 16) | (0x970c >> 2),
 968	0x00000000,
 969	(0x0e00 << 16) | (0x9714 >> 2),
 970	0x00000000,
 971	(0x0e00 << 16) | (0x9718 >> 2),
 972	0x00000000,
 973	(0x0e00 << 16) | (0x971c >> 2),
 974	0x00000000,
 975	(0x0e00 << 16) | (0x31068 >> 2),
 976	0x00000000,
 977	(0x4e00 << 16) | (0x31068 >> 2),
 978	0x00000000,
 979	(0x5e00 << 16) | (0x31068 >> 2),
 980	0x00000000,
 981	(0x6e00 << 16) | (0x31068 >> 2),
 982	0x00000000,
 983	(0x7e00 << 16) | (0x31068 >> 2),
 984	0x00000000,
 985	(0x0e00 << 16) | (0xcd10 >> 2),
 986	0x00000000,
 987	(0x0e00 << 16) | (0xcd14 >> 2),
 988	0x00000000,
 989	(0x0e00 << 16) | (0x88b0 >> 2),
 990	0x00000000,
 991	(0x0e00 << 16) | (0x88b4 >> 2),
 992	0x00000000,
 993	(0x0e00 << 16) | (0x88b8 >> 2),
 994	0x00000000,
 995	(0x0e00 << 16) | (0x88bc >> 2),
 996	0x00000000,
 997	(0x0400 << 16) | (0x89c0 >> 2),
 998	0x00000000,
 999	(0x0e00 << 16) | (0x88c4 >> 2),
1000	0x00000000,
1001	(0x0e00 << 16) | (0x88c8 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88d0 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x88d4 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x88d8 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x8980 >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x30938 >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x3093c >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x30940 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x89a0 >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x30900 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x30904 >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x89b4 >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x3e1fc >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x3c210 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x3c214 >> 2),
1030	0x00000000,
1031	(0x0e00 << 16) | (0x3c218 >> 2),
1032	0x00000000,
1033	(0x0e00 << 16) | (0x8904 >> 2),
1034	0x00000000,
1035	0x5,
1036	(0x0e00 << 16) | (0x8c28 >> 2),
1037	(0x0e00 << 16) | (0x8c2c >> 2),
1038	(0x0e00 << 16) | (0x8c30 >> 2),
1039	(0x0e00 << 16) | (0x8c34 >> 2),
1040	(0x0e00 << 16) | (0x9600 >> 2),
1041};
1042
1043static const u32 bonaire_golden_spm_registers[] =
1044{
1045	0x30800, 0xe0ffffff, 0xe0000000
1046};
1047
1048static const u32 bonaire_golden_common_registers[] =
1049{
1050	0xc770, 0xffffffff, 0x00000800,
1051	0xc774, 0xffffffff, 0x00000800,
1052	0xc798, 0xffffffff, 0x00007fbf,
1053	0xc79c, 0xffffffff, 0x00007faf
1054};
1055
1056static const u32 bonaire_golden_registers[] =
1057{
1058	0x3354, 0x00000333, 0x00000333,
1059	0x3350, 0x000c0fc0, 0x00040200,
1060	0x9a10, 0x00010000, 0x00058208,
1061	0x3c000, 0xffff1fff, 0x00140000,
1062	0x3c200, 0xfdfc0fff, 0x00000100,
1063	0x3c234, 0x40000000, 0x40000200,
1064	0x9830, 0xffffffff, 0x00000000,
1065	0x9834, 0xf00fffff, 0x00000400,
1066	0x9838, 0x0002021c, 0x00020200,
1067	0xc78, 0x00000080, 0x00000000,
1068	0x5bb0, 0x000000f0, 0x00000070,
1069	0x5bc0, 0xf0311fff, 0x80300000,
1070	0x98f8, 0x73773777, 0x12010001,
1071	0x350c, 0x00810000, 0x408af000,
1072	0x7030, 0x31000111, 0x00000011,
1073	0x2f48, 0x73773777, 0x12010001,
1074	0x220c, 0x00007fb6, 0x0021a1b1,
1075	0x2210, 0x00007fb6, 0x002021b1,
1076	0x2180, 0x00007fb6, 0x00002191,
1077	0x2218, 0x00007fb6, 0x002121b1,
1078	0x221c, 0x00007fb6, 0x002021b1,
1079	0x21dc, 0x00007fb6, 0x00002191,
1080	0x21e0, 0x00007fb6, 0x00002191,
1081	0x3628, 0x0000003f, 0x0000000a,
1082	0x362c, 0x0000003f, 0x0000000a,
1083	0x2ae4, 0x00073ffe, 0x000022a2,
1084	0x240c, 0x000007ff, 0x00000000,
1085	0x8a14, 0xf000003f, 0x00000007,
1086	0x8bf0, 0x00002001, 0x00000001,
1087	0x8b24, 0xffffffff, 0x00ffffff,
1088	0x30a04, 0x0000ff0f, 0x00000000,
1089	0x28a4c, 0x07ffffff, 0x06000000,
1090	0x4d8, 0x00000fff, 0x00000100,
1091	0x3e78, 0x00000001, 0x00000002,
1092	0x9100, 0x03000000, 0x0362c688,
1093	0x8c00, 0x000000ff, 0x00000001,
1094	0xe40, 0x00001fff, 0x00001fff,
1095	0x9060, 0x0000007f, 0x00000020,
1096	0x9508, 0x00010000, 0x00010000,
1097	0xac14, 0x000003ff, 0x000000f3,
1098	0xac0c, 0xffffffff, 0x00001032
1099};
1100
1101static const u32 bonaire_mgcg_cgcg_init[] =
1102{
1103	0xc420, 0xffffffff, 0xfffffffc,
1104	0x30800, 0xffffffff, 0xe0000000,
1105	0x3c2a0, 0xffffffff, 0x00000100,
1106	0x3c208, 0xffffffff, 0x00000100,
1107	0x3c2c0, 0xffffffff, 0xc0000100,
1108	0x3c2c8, 0xffffffff, 0xc0000100,
1109	0x3c2c4, 0xffffffff, 0xc0000100,
1110	0x55e4, 0xffffffff, 0x00600100,
1111	0x3c280, 0xffffffff, 0x00000100,
1112	0x3c214, 0xffffffff, 0x06000100,
1113	0x3c220, 0xffffffff, 0x00000100,
1114	0x3c218, 0xffffffff, 0x06000100,
1115	0x3c204, 0xffffffff, 0x00000100,
1116	0x3c2e0, 0xffffffff, 0x00000100,
1117	0x3c224, 0xffffffff, 0x00000100,
1118	0x3c200, 0xffffffff, 0x00000100,
1119	0x3c230, 0xffffffff, 0x00000100,
1120	0x3c234, 0xffffffff, 0x00000100,
1121	0x3c250, 0xffffffff, 0x00000100,
1122	0x3c254, 0xffffffff, 0x00000100,
1123	0x3c258, 0xffffffff, 0x00000100,
1124	0x3c25c, 0xffffffff, 0x00000100,
1125	0x3c260, 0xffffffff, 0x00000100,
1126	0x3c27c, 0xffffffff, 0x00000100,
1127	0x3c278, 0xffffffff, 0x00000100,
1128	0x3c210, 0xffffffff, 0x06000100,
1129	0x3c290, 0xffffffff, 0x00000100,
1130	0x3c274, 0xffffffff, 0x00000100,
1131	0x3c2b4, 0xffffffff, 0x00000100,
1132	0x3c2b0, 0xffffffff, 0x00000100,
1133	0x3c270, 0xffffffff, 0x00000100,
1134	0x30800, 0xffffffff, 0xe0000000,
1135	0x3c020, 0xffffffff, 0x00010000,
1136	0x3c024, 0xffffffff, 0x00030002,
1137	0x3c028, 0xffffffff, 0x00040007,
1138	0x3c02c, 0xffffffff, 0x00060005,
1139	0x3c030, 0xffffffff, 0x00090008,
1140	0x3c034, 0xffffffff, 0x00010000,
1141	0x3c038, 0xffffffff, 0x00030002,
1142	0x3c03c, 0xffffffff, 0x00040007,
1143	0x3c040, 0xffffffff, 0x00060005,
1144	0x3c044, 0xffffffff, 0x00090008,
1145	0x3c048, 0xffffffff, 0x00010000,
1146	0x3c04c, 0xffffffff, 0x00030002,
1147	0x3c050, 0xffffffff, 0x00040007,
1148	0x3c054, 0xffffffff, 0x00060005,
1149	0x3c058, 0xffffffff, 0x00090008,
1150	0x3c05c, 0xffffffff, 0x00010000,
1151	0x3c060, 0xffffffff, 0x00030002,
1152	0x3c064, 0xffffffff, 0x00040007,
1153	0x3c068, 0xffffffff, 0x00060005,
1154	0x3c06c, 0xffffffff, 0x00090008,
1155	0x3c070, 0xffffffff, 0x00010000,
1156	0x3c074, 0xffffffff, 0x00030002,
1157	0x3c078, 0xffffffff, 0x00040007,
1158	0x3c07c, 0xffffffff, 0x00060005,
1159	0x3c080, 0xffffffff, 0x00090008,
1160	0x3c084, 0xffffffff, 0x00010000,
1161	0x3c088, 0xffffffff, 0x00030002,
1162	0x3c08c, 0xffffffff, 0x00040007,
1163	0x3c090, 0xffffffff, 0x00060005,
1164	0x3c094, 0xffffffff, 0x00090008,
1165	0x3c098, 0xffffffff, 0x00010000,
1166	0x3c09c, 0xffffffff, 0x00030002,
1167	0x3c0a0, 0xffffffff, 0x00040007,
1168	0x3c0a4, 0xffffffff, 0x00060005,
1169	0x3c0a8, 0xffffffff, 0x00090008,
1170	0x3c000, 0xffffffff, 0x96e00200,
1171	0x8708, 0xffffffff, 0x00900100,
1172	0xc424, 0xffffffff, 0x0020003f,
1173	0x38, 0xffffffff, 0x0140001c,
1174	0x3c, 0x000f0000, 0x000f0000,
1175	0x220, 0xffffffff, 0xC060000C,
1176	0x224, 0xc0000fff, 0x00000100,
1177	0xf90, 0xffffffff, 0x00000100,
1178	0xf98, 0x00000101, 0x00000000,
1179	0x20a8, 0xffffffff, 0x00000104,
1180	0x55e4, 0xff000fff, 0x00000100,
1181	0x30cc, 0xc0000fff, 0x00000104,
1182	0xc1e4, 0x00000001, 0x00000001,
1183	0xd00c, 0xff000ff0, 0x00000100,
1184	0xd80c, 0xff000ff0, 0x00000100
1185};
1186
1187static const u32 spectre_golden_spm_registers[] =
1188{
1189	0x30800, 0xe0ffffff, 0xe0000000
1190};
1191
1192static const u32 spectre_golden_common_registers[] =
1193{
1194	0xc770, 0xffffffff, 0x00000800,
1195	0xc774, 0xffffffff, 0x00000800,
1196	0xc798, 0xffffffff, 0x00007fbf,
1197	0xc79c, 0xffffffff, 0x00007faf
1198};
1199
1200static const u32 spectre_golden_registers[] =
1201{
1202	0x3c000, 0xffff1fff, 0x96940200,
1203	0x3c00c, 0xffff0001, 0xff000000,
1204	0x3c200, 0xfffc0fff, 0x00000100,
1205	0x6ed8, 0x00010101, 0x00010000,
1206	0x9834, 0xf00fffff, 0x00000400,
1207	0x9838, 0xfffffffc, 0x00020200,
1208	0x5bb0, 0x000000f0, 0x00000070,
1209	0x5bc0, 0xf0311fff, 0x80300000,
1210	0x98f8, 0x73773777, 0x12010001,
1211	0x9b7c, 0x00ff0000, 0x00fc0000,
1212	0x2f48, 0x73773777, 0x12010001,
1213	0x8a14, 0xf000003f, 0x00000007,
1214	0x8b24, 0xffffffff, 0x00ffffff,
1215	0x28350, 0x3f3f3fff, 0x00000082,
1216	0x28354, 0x0000003f, 0x00000000,
1217	0x3e78, 0x00000001, 0x00000002,
1218	0x913c, 0xffff03df, 0x00000004,
1219	0xc768, 0x00000008, 0x00000008,
1220	0x8c00, 0x000008ff, 0x00000800,
1221	0x9508, 0x00010000, 0x00010000,
1222	0xac0c, 0xffffffff, 0x54763210,
1223	0x214f8, 0x01ff01ff, 0x00000002,
1224	0x21498, 0x007ff800, 0x00200000,
1225	0x2015c, 0xffffffff, 0x00000f40,
1226	0x30934, 0xffffffff, 0x00000001
1227};
1228
1229static const u32 spectre_mgcg_cgcg_init[] =
1230{
1231	0xc420, 0xffffffff, 0xfffffffc,
1232	0x30800, 0xffffffff, 0xe0000000,
1233	0x3c2a0, 0xffffffff, 0x00000100,
1234	0x3c208, 0xffffffff, 0x00000100,
1235	0x3c2c0, 0xffffffff, 0x00000100,
1236	0x3c2c8, 0xffffffff, 0x00000100,
1237	0x3c2c4, 0xffffffff, 0x00000100,
1238	0x55e4, 0xffffffff, 0x00600100,
1239	0x3c280, 0xffffffff, 0x00000100,
1240	0x3c214, 0xffffffff, 0x06000100,
1241	0x3c220, 0xffffffff, 0x00000100,
1242	0x3c218, 0xffffffff, 0x06000100,
1243	0x3c204, 0xffffffff, 0x00000100,
1244	0x3c2e0, 0xffffffff, 0x00000100,
1245	0x3c224, 0xffffffff, 0x00000100,
1246	0x3c200, 0xffffffff, 0x00000100,
1247	0x3c230, 0xffffffff, 0x00000100,
1248	0x3c234, 0xffffffff, 0x00000100,
1249	0x3c250, 0xffffffff, 0x00000100,
1250	0x3c254, 0xffffffff, 0x00000100,
1251	0x3c258, 0xffffffff, 0x00000100,
1252	0x3c25c, 0xffffffff, 0x00000100,
1253	0x3c260, 0xffffffff, 0x00000100,
1254	0x3c27c, 0xffffffff, 0x00000100,
1255	0x3c278, 0xffffffff, 0x00000100,
1256	0x3c210, 0xffffffff, 0x06000100,
1257	0x3c290, 0xffffffff, 0x00000100,
1258	0x3c274, 0xffffffff, 0x00000100,
1259	0x3c2b4, 0xffffffff, 0x00000100,
1260	0x3c2b0, 0xffffffff, 0x00000100,
1261	0x3c270, 0xffffffff, 0x00000100,
1262	0x30800, 0xffffffff, 0xe0000000,
1263	0x3c020, 0xffffffff, 0x00010000,
1264	0x3c024, 0xffffffff, 0x00030002,
1265	0x3c028, 0xffffffff, 0x00040007,
1266	0x3c02c, 0xffffffff, 0x00060005,
1267	0x3c030, 0xffffffff, 0x00090008,
1268	0x3c034, 0xffffffff, 0x00010000,
1269	0x3c038, 0xffffffff, 0x00030002,
1270	0x3c03c, 0xffffffff, 0x00040007,
1271	0x3c040, 0xffffffff, 0x00060005,
1272	0x3c044, 0xffffffff, 0x00090008,
1273	0x3c048, 0xffffffff, 0x00010000,
1274	0x3c04c, 0xffffffff, 0x00030002,
1275	0x3c050, 0xffffffff, 0x00040007,
1276	0x3c054, 0xffffffff, 0x00060005,
1277	0x3c058, 0xffffffff, 0x00090008,
1278	0x3c05c, 0xffffffff, 0x00010000,
1279	0x3c060, 0xffffffff, 0x00030002,
1280	0x3c064, 0xffffffff, 0x00040007,
1281	0x3c068, 0xffffffff, 0x00060005,
1282	0x3c06c, 0xffffffff, 0x00090008,
1283	0x3c070, 0xffffffff, 0x00010000,
1284	0x3c074, 0xffffffff, 0x00030002,
1285	0x3c078, 0xffffffff, 0x00040007,
1286	0x3c07c, 0xffffffff, 0x00060005,
1287	0x3c080, 0xffffffff, 0x00090008,
1288	0x3c084, 0xffffffff, 0x00010000,
1289	0x3c088, 0xffffffff, 0x00030002,
1290	0x3c08c, 0xffffffff, 0x00040007,
1291	0x3c090, 0xffffffff, 0x00060005,
1292	0x3c094, 0xffffffff, 0x00090008,
1293	0x3c098, 0xffffffff, 0x00010000,
1294	0x3c09c, 0xffffffff, 0x00030002,
1295	0x3c0a0, 0xffffffff, 0x00040007,
1296	0x3c0a4, 0xffffffff, 0x00060005,
1297	0x3c0a8, 0xffffffff, 0x00090008,
1298	0x3c0ac, 0xffffffff, 0x00010000,
1299	0x3c0b0, 0xffffffff, 0x00030002,
1300	0x3c0b4, 0xffffffff, 0x00040007,
1301	0x3c0b8, 0xffffffff, 0x00060005,
1302	0x3c0bc, 0xffffffff, 0x00090008,
1303	0x3c000, 0xffffffff, 0x96e00200,
1304	0x8708, 0xffffffff, 0x00900100,
1305	0xc424, 0xffffffff, 0x0020003f,
1306	0x38, 0xffffffff, 0x0140001c,
1307	0x3c, 0x000f0000, 0x000f0000,
1308	0x220, 0xffffffff, 0xC060000C,
1309	0x224, 0xc0000fff, 0x00000100,
1310	0xf90, 0xffffffff, 0x00000100,
1311	0xf98, 0x00000101, 0x00000000,
1312	0x20a8, 0xffffffff, 0x00000104,
1313	0x55e4, 0xff000fff, 0x00000100,
1314	0x30cc, 0xc0000fff, 0x00000104,
1315	0xc1e4, 0x00000001, 0x00000001,
1316	0xd00c, 0xff000ff0, 0x00000100,
1317	0xd80c, 0xff000ff0, 0x00000100
1318};
1319
1320static const u32 kalindi_golden_spm_registers[] =
1321{
1322	0x30800, 0xe0ffffff, 0xe0000000
1323};
1324
1325static const u32 kalindi_golden_common_registers[] =
1326{
1327	0xc770, 0xffffffff, 0x00000800,
1328	0xc774, 0xffffffff, 0x00000800,
1329	0xc798, 0xffffffff, 0x00007fbf,
1330	0xc79c, 0xffffffff, 0x00007faf
1331};
1332
1333static const u32 kalindi_golden_registers[] =
1334{
1335	0x3c000, 0xffffdfff, 0x6e944040,
1336	0x55e4, 0xff607fff, 0xfc000100,
1337	0x3c220, 0xff000fff, 0x00000100,
1338	0x3c224, 0xff000fff, 0x00000100,
1339	0x3c200, 0xfffc0fff, 0x00000100,
1340	0x6ed8, 0x00010101, 0x00010000,
1341	0x9830, 0xffffffff, 0x00000000,
1342	0x9834, 0xf00fffff, 0x00000400,
1343	0x5bb0, 0x000000f0, 0x00000070,
1344	0x5bc0, 0xf0311fff, 0x80300000,
1345	0x98f8, 0x73773777, 0x12010001,
1346	0x98fc, 0xffffffff, 0x00000010,
1347	0x9b7c, 0x00ff0000, 0x00fc0000,
1348	0x8030, 0x00001f0f, 0x0000100a,
1349	0x2f48, 0x73773777, 0x12010001,
1350	0x2408, 0x000fffff, 0x000c007f,
1351	0x8a14, 0xf000003f, 0x00000007,
1352	0x8b24, 0x3fff3fff, 0x00ffcfff,
1353	0x30a04, 0x0000ff0f, 0x00000000,
1354	0x28a4c, 0x07ffffff, 0x06000000,
1355	0x4d8, 0x00000fff, 0x00000100,
1356	0x3e78, 0x00000001, 0x00000002,
1357	0xc768, 0x00000008, 0x00000008,
1358	0x8c00, 0x000000ff, 0x00000003,
1359	0x214f8, 0x01ff01ff, 0x00000002,
1360	0x21498, 0x007ff800, 0x00200000,
1361	0x2015c, 0xffffffff, 0x00000f40,
1362	0x88c4, 0x001f3ae3, 0x00000082,
1363	0x88d4, 0x0000001f, 0x00000010,
1364	0x30934, 0xffffffff, 0x00000000
1365};
1366
1367static const u32 kalindi_mgcg_cgcg_init[] =
1368{
1369	0xc420, 0xffffffff, 0xfffffffc,
1370	0x30800, 0xffffffff, 0xe0000000,
1371	0x3c2a0, 0xffffffff, 0x00000100,
1372	0x3c208, 0xffffffff, 0x00000100,
1373	0x3c2c0, 0xffffffff, 0x00000100,
1374	0x3c2c8, 0xffffffff, 0x00000100,
1375	0x3c2c4, 0xffffffff, 0x00000100,
1376	0x55e4, 0xffffffff, 0x00600100,
1377	0x3c280, 0xffffffff, 0x00000100,
1378	0x3c214, 0xffffffff, 0x06000100,
1379	0x3c220, 0xffffffff, 0x00000100,
1380	0x3c218, 0xffffffff, 0x06000100,
1381	0x3c204, 0xffffffff, 0x00000100,
1382	0x3c2e0, 0xffffffff, 0x00000100,
1383	0x3c224, 0xffffffff, 0x00000100,
1384	0x3c200, 0xffffffff, 0x00000100,
1385	0x3c230, 0xffffffff, 0x00000100,
1386	0x3c234, 0xffffffff, 0x00000100,
1387	0x3c250, 0xffffffff, 0x00000100,
1388	0x3c254, 0xffffffff, 0x00000100,
1389	0x3c258, 0xffffffff, 0x00000100,
1390	0x3c25c, 0xffffffff, 0x00000100,
1391	0x3c260, 0xffffffff, 0x00000100,
1392	0x3c27c, 0xffffffff, 0x00000100,
1393	0x3c278, 0xffffffff, 0x00000100,
1394	0x3c210, 0xffffffff, 0x06000100,
1395	0x3c290, 0xffffffff, 0x00000100,
1396	0x3c274, 0xffffffff, 0x00000100,
1397	0x3c2b4, 0xffffffff, 0x00000100,
1398	0x3c2b0, 0xffffffff, 0x00000100,
1399	0x3c270, 0xffffffff, 0x00000100,
1400	0x30800, 0xffffffff, 0xe0000000,
1401	0x3c020, 0xffffffff, 0x00010000,
1402	0x3c024, 0xffffffff, 0x00030002,
1403	0x3c028, 0xffffffff, 0x00040007,
1404	0x3c02c, 0xffffffff, 0x00060005,
1405	0x3c030, 0xffffffff, 0x00090008,
1406	0x3c034, 0xffffffff, 0x00010000,
1407	0x3c038, 0xffffffff, 0x00030002,
1408	0x3c03c, 0xffffffff, 0x00040007,
1409	0x3c040, 0xffffffff, 0x00060005,
1410	0x3c044, 0xffffffff, 0x00090008,
1411	0x3c000, 0xffffffff, 0x96e00200,
1412	0x8708, 0xffffffff, 0x00900100,
1413	0xc424, 0xffffffff, 0x0020003f,
1414	0x38, 0xffffffff, 0x0140001c,
1415	0x3c, 0x000f0000, 0x000f0000,
1416	0x220, 0xffffffff, 0xC060000C,
1417	0x224, 0xc0000fff, 0x00000100,
1418	0x20a8, 0xffffffff, 0x00000104,
1419	0x55e4, 0xff000fff, 0x00000100,
1420	0x30cc, 0xc0000fff, 0x00000104,
1421	0xc1e4, 0x00000001, 0x00000001,
1422	0xd00c, 0xff000ff0, 0x00000100,
1423	0xd80c, 0xff000ff0, 0x00000100
1424};
1425
1426static const u32 hawaii_golden_spm_registers[] =
1427{
1428	0x30800, 0xe0ffffff, 0xe0000000
1429};
1430
1431static const u32 hawaii_golden_common_registers[] =
1432{
1433	0x30800, 0xffffffff, 0xe0000000,
1434	0x28350, 0xffffffff, 0x3a00161a,
1435	0x28354, 0xffffffff, 0x0000002e,
1436	0x9a10, 0xffffffff, 0x00018208,
1437	0x98f8, 0xffffffff, 0x12011003
1438};
1439
1440static const u32 hawaii_golden_registers[] =
1441{
1442	0x3354, 0x00000333, 0x00000333,
1443	0x9a10, 0x00010000, 0x00058208,
1444	0x9830, 0xffffffff, 0x00000000,
1445	0x9834, 0xf00fffff, 0x00000400,
1446	0x9838, 0x0002021c, 0x00020200,
1447	0xc78, 0x00000080, 0x00000000,
1448	0x5bb0, 0x000000f0, 0x00000070,
1449	0x5bc0, 0xf0311fff, 0x80300000,
1450	0x350c, 0x00810000, 0x408af000,
1451	0x7030, 0x31000111, 0x00000011,
1452	0x2f48, 0x73773777, 0x12010001,
1453	0x2120, 0x0000007f, 0x0000001b,
1454	0x21dc, 0x00007fb6, 0x00002191,
1455	0x3628, 0x0000003f, 0x0000000a,
1456	0x362c, 0x0000003f, 0x0000000a,
1457	0x2ae4, 0x00073ffe, 0x000022a2,
1458	0x240c, 0x000007ff, 0x00000000,
1459	0x8bf0, 0x00002001, 0x00000001,
1460	0x8b24, 0xffffffff, 0x00ffffff,
1461	0x30a04, 0x0000ff0f, 0x00000000,
1462	0x28a4c, 0x07ffffff, 0x06000000,
1463	0x3e78, 0x00000001, 0x00000002,
1464	0xc768, 0x00000008, 0x00000008,
1465	0xc770, 0x00000f00, 0x00000800,
1466	0xc774, 0x00000f00, 0x00000800,
1467	0xc798, 0x00ffffff, 0x00ff7fbf,
1468	0xc79c, 0x00ffffff, 0x00ff7faf,
1469	0x8c00, 0x000000ff, 0x00000800,
1470	0xe40, 0x00001fff, 0x00001fff,
1471	0x9060, 0x0000007f, 0x00000020,
1472	0x9508, 0x00010000, 0x00010000,
1473	0xae00, 0x00100000, 0x000ff07c,
1474	0xac14, 0x000003ff, 0x0000000f,
1475	0xac10, 0xffffffff, 0x7564fdec,
1476	0xac0c, 0xffffffff, 0x3120b9a8,
1477	0xac08, 0x20000000, 0x0f9c0000
1478};
1479
1480static const u32 hawaii_mgcg_cgcg_init[] =
1481{
1482	0xc420, 0xffffffff, 0xfffffffd,
1483	0x30800, 0xffffffff, 0xe0000000,
1484	0x3c2a0, 0xffffffff, 0x00000100,
1485	0x3c208, 0xffffffff, 0x00000100,
1486	0x3c2c0, 0xffffffff, 0x00000100,
1487	0x3c2c8, 0xffffffff, 0x00000100,
1488	0x3c2c4, 0xffffffff, 0x00000100,
1489	0x55e4, 0xffffffff, 0x00200100,
1490	0x3c280, 0xffffffff, 0x00000100,
1491	0x3c214, 0xffffffff, 0x06000100,
1492	0x3c220, 0xffffffff, 0x00000100,
1493	0x3c218, 0xffffffff, 0x06000100,
1494	0x3c204, 0xffffffff, 0x00000100,
1495	0x3c2e0, 0xffffffff, 0x00000100,
1496	0x3c224, 0xffffffff, 0x00000100,
1497	0x3c200, 0xffffffff, 0x00000100,
1498	0x3c230, 0xffffffff, 0x00000100,
1499	0x3c234, 0xffffffff, 0x00000100,
1500	0x3c250, 0xffffffff, 0x00000100,
1501	0x3c254, 0xffffffff, 0x00000100,
1502	0x3c258, 0xffffffff, 0x00000100,
1503	0x3c25c, 0xffffffff, 0x00000100,
1504	0x3c260, 0xffffffff, 0x00000100,
1505	0x3c27c, 0xffffffff, 0x00000100,
1506	0x3c278, 0xffffffff, 0x00000100,
1507	0x3c210, 0xffffffff, 0x06000100,
1508	0x3c290, 0xffffffff, 0x00000100,
1509	0x3c274, 0xffffffff, 0x00000100,
1510	0x3c2b4, 0xffffffff, 0x00000100,
1511	0x3c2b0, 0xffffffff, 0x00000100,
1512	0x3c270, 0xffffffff, 0x00000100,
1513	0x30800, 0xffffffff, 0xe0000000,
1514	0x3c020, 0xffffffff, 0x00010000,
1515	0x3c024, 0xffffffff, 0x00030002,
1516	0x3c028, 0xffffffff, 0x00040007,
1517	0x3c02c, 0xffffffff, 0x00060005,
1518	0x3c030, 0xffffffff, 0x00090008,
1519	0x3c034, 0xffffffff, 0x00010000,
1520	0x3c038, 0xffffffff, 0x00030002,
1521	0x3c03c, 0xffffffff, 0x00040007,
1522	0x3c040, 0xffffffff, 0x00060005,
1523	0x3c044, 0xffffffff, 0x00090008,
1524	0x3c048, 0xffffffff, 0x00010000,
1525	0x3c04c, 0xffffffff, 0x00030002,
1526	0x3c050, 0xffffffff, 0x00040007,
1527	0x3c054, 0xffffffff, 0x00060005,
1528	0x3c058, 0xffffffff, 0x00090008,
1529	0x3c05c, 0xffffffff, 0x00010000,
1530	0x3c060, 0xffffffff, 0x00030002,
1531	0x3c064, 0xffffffff, 0x00040007,
1532	0x3c068, 0xffffffff, 0x00060005,
1533	0x3c06c, 0xffffffff, 0x00090008,
1534	0x3c070, 0xffffffff, 0x00010000,
1535	0x3c074, 0xffffffff, 0x00030002,
1536	0x3c078, 0xffffffff, 0x00040007,
1537	0x3c07c, 0xffffffff, 0x00060005,
1538	0x3c080, 0xffffffff, 0x00090008,
1539	0x3c084, 0xffffffff, 0x00010000,
1540	0x3c088, 0xffffffff, 0x00030002,
1541	0x3c08c, 0xffffffff, 0x00040007,
1542	0x3c090, 0xffffffff, 0x00060005,
1543	0x3c094, 0xffffffff, 0x00090008,
1544	0x3c098, 0xffffffff, 0x00010000,
1545	0x3c09c, 0xffffffff, 0x00030002,
1546	0x3c0a0, 0xffffffff, 0x00040007,
1547	0x3c0a4, 0xffffffff, 0x00060005,
1548	0x3c0a8, 0xffffffff, 0x00090008,
1549	0x3c0ac, 0xffffffff, 0x00010000,
1550	0x3c0b0, 0xffffffff, 0x00030002,
1551	0x3c0b4, 0xffffffff, 0x00040007,
1552	0x3c0b8, 0xffffffff, 0x00060005,
1553	0x3c0bc, 0xffffffff, 0x00090008,
1554	0x3c0c0, 0xffffffff, 0x00010000,
1555	0x3c0c4, 0xffffffff, 0x00030002,
1556	0x3c0c8, 0xffffffff, 0x00040007,
1557	0x3c0cc, 0xffffffff, 0x00060005,
1558	0x3c0d0, 0xffffffff, 0x00090008,
1559	0x3c0d4, 0xffffffff, 0x00010000,
1560	0x3c0d8, 0xffffffff, 0x00030002,
1561	0x3c0dc, 0xffffffff, 0x00040007,
1562	0x3c0e0, 0xffffffff, 0x00060005,
1563	0x3c0e4, 0xffffffff, 0x00090008,
1564	0x3c0e8, 0xffffffff, 0x00010000,
1565	0x3c0ec, 0xffffffff, 0x00030002,
1566	0x3c0f0, 0xffffffff, 0x00040007,
1567	0x3c0f4, 0xffffffff, 0x00060005,
1568	0x3c0f8, 0xffffffff, 0x00090008,
1569	0xc318, 0xffffffff, 0x00020200,
1570	0x3350, 0xffffffff, 0x00000200,
1571	0x15c0, 0xffffffff, 0x00000400,
1572	0x55e8, 0xffffffff, 0x00000000,
1573	0x2f50, 0xffffffff, 0x00000902,
1574	0x3c000, 0xffffffff, 0x96940200,
1575	0x8708, 0xffffffff, 0x00900100,
1576	0xc424, 0xffffffff, 0x0020003f,
1577	0x38, 0xffffffff, 0x0140001c,
1578	0x3c, 0x000f0000, 0x000f0000,
1579	0x220, 0xffffffff, 0xc060000c,
1580	0x224, 0xc0000fff, 0x00000100,
1581	0xf90, 0xffffffff, 0x00000100,
1582	0xf98, 0x00000101, 0x00000000,
1583	0x20a8, 0xffffffff, 0x00000104,
1584	0x55e4, 0xff000fff, 0x00000100,
1585	0x30cc, 0xc0000fff, 0x00000104,
1586	0xc1e4, 0x00000001, 0x00000001,
1587	0xd00c, 0xff000ff0, 0x00000100,
1588	0xd80c, 0xff000ff0, 0x00000100
1589};
1590
1591static const u32 godavari_golden_registers[] =
1592{
1593	0x55e4, 0xff607fff, 0xfc000100,
1594	0x6ed8, 0x00010101, 0x00010000,
1595	0x9830, 0xffffffff, 0x00000000,
1596	0x98302, 0xf00fffff, 0x00000400,
1597	0x6130, 0xffffffff, 0x00010000,
1598	0x5bb0, 0x000000f0, 0x00000070,
1599	0x5bc0, 0xf0311fff, 0x80300000,
1600	0x98f8, 0x73773777, 0x12010001,
1601	0x98fc, 0xffffffff, 0x00000010,
1602	0x8030, 0x00001f0f, 0x0000100a,
1603	0x2f48, 0x73773777, 0x12010001,
1604	0x2408, 0x000fffff, 0x000c007f,
1605	0x8a14, 0xf000003f, 0x00000007,
1606	0x8b24, 0xffffffff, 0x00ff0fff,
1607	0x30a04, 0x0000ff0f, 0x00000000,
1608	0x28a4c, 0x07ffffff, 0x06000000,
1609	0x4d8, 0x00000fff, 0x00000100,
1610	0xd014, 0x00010000, 0x00810001,
1611	0xd814, 0x00010000, 0x00810001,
1612	0x3e78, 0x00000001, 0x00000002,
1613	0xc768, 0x00000008, 0x00000008,
1614	0xc770, 0x00000f00, 0x00000800,
1615	0xc774, 0x00000f00, 0x00000800,
1616	0xc798, 0x00ffffff, 0x00ff7fbf,
1617	0xc79c, 0x00ffffff, 0x00ff7faf,
1618	0x8c00, 0x000000ff, 0x00000001,
1619	0x214f8, 0x01ff01ff, 0x00000002,
1620	0x21498, 0x007ff800, 0x00200000,
1621	0x2015c, 0xffffffff, 0x00000f40,
1622	0x88c4, 0x001f3ae3, 0x00000082,
1623	0x88d4, 0x0000001f, 0x00000010,
1624	0x30934, 0xffffffff, 0x00000000
1625};
1626
1627
1628static void cik_init_golden_registers(struct radeon_device *rdev)
1629{
1630	switch (rdev->family) {
1631	case CHIP_BONAIRE:
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_mgcg_cgcg_init,
1634						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635		radeon_program_register_sequence(rdev,
1636						 bonaire_golden_registers,
1637						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638		radeon_program_register_sequence(rdev,
1639						 bonaire_golden_common_registers,
1640						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641		radeon_program_register_sequence(rdev,
1642						 bonaire_golden_spm_registers,
1643						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644		break;
1645	case CHIP_KABINI:
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_mgcg_cgcg_init,
1648						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649		radeon_program_register_sequence(rdev,
1650						 kalindi_golden_registers,
1651						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652		radeon_program_register_sequence(rdev,
1653						 kalindi_golden_common_registers,
1654						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655		radeon_program_register_sequence(rdev,
1656						 kalindi_golden_spm_registers,
1657						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658		break;
1659	case CHIP_MULLINS:
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_mgcg_cgcg_init,
1662						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663		radeon_program_register_sequence(rdev,
1664						 godavari_golden_registers,
1665						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666		radeon_program_register_sequence(rdev,
1667						 kalindi_golden_common_registers,
1668						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669		radeon_program_register_sequence(rdev,
1670						 kalindi_golden_spm_registers,
1671						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672		break;
1673	case CHIP_KAVERI:
1674		radeon_program_register_sequence(rdev,
1675						 spectre_mgcg_cgcg_init,
1676						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677		radeon_program_register_sequence(rdev,
1678						 spectre_golden_registers,
1679						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680		radeon_program_register_sequence(rdev,
1681						 spectre_golden_common_registers,
1682						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683		radeon_program_register_sequence(rdev,
1684						 spectre_golden_spm_registers,
1685						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686		break;
1687	case CHIP_HAWAII:
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_mgcg_cgcg_init,
1690						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691		radeon_program_register_sequence(rdev,
1692						 hawaii_golden_registers,
1693						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694		radeon_program_register_sequence(rdev,
1695						 hawaii_golden_common_registers,
1696						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697		radeon_program_register_sequence(rdev,
1698						 hawaii_golden_spm_registers,
1699						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700		break;
1701	default:
1702		break;
1703	}
1704}
1705
1706/**
1707 * cik_get_xclk - get the xclk
1708 *
1709 * @rdev: radeon_device pointer
1710 *
1711 * Returns the reference clock used by the gfx engine
1712 * (CIK).
1713 */
1714u32 cik_get_xclk(struct radeon_device *rdev)
1715{
1716	u32 reference_clock = rdev->clock.spll.reference_freq;
1717
1718	if (rdev->flags & RADEON_IS_IGP) {
1719		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1720			return reference_clock / 2;
1721	} else {
1722		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1723			return reference_clock / 4;
1724	}
1725	return reference_clock;
1726}
1727
1728/**
1729 * cik_mm_rdoorbell - read a doorbell dword
1730 *
1731 * @rdev: radeon_device pointer
1732 * @index: doorbell index
1733 *
1734 * Returns the value in the doorbell aperture at the
1735 * requested doorbell index (CIK).
1736 */
1737u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1738{
1739	if (index < rdev->doorbell.num_doorbells) {
1740		return readl(rdev->doorbell.ptr + index);
1741	} else {
1742		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1743		return 0;
1744	}
1745}
1746
1747/**
1748 * cik_mm_wdoorbell - write a doorbell dword
1749 *
1750 * @rdev: radeon_device pointer
1751 * @index: doorbell index
1752 * @v: value to write
1753 *
1754 * Writes @v to the doorbell aperture at the
1755 * requested doorbell index (CIK).
1756 */
1757void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1758{
1759	if (index < rdev->doorbell.num_doorbells) {
1760		writel(v, rdev->doorbell.ptr + index);
1761	} else {
1762		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1763	}
1764}
1765
1766#define BONAIRE_IO_MC_REGS_SIZE 36
1767
1768static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1769{
1770	{0x00000070, 0x04400000},
1771	{0x00000071, 0x80c01803},
1772	{0x00000072, 0x00004004},
1773	{0x00000073, 0x00000100},
1774	{0x00000074, 0x00ff0000},
1775	{0x00000075, 0x34000000},
1776	{0x00000076, 0x08000014},
1777	{0x00000077, 0x00cc08ec},
1778	{0x00000078, 0x00000400},
1779	{0x00000079, 0x00000000},
1780	{0x0000007a, 0x04090000},
1781	{0x0000007c, 0x00000000},
1782	{0x0000007e, 0x4408a8e8},
1783	{0x0000007f, 0x00000304},
1784	{0x00000080, 0x00000000},
1785	{0x00000082, 0x00000001},
1786	{0x00000083, 0x00000002},
1787	{0x00000084, 0xf3e4f400},
1788	{0x00000085, 0x052024e3},
1789	{0x00000087, 0x00000000},
1790	{0x00000088, 0x01000000},
1791	{0x0000008a, 0x1c0a0000},
1792	{0x0000008b, 0xff010000},
1793	{0x0000008d, 0xffffefff},
1794	{0x0000008e, 0xfff3efff},
1795	{0x0000008f, 0xfff3efbf},
1796	{0x00000092, 0xf7ffffff},
1797	{0x00000093, 0xffffff7f},
1798	{0x00000095, 0x00101101},
1799	{0x00000096, 0x00000fff},
1800	{0x00000097, 0x00116fff},
1801	{0x00000098, 0x60010000},
1802	{0x00000099, 0x10010000},
1803	{0x0000009a, 0x00006000},
1804	{0x0000009b, 0x00001000},
1805	{0x0000009f, 0x00b48000}
1806};
1807
1808#define HAWAII_IO_MC_REGS_SIZE 22
1809
1810static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1811{
1812	{0x0000007d, 0x40000000},
1813	{0x0000007e, 0x40180304},
1814	{0x0000007f, 0x0000ff00},
1815	{0x00000081, 0x00000000},
1816	{0x00000083, 0x00000800},
1817	{0x00000086, 0x00000000},
1818	{0x00000087, 0x00000100},
1819	{0x00000088, 0x00020100},
1820	{0x00000089, 0x00000000},
1821	{0x0000008b, 0x00040000},
1822	{0x0000008c, 0x00000100},
1823	{0x0000008e, 0xff010000},
1824	{0x00000090, 0xffffefff},
1825	{0x00000091, 0xfff3efff},
1826	{0x00000092, 0xfff3efbf},
1827	{0x00000093, 0xf7ffffff},
1828	{0x00000094, 0xffffff7f},
1829	{0x00000095, 0x00000fff},
1830	{0x00000096, 0x00116fff},
1831	{0x00000097, 0x60010000},
1832	{0x00000098, 0x10010000},
1833	{0x0000009f, 0x00c79000}
1834};
1835
1836
1837/**
1838 * cik_srbm_select - select specific register instances
1839 *
1840 * @rdev: radeon_device pointer
1841 * @me: selected ME (micro engine)
1842 * @pipe: pipe
1843 * @queue: queue
1844 * @vmid: VMID
1845 *
1846 * Switches the currently active registers instances.  Some
1847 * registers are instanced per VMID, others are instanced per
1848 * me/pipe/queue combination.
1849 */
1850static void cik_srbm_select(struct radeon_device *rdev,
1851			    u32 me, u32 pipe, u32 queue, u32 vmid)
1852{
1853	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1854			     MEID(me & 0x3) |
1855			     VMID(vmid & 0xf) |
1856			     QUEUEID(queue & 0x7));
1857	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1858}
1859
1860/* ucode loading */
1861/**
1862 * ci_mc_load_microcode - load MC ucode into the hw
1863 *
1864 * @rdev: radeon_device pointer
1865 *
1866 * Load the GDDR MC ucode into the hw (CIK).
1867 * Returns 0 on success, error on failure.
1868 */
1869int ci_mc_load_microcode(struct radeon_device *rdev)
1870{
1871	const __be32 *fw_data = NULL;
1872	const __le32 *new_fw_data = NULL;
1873	u32 running, tmp;
1874	u32 *io_mc_regs = NULL;
1875	const __le32 *new_io_mc_regs = NULL;
1876	int i, regs_size, ucode_size;
1877
1878	if (!rdev->mc_fw)
1879		return -EINVAL;
1880
1881	if (rdev->new_fw) {
1882		const struct mc_firmware_header_v1_0 *hdr =
1883			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1884
1885		radeon_ucode_print_mc_hdr(&hdr->header);
1886
1887		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1888		new_io_mc_regs = (const __le32 *)
1889			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1890		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1891		new_fw_data = (const __le32 *)
1892			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1893	} else {
1894		ucode_size = rdev->mc_fw->size / 4;
1895
1896		switch (rdev->family) {
1897		case CHIP_BONAIRE:
1898			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1899			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1900			break;
1901		case CHIP_HAWAII:
1902			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1903			regs_size = HAWAII_IO_MC_REGS_SIZE;
1904			break;
1905		default:
1906			return -EINVAL;
1907		}
1908		fw_data = (const __be32 *)rdev->mc_fw->data;
1909	}
1910
1911	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1912
1913	if (running == 0) {
1914		/* reset the engine and set to writable */
1915		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1916		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1917
1918		/* load mc io regs */
1919		for (i = 0; i < regs_size; i++) {
1920			if (rdev->new_fw) {
1921				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1922				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1923			} else {
1924				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1925				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1926			}
1927		}
1928
1929		tmp = RREG32(MC_SEQ_MISC0);
1930		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1931			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1932			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1933			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1934			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1935		}
1936
1937		/* load the MC ucode */
1938		for (i = 0; i < ucode_size; i++) {
1939			if (rdev->new_fw)
1940				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1941			else
1942				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1943		}
1944
1945		/* put the engine back into the active state */
1946		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1947		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1948		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1949
1950		/* wait for training to complete */
1951		for (i = 0; i < rdev->usec_timeout; i++) {
1952			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1953				break;
1954			udelay(1);
1955		}
1956		for (i = 0; i < rdev->usec_timeout; i++) {
1957			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1958				break;
1959			udelay(1);
1960		}
1961	}
1962
1963	return 0;
1964}
1965
1966/**
1967 * cik_init_microcode - load ucode images from disk
1968 *
1969 * @rdev: radeon_device pointer
1970 *
1971 * Use the firmware interface to load the ucode images into
1972 * the driver (not loaded into hw).
1973 * Returns 0 on success, error on failure.
1974 */
1975static int cik_init_microcode(struct radeon_device *rdev)
1976{
1977	const char *chip_name;
1978	const char *new_chip_name;
1979	size_t pfp_req_size, me_req_size, ce_req_size,
1980		mec_req_size, rlc_req_size, mc_req_size = 0,
1981		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1982	char fw_name[30];
1983	int new_fw = 0;
1984	int err;
1985	int num_fw;
1986	bool new_smc = false;
1987
1988	DRM_DEBUG("\n");
1989
1990	switch (rdev->family) {
1991	case CHIP_BONAIRE:
1992		chip_name = "BONAIRE";
1993		if ((rdev->pdev->revision == 0x80) ||
1994		    (rdev->pdev->revision == 0x81) ||
1995		    (rdev->pdev->device == 0x665f))
1996			new_smc = true;
1997		new_chip_name = "bonaire";
1998		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1999		me_req_size = CIK_ME_UCODE_SIZE * 4;
2000		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2001		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2002		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2003		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2004		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2005		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2006		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2007		num_fw = 8;
2008		break;
2009	case CHIP_HAWAII:
2010		chip_name = "HAWAII";
2011		if (rdev->pdev->revision == 0x80)
2012			new_smc = true;
2013		new_chip_name = "hawaii";
2014		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023		num_fw = 8;
2024		break;
2025	case CHIP_KAVERI:
2026		chip_name = "KAVERI";
2027		new_chip_name = "kaveri";
2028		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034		num_fw = 7;
2035		break;
2036	case CHIP_KABINI:
2037		chip_name = "KABINI";
2038		new_chip_name = "kabini";
2039		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045		num_fw = 6;
2046		break;
2047	case CHIP_MULLINS:
2048		chip_name = "MULLINS";
2049		new_chip_name = "mullins";
2050		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056		num_fw = 6;
2057		break;
2058	default: BUG();
2059	}
2060
2061	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065	if (err) {
2066		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068		if (err)
2069			goto out;
2070		if (rdev->pfp_fw->size != pfp_req_size) {
2071			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2072			       rdev->pfp_fw->size, fw_name);
2073			err = -EINVAL;
2074			goto out;
2075		}
2076	} else {
2077		err = radeon_ucode_validate(rdev->pfp_fw);
2078		if (err) {
2079			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2080			       fw_name);
2081			goto out;
2082		} else {
2083			new_fw++;
2084		}
2085	}
2086
2087	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2088	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2089	if (err) {
2090		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2091		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092		if (err)
2093			goto out;
2094		if (rdev->me_fw->size != me_req_size) {
2095			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2096			       rdev->me_fw->size, fw_name);
2097			err = -EINVAL;
2098		}
2099	} else {
2100		err = radeon_ucode_validate(rdev->me_fw);
2101		if (err) {
2102			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2103			       fw_name);
2104			goto out;
2105		} else {
2106			new_fw++;
2107		}
2108	}
2109
2110	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2111	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2112	if (err) {
2113		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2114		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2115		if (err)
2116			goto out;
2117		if (rdev->ce_fw->size != ce_req_size) {
2118			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2119			       rdev->ce_fw->size, fw_name);
2120			err = -EINVAL;
2121		}
2122	} else {
2123		err = radeon_ucode_validate(rdev->ce_fw);
2124		if (err) {
2125			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2126			       fw_name);
2127			goto out;
2128		} else {
2129			new_fw++;
2130		}
2131	}
2132
2133	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2134	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2135	if (err) {
2136		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2137		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2138		if (err)
2139			goto out;
2140		if (rdev->mec_fw->size != mec_req_size) {
2141			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2142			       rdev->mec_fw->size, fw_name);
2143			err = -EINVAL;
2144		}
2145	} else {
2146		err = radeon_ucode_validate(rdev->mec_fw);
2147		if (err) {
2148			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2149			       fw_name);
2150			goto out;
2151		} else {
2152			new_fw++;
2153		}
2154	}
2155
2156	if (rdev->family == CHIP_KAVERI) {
2157		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2158		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2159		if (err) {
2160			goto out;
2161		} else {
2162			err = radeon_ucode_validate(rdev->mec2_fw);
2163			if (err) {
2164				goto out;
2165			} else {
2166				new_fw++;
2167			}
2168		}
2169	}
2170
2171	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2172	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2173	if (err) {
2174		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2175		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2176		if (err)
2177			goto out;
2178		if (rdev->rlc_fw->size != rlc_req_size) {
2179			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2180			       rdev->rlc_fw->size, fw_name);
2181			err = -EINVAL;
2182		}
2183	} else {
2184		err = radeon_ucode_validate(rdev->rlc_fw);
2185		if (err) {
2186			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2187			       fw_name);
2188			goto out;
2189		} else {
2190			new_fw++;
2191		}
2192	}
2193
2194	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2195	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2196	if (err) {
2197		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2198		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2199		if (err)
2200			goto out;
2201		if (rdev->sdma_fw->size != sdma_req_size) {
2202			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2203			       rdev->sdma_fw->size, fw_name);
2204			err = -EINVAL;
2205		}
2206	} else {
2207		err = radeon_ucode_validate(rdev->sdma_fw);
2208		if (err) {
2209			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2210			       fw_name);
2211			goto out;
2212		} else {
2213			new_fw++;
2214		}
2215	}
2216
2217	/* No SMC, MC ucode on APUs */
2218	if (!(rdev->flags & RADEON_IS_IGP)) {
2219		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2220		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2221		if (err) {
2222			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2223			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2224			if (err) {
2225				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2226				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2227				if (err)
2228					goto out;
2229			}
2230			if ((rdev->mc_fw->size != mc_req_size) &&
2231			    (rdev->mc_fw->size != mc2_req_size)){
2232				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2233				       rdev->mc_fw->size, fw_name);
2234				err = -EINVAL;
2235			}
2236			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2237		} else {
2238			err = radeon_ucode_validate(rdev->mc_fw);
2239			if (err) {
2240				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2241				       fw_name);
2242				goto out;
2243			} else {
2244				new_fw++;
2245			}
2246		}
2247
2248		if (new_smc)
2249			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2250		else
2251			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2252		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2253		if (err) {
2254			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2255			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2256			if (err) {
2257				pr_err("smc: error loading firmware \"%s\"\n",
2258				       fw_name);
2259				release_firmware(rdev->smc_fw);
2260				rdev->smc_fw = NULL;
2261				err = 0;
2262			} else if (rdev->smc_fw->size != smc_req_size) {
2263				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2264				       rdev->smc_fw->size, fw_name);
2265				err = -EINVAL;
2266			}
2267		} else {
2268			err = radeon_ucode_validate(rdev->smc_fw);
2269			if (err) {
2270				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2271				       fw_name);
2272				goto out;
2273			} else {
2274				new_fw++;
2275			}
2276		}
2277	}
2278
2279	if (new_fw == 0) {
2280		rdev->new_fw = false;
2281	} else if (new_fw < num_fw) {
2282		pr_err("ci_fw: mixing new and old firmware!\n");
2283		err = -EINVAL;
2284	} else {
2285		rdev->new_fw = true;
2286	}
2287
2288out:
2289	if (err) {
2290		if (err != -EINVAL)
2291			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2292			       fw_name);
2293		release_firmware(rdev->pfp_fw);
2294		rdev->pfp_fw = NULL;
2295		release_firmware(rdev->me_fw);
2296		rdev->me_fw = NULL;
2297		release_firmware(rdev->ce_fw);
2298		rdev->ce_fw = NULL;
2299		release_firmware(rdev->mec_fw);
2300		rdev->mec_fw = NULL;
2301		release_firmware(rdev->mec2_fw);
2302		rdev->mec2_fw = NULL;
2303		release_firmware(rdev->rlc_fw);
2304		rdev->rlc_fw = NULL;
2305		release_firmware(rdev->sdma_fw);
2306		rdev->sdma_fw = NULL;
2307		release_firmware(rdev->mc_fw);
2308		rdev->mc_fw = NULL;
2309		release_firmware(rdev->smc_fw);
2310		rdev->smc_fw = NULL;
2311	}
2312	return err;
2313}
2314
2315/*
2316 * Core functions
2317 */
2318/**
2319 * cik_tiling_mode_table_init - init the hw tiling table
2320 *
2321 * @rdev: radeon_device pointer
2322 *
2323 * Starting with SI, the tiling setup is done globally in a
2324 * set of 32 tiling modes.  Rather than selecting each set of
2325 * parameters per surface as on older asics, we just select
2326 * which index in the tiling table we want to use, and the
2327 * surface uses those parameters (CIK).
2328 */
2329static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2330{
2331	u32 *tile = rdev->config.cik.tile_mode_array;
2332	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2333	const u32 num_tile_mode_states =
2334			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2335	const u32 num_secondary_tile_mode_states =
2336			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2337	u32 reg_offset, split_equal_to_row_size;
2338	u32 num_pipe_configs;
2339	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2340		rdev->config.cik.max_shader_engines;
2341
2342	switch (rdev->config.cik.mem_row_size_in_kb) {
2343	case 1:
2344		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2345		break;
2346	case 2:
2347	default:
2348		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2349		break;
2350	case 4:
2351		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2352		break;
2353	}
2354
2355	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2356	if (num_pipe_configs > 8)
2357		num_pipe_configs = 16;
2358
2359	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2360		tile[reg_offset] = 0;
2361	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2362		macrotile[reg_offset] = 0;
2363
2364	switch(num_pipe_configs) {
2365	case 16:
2366		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2368			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2369			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2370		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2374		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2378		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2382		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385			   TILE_SPLIT(split_equal_to_row_size));
2386		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2387			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(split_equal_to_row_size));
2397		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2398			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2399		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2400			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2402		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2404			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2409			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2417		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2419			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2432		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444
2445		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448			   NUM_BANKS(ADDR_SURF_16_BANK));
2449		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452			   NUM_BANKS(ADDR_SURF_16_BANK));
2453		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456			   NUM_BANKS(ADDR_SURF_16_BANK));
2457		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460			   NUM_BANKS(ADDR_SURF_16_BANK));
2461		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464			   NUM_BANKS(ADDR_SURF_8_BANK));
2465		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468			   NUM_BANKS(ADDR_SURF_4_BANK));
2469		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472			   NUM_BANKS(ADDR_SURF_2_BANK));
2473		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476			   NUM_BANKS(ADDR_SURF_16_BANK));
2477		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480			   NUM_BANKS(ADDR_SURF_16_BANK));
2481		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484			    NUM_BANKS(ADDR_SURF_16_BANK));
2485		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488			    NUM_BANKS(ADDR_SURF_8_BANK));
2489		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492			    NUM_BANKS(ADDR_SURF_4_BANK));
2493		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496			    NUM_BANKS(ADDR_SURF_2_BANK));
2497		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500			    NUM_BANKS(ADDR_SURF_2_BANK));
2501
2502		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2503			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2504		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2506		break;
2507
2508	case 8:
2509		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2511			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2513		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2521		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2525		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528			   TILE_SPLIT(split_equal_to_row_size));
2529		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2533			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(split_equal_to_row_size));
2540		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2541			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2542		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2545		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2552			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2560		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2575		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587
2588		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2589				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2590				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2591				NUM_BANKS(ADDR_SURF_16_BANK));
2592		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2607				NUM_BANKS(ADDR_SURF_8_BANK));
2608		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611				NUM_BANKS(ADDR_SURF_4_BANK));
2612		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615				NUM_BANKS(ADDR_SURF_2_BANK));
2616		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2618				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619				NUM_BANKS(ADDR_SURF_16_BANK));
2620		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623				NUM_BANKS(ADDR_SURF_16_BANK));
2624		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2626				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627				NUM_BANKS(ADDR_SURF_16_BANK));
2628		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631				NUM_BANKS(ADDR_SURF_16_BANK));
2632		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635				NUM_BANKS(ADDR_SURF_8_BANK));
2636		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639				NUM_BANKS(ADDR_SURF_4_BANK));
2640		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643				NUM_BANKS(ADDR_SURF_2_BANK));
2644
2645		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2646			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2647		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2648			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2649		break;
2650
2651	case 4:
2652		if (num_rbs == 4) {
2653		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2656			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2661		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2665		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2669		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672			   TILE_SPLIT(split_equal_to_row_size));
2673		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2676		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(split_equal_to_row_size));
2684		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2685			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2686		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2687			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2689		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2691			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2693		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2696			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2704		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2719		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2720			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731
2732		} else if (num_rbs < 4) {
2733		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2735			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2737		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2745		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2749		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752			   TILE_SPLIT(split_equal_to_row_size));
2753		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2756		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(split_equal_to_row_size));
2764		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2765			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2766		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2767			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2769		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2771			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2773		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2784		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2786			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2799		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		}
2812
2813		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816				NUM_BANKS(ADDR_SURF_16_BANK));
2817		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828				NUM_BANKS(ADDR_SURF_16_BANK));
2829		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832				NUM_BANKS(ADDR_SURF_16_BANK));
2833		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836				NUM_BANKS(ADDR_SURF_8_BANK));
2837		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2840				NUM_BANKS(ADDR_SURF_4_BANK));
2841		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844				NUM_BANKS(ADDR_SURF_16_BANK));
2845		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2847				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848				NUM_BANKS(ADDR_SURF_16_BANK));
2849		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852				NUM_BANKS(ADDR_SURF_16_BANK));
2853		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860				NUM_BANKS(ADDR_SURF_16_BANK));
2861		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864				NUM_BANKS(ADDR_SURF_8_BANK));
2865		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868				NUM_BANKS(ADDR_SURF_4_BANK));
2869
2870		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2871			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2872		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2873			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2874		break;
2875
2876	case 2:
2877		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879			   PIPE_CONFIG(ADDR_SURF_P2) |
2880			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883			   PIPE_CONFIG(ADDR_SURF_P2) |
2884			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2885		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887			   PIPE_CONFIG(ADDR_SURF_P2) |
2888			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2889		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891			   PIPE_CONFIG(ADDR_SURF_P2) |
2892			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2893		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895			   PIPE_CONFIG(ADDR_SURF_P2) |
2896			   TILE_SPLIT(split_equal_to_row_size));
2897		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898			   PIPE_CONFIG(ADDR_SURF_P2) |
2899			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(split_equal_to_row_size));
2908		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909			   PIPE_CONFIG(ADDR_SURF_P2);
2910		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2911			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912			   PIPE_CONFIG(ADDR_SURF_P2));
2913		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2915			    PIPE_CONFIG(ADDR_SURF_P2) |
2916			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919			    PIPE_CONFIG(ADDR_SURF_P2) |
2920			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923			    PIPE_CONFIG(ADDR_SURF_P2) |
2924			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926			    PIPE_CONFIG(ADDR_SURF_P2) |
2927			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2928		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2930			    PIPE_CONFIG(ADDR_SURF_P2) |
2931			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2942			    PIPE_CONFIG(ADDR_SURF_P2));
2943		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2944			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2945			    PIPE_CONFIG(ADDR_SURF_P2) |
2946			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955
2956		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2957				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959				NUM_BANKS(ADDR_SURF_16_BANK));
2960		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2962				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963				NUM_BANKS(ADDR_SURF_16_BANK));
2964		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967				NUM_BANKS(ADDR_SURF_16_BANK));
2968		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971				NUM_BANKS(ADDR_SURF_16_BANK));
2972		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975				NUM_BANKS(ADDR_SURF_16_BANK));
2976		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979				NUM_BANKS(ADDR_SURF_16_BANK));
2980		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2983				NUM_BANKS(ADDR_SURF_8_BANK));
2984		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987				NUM_BANKS(ADDR_SURF_16_BANK));
2988		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991				NUM_BANKS(ADDR_SURF_16_BANK));
2992		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995				NUM_BANKS(ADDR_SURF_16_BANK));
2996		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999				NUM_BANKS(ADDR_SURF_16_BANK));
3000		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003				NUM_BANKS(ADDR_SURF_16_BANK));
3004		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011				NUM_BANKS(ADDR_SURF_8_BANK));
3012
3013		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3014			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3015		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3016			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3017		break;
3018
3019	default:
3020		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3021	}
3022}
3023
3024/**
3025 * cik_select_se_sh - select which SE, SH to address
3026 *
3027 * @rdev: radeon_device pointer
3028 * @se_num: shader engine to address
3029 * @sh_num: sh block to address
3030 *
3031 * Select which SE, SH combinations to address. Certain
3032 * registers are instanced per SE or SH.  0xffffffff means
3033 * broadcast to all SEs or SHs (CIK).
3034 */
3035static void cik_select_se_sh(struct radeon_device *rdev,
3036			     u32 se_num, u32 sh_num)
3037{
3038	u32 data = INSTANCE_BROADCAST_WRITES;
3039
3040	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3041		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3042	else if (se_num == 0xffffffff)
3043		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3044	else if (sh_num == 0xffffffff)
3045		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3046	else
3047		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3048	WREG32(GRBM_GFX_INDEX, data);
3049}
3050
3051/**
3052 * cik_create_bitmask - create a bitmask
3053 *
3054 * @bit_width: length of the mask
3055 *
3056 * create a variable length bit mask (CIK).
3057 * Returns the bitmask.
3058 */
3059static u32 cik_create_bitmask(u32 bit_width)
3060{
3061	u32 i, mask = 0;
3062
3063	for (i = 0; i < bit_width; i++) {
3064		mask <<= 1;
3065		mask |= 1;
3066	}
3067	return mask;
3068}
3069
3070/**
3071 * cik_get_rb_disabled - computes the mask of disabled RBs
3072 *
3073 * @rdev: radeon_device pointer
3074 * @max_rb_num: max RBs (render backends) for the asic
3075 * @se_num: number of SEs (shader engines) for the asic
3076 * @sh_per_se: number of SH blocks per SE for the asic
3077 *
3078 * Calculates the bitmask of disabled RBs (CIK).
3079 * Returns the disabled RB bitmask.
3080 */
3081static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3082			      u32 max_rb_num_per_se,
3083			      u32 sh_per_se)
3084{
3085	u32 data, mask;
3086
3087	data = RREG32(CC_RB_BACKEND_DISABLE);
3088	if (data & 1)
3089		data &= BACKEND_DISABLE_MASK;
3090	else
3091		data = 0;
3092	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3093
3094	data >>= BACKEND_DISABLE_SHIFT;
3095
3096	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3097
3098	return data & mask;
3099}
3100
3101/**
3102 * cik_setup_rb - setup the RBs on the asic
3103 *
3104 * @rdev: radeon_device pointer
3105 * @se_num: number of SEs (shader engines) for the asic
3106 * @sh_per_se: number of SH blocks per SE for the asic
3107 * @max_rb_num: max RBs (render backends) for the asic
3108 *
3109 * Configures per-SE/SH RB registers (CIK).
3110 */
3111static void cik_setup_rb(struct radeon_device *rdev,
3112			 u32 se_num, u32 sh_per_se,
3113			 u32 max_rb_num_per_se)
3114{
3115	int i, j;
3116	u32 data, mask;
3117	u32 disabled_rbs = 0;
3118	u32 enabled_rbs = 0;
3119
3120	for (i = 0; i < se_num; i++) {
3121		for (j = 0; j < sh_per_se; j++) {
3122			cik_select_se_sh(rdev, i, j);
3123			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3124			if (rdev->family == CHIP_HAWAII)
3125				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3126			else
3127				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3128		}
3129	}
3130	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3131
3132	mask = 1;
3133	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3134		if (!(disabled_rbs & mask))
3135			enabled_rbs |= mask;
3136		mask <<= 1;
3137	}
3138
3139	rdev->config.cik.backend_enable_mask = enabled_rbs;
3140
3141	for (i = 0; i < se_num; i++) {
3142		cik_select_se_sh(rdev, i, 0xffffffff);
3143		data = 0;
3144		for (j = 0; j < sh_per_se; j++) {
3145			switch (enabled_rbs & 3) {
3146			case 0:
3147				if (j == 0)
3148					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3149				else
3150					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3151				break;
3152			case 1:
3153				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3154				break;
3155			case 2:
3156				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3157				break;
3158			case 3:
3159			default:
3160				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3161				break;
3162			}
3163			enabled_rbs >>= 2;
3164		}
3165		WREG32(PA_SC_RASTER_CONFIG, data);
3166	}
3167	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3168}
3169
3170/**
3171 * cik_gpu_init - setup the 3D engine
3172 *
3173 * @rdev: radeon_device pointer
3174 *
3175 * Configures the 3D engine and tiling configuration
3176 * registers so that the 3D engine is usable.
3177 */
3178static void cik_gpu_init(struct radeon_device *rdev)
3179{
3180	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3181	u32 mc_shared_chmap, mc_arb_ramcfg;
3182	u32 hdp_host_path_cntl;
3183	u32 tmp;
3184	int i, j;
3185
3186	switch (rdev->family) {
3187	case CHIP_BONAIRE:
3188		rdev->config.cik.max_shader_engines = 2;
3189		rdev->config.cik.max_tile_pipes = 4;
3190		rdev->config.cik.max_cu_per_sh = 7;
3191		rdev->config.cik.max_sh_per_se = 1;
3192		rdev->config.cik.max_backends_per_se = 2;
3193		rdev->config.cik.max_texture_channel_caches = 4;
3194		rdev->config.cik.max_gprs = 256;
3195		rdev->config.cik.max_gs_threads = 32;
3196		rdev->config.cik.max_hw_contexts = 8;
3197
3198		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3199		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3200		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3201		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3202		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3203		break;
3204	case CHIP_HAWAII:
3205		rdev->config.cik.max_shader_engines = 4;
3206		rdev->config.cik.max_tile_pipes = 16;
3207		rdev->config.cik.max_cu_per_sh = 11;
3208		rdev->config.cik.max_sh_per_se = 1;
3209		rdev->config.cik.max_backends_per_se = 4;
3210		rdev->config.cik.max_texture_channel_caches = 16;
3211		rdev->config.cik.max_gprs = 256;
3212		rdev->config.cik.max_gs_threads = 32;
3213		rdev->config.cik.max_hw_contexts = 8;
3214
3215		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3220		break;
3221	case CHIP_KAVERI:
3222		rdev->config.cik.max_shader_engines = 1;
3223		rdev->config.cik.max_tile_pipes = 4;
3224		rdev->config.cik.max_cu_per_sh = 8;
3225		rdev->config.cik.max_backends_per_se = 2;
3226		rdev->config.cik.max_sh_per_se = 1;
3227		rdev->config.cik.max_texture_channel_caches = 4;
3228		rdev->config.cik.max_gprs = 256;
3229		rdev->config.cik.max_gs_threads = 16;
3230		rdev->config.cik.max_hw_contexts = 8;
3231
3232		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3237		break;
3238	case CHIP_KABINI:
3239	case CHIP_MULLINS:
3240	default:
3241		rdev->config.cik.max_shader_engines = 1;
3242		rdev->config.cik.max_tile_pipes = 2;
3243		rdev->config.cik.max_cu_per_sh = 2;
3244		rdev->config.cik.max_sh_per_se = 1;
3245		rdev->config.cik.max_backends_per_se = 1;
3246		rdev->config.cik.max_texture_channel_caches = 2;
3247		rdev->config.cik.max_gprs = 256;
3248		rdev->config.cik.max_gs_threads = 16;
3249		rdev->config.cik.max_hw_contexts = 8;
3250
3251		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3252		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3253		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3254		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3255		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3256		break;
3257	}
3258
3259	/* Initialize HDP */
3260	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3261		WREG32((0x2c14 + j), 0x00000000);
3262		WREG32((0x2c18 + j), 0x00000000);
3263		WREG32((0x2c1c + j), 0x00000000);
3264		WREG32((0x2c20 + j), 0x00000000);
3265		WREG32((0x2c24 + j), 0x00000000);
3266	}
3267
3268	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3269	WREG32(SRBM_INT_CNTL, 0x1);
3270	WREG32(SRBM_INT_ACK, 0x1);
3271
3272	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3273
3274	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3275	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3276
3277	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3278	rdev->config.cik.mem_max_burst_length_bytes = 256;
3279	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3280	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3281	if (rdev->config.cik.mem_row_size_in_kb > 4)
3282		rdev->config.cik.mem_row_size_in_kb = 4;
3283	/* XXX use MC settings? */
3284	rdev->config.cik.shader_engine_tile_size = 32;
3285	rdev->config.cik.num_gpus = 1;
3286	rdev->config.cik.multi_gpu_tile_size = 64;
3287
3288	/* fix up row size */
3289	gb_addr_config &= ~ROW_SIZE_MASK;
3290	switch (rdev->config.cik.mem_row_size_in_kb) {
3291	case 1:
3292	default:
3293		gb_addr_config |= ROW_SIZE(0);
3294		break;
3295	case 2:
3296		gb_addr_config |= ROW_SIZE(1);
3297		break;
3298	case 4:
3299		gb_addr_config |= ROW_SIZE(2);
3300		break;
3301	}
3302
3303	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3304	 * not have bank info, so create a custom tiling dword.
3305	 * bits 3:0   num_pipes
3306	 * bits 7:4   num_banks
3307	 * bits 11:8  group_size
3308	 * bits 15:12 row_size
3309	 */
3310	rdev->config.cik.tile_config = 0;
3311	switch (rdev->config.cik.num_tile_pipes) {
3312	case 1:
3313		rdev->config.cik.tile_config |= (0 << 0);
3314		break;
3315	case 2:
3316		rdev->config.cik.tile_config |= (1 << 0);
3317		break;
3318	case 4:
3319		rdev->config.cik.tile_config |= (2 << 0);
3320		break;
3321	case 8:
3322	default:
3323		/* XXX what about 12? */
3324		rdev->config.cik.tile_config |= (3 << 0);
3325		break;
3326	}
3327	rdev->config.cik.tile_config |=
3328		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3329	rdev->config.cik.tile_config |=
3330		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3331	rdev->config.cik.tile_config |=
3332		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3333
3334	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3335	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3336	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3337	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3338	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3339	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3340	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3341	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3342
3343	cik_tiling_mode_table_init(rdev);
3344
3345	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3346		     rdev->config.cik.max_sh_per_se,
3347		     rdev->config.cik.max_backends_per_se);
3348
3349	rdev->config.cik.active_cus = 0;
3350	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3351		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3352			rdev->config.cik.active_cus +=
3353				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3354		}
3355	}
3356
3357	/* set HW defaults for 3D engine */
3358	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3359
3360	WREG32(SX_DEBUG_1, 0x20);
3361
3362	WREG32(TA_CNTL_AUX, 0x00010000);
3363
3364	tmp = RREG32(SPI_CONFIG_CNTL);
3365	tmp |= 0x03000000;
3366	WREG32(SPI_CONFIG_CNTL, tmp);
3367
3368	WREG32(SQ_CONFIG, 1);
3369
3370	WREG32(DB_DEBUG, 0);
3371
3372	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3373	tmp |= 0x00000400;
3374	WREG32(DB_DEBUG2, tmp);
3375
3376	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3377	tmp |= 0x00020200;
3378	WREG32(DB_DEBUG3, tmp);
3379
3380	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3381	tmp |= 0x00018208;
3382	WREG32(CB_HW_CONTROL, tmp);
3383
3384	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3385
3386	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3387				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3388				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3389				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3390
3391	WREG32(VGT_NUM_INSTANCES, 1);
3392
3393	WREG32(CP_PERFMON_CNTL, 0);
3394
3395	WREG32(SQ_CONFIG, 0);
3396
3397	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3398					  FORCE_EOV_MAX_REZ_CNT(255)));
3399
3400	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3401	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3402
3403	WREG32(VGT_GS_VERTEX_REUSE, 16);
3404	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3405
3406	tmp = RREG32(HDP_MISC_CNTL);
3407	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3408	WREG32(HDP_MISC_CNTL, tmp);
3409
3410	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3411	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3412
3413	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3414	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3415
3416	udelay(50);
3417}
3418
3419/*
3420 * GPU scratch registers helpers function.
3421 */
3422/**
3423 * cik_scratch_init - setup driver info for CP scratch regs
3424 *
3425 * @rdev: radeon_device pointer
3426 *
3427 * Set up the number and offset of the CP scratch registers.
3428 * NOTE: use of CP scratch registers is a legacy inferface and
3429 * is not used by default on newer asics (r6xx+).  On newer asics,
3430 * memory buffers are used for fences rather than scratch regs.
3431 */
3432static void cik_scratch_init(struct radeon_device *rdev)
3433{
3434	int i;
3435
3436	rdev->scratch.num_reg = 7;
3437	rdev->scratch.reg_base = SCRATCH_REG0;
3438	for (i = 0; i < rdev->scratch.num_reg; i++) {
3439		rdev->scratch.free[i] = true;
3440		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3441	}
3442}
3443
3444/**
3445 * cik_ring_test - basic gfx ring test
3446 *
3447 * @rdev: radeon_device pointer
3448 * @ring: radeon_ring structure holding ring information
3449 *
3450 * Allocate a scratch register and write to it using the gfx ring (CIK).
3451 * Provides a basic gfx ring test to verify that the ring is working.
3452 * Used by cik_cp_gfx_resume();
3453 * Returns 0 on success, error on failure.
3454 */
3455int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3456{
3457	uint32_t scratch;
3458	uint32_t tmp = 0;
3459	unsigned i;
3460	int r;
3461
3462	r = radeon_scratch_get(rdev, &scratch);
3463	if (r) {
3464		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3465		return r;
3466	}
3467	WREG32(scratch, 0xCAFEDEAD);
3468	r = radeon_ring_lock(rdev, ring, 3);
3469	if (r) {
3470		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3471		radeon_scratch_free(rdev, scratch);
3472		return r;
3473	}
3474	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3475	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3476	radeon_ring_write(ring, 0xDEADBEEF);
3477	radeon_ring_unlock_commit(rdev, ring, false);
3478
3479	for (i = 0; i < rdev->usec_timeout; i++) {
3480		tmp = RREG32(scratch);
3481		if (tmp == 0xDEADBEEF)
3482			break;
3483		DRM_UDELAY(1);
3484	}
3485	if (i < rdev->usec_timeout) {
3486		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3487	} else {
3488		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3489			  ring->idx, scratch, tmp);
3490		r = -EINVAL;
3491	}
3492	radeon_scratch_free(rdev, scratch);
3493	return r;
3494}
3495
3496/**
3497 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3498 *
3499 * @rdev: radeon_device pointer
3500 * @ridx: radeon ring index
3501 *
3502 * Emits an hdp flush on the cp.
3503 */
3504static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3505				       int ridx)
3506{
3507	struct radeon_ring *ring = &rdev->ring[ridx];
3508	u32 ref_and_mask;
3509
3510	switch (ring->idx) {
3511	case CAYMAN_RING_TYPE_CP1_INDEX:
3512	case CAYMAN_RING_TYPE_CP2_INDEX:
3513	default:
3514		switch (ring->me) {
3515		case 0:
3516			ref_and_mask = CP2 << ring->pipe;
3517			break;
3518		case 1:
3519			ref_and_mask = CP6 << ring->pipe;
3520			break;
3521		default:
3522			return;
3523		}
3524		break;
3525	case RADEON_RING_TYPE_GFX_INDEX:
3526		ref_and_mask = CP0;
3527		break;
3528	}
3529
3530	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3531	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3532				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3533				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3534	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3535	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3536	radeon_ring_write(ring, ref_and_mask);
3537	radeon_ring_write(ring, ref_and_mask);
3538	radeon_ring_write(ring, 0x20); /* poll interval */
3539}
3540
3541/**
3542 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3543 *
3544 * @rdev: radeon_device pointer
3545 * @fence: radeon fence object
3546 *
3547 * Emits a fence sequnce number on the gfx ring and flushes
3548 * GPU caches.
3549 */
3550void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3551			     struct radeon_fence *fence)
3552{
3553	struct radeon_ring *ring = &rdev->ring[fence->ring];
3554	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3555
3556	/* Workaround for cache flush problems. First send a dummy EOP
3557	 * event down the pipe with seq one below.
3558	 */
3559	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3560	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3561				 EOP_TC_ACTION_EN |
3562				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3563				 EVENT_INDEX(5)));
3564	radeon_ring_write(ring, addr & 0xfffffffc);
3565	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3566				DATA_SEL(1) | INT_SEL(0));
3567	radeon_ring_write(ring, fence->seq - 1);
3568	radeon_ring_write(ring, 0);
3569
3570	/* Then send the real EOP event down the pipe. */
3571	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3572	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3573				 EOP_TC_ACTION_EN |
3574				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3575				 EVENT_INDEX(5)));
3576	radeon_ring_write(ring, addr & 0xfffffffc);
3577	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3578	radeon_ring_write(ring, fence->seq);
3579	radeon_ring_write(ring, 0);
3580}
3581
3582/**
3583 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3584 *
3585 * @rdev: radeon_device pointer
3586 * @fence: radeon fence object
3587 *
3588 * Emits a fence sequnce number on the compute ring and flushes
3589 * GPU caches.
3590 */
3591void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3592				 struct radeon_fence *fence)
3593{
3594	struct radeon_ring *ring = &rdev->ring[fence->ring];
3595	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3596
3597	/* RELEASE_MEM - flush caches, send int */
3598	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3599	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600				 EOP_TC_ACTION_EN |
3601				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602				 EVENT_INDEX(5)));
3603	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3604	radeon_ring_write(ring, addr & 0xfffffffc);
3605	radeon_ring_write(ring, upper_32_bits(addr));
3606	radeon_ring_write(ring, fence->seq);
3607	radeon_ring_write(ring, 0);
3608}
3609
3610/**
3611 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3612 *
3613 * @rdev: radeon_device pointer
3614 * @ring: radeon ring buffer object
3615 * @semaphore: radeon semaphore object
3616 * @emit_wait: Is this a sempahore wait?
3617 *
3618 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3619 * from running ahead of semaphore waits.
3620 */
3621bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3622			     struct radeon_ring *ring,
3623			     struct radeon_semaphore *semaphore,
3624			     bool emit_wait)
3625{
3626	uint64_t addr = semaphore->gpu_addr;
3627	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3628
3629	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3630	radeon_ring_write(ring, lower_32_bits(addr));
3631	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3632
3633	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3634		/* Prevent the PFP from running ahead of the semaphore wait */
3635		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3636		radeon_ring_write(ring, 0x0);
3637	}
3638
3639	return true;
3640}
3641
3642/**
3643 * cik_copy_cpdma - copy pages using the CP DMA engine
3644 *
3645 * @rdev: radeon_device pointer
3646 * @src_offset: src GPU address
3647 * @dst_offset: dst GPU address
3648 * @num_gpu_pages: number of GPU pages to xfer
3649 * @resv: reservation object to sync to
3650 *
3651 * Copy GPU paging using the CP DMA engine (CIK+).
3652 * Used by the radeon ttm implementation to move pages if
3653 * registered as the asic copy callback.
3654 */
3655struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3656				    uint64_t src_offset, uint64_t dst_offset,
3657				    unsigned num_gpu_pages,
3658				    struct reservation_object *resv)
3659{
3660	struct radeon_fence *fence;
3661	struct radeon_sync sync;
3662	int ring_index = rdev->asic->copy.blit_ring_index;
3663	struct radeon_ring *ring = &rdev->ring[ring_index];
3664	u32 size_in_bytes, cur_size_in_bytes, control;
3665	int i, num_loops;
3666	int r = 0;
3667
3668	radeon_sync_create(&sync);
3669
3670	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3671	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3672	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3673	if (r) {
3674		DRM_ERROR("radeon: moving bo (%d).\n", r);
3675		radeon_sync_free(rdev, &sync, NULL);
3676		return ERR_PTR(r);
3677	}
3678
3679	radeon_sync_resv(rdev, &sync, resv, false);
3680	radeon_sync_rings(rdev, &sync, ring->idx);
3681
3682	for (i = 0; i < num_loops; i++) {
3683		cur_size_in_bytes = size_in_bytes;
3684		if (cur_size_in_bytes > 0x1fffff)
3685			cur_size_in_bytes = 0x1fffff;
3686		size_in_bytes -= cur_size_in_bytes;
3687		control = 0;
3688		if (size_in_bytes == 0)
3689			control |= PACKET3_DMA_DATA_CP_SYNC;
3690		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3691		radeon_ring_write(ring, control);
3692		radeon_ring_write(ring, lower_32_bits(src_offset));
3693		radeon_ring_write(ring, upper_32_bits(src_offset));
3694		radeon_ring_write(ring, lower_32_bits(dst_offset));
3695		radeon_ring_write(ring, upper_32_bits(dst_offset));
3696		radeon_ring_write(ring, cur_size_in_bytes);
3697		src_offset += cur_size_in_bytes;
3698		dst_offset += cur_size_in_bytes;
3699	}
3700
3701	r = radeon_fence_emit(rdev, &fence, ring->idx);
3702	if (r) {
3703		radeon_ring_unlock_undo(rdev, ring);
3704		radeon_sync_free(rdev, &sync, NULL);
3705		return ERR_PTR(r);
3706	}
3707
3708	radeon_ring_unlock_commit(rdev, ring, false);
3709	radeon_sync_free(rdev, &sync, fence);
3710
3711	return fence;
3712}
3713
3714/*
3715 * IB stuff
3716 */
3717/**
3718 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3719 *
3720 * @rdev: radeon_device pointer
3721 * @ib: radeon indirect buffer object
3722 *
3723 * Emits a DE (drawing engine) or CE (constant engine) IB
3724 * on the gfx ring.  IBs are usually generated by userspace
3725 * acceleration drivers and submitted to the kernel for
3726 * scheduling on the ring.  This function schedules the IB
3727 * on the gfx ring for execution by the GPU.
3728 */
3729void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3730{
3731	struct radeon_ring *ring = &rdev->ring[ib->ring];
3732	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3733	u32 header, control = INDIRECT_BUFFER_VALID;
3734
3735	if (ib->is_const_ib) {
3736		/* set switch buffer packet before const IB */
3737		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3738		radeon_ring_write(ring, 0);
3739
3740		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3741	} else {
3742		u32 next_rptr;
3743		if (ring->rptr_save_reg) {
3744			next_rptr = ring->wptr + 3 + 4;
3745			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3746			radeon_ring_write(ring, ((ring->rptr_save_reg -
3747						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3748			radeon_ring_write(ring, next_rptr);
3749		} else if (rdev->wb.enabled) {
3750			next_rptr = ring->wptr + 5 + 4;
3751			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3752			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3753			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3754			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3755			radeon_ring_write(ring, next_rptr);
3756		}
3757
3758		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3759	}
3760
3761	control |= ib->length_dw | (vm_id << 24);
3762
3763	radeon_ring_write(ring, header);
3764	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3765	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3766	radeon_ring_write(ring, control);
3767}
3768
3769/**
3770 * cik_ib_test - basic gfx ring IB test
3771 *
3772 * @rdev: radeon_device pointer
3773 * @ring: radeon_ring structure holding ring information
3774 *
3775 * Allocate an IB and execute it on the gfx ring (CIK).
3776 * Provides a basic gfx ring test to verify that IBs are working.
3777 * Returns 0 on success, error on failure.
3778 */
3779int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3780{
3781	struct radeon_ib ib;
3782	uint32_t scratch;
3783	uint32_t tmp = 0;
3784	unsigned i;
3785	int r;
3786
3787	r = radeon_scratch_get(rdev, &scratch);
3788	if (r) {
3789		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3790		return r;
3791	}
3792	WREG32(scratch, 0xCAFEDEAD);
3793	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3794	if (r) {
3795		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3796		radeon_scratch_free(rdev, scratch);
3797		return r;
3798	}
3799	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3800	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3801	ib.ptr[2] = 0xDEADBEEF;
3802	ib.length_dw = 3;
3803	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3804	if (r) {
3805		radeon_scratch_free(rdev, scratch);
3806		radeon_ib_free(rdev, &ib);
3807		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3808		return r;
3809	}
3810	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3811		RADEON_USEC_IB_TEST_TIMEOUT));
3812	if (r < 0) {
3813		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3814		radeon_scratch_free(rdev, scratch);
3815		radeon_ib_free(rdev, &ib);
3816		return r;
3817	} else if (r == 0) {
3818		DRM_ERROR("radeon: fence wait timed out.\n");
3819		radeon_scratch_free(rdev, scratch);
3820		radeon_ib_free(rdev, &ib);
3821		return -ETIMEDOUT;
3822	}
3823	r = 0;
3824	for (i = 0; i < rdev->usec_timeout; i++) {
3825		tmp = RREG32(scratch);
3826		if (tmp == 0xDEADBEEF)
3827			break;
3828		DRM_UDELAY(1);
3829	}
3830	if (i < rdev->usec_timeout) {
3831		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3832	} else {
3833		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3834			  scratch, tmp);
3835		r = -EINVAL;
3836	}
3837	radeon_scratch_free(rdev, scratch);
3838	radeon_ib_free(rdev, &ib);
3839	return r;
3840}
3841
3842/*
3843 * CP.
3844 * On CIK, gfx and compute now have independant command processors.
3845 *
3846 * GFX
3847 * Gfx consists of a single ring and can process both gfx jobs and
3848 * compute jobs.  The gfx CP consists of three microengines (ME):
3849 * PFP - Pre-Fetch Parser
3850 * ME - Micro Engine
3851 * CE - Constant Engine
3852 * The PFP and ME make up what is considered the Drawing Engine (DE).
3853 * The CE is an asynchronous engine used for updating buffer desciptors
3854 * used by the DE so that they can be loaded into cache in parallel
3855 * while the DE is processing state update packets.
3856 *
3857 * Compute
3858 * The compute CP consists of two microengines (ME):
3859 * MEC1 - Compute MicroEngine 1
3860 * MEC2 - Compute MicroEngine 2
3861 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3862 * The queues are exposed to userspace and are programmed directly
3863 * by the compute runtime.
3864 */
3865/**
3866 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3867 *
3868 * @rdev: radeon_device pointer
3869 * @enable: enable or disable the MEs
3870 *
3871 * Halts or unhalts the gfx MEs.
3872 */
3873static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3874{
3875	if (enable)
3876		WREG32(CP_ME_CNTL, 0);
3877	else {
3878		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3879			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3880		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3881		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3882	}
3883	udelay(50);
3884}
3885
3886/**
3887 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3888 *
3889 * @rdev: radeon_device pointer
3890 *
3891 * Loads the gfx PFP, ME, and CE ucode.
3892 * Returns 0 for success, -EINVAL if the ucode is not available.
3893 */
3894static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3895{
3896	int i;
3897
3898	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3899		return -EINVAL;
3900
3901	cik_cp_gfx_enable(rdev, false);
3902
3903	if (rdev->new_fw) {
3904		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3905			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3906		const struct gfx_firmware_header_v1_0 *ce_hdr =
3907			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3908		const struct gfx_firmware_header_v1_0 *me_hdr =
3909			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3910		const __le32 *fw_data;
3911		u32 fw_size;
3912
3913		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3914		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3915		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3916
3917		/* PFP */
3918		fw_data = (const __le32 *)
3919			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3920		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3921		WREG32(CP_PFP_UCODE_ADDR, 0);
3922		for (i = 0; i < fw_size; i++)
3923			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3924		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3925
3926		/* CE */
3927		fw_data = (const __le32 *)
3928			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3929		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3930		WREG32(CP_CE_UCODE_ADDR, 0);
3931		for (i = 0; i < fw_size; i++)
3932			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3933		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3934
3935		/* ME */
3936		fw_data = (const __be32 *)
3937			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3938		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3939		WREG32(CP_ME_RAM_WADDR, 0);
3940		for (i = 0; i < fw_size; i++)
3941			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3942		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3943		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3944	} else {
3945		const __be32 *fw_data;
3946
3947		/* PFP */
3948		fw_data = (const __be32 *)rdev->pfp_fw->data;
3949		WREG32(CP_PFP_UCODE_ADDR, 0);
3950		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3951			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3952		WREG32(CP_PFP_UCODE_ADDR, 0);
3953
3954		/* CE */
3955		fw_data = (const __be32 *)rdev->ce_fw->data;
3956		WREG32(CP_CE_UCODE_ADDR, 0);
3957		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3958			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3959		WREG32(CP_CE_UCODE_ADDR, 0);
3960
3961		/* ME */
3962		fw_data = (const __be32 *)rdev->me_fw->data;
3963		WREG32(CP_ME_RAM_WADDR, 0);
3964		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3965			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3966		WREG32(CP_ME_RAM_WADDR, 0);
3967	}
3968
3969	return 0;
3970}
3971
3972/**
3973 * cik_cp_gfx_start - start the gfx ring
3974 *
3975 * @rdev: radeon_device pointer
3976 *
3977 * Enables the ring and loads the clear state context and other
3978 * packets required to init the ring.
3979 * Returns 0 for success, error for failure.
3980 */
3981static int cik_cp_gfx_start(struct radeon_device *rdev)
3982{
3983	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3984	int r, i;
3985
3986	/* init the CP */
3987	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3988	WREG32(CP_ENDIAN_SWAP, 0);
3989	WREG32(CP_DEVICE_ID, 1);
3990
3991	cik_cp_gfx_enable(rdev, true);
3992
3993	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3994	if (r) {
3995		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3996		return r;
3997	}
3998
3999	/* init the CE partitions.  CE only used for gfx on CIK */
4000	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4001	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4002	radeon_ring_write(ring, 0x8000);
4003	radeon_ring_write(ring, 0x8000);
4004
4005	/* setup clear context state */
4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4008
4009	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4010	radeon_ring_write(ring, 0x80000000);
4011	radeon_ring_write(ring, 0x80000000);
4012
4013	for (i = 0; i < cik_default_size; i++)
4014		radeon_ring_write(ring, cik_default_state[i]);
4015
4016	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4017	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4018
4019	/* set clear context state */
4020	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4021	radeon_ring_write(ring, 0);
4022
4023	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4024	radeon_ring_write(ring, 0x00000316);
4025	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4026	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4027
4028	radeon_ring_unlock_commit(rdev, ring, false);
4029
4030	return 0;
4031}
4032
4033/**
4034 * cik_cp_gfx_fini - stop the gfx ring
4035 *
4036 * @rdev: radeon_device pointer
4037 *
4038 * Stop the gfx ring and tear down the driver ring
4039 * info.
4040 */
4041static void cik_cp_gfx_fini(struct radeon_device *rdev)
4042{
4043	cik_cp_gfx_enable(rdev, false);
4044	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4045}
4046
4047/**
4048 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4049 *
4050 * @rdev: radeon_device pointer
4051 *
4052 * Program the location and size of the gfx ring buffer
4053 * and test it to make sure it's working.
4054 * Returns 0 for success, error for failure.
4055 */
4056static int cik_cp_gfx_resume(struct radeon_device *rdev)
4057{
4058	struct radeon_ring *ring;
4059	u32 tmp;
4060	u32 rb_bufsz;
4061	u64 rb_addr;
4062	int r;
4063
4064	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4065	if (rdev->family != CHIP_HAWAII)
4066		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4067
4068	/* Set the write pointer delay */
4069	WREG32(CP_RB_WPTR_DELAY, 0);
4070
4071	/* set the RB to use vmid 0 */
4072	WREG32(CP_RB_VMID, 0);
4073
4074	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4075
4076	/* ring 0 - compute and gfx */
4077	/* Set ring buffer size */
4078	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4079	rb_bufsz = order_base_2(ring->ring_size / 8);
4080	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4081#ifdef __BIG_ENDIAN
4082	tmp |= BUF_SWAP_32BIT;
4083#endif
4084	WREG32(CP_RB0_CNTL, tmp);
4085
4086	/* Initialize the ring buffer's read and write pointers */
4087	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4088	ring->wptr = 0;
4089	WREG32(CP_RB0_WPTR, ring->wptr);
4090
4091	/* set the wb address wether it's enabled or not */
4092	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4093	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4094
4095	/* scratch register shadowing is no longer supported */
4096	WREG32(SCRATCH_UMSK, 0);
4097
4098	if (!rdev->wb.enabled)
4099		tmp |= RB_NO_UPDATE;
4100
4101	mdelay(1);
4102	WREG32(CP_RB0_CNTL, tmp);
4103
4104	rb_addr = ring->gpu_addr >> 8;
4105	WREG32(CP_RB0_BASE, rb_addr);
4106	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4107
4108	/* start the ring */
4109	cik_cp_gfx_start(rdev);
4110	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4111	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4112	if (r) {
4113		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4114		return r;
4115	}
4116
4117	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4118		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4119
4120	return 0;
4121}
4122
4123u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4124		     struct radeon_ring *ring)
4125{
4126	u32 rptr;
4127
4128	if (rdev->wb.enabled)
4129		rptr = rdev->wb.wb[ring->rptr_offs/4];
4130	else
4131		rptr = RREG32(CP_RB0_RPTR);
4132
4133	return rptr;
4134}
4135
4136u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4137		     struct radeon_ring *ring)
4138{
4139	return RREG32(CP_RB0_WPTR);
4140}
4141
4142void cik_gfx_set_wptr(struct radeon_device *rdev,
4143		      struct radeon_ring *ring)
4144{
4145	WREG32(CP_RB0_WPTR, ring->wptr);
4146	(void)RREG32(CP_RB0_WPTR);
4147}
4148
4149u32 cik_compute_get_rptr(struct radeon_device *rdev,
4150			 struct radeon_ring *ring)
4151{
4152	u32 rptr;
4153
4154	if (rdev->wb.enabled) {
4155		rptr = rdev->wb.wb[ring->rptr_offs/4];
4156	} else {
4157		mutex_lock(&rdev->srbm_mutex);
4158		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4159		rptr = RREG32(CP_HQD_PQ_RPTR);
4160		cik_srbm_select(rdev, 0, 0, 0, 0);
4161		mutex_unlock(&rdev->srbm_mutex);
4162	}
4163
4164	return rptr;
4165}
4166
4167u32 cik_compute_get_wptr(struct radeon_device *rdev,
4168			 struct radeon_ring *ring)
4169{
4170	u32 wptr;
4171
4172	if (rdev->wb.enabled) {
4173		/* XXX check if swapping is necessary on BE */
4174		wptr = rdev->wb.wb[ring->wptr_offs/4];
4175	} else {
4176		mutex_lock(&rdev->srbm_mutex);
4177		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4178		wptr = RREG32(CP_HQD_PQ_WPTR);
4179		cik_srbm_select(rdev, 0, 0, 0, 0);
4180		mutex_unlock(&rdev->srbm_mutex);
4181	}
4182
4183	return wptr;
4184}
4185
4186void cik_compute_set_wptr(struct radeon_device *rdev,
4187			  struct radeon_ring *ring)
4188{
4189	/* XXX check if swapping is necessary on BE */
4190	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4191	WDOORBELL32(ring->doorbell_index, ring->wptr);
4192}
4193
4194static void cik_compute_stop(struct radeon_device *rdev,
4195			     struct radeon_ring *ring)
4196{
4197	u32 j, tmp;
4198
4199	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4200	/* Disable wptr polling. */
4201	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4202	tmp &= ~WPTR_POLL_EN;
4203	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4204	/* Disable HQD. */
4205	if (RREG32(CP_HQD_ACTIVE) & 1) {
4206		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4207		for (j = 0; j < rdev->usec_timeout; j++) {
4208			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4209				break;
4210			udelay(1);
4211		}
4212		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4213		WREG32(CP_HQD_PQ_RPTR, 0);
4214		WREG32(CP_HQD_PQ_WPTR, 0);
4215	}
4216	cik_srbm_select(rdev, 0, 0, 0, 0);
4217}
4218
4219/**
4220 * cik_cp_compute_enable - enable/disable the compute CP MEs
4221 *
4222 * @rdev: radeon_device pointer
4223 * @enable: enable or disable the MEs
4224 *
4225 * Halts or unhalts the compute MEs.
4226 */
4227static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4228{
4229	if (enable)
4230		WREG32(CP_MEC_CNTL, 0);
4231	else {
4232		/*
4233		 * To make hibernation reliable we need to clear compute ring
4234		 * configuration before halting the compute ring.
4235		 */
4236		mutex_lock(&rdev->srbm_mutex);
4237		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4238		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4239		mutex_unlock(&rdev->srbm_mutex);
4240
4241		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4242		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4243		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4244	}
4245	udelay(50);
4246}
4247
4248/**
4249 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4250 *
4251 * @rdev: radeon_device pointer
4252 *
4253 * Loads the compute MEC1&2 ucode.
4254 * Returns 0 for success, -EINVAL if the ucode is not available.
4255 */
4256static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4257{
4258	int i;
4259
4260	if (!rdev->mec_fw)
4261		return -EINVAL;
4262
4263	cik_cp_compute_enable(rdev, false);
4264
4265	if (rdev->new_fw) {
4266		const struct gfx_firmware_header_v1_0 *mec_hdr =
4267			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4268		const __le32 *fw_data;
4269		u32 fw_size;
4270
4271		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4272
4273		/* MEC1 */
4274		fw_data = (const __le32 *)
4275			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4276		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4277		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4278		for (i = 0; i < fw_size; i++)
4279			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4280		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4281
4282		/* MEC2 */
4283		if (rdev->family == CHIP_KAVERI) {
4284			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4285				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4286
4287			fw_data = (const __le32 *)
4288				(rdev->mec2_fw->data +
4289				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4290			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4291			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4292			for (i = 0; i < fw_size; i++)
4293				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4294			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4295		}
4296	} else {
4297		const __be32 *fw_data;
4298
4299		/* MEC1 */
4300		fw_data = (const __be32 *)rdev->mec_fw->data;
4301		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4302		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4303			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4304		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4305
4306		if (rdev->family == CHIP_KAVERI) {
4307			/* MEC2 */
4308			fw_data = (const __be32 *)rdev->mec_fw->data;
4309			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4310			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4311				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4312			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4313		}
4314	}
4315
4316	return 0;
4317}
4318
4319/**
4320 * cik_cp_compute_start - start the compute queues
4321 *
4322 * @rdev: radeon_device pointer
4323 *
4324 * Enable the compute queues.
4325 * Returns 0 for success, error for failure.
4326 */
4327static int cik_cp_compute_start(struct radeon_device *rdev)
4328{
4329	cik_cp_compute_enable(rdev, true);
4330
4331	return 0;
4332}
4333
4334/**
4335 * cik_cp_compute_fini - stop the compute queues
4336 *
4337 * @rdev: radeon_device pointer
4338 *
4339 * Stop the compute queues and tear down the driver queue
4340 * info.
4341 */
4342static void cik_cp_compute_fini(struct radeon_device *rdev)
4343{
4344	int i, idx, r;
4345
4346	cik_cp_compute_enable(rdev, false);
4347
4348	for (i = 0; i < 2; i++) {
4349		if (i == 0)
4350			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4351		else
4352			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4353
4354		if (rdev->ring[idx].mqd_obj) {
4355			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4356			if (unlikely(r != 0))
4357				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4358
4359			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4360			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4361
4362			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4363			rdev->ring[idx].mqd_obj = NULL;
4364		}
4365	}
4366}
4367
4368static void cik_mec_fini(struct radeon_device *rdev)
4369{
4370	int r;
4371
4372	if (rdev->mec.hpd_eop_obj) {
4373		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4374		if (unlikely(r != 0))
4375			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4376		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4377		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4378
4379		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4380		rdev->mec.hpd_eop_obj = NULL;
4381	}
4382}
4383
4384#define MEC_HPD_SIZE 2048
4385
4386static int cik_mec_init(struct radeon_device *rdev)
4387{
4388	int r;
4389	u32 *hpd;
4390
4391	/*
4392	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4393	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4394	 */
4395	if (rdev->family == CHIP_KAVERI)
4396		rdev->mec.num_mec = 2;
4397	else
4398		rdev->mec.num_mec = 1;
4399	rdev->mec.num_pipe = 4;
4400	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4401
4402	if (rdev->mec.hpd_eop_obj == NULL) {
4403		r = radeon_bo_create(rdev,
4404				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4405				     PAGE_SIZE, true,
4406				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4407				     &rdev->mec.hpd_eop_obj);
4408		if (r) {
4409			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4410			return r;
4411		}
4412	}
4413
4414	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4415	if (unlikely(r != 0)) {
4416		cik_mec_fini(rdev);
4417		return r;
4418	}
4419	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4420			  &rdev->mec.hpd_eop_gpu_addr);
4421	if (r) {
4422		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4423		cik_mec_fini(rdev);
4424		return r;
4425	}
4426	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4427	if (r) {
4428		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4429		cik_mec_fini(rdev);
4430		return r;
4431	}
4432
4433	/* clear memory.  Not sure if this is required or not */
4434	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4435
4436	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4437	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4438
4439	return 0;
4440}
4441
4442struct hqd_registers
4443{
4444	u32 cp_mqd_base_addr;
4445	u32 cp_mqd_base_addr_hi;
4446	u32 cp_hqd_active;
4447	u32 cp_hqd_vmid;
4448	u32 cp_hqd_persistent_state;
4449	u32 cp_hqd_pipe_priority;
4450	u32 cp_hqd_queue_priority;
4451	u32 cp_hqd_quantum;
4452	u32 cp_hqd_pq_base;
4453	u32 cp_hqd_pq_base_hi;
4454	u32 cp_hqd_pq_rptr;
4455	u32 cp_hqd_pq_rptr_report_addr;
4456	u32 cp_hqd_pq_rptr_report_addr_hi;
4457	u32 cp_hqd_pq_wptr_poll_addr;
4458	u32 cp_hqd_pq_wptr_poll_addr_hi;
4459	u32 cp_hqd_pq_doorbell_control;
4460	u32 cp_hqd_pq_wptr;
4461	u32 cp_hqd_pq_control;
4462	u32 cp_hqd_ib_base_addr;
4463	u32 cp_hqd_ib_base_addr_hi;
4464	u32 cp_hqd_ib_rptr;
4465	u32 cp_hqd_ib_control;
4466	u32 cp_hqd_iq_timer;
4467	u32 cp_hqd_iq_rptr;
4468	u32 cp_hqd_dequeue_request;
4469	u32 cp_hqd_dma_offload;
4470	u32 cp_hqd_sema_cmd;
4471	u32 cp_hqd_msg_type;
4472	u32 cp_hqd_atomic0_preop_lo;
4473	u32 cp_hqd_atomic0_preop_hi;
4474	u32 cp_hqd_atomic1_preop_lo;
4475	u32 cp_hqd_atomic1_preop_hi;
4476	u32 cp_hqd_hq_scheduler0;
4477	u32 cp_hqd_hq_scheduler1;
4478	u32 cp_mqd_control;
4479};
4480
4481struct bonaire_mqd
4482{
4483	u32 header;
4484	u32 dispatch_initiator;
4485	u32 dimensions[3];
4486	u32 start_idx[3];
4487	u32 num_threads[3];
4488	u32 pipeline_stat_enable;
4489	u32 perf_counter_enable;
4490	u32 pgm[2];
4491	u32 tba[2];
4492	u32 tma[2];
4493	u32 pgm_rsrc[2];
4494	u32 vmid;
4495	u32 resource_limits;
4496	u32 static_thread_mgmt01[2];
4497	u32 tmp_ring_size;
4498	u32 static_thread_mgmt23[2];
4499	u32 restart[3];
4500	u32 thread_trace_enable;
4501	u32 reserved1;
4502	u32 user_data[16];
4503	u32 vgtcs_invoke_count[2];
4504	struct hqd_registers queue_state;
4505	u32 dequeue_cntr;
4506	u32 interrupt_queue[64];
4507};
4508
4509/**
4510 * cik_cp_compute_resume - setup the compute queue registers
4511 *
4512 * @rdev: radeon_device pointer
4513 *
4514 * Program the compute queues and test them to make sure they
4515 * are working.
4516 * Returns 0 for success, error for failure.
4517 */
4518static int cik_cp_compute_resume(struct radeon_device *rdev)
4519{
4520	int r, i, j, idx;
4521	u32 tmp;
4522	bool use_doorbell = true;
4523	u64 hqd_gpu_addr;
4524	u64 mqd_gpu_addr;
4525	u64 eop_gpu_addr;
4526	u64 wb_gpu_addr;
4527	u32 *buf;
4528	struct bonaire_mqd *mqd;
4529
4530	r = cik_cp_compute_start(rdev);
4531	if (r)
4532		return r;
4533
4534	/* fix up chicken bits */
4535	tmp = RREG32(CP_CPF_DEBUG);
4536	tmp |= (1 << 23);
4537	WREG32(CP_CPF_DEBUG, tmp);
4538
4539	/* init the pipes */
4540	mutex_lock(&rdev->srbm_mutex);
4541
4542	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4543		int me = (i < 4) ? 1 : 2;
4544		int pipe = (i < 4) ? i : (i - 4);
4545
4546		cik_srbm_select(rdev, me, pipe, 0, 0);
4547
4548		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4549		/* write the EOP addr */
4550		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4551		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4552
4553		/* set the VMID assigned */
4554		WREG32(CP_HPD_EOP_VMID, 0);
4555
4556		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4557		tmp = RREG32(CP_HPD_EOP_CONTROL);
4558		tmp &= ~EOP_SIZE_MASK;
4559		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4560		WREG32(CP_HPD_EOP_CONTROL, tmp);
4561
4562	}
4563	cik_srbm_select(rdev, 0, 0, 0, 0);
4564	mutex_unlock(&rdev->srbm_mutex);
4565
4566	/* init the queues.  Just two for now. */
4567	for (i = 0; i < 2; i++) {
4568		if (i == 0)
4569			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4570		else
4571			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4572
4573		if (rdev->ring[idx].mqd_obj == NULL) {
4574			r = radeon_bo_create(rdev,
4575					     sizeof(struct bonaire_mqd),
4576					     PAGE_SIZE, true,
4577					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4578					     NULL, &rdev->ring[idx].mqd_obj);
4579			if (r) {
4580				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4581				return r;
4582			}
4583		}
4584
4585		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4586		if (unlikely(r != 0)) {
4587			cik_cp_compute_fini(rdev);
4588			return r;
4589		}
4590		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4591				  &mqd_gpu_addr);
4592		if (r) {
4593			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4594			cik_cp_compute_fini(rdev);
4595			return r;
4596		}
4597		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4598		if (r) {
4599			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4600			cik_cp_compute_fini(rdev);
4601			return r;
4602		}
4603
4604		/* init the mqd struct */
4605		memset(buf, 0, sizeof(struct bonaire_mqd));
4606
4607		mqd = (struct bonaire_mqd *)buf;
4608		mqd->header = 0xC0310800;
4609		mqd->static_thread_mgmt01[0] = 0xffffffff;
4610		mqd->static_thread_mgmt01[1] = 0xffffffff;
4611		mqd->static_thread_mgmt23[0] = 0xffffffff;
4612		mqd->static_thread_mgmt23[1] = 0xffffffff;
4613
4614		mutex_lock(&rdev->srbm_mutex);
4615		cik_srbm_select(rdev, rdev->ring[idx].me,
4616				rdev->ring[idx].pipe,
4617				rdev->ring[idx].queue, 0);
4618
4619		/* disable wptr polling */
4620		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4621		tmp &= ~WPTR_POLL_EN;
4622		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4623
4624		/* enable doorbell? */
4625		mqd->queue_state.cp_hqd_pq_doorbell_control =
4626			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4627		if (use_doorbell)
4628			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4629		else
4630			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4631		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4632		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4633
4634		/* disable the queue if it's active */
4635		mqd->queue_state.cp_hqd_dequeue_request = 0;
4636		mqd->queue_state.cp_hqd_pq_rptr = 0;
4637		mqd->queue_state.cp_hqd_pq_wptr= 0;
4638		if (RREG32(CP_HQD_ACTIVE) & 1) {
4639			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4640			for (j = 0; j < rdev->usec_timeout; j++) {
4641				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4642					break;
4643				udelay(1);
4644			}
4645			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4646			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4647			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4648		}
4649
4650		/* set the pointer to the MQD */
4651		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4652		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4653		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4654		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4655		/* set MQD vmid to 0 */
4656		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4657		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4658		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4659
4660		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4661		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4662		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4663		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4664		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4665		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4666
4667		/* set up the HQD, this is similar to CP_RB0_CNTL */
4668		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4669		mqd->queue_state.cp_hqd_pq_control &=
4670			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4671
4672		mqd->queue_state.cp_hqd_pq_control |=
4673			order_base_2(rdev->ring[idx].ring_size / 8);
4674		mqd->queue_state.cp_hqd_pq_control |=
4675			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4676#ifdef __BIG_ENDIAN
4677		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4678#endif
4679		mqd->queue_state.cp_hqd_pq_control &=
4680			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4681		mqd->queue_state.cp_hqd_pq_control |=
4682			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4683		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4684
4685		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4686		if (i == 0)
4687			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4688		else
4689			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4690		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4691		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4692		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4693		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4694		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4695
4696		/* set the wb address wether it's enabled or not */
4697		if (i == 0)
4698			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4699		else
4700			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4701		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4702		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4703			upper_32_bits(wb_gpu_addr) & 0xffff;
4704		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4705		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4706		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4707		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4708
4709		/* enable the doorbell if requested */
4710		if (use_doorbell) {
4711			mqd->queue_state.cp_hqd_pq_doorbell_control =
4712				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4713			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4714			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4715				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4716			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4717			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4718				~(DOORBELL_SOURCE | DOORBELL_HIT);
4719
4720		} else {
4721			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4722		}
4723		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4724		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4725
4726		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4727		rdev->ring[idx].wptr = 0;
4728		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4729		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4730		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4731
4732		/* set the vmid for the queue */
4733		mqd->queue_state.cp_hqd_vmid = 0;
4734		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4735
4736		/* activate the queue */
4737		mqd->queue_state.cp_hqd_active = 1;
4738		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4739
4740		cik_srbm_select(rdev, 0, 0, 0, 0);
4741		mutex_unlock(&rdev->srbm_mutex);
4742
4743		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4744		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4745
4746		rdev->ring[idx].ready = true;
4747		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4748		if (r)
4749			rdev->ring[idx].ready = false;
4750	}
4751
4752	return 0;
4753}
4754
4755static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4756{
4757	cik_cp_gfx_enable(rdev, enable);
4758	cik_cp_compute_enable(rdev, enable);
4759}
4760
4761static int cik_cp_load_microcode(struct radeon_device *rdev)
4762{
4763	int r;
4764
4765	r = cik_cp_gfx_load_microcode(rdev);
4766	if (r)
4767		return r;
4768	r = cik_cp_compute_load_microcode(rdev);
4769	if (r)
4770		return r;
4771
4772	return 0;
4773}
4774
4775static void cik_cp_fini(struct radeon_device *rdev)
4776{
4777	cik_cp_gfx_fini(rdev);
4778	cik_cp_compute_fini(rdev);
4779}
4780
4781static int cik_cp_resume(struct radeon_device *rdev)
4782{
4783	int r;
4784
4785	cik_enable_gui_idle_interrupt(rdev, false);
4786
4787	r = cik_cp_load_microcode(rdev);
4788	if (r)
4789		return r;
4790
4791	r = cik_cp_gfx_resume(rdev);
4792	if (r)
4793		return r;
4794	r = cik_cp_compute_resume(rdev);
4795	if (r)
4796		return r;
4797
4798	cik_enable_gui_idle_interrupt(rdev, true);
4799
4800	return 0;
4801}
4802
4803static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4804{
4805	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4806		RREG32(GRBM_STATUS));
4807	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4808		RREG32(GRBM_STATUS2));
4809	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4810		RREG32(GRBM_STATUS_SE0));
4811	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4812		RREG32(GRBM_STATUS_SE1));
4813	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4814		RREG32(GRBM_STATUS_SE2));
4815	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4816		RREG32(GRBM_STATUS_SE3));
4817	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4818		RREG32(SRBM_STATUS));
4819	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4820		RREG32(SRBM_STATUS2));
4821	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4822		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4823	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4824		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4825	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4826	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4827		 RREG32(CP_STALLED_STAT1));
4828	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4829		 RREG32(CP_STALLED_STAT2));
4830	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4831		 RREG32(CP_STALLED_STAT3));
4832	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4833		 RREG32(CP_CPF_BUSY_STAT));
4834	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4835		 RREG32(CP_CPF_STALLED_STAT1));
4836	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4837	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4838	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4839		 RREG32(CP_CPC_STALLED_STAT1));
4840	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4841}
4842
4843/**
4844 * cik_gpu_check_soft_reset - check which blocks are busy
4845 *
4846 * @rdev: radeon_device pointer
4847 *
4848 * Check which blocks are busy and return the relevant reset
4849 * mask to be used by cik_gpu_soft_reset().
4850 * Returns a mask of the blocks to be reset.
4851 */
4852u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4853{
4854	u32 reset_mask = 0;
4855	u32 tmp;
4856
4857	/* GRBM_STATUS */
4858	tmp = RREG32(GRBM_STATUS);
4859	if (tmp & (PA_BUSY | SC_BUSY |
4860		   BCI_BUSY | SX_BUSY |
4861		   TA_BUSY | VGT_BUSY |
4862		   DB_BUSY | CB_BUSY |
4863		   GDS_BUSY | SPI_BUSY |
4864		   IA_BUSY | IA_BUSY_NO_DMA))
4865		reset_mask |= RADEON_RESET_GFX;
4866
4867	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4868		reset_mask |= RADEON_RESET_CP;
4869
4870	/* GRBM_STATUS2 */
4871	tmp = RREG32(GRBM_STATUS2);
4872	if (tmp & RLC_BUSY)
4873		reset_mask |= RADEON_RESET_RLC;
4874
4875	/* SDMA0_STATUS_REG */
4876	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4877	if (!(tmp & SDMA_IDLE))
4878		reset_mask |= RADEON_RESET_DMA;
4879
4880	/* SDMA1_STATUS_REG */
4881	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4882	if (!(tmp & SDMA_IDLE))
4883		reset_mask |= RADEON_RESET_DMA1;
4884
4885	/* SRBM_STATUS2 */
4886	tmp = RREG32(SRBM_STATUS2);
4887	if (tmp & SDMA_BUSY)
4888		reset_mask |= RADEON_RESET_DMA;
4889
4890	if (tmp & SDMA1_BUSY)
4891		reset_mask |= RADEON_RESET_DMA1;
4892
4893	/* SRBM_STATUS */
4894	tmp = RREG32(SRBM_STATUS);
4895
4896	if (tmp & IH_BUSY)
4897		reset_mask |= RADEON_RESET_IH;
4898
4899	if (tmp & SEM_BUSY)
4900		reset_mask |= RADEON_RESET_SEM;
4901
4902	if (tmp & GRBM_RQ_PENDING)
4903		reset_mask |= RADEON_RESET_GRBM;
4904
4905	if (tmp & VMC_BUSY)
4906		reset_mask |= RADEON_RESET_VMC;
4907
4908	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4909		   MCC_BUSY | MCD_BUSY))
4910		reset_mask |= RADEON_RESET_MC;
4911
4912	if (evergreen_is_display_hung(rdev))
4913		reset_mask |= RADEON_RESET_DISPLAY;
4914
4915	/* Skip MC reset as it's mostly likely not hung, just busy */
4916	if (reset_mask & RADEON_RESET_MC) {
4917		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4918		reset_mask &= ~RADEON_RESET_MC;
4919	}
4920
4921	return reset_mask;
4922}
4923
4924/**
4925 * cik_gpu_soft_reset - soft reset GPU
4926 *
4927 * @rdev: radeon_device pointer
4928 * @reset_mask: mask of which blocks to reset
4929 *
4930 * Soft reset the blocks specified in @reset_mask.
4931 */
4932static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4933{
4934	struct evergreen_mc_save save;
4935	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4936	u32 tmp;
4937
4938	if (reset_mask == 0)
4939		return;
4940
4941	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4942
4943	cik_print_gpu_status_regs(rdev);
4944	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4945		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4946	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4947		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4948
4949	/* disable CG/PG */
4950	cik_fini_pg(rdev);
4951	cik_fini_cg(rdev);
4952
4953	/* stop the rlc */
4954	cik_rlc_stop(rdev);
4955
4956	/* Disable GFX parsing/prefetching */
4957	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4958
4959	/* Disable MEC parsing/prefetching */
4960	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4961
4962	if (reset_mask & RADEON_RESET_DMA) {
4963		/* sdma0 */
4964		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4965		tmp |= SDMA_HALT;
4966		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4967	}
4968	if (reset_mask & RADEON_RESET_DMA1) {
4969		/* sdma1 */
4970		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4971		tmp |= SDMA_HALT;
4972		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4973	}
4974
4975	evergreen_mc_stop(rdev, &save);
4976	if (evergreen_mc_wait_for_idle(rdev)) {
4977		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4978	}
4979
4980	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4981		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4982
4983	if (reset_mask & RADEON_RESET_CP) {
4984		grbm_soft_reset |= SOFT_RESET_CP;
4985
4986		srbm_soft_reset |= SOFT_RESET_GRBM;
4987	}
4988
4989	if (reset_mask & RADEON_RESET_DMA)
4990		srbm_soft_reset |= SOFT_RESET_SDMA;
4991
4992	if (reset_mask & RADEON_RESET_DMA1)
4993		srbm_soft_reset |= SOFT_RESET_SDMA1;
4994
4995	if (reset_mask & RADEON_RESET_DISPLAY)
4996		srbm_soft_reset |= SOFT_RESET_DC;
4997
4998	if (reset_mask & RADEON_RESET_RLC)
4999		grbm_soft_reset |= SOFT_RESET_RLC;
5000
5001	if (reset_mask & RADEON_RESET_SEM)
5002		srbm_soft_reset |= SOFT_RESET_SEM;
5003
5004	if (reset_mask & RADEON_RESET_IH)
5005		srbm_soft_reset |= SOFT_RESET_IH;
5006
5007	if (reset_mask & RADEON_RESET_GRBM)
5008		srbm_soft_reset |= SOFT_RESET_GRBM;
5009
5010	if (reset_mask & RADEON_RESET_VMC)
5011		srbm_soft_reset |= SOFT_RESET_VMC;
5012
5013	if (!(rdev->flags & RADEON_IS_IGP)) {
5014		if (reset_mask & RADEON_RESET_MC)
5015			srbm_soft_reset |= SOFT_RESET_MC;
5016	}
5017
5018	if (grbm_soft_reset) {
5019		tmp = RREG32(GRBM_SOFT_RESET);
5020		tmp |= grbm_soft_reset;
5021		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5022		WREG32(GRBM_SOFT_RESET, tmp);
5023		tmp = RREG32(GRBM_SOFT_RESET);
5024
5025		udelay(50);
5026
5027		tmp &= ~grbm_soft_reset;
5028		WREG32(GRBM_SOFT_RESET, tmp);
5029		tmp = RREG32(GRBM_SOFT_RESET);
5030	}
5031
5032	if (srbm_soft_reset) {
5033		tmp = RREG32(SRBM_SOFT_RESET);
5034		tmp |= srbm_soft_reset;
5035		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5036		WREG32(SRBM_SOFT_RESET, tmp);
5037		tmp = RREG32(SRBM_SOFT_RESET);
5038
5039		udelay(50);
5040
5041		tmp &= ~srbm_soft_reset;
5042		WREG32(SRBM_SOFT_RESET, tmp);
5043		tmp = RREG32(SRBM_SOFT_RESET);
5044	}
5045
5046	/* Wait a little for things to settle down */
5047	udelay(50);
5048
5049	evergreen_mc_resume(rdev, &save);
5050	udelay(50);
5051
5052	cik_print_gpu_status_regs(rdev);
5053}
5054
5055struct kv_reset_save_regs {
5056	u32 gmcon_reng_execute;
5057	u32 gmcon_misc;
5058	u32 gmcon_misc3;
5059};
5060
5061static void kv_save_regs_for_reset(struct radeon_device *rdev,
5062				   struct kv_reset_save_regs *save)
5063{
5064	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5065	save->gmcon_misc = RREG32(GMCON_MISC);
5066	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5067
5068	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5069	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5070						STCTRL_STUTTER_EN));
5071}
5072
5073static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5074				      struct kv_reset_save_regs *save)
5075{
5076	int i;
5077
5078	WREG32(GMCON_PGFSM_WRITE, 0);
5079	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5080
5081	for (i = 0; i < 5; i++)
5082		WREG32(GMCON_PGFSM_WRITE, 0);
5083
5084	WREG32(GMCON_PGFSM_WRITE, 0);
5085	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5086
5087	for (i = 0; i < 5; i++)
5088		WREG32(GMCON_PGFSM_WRITE, 0);
5089
5090	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5091	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5092
5093	for (i = 0; i < 5; i++)
5094		WREG32(GMCON_PGFSM_WRITE, 0);
5095
5096	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5097	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5098
5099	for (i = 0; i < 5; i++)
5100		WREG32(GMCON_PGFSM_WRITE, 0);
5101
5102	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5103	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5104
5105	for (i = 0; i < 5; i++)
5106		WREG32(GMCON_PGFSM_WRITE, 0);
5107
5108	WREG32(GMCON_PGFSM_WRITE, 0);
5109	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5110
5111	for (i = 0; i < 5; i++)
5112		WREG32(GMCON_PGFSM_WRITE, 0);
5113
5114	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5115	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5116
5117	for (i = 0; i < 5; i++)
5118		WREG32(GMCON_PGFSM_WRITE, 0);
5119
5120	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5121	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5122
5123	for (i = 0; i < 5; i++)
5124		WREG32(GMCON_PGFSM_WRITE, 0);
5125
5126	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5127	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5128
5129	for (i = 0; i < 5; i++)
5130		WREG32(GMCON_PGFSM_WRITE, 0);
5131
5132	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5133	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5134
5135	for (i = 0; i < 5; i++)
5136		WREG32(GMCON_PGFSM_WRITE, 0);
5137
5138	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5139	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5140
5141	WREG32(GMCON_MISC3, save->gmcon_misc3);
5142	WREG32(GMCON_MISC, save->gmcon_misc);
5143	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5144}
5145
5146static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5147{
5148	struct evergreen_mc_save save;
5149	struct kv_reset_save_regs kv_save = { 0 };
5150	u32 tmp, i;
5151
5152	dev_info(rdev->dev, "GPU pci config reset\n");
5153
5154	/* disable dpm? */
5155
5156	/* disable cg/pg */
5157	cik_fini_pg(rdev);
5158	cik_fini_cg(rdev);
5159
5160	/* Disable GFX parsing/prefetching */
5161	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5162
5163	/* Disable MEC parsing/prefetching */
5164	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5165
5166	/* sdma0 */
5167	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5168	tmp |= SDMA_HALT;
5169	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5170	/* sdma1 */
5171	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5172	tmp |= SDMA_HALT;
5173	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5174	/* XXX other engines? */
5175
5176	/* halt the rlc, disable cp internal ints */
5177	cik_rlc_stop(rdev);
5178
5179	udelay(50);
5180
5181	/* disable mem access */
5182	evergreen_mc_stop(rdev, &save);
5183	if (evergreen_mc_wait_for_idle(rdev)) {
5184		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5185	}
5186
5187	if (rdev->flags & RADEON_IS_IGP)
5188		kv_save_regs_for_reset(rdev, &kv_save);
5189
5190	/* disable BM */
5191	pci_clear_master(rdev->pdev);
5192	/* reset */
5193	radeon_pci_config_reset(rdev);
5194
5195	udelay(100);
5196
5197	/* wait for asic to come out of reset */
5198	for (i = 0; i < rdev->usec_timeout; i++) {
5199		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5200			break;
5201		udelay(1);
5202	}
5203
5204	/* does asic init need to be run first??? */
5205	if (rdev->flags & RADEON_IS_IGP)
5206		kv_restore_regs_for_reset(rdev, &kv_save);
5207}
5208
5209/**
5210 * cik_asic_reset - soft reset GPU
5211 *
5212 * @rdev: radeon_device pointer
5213 * @hard: force hard reset
5214 *
5215 * Look up which blocks are hung and attempt
5216 * to reset them.
5217 * Returns 0 for success.
5218 */
5219int cik_asic_reset(struct radeon_device *rdev, bool hard)
5220{
5221	u32 reset_mask;
5222
5223	if (hard) {
5224		cik_gpu_pci_config_reset(rdev);
5225		return 0;
5226	}
5227
5228	reset_mask = cik_gpu_check_soft_reset(rdev);
5229
5230	if (reset_mask)
5231		r600_set_bios_scratch_engine_hung(rdev, true);
5232
5233	/* try soft reset */
5234	cik_gpu_soft_reset(rdev, reset_mask);
5235
5236	reset_mask = cik_gpu_check_soft_reset(rdev);
5237
5238	/* try pci config reset */
5239	if (reset_mask && radeon_hard_reset)
5240		cik_gpu_pci_config_reset(rdev);
5241
5242	reset_mask = cik_gpu_check_soft_reset(rdev);
5243
5244	if (!reset_mask)
5245		r600_set_bios_scratch_engine_hung(rdev, false);
5246
5247	return 0;
5248}
5249
5250/**
5251 * cik_gfx_is_lockup - check if the 3D engine is locked up
5252 *
5253 * @rdev: radeon_device pointer
5254 * @ring: radeon_ring structure holding ring information
5255 *
5256 * Check if the 3D engine is locked up (CIK).
5257 * Returns true if the engine is locked, false if not.
5258 */
5259bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5260{
5261	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5262
5263	if (!(reset_mask & (RADEON_RESET_GFX |
5264			    RADEON_RESET_COMPUTE |
5265			    RADEON_RESET_CP))) {
5266		radeon_ring_lockup_update(rdev, ring);
5267		return false;
5268	}
5269	return radeon_ring_test_lockup(rdev, ring);
5270}
5271
5272/* MC */
5273/**
5274 * cik_mc_program - program the GPU memory controller
5275 *
5276 * @rdev: radeon_device pointer
5277 *
5278 * Set the location of vram, gart, and AGP in the GPU's
5279 * physical address space (CIK).
5280 */
5281static void cik_mc_program(struct radeon_device *rdev)
5282{
5283	struct evergreen_mc_save save;
5284	u32 tmp;
5285	int i, j;
5286
5287	/* Initialize HDP */
5288	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5289		WREG32((0x2c14 + j), 0x00000000);
5290		WREG32((0x2c18 + j), 0x00000000);
5291		WREG32((0x2c1c + j), 0x00000000);
5292		WREG32((0x2c20 + j), 0x00000000);
5293		WREG32((0x2c24 + j), 0x00000000);
5294	}
5295	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5296
5297	evergreen_mc_stop(rdev, &save);
5298	if (radeon_mc_wait_for_idle(rdev)) {
5299		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5300	}
5301	/* Lockout access through VGA aperture*/
5302	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5303	/* Update configuration */
5304	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5305	       rdev->mc.vram_start >> 12);
5306	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5307	       rdev->mc.vram_end >> 12);
5308	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5309	       rdev->vram_scratch.gpu_addr >> 12);
5310	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5311	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5312	WREG32(MC_VM_FB_LOCATION, tmp);
5313	/* XXX double check these! */
5314	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5315	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5316	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5317	WREG32(MC_VM_AGP_BASE, 0);
5318	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5319	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5320	if (radeon_mc_wait_for_idle(rdev)) {
5321		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5322	}
5323	evergreen_mc_resume(rdev, &save);
5324	/* we need to own VRAM, so turn off the VGA renderer here
5325	 * to stop it overwriting our objects */
5326	rv515_vga_render_disable(rdev);
5327}
5328
5329/**
5330 * cik_mc_init - initialize the memory controller driver params
5331 *
5332 * @rdev: radeon_device pointer
5333 *
5334 * Look up the amount of vram, vram width, and decide how to place
5335 * vram and gart within the GPU's physical address space (CIK).
5336 * Returns 0 for success.
5337 */
5338static int cik_mc_init(struct radeon_device *rdev)
5339{
5340	u32 tmp;
5341	int chansize, numchan;
5342
5343	/* Get VRAM informations */
5344	rdev->mc.vram_is_ddr = true;
5345	tmp = RREG32(MC_ARB_RAMCFG);
5346	if (tmp & CHANSIZE_MASK) {
5347		chansize = 64;
5348	} else {
5349		chansize = 32;
5350	}
5351	tmp = RREG32(MC_SHARED_CHMAP);
5352	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5353	case 0:
5354	default:
5355		numchan = 1;
5356		break;
5357	case 1:
5358		numchan = 2;
5359		break;
5360	case 2:
5361		numchan = 4;
5362		break;
5363	case 3:
5364		numchan = 8;
5365		break;
5366	case 4:
5367		numchan = 3;
5368		break;
5369	case 5:
5370		numchan = 6;
5371		break;
5372	case 6:
5373		numchan = 10;
5374		break;
5375	case 7:
5376		numchan = 12;
5377		break;
5378	case 8:
5379		numchan = 16;
5380		break;
5381	}
5382	rdev->mc.vram_width = numchan * chansize;
5383	/* Could aper size report 0 ? */
5384	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5385	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5386	/* size in MB on si */
5387	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5388	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5389	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5390	si_vram_gtt_location(rdev, &rdev->mc);
5391	radeon_update_bandwidth_info(rdev);
5392
5393	return 0;
5394}
5395
5396/*
5397 * GART
5398 * VMID 0 is the physical GPU addresses as used by the kernel.
5399 * VMIDs 1-15 are used for userspace clients and are handled
5400 * by the radeon vm/hsa code.
5401 */
5402/**
5403 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5404 *
5405 * @rdev: radeon_device pointer
5406 *
5407 * Flush the TLB for the VMID 0 page table (CIK).
5408 */
5409void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5410{
5411	/* flush hdp cache */
5412	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5413
5414	/* bits 0-15 are the VM contexts0-15 */
5415	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5416}
5417
5418/**
5419 * cik_pcie_gart_enable - gart enable
5420 *
5421 * @rdev: radeon_device pointer
5422 *
5423 * This sets up the TLBs, programs the page tables for VMID0,
5424 * sets up the hw for VMIDs 1-15 which are allocated on
5425 * demand, and sets up the global locations for the LDS, GDS,
5426 * and GPUVM for FSA64 clients (CIK).
5427 * Returns 0 for success, errors for failure.
5428 */
5429static int cik_pcie_gart_enable(struct radeon_device *rdev)
5430{
5431	int r, i;
5432
5433	if (rdev->gart.robj == NULL) {
5434		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5435		return -EINVAL;
5436	}
5437	r = radeon_gart_table_vram_pin(rdev);
5438	if (r)
5439		return r;
5440	/* Setup TLB control */
5441	WREG32(MC_VM_MX_L1_TLB_CNTL,
5442	       (0xA << 7) |
5443	       ENABLE_L1_TLB |
5444	       ENABLE_L1_FRAGMENT_PROCESSING |
5445	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5446	       ENABLE_ADVANCED_DRIVER_MODEL |
5447	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5448	/* Setup L2 cache */
5449	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5450	       ENABLE_L2_FRAGMENT_PROCESSING |
5451	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5452	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5453	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5454	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5455	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5456	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5457	       BANK_SELECT(4) |
5458	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5459	/* setup context0 */
5460	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5461	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5462	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5463	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5464			(u32)(rdev->dummy_page.addr >> 12));
5465	WREG32(VM_CONTEXT0_CNTL2, 0);
5466	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5467				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5468
5469	WREG32(0x15D4, 0);
5470	WREG32(0x15D8, 0);
5471	WREG32(0x15DC, 0);
5472
5473	/* restore context1-15 */
5474	/* set vm size, must be a multiple of 4 */
5475	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5476	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5477	for (i = 1; i < 16; i++) {
5478		if (i < 8)
5479			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5480			       rdev->vm_manager.saved_table_addr[i]);
5481		else
5482			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5483			       rdev->vm_manager.saved_table_addr[i]);
5484	}
5485
5486	/* enable context1-15 */
5487	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5488	       (u32)(rdev->dummy_page.addr >> 12));
5489	WREG32(VM_CONTEXT1_CNTL2, 4);
5490	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5491				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5492				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5494				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5495				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5496				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5498				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5500				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5502				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5504
5505	if (rdev->family == CHIP_KAVERI) {
5506		u32 tmp = RREG32(CHUB_CONTROL);
5507		tmp &= ~BYPASS_VM;
5508		WREG32(CHUB_CONTROL, tmp);
5509	}
5510
5511	/* XXX SH_MEM regs */
5512	/* where to put LDS, scratch, GPUVM in FSA64 space */
5513	mutex_lock(&rdev->srbm_mutex);
5514	for (i = 0; i < 16; i++) {
5515		cik_srbm_select(rdev, 0, 0, 0, i);
5516		/* CP and shaders */
5517		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5518		WREG32(SH_MEM_APE1_BASE, 1);
5519		WREG32(SH_MEM_APE1_LIMIT, 0);
5520		WREG32(SH_MEM_BASES, 0);
5521		/* SDMA GFX */
5522		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5523		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5524		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5525		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5526		/* XXX SDMA RLC - todo */
5527	}
5528	cik_srbm_select(rdev, 0, 0, 0, 0);
5529	mutex_unlock(&rdev->srbm_mutex);
5530
5531	cik_pcie_gart_tlb_flush(rdev);
5532	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5533		 (unsigned)(rdev->mc.gtt_size >> 20),
5534		 (unsigned long long)rdev->gart.table_addr);
5535	rdev->gart.ready = true;
5536	return 0;
5537}
5538
5539/**
5540 * cik_pcie_gart_disable - gart disable
5541 *
5542 * @rdev: radeon_device pointer
5543 *
5544 * This disables all VM page table (CIK).
5545 */
5546static void cik_pcie_gart_disable(struct radeon_device *rdev)
5547{
5548	unsigned i;
5549
5550	for (i = 1; i < 16; ++i) {
5551		uint32_t reg;
5552		if (i < 8)
5553			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5554		else
5555			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5556		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5557	}
5558
5559	/* Disable all tables */
5560	WREG32(VM_CONTEXT0_CNTL, 0);
5561	WREG32(VM_CONTEXT1_CNTL, 0);
5562	/* Setup TLB control */
5563	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5564	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5565	/* Setup L2 cache */
5566	WREG32(VM_L2_CNTL,
5567	       ENABLE_L2_FRAGMENT_PROCESSING |
5568	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5569	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5570	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5571	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5572	WREG32(VM_L2_CNTL2, 0);
5573	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5574	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5575	radeon_gart_table_vram_unpin(rdev);
5576}
5577
5578/**
5579 * cik_pcie_gart_fini - vm fini callback
5580 *
5581 * @rdev: radeon_device pointer
5582 *
5583 * Tears down the driver GART/VM setup (CIK).
5584 */
5585static void cik_pcie_gart_fini(struct radeon_device *rdev)
5586{
5587	cik_pcie_gart_disable(rdev);
5588	radeon_gart_table_vram_free(rdev);
5589	radeon_gart_fini(rdev);
5590}
5591
5592/* vm parser */
5593/**
5594 * cik_ib_parse - vm ib_parse callback
5595 *
5596 * @rdev: radeon_device pointer
5597 * @ib: indirect buffer pointer
5598 *
5599 * CIK uses hw IB checking so this is a nop (CIK).
5600 */
5601int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5602{
5603	return 0;
5604}
5605
5606/*
5607 * vm
5608 * VMID 0 is the physical GPU addresses as used by the kernel.
5609 * VMIDs 1-15 are used for userspace clients and are handled
5610 * by the radeon vm/hsa code.
5611 */
5612/**
5613 * cik_vm_init - cik vm init callback
5614 *
5615 * @rdev: radeon_device pointer
5616 *
5617 * Inits cik specific vm parameters (number of VMs, base of vram for
5618 * VMIDs 1-15) (CIK).
5619 * Returns 0 for success.
5620 */
5621int cik_vm_init(struct radeon_device *rdev)
5622{
5623	/*
5624	 * number of VMs
5625	 * VMID 0 is reserved for System
5626	 * radeon graphics/compute will use VMIDs 1-15
5627	 */
5628	rdev->vm_manager.nvm = 16;
5629	/* base offset of vram pages */
5630	if (rdev->flags & RADEON_IS_IGP) {
5631		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5632		tmp <<= 22;
5633		rdev->vm_manager.vram_base_offset = tmp;
5634	} else
5635		rdev->vm_manager.vram_base_offset = 0;
5636
5637	return 0;
5638}
5639
5640/**
5641 * cik_vm_fini - cik vm fini callback
5642 *
5643 * @rdev: radeon_device pointer
5644 *
5645 * Tear down any asic specific VM setup (CIK).
5646 */
5647void cik_vm_fini(struct radeon_device *rdev)
5648{
5649}
5650
5651/**
5652 * cik_vm_decode_fault - print human readable fault info
5653 *
5654 * @rdev: radeon_device pointer
5655 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5656 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5657 *
5658 * Print human readable fault information (CIK).
5659 */
5660static void cik_vm_decode_fault(struct radeon_device *rdev,
5661				u32 status, u32 addr, u32 mc_client)
5662{
5663	u32 mc_id;
5664	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5665	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5666	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5667		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5668
5669	if (rdev->family == CHIP_HAWAII)
5670		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5671	else
5672		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5673
5674	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5675	       protections, vmid, addr,
5676	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5677	       block, mc_client, mc_id);
5678}
5679
5680/**
5681 * cik_vm_flush - cik vm flush using the CP
5682 *
5683 * @rdev: radeon_device pointer
5684 *
5685 * Update the page table base and flush the VM TLB
5686 * using the CP (CIK).
5687 */
5688void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5689		  unsigned vm_id, uint64_t pd_addr)
5690{
5691	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5692
5693	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5694	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5695				 WRITE_DATA_DST_SEL(0)));
5696	if (vm_id < 8) {
5697		radeon_ring_write(ring,
5698				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5699	} else {
5700		radeon_ring_write(ring,
5701				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5702	}
5703	radeon_ring_write(ring, 0);
5704	radeon_ring_write(ring, pd_addr >> 12);
5705
5706	/* update SH_MEM_* regs */
5707	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5708	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5709				 WRITE_DATA_DST_SEL(0)));
5710	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5711	radeon_ring_write(ring, 0);
5712	radeon_ring_write(ring, VMID(vm_id));
5713
5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716				 WRITE_DATA_DST_SEL(0)));
5717	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5718	radeon_ring_write(ring, 0);
5719
5720	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5721	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5722	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5723	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5724
5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727				 WRITE_DATA_DST_SEL(0)));
5728	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5729	radeon_ring_write(ring, 0);
5730	radeon_ring_write(ring, VMID(0));
5731
5732	/* HDP flush */
5733	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5734
5735	/* bits 0-15 are the VM contexts0-15 */
5736	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5737	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5738				 WRITE_DATA_DST_SEL(0)));
5739	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5740	radeon_ring_write(ring, 0);
5741	radeon_ring_write(ring, 1 << vm_id);
5742
5743	/* wait for the invalidate to complete */
5744	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5745	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5746				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5747				 WAIT_REG_MEM_ENGINE(0))); /* me */
5748	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5749	radeon_ring_write(ring, 0);
5750	radeon_ring_write(ring, 0); /* ref */
5751	radeon_ring_write(ring, 0); /* mask */
5752	radeon_ring_write(ring, 0x20); /* poll interval */
5753
5754	/* compute doesn't have PFP */
5755	if (usepfp) {
5756		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5757		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5758		radeon_ring_write(ring, 0x0);
5759	}
5760}
5761
5762/*
5763 * RLC
5764 * The RLC is a multi-purpose microengine that handles a
5765 * variety of functions, the most important of which is
5766 * the interrupt controller.
5767 */
5768static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5769					  bool enable)
5770{
5771	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5772
5773	if (enable)
5774		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5775	else
5776		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5777	WREG32(CP_INT_CNTL_RING0, tmp);
5778}
5779
5780static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5781{
5782	u32 tmp;
5783
5784	tmp = RREG32(RLC_LB_CNTL);
5785	if (enable)
5786		tmp |= LOAD_BALANCE_ENABLE;
5787	else
5788		tmp &= ~LOAD_BALANCE_ENABLE;
5789	WREG32(RLC_LB_CNTL, tmp);
5790}
5791
5792static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5793{
5794	u32 i, j, k;
5795	u32 mask;
5796
5797	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5798		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5799			cik_select_se_sh(rdev, i, j);
5800			for (k = 0; k < rdev->usec_timeout; k++) {
5801				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5802					break;
5803				udelay(1);
5804			}
5805		}
5806	}
5807	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5808
5809	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5810	for (k = 0; k < rdev->usec_timeout; k++) {
5811		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5812			break;
5813		udelay(1);
5814	}
5815}
5816
5817static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5818{
5819	u32 tmp;
5820
5821	tmp = RREG32(RLC_CNTL);
5822	if (tmp != rlc)
5823		WREG32(RLC_CNTL, rlc);
5824}
5825
5826static u32 cik_halt_rlc(struct radeon_device *rdev)
5827{
5828	u32 data, orig;
5829
5830	orig = data = RREG32(RLC_CNTL);
5831
5832	if (data & RLC_ENABLE) {
5833		u32 i;
5834
5835		data &= ~RLC_ENABLE;
5836		WREG32(RLC_CNTL, data);
5837
5838		for (i = 0; i < rdev->usec_timeout; i++) {
5839			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5840				break;
5841			udelay(1);
5842		}
5843
5844		cik_wait_for_rlc_serdes(rdev);
5845	}
5846
5847	return orig;
5848}
5849
5850void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5851{
5852	u32 tmp, i, mask;
5853
5854	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5855	WREG32(RLC_GPR_REG2, tmp);
5856
5857	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5858	for (i = 0; i < rdev->usec_timeout; i++) {
5859		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5860			break;
5861		udelay(1);
5862	}
5863
5864	for (i = 0; i < rdev->usec_timeout; i++) {
5865		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5866			break;
5867		udelay(1);
5868	}
5869}
5870
5871void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5872{
5873	u32 tmp;
5874
5875	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5876	WREG32(RLC_GPR_REG2, tmp);
5877}
5878
5879/**
5880 * cik_rlc_stop - stop the RLC ME
5881 *
5882 * @rdev: radeon_device pointer
5883 *
5884 * Halt the RLC ME (MicroEngine) (CIK).
5885 */
5886static void cik_rlc_stop(struct radeon_device *rdev)
5887{
5888	WREG32(RLC_CNTL, 0);
5889
5890	cik_enable_gui_idle_interrupt(rdev, false);
5891
5892	cik_wait_for_rlc_serdes(rdev);
5893}
5894
5895/**
5896 * cik_rlc_start - start the RLC ME
5897 *
5898 * @rdev: radeon_device pointer
5899 *
5900 * Unhalt the RLC ME (MicroEngine) (CIK).
5901 */
5902static void cik_rlc_start(struct radeon_device *rdev)
5903{
5904	WREG32(RLC_CNTL, RLC_ENABLE);
5905
5906	cik_enable_gui_idle_interrupt(rdev, true);
5907
5908	udelay(50);
5909}
5910
5911/**
5912 * cik_rlc_resume - setup the RLC hw
5913 *
5914 * @rdev: radeon_device pointer
5915 *
5916 * Initialize the RLC registers, load the ucode,
5917 * and start the RLC (CIK).
5918 * Returns 0 for success, -EINVAL if the ucode is not available.
5919 */
5920static int cik_rlc_resume(struct radeon_device *rdev)
5921{
5922	u32 i, size, tmp;
5923
5924	if (!rdev->rlc_fw)
5925		return -EINVAL;
5926
5927	cik_rlc_stop(rdev);
5928
5929	/* disable CG */
5930	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5931	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5932
5933	si_rlc_reset(rdev);
5934
5935	cik_init_pg(rdev);
5936
5937	cik_init_cg(rdev);
5938
5939	WREG32(RLC_LB_CNTR_INIT, 0);
5940	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5941
5942	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5943	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5944	WREG32(RLC_LB_PARAMS, 0x00600408);
5945	WREG32(RLC_LB_CNTL, 0x80000004);
5946
5947	WREG32(RLC_MC_CNTL, 0);
5948	WREG32(RLC_UCODE_CNTL, 0);
5949
5950	if (rdev->new_fw) {
5951		const struct rlc_firmware_header_v1_0 *hdr =
5952			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5953		const __le32 *fw_data = (const __le32 *)
5954			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5955
5956		radeon_ucode_print_rlc_hdr(&hdr->header);
5957
5958		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5959		WREG32(RLC_GPM_UCODE_ADDR, 0);
5960		for (i = 0; i < size; i++)
5961			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5962		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5963	} else {
5964		const __be32 *fw_data;
5965
5966		switch (rdev->family) {
5967		case CHIP_BONAIRE:
5968		case CHIP_HAWAII:
5969		default:
5970			size = BONAIRE_RLC_UCODE_SIZE;
5971			break;
5972		case CHIP_KAVERI:
5973			size = KV_RLC_UCODE_SIZE;
5974			break;
5975		case CHIP_KABINI:
5976			size = KB_RLC_UCODE_SIZE;
5977			break;
5978		case CHIP_MULLINS:
5979			size = ML_RLC_UCODE_SIZE;
5980			break;
5981		}
5982
5983		fw_data = (const __be32 *)rdev->rlc_fw->data;
5984		WREG32(RLC_GPM_UCODE_ADDR, 0);
5985		for (i = 0; i < size; i++)
5986			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5987		WREG32(RLC_GPM_UCODE_ADDR, 0);
5988	}
5989
5990	/* XXX - find out what chips support lbpw */
5991	cik_enable_lbpw(rdev, false);
5992
5993	if (rdev->family == CHIP_BONAIRE)
5994		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5995
5996	cik_rlc_start(rdev);
5997
5998	return 0;
5999}
6000
6001static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6002{
6003	u32 data, orig, tmp, tmp2;
6004
6005	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6006
6007	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6008		cik_enable_gui_idle_interrupt(rdev, true);
6009
6010		tmp = cik_halt_rlc(rdev);
6011
6012		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6013		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6014		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6015		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6016		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6017
6018		cik_update_rlc(rdev, tmp);
6019
6020		data |= CGCG_EN | CGLS_EN;
6021	} else {
6022		cik_enable_gui_idle_interrupt(rdev, false);
6023
6024		RREG32(CB_CGTT_SCLK_CTRL);
6025		RREG32(CB_CGTT_SCLK_CTRL);
6026		RREG32(CB_CGTT_SCLK_CTRL);
6027		RREG32(CB_CGTT_SCLK_CTRL);
6028
6029		data &= ~(CGCG_EN | CGLS_EN);
6030	}
6031
6032	if (orig != data)
6033		WREG32(RLC_CGCG_CGLS_CTRL, data);
6034
6035}
6036
6037static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6038{
6039	u32 data, orig, tmp = 0;
6040
6041	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6042		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6043			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6044				orig = data = RREG32(CP_MEM_SLP_CNTL);
6045				data |= CP_MEM_LS_EN;
6046				if (orig != data)
6047					WREG32(CP_MEM_SLP_CNTL, data);
6048			}
6049		}
6050
6051		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6052		data |= 0x00000001;
6053		data &= 0xfffffffd;
6054		if (orig != data)
6055			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6056
6057		tmp = cik_halt_rlc(rdev);
6058
6059		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6060		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6061		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6062		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6063		WREG32(RLC_SERDES_WR_CTRL, data);
6064
6065		cik_update_rlc(rdev, tmp);
6066
6067		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6068			orig = data = RREG32(CGTS_SM_CTRL_REG);
6069			data &= ~SM_MODE_MASK;
6070			data |= SM_MODE(0x2);
6071			data |= SM_MODE_ENABLE;
6072			data &= ~CGTS_OVERRIDE;
6073			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6074			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6075				data &= ~CGTS_LS_OVERRIDE;
6076			data &= ~ON_MONITOR_ADD_MASK;
6077			data |= ON_MONITOR_ADD_EN;
6078			data |= ON_MONITOR_ADD(0x96);
6079			if (orig != data)
6080				WREG32(CGTS_SM_CTRL_REG, data);
6081		}
6082	} else {
6083		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6084		data |= 0x00000003;
6085		if (orig != data)
6086			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087
6088		data = RREG32(RLC_MEM_SLP_CNTL);
6089		if (data & RLC_MEM_LS_EN) {
6090			data &= ~RLC_MEM_LS_EN;
6091			WREG32(RLC_MEM_SLP_CNTL, data);
6092		}
6093
6094		data = RREG32(CP_MEM_SLP_CNTL);
6095		if (data & CP_MEM_LS_EN) {
6096			data &= ~CP_MEM_LS_EN;
6097			WREG32(CP_MEM_SLP_CNTL, data);
6098		}
6099
6100		orig = data = RREG32(CGTS_SM_CTRL_REG);
6101		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6102		if (orig != data)
6103			WREG32(CGTS_SM_CTRL_REG, data);
6104
6105		tmp = cik_halt_rlc(rdev);
6106
6107		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6108		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6109		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6110		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6111		WREG32(RLC_SERDES_WR_CTRL, data);
6112
6113		cik_update_rlc(rdev, tmp);
6114	}
6115}
6116
6117static const u32 mc_cg_registers[] =
6118{
6119	MC_HUB_MISC_HUB_CG,
6120	MC_HUB_MISC_SIP_CG,
6121	MC_HUB_MISC_VM_CG,
6122	MC_XPB_CLK_GAT,
6123	ATC_MISC_CG,
6124	MC_CITF_MISC_WR_CG,
6125	MC_CITF_MISC_RD_CG,
6126	MC_CITF_MISC_VM_CG,
6127	VM_L2_CG,
6128};
6129
6130static void cik_enable_mc_ls(struct radeon_device *rdev,
6131			     bool enable)
6132{
6133	int i;
6134	u32 orig, data;
6135
6136	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6137		orig = data = RREG32(mc_cg_registers[i]);
6138		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6139			data |= MC_LS_ENABLE;
6140		else
6141			data &= ~MC_LS_ENABLE;
6142		if (data != orig)
6143			WREG32(mc_cg_registers[i], data);
6144	}
6145}
6146
6147static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6148			       bool enable)
6149{
6150	int i;
6151	u32 orig, data;
6152
6153	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154		orig = data = RREG32(mc_cg_registers[i]);
6155		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6156			data |= MC_CG_ENABLE;
6157		else
6158			data &= ~MC_CG_ENABLE;
6159		if (data != orig)
6160			WREG32(mc_cg_registers[i], data);
6161	}
6162}
6163
6164static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6165				 bool enable)
6166{
6167	u32 orig, data;
6168
6169	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6170		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6171		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6172	} else {
6173		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6174		data |= 0xff000000;
6175		if (data != orig)
6176			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6177
6178		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6179		data |= 0xff000000;
6180		if (data != orig)
6181			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6182	}
6183}
6184
6185static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6186				 bool enable)
6187{
6188	u32 orig, data;
6189
6190	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6191		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6192		data |= 0x100;
6193		if (orig != data)
6194			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6195
6196		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6197		data |= 0x100;
6198		if (orig != data)
6199			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6200	} else {
6201		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6202		data &= ~0x100;
6203		if (orig != data)
6204			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6205
6206		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6207		data &= ~0x100;
6208		if (orig != data)
6209			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6210	}
6211}
6212
6213static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6214				bool enable)
6215{
6216	u32 orig, data;
6217
6218	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6219		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6220		data = 0xfff;
6221		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6222
6223		orig = data = RREG32(UVD_CGC_CTRL);
6224		data |= DCM;
6225		if (orig != data)
6226			WREG32(UVD_CGC_CTRL, data);
6227	} else {
6228		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6229		data &= ~0xfff;
6230		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6231
6232		orig = data = RREG32(UVD_CGC_CTRL);
6233		data &= ~DCM;
6234		if (orig != data)
6235			WREG32(UVD_CGC_CTRL, data);
6236	}
6237}
6238
6239static void cik_enable_bif_mgls(struct radeon_device *rdev,
6240			       bool enable)
6241{
6242	u32 orig, data;
6243
6244	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6245
6246	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6247		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6248			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6249	else
6250		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6251			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6252
6253	if (orig != data)
6254		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6255}
6256
6257static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6258				bool enable)
6259{
6260	u32 orig, data;
6261
6262	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6263
6264	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6265		data &= ~CLOCK_GATING_DIS;
6266	else
6267		data |= CLOCK_GATING_DIS;
6268
6269	if (orig != data)
6270		WREG32(HDP_HOST_PATH_CNTL, data);
6271}
6272
6273static void cik_enable_hdp_ls(struct radeon_device *rdev,
6274			      bool enable)
6275{
6276	u32 orig, data;
6277
6278	orig = data = RREG32(HDP_MEM_POWER_LS);
6279
6280	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6281		data |= HDP_LS_ENABLE;
6282	else
6283		data &= ~HDP_LS_ENABLE;
6284
6285	if (orig != data)
6286		WREG32(HDP_MEM_POWER_LS, data);
6287}
6288
6289void cik_update_cg(struct radeon_device *rdev,
6290		   u32 block, bool enable)
6291{
6292
6293	if (block & RADEON_CG_BLOCK_GFX) {
6294		cik_enable_gui_idle_interrupt(rdev, false);
6295		/* order matters! */
6296		if (enable) {
6297			cik_enable_mgcg(rdev, true);
6298			cik_enable_cgcg(rdev, true);
6299		} else {
6300			cik_enable_cgcg(rdev, false);
6301			cik_enable_mgcg(rdev, false);
6302		}
6303		cik_enable_gui_idle_interrupt(rdev, true);
6304	}
6305
6306	if (block & RADEON_CG_BLOCK_MC) {
6307		if (!(rdev->flags & RADEON_IS_IGP)) {
6308			cik_enable_mc_mgcg(rdev, enable);
6309			cik_enable_mc_ls(rdev, enable);
6310		}
6311	}
6312
6313	if (block & RADEON_CG_BLOCK_SDMA) {
6314		cik_enable_sdma_mgcg(rdev, enable);
6315		cik_enable_sdma_mgls(rdev, enable);
6316	}
6317
6318	if (block & RADEON_CG_BLOCK_BIF) {
6319		cik_enable_bif_mgls(rdev, enable);
6320	}
6321
6322	if (block & RADEON_CG_BLOCK_UVD) {
6323		if (rdev->has_uvd)
6324			cik_enable_uvd_mgcg(rdev, enable);
6325	}
6326
6327	if (block & RADEON_CG_BLOCK_HDP) {
6328		cik_enable_hdp_mgcg(rdev, enable);
6329		cik_enable_hdp_ls(rdev, enable);
6330	}
6331
6332	if (block & RADEON_CG_BLOCK_VCE) {
6333		vce_v2_0_enable_mgcg(rdev, enable);
6334	}
6335}
6336
6337static void cik_init_cg(struct radeon_device *rdev)
6338{
6339
6340	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6341
6342	if (rdev->has_uvd)
6343		si_init_uvd_internal_cg(rdev);
6344
6345	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6346			     RADEON_CG_BLOCK_SDMA |
6347			     RADEON_CG_BLOCK_BIF |
6348			     RADEON_CG_BLOCK_UVD |
6349			     RADEON_CG_BLOCK_HDP), true);
6350}
6351
6352static void cik_fini_cg(struct radeon_device *rdev)
6353{
6354	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6355			     RADEON_CG_BLOCK_SDMA |
6356			     RADEON_CG_BLOCK_BIF |
6357			     RADEON_CG_BLOCK_UVD |
6358			     RADEON_CG_BLOCK_HDP), false);
6359
6360	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6361}
6362
6363static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6364					  bool enable)
6365{
6366	u32 data, orig;
6367
6368	orig = data = RREG32(RLC_PG_CNTL);
6369	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6370		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6371	else
6372		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6373	if (orig != data)
6374		WREG32(RLC_PG_CNTL, data);
6375}
6376
6377static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6378					  bool enable)
6379{
6380	u32 data, orig;
6381
6382	orig = data = RREG32(RLC_PG_CNTL);
6383	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6384		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6385	else
6386		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6387	if (orig != data)
6388		WREG32(RLC_PG_CNTL, data);
6389}
6390
6391static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6392{
6393	u32 data, orig;
6394
6395	orig = data = RREG32(RLC_PG_CNTL);
6396	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6397		data &= ~DISABLE_CP_PG;
6398	else
6399		data |= DISABLE_CP_PG;
6400	if (orig != data)
6401		WREG32(RLC_PG_CNTL, data);
6402}
6403
6404static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6405{
6406	u32 data, orig;
6407
6408	orig = data = RREG32(RLC_PG_CNTL);
6409	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6410		data &= ~DISABLE_GDS_PG;
6411	else
6412		data |= DISABLE_GDS_PG;
6413	if (orig != data)
6414		WREG32(RLC_PG_CNTL, data);
6415}
6416
6417#define CP_ME_TABLE_SIZE    96
6418#define CP_ME_TABLE_OFFSET  2048
6419#define CP_MEC_TABLE_OFFSET 4096
6420
6421void cik_init_cp_pg_table(struct radeon_device *rdev)
6422{
6423	volatile u32 *dst_ptr;
6424	int me, i, max_me = 4;
6425	u32 bo_offset = 0;
6426	u32 table_offset, table_size;
6427
6428	if (rdev->family == CHIP_KAVERI)
6429		max_me = 5;
6430
6431	if (rdev->rlc.cp_table_ptr == NULL)
6432		return;
6433
6434	/* write the cp table buffer */
6435	dst_ptr = rdev->rlc.cp_table_ptr;
6436	for (me = 0; me < max_me; me++) {
6437		if (rdev->new_fw) {
6438			const __le32 *fw_data;
6439			const struct gfx_firmware_header_v1_0 *hdr;
6440
6441			if (me == 0) {
6442				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6443				fw_data = (const __le32 *)
6444					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6445				table_offset = le32_to_cpu(hdr->jt_offset);
6446				table_size = le32_to_cpu(hdr->jt_size);
6447			} else if (me == 1) {
6448				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6449				fw_data = (const __le32 *)
6450					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6451				table_offset = le32_to_cpu(hdr->jt_offset);
6452				table_size = le32_to_cpu(hdr->jt_size);
6453			} else if (me == 2) {
6454				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6455				fw_data = (const __le32 *)
6456					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6457				table_offset = le32_to_cpu(hdr->jt_offset);
6458				table_size = le32_to_cpu(hdr->jt_size);
6459			} else if (me == 3) {
6460				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6461				fw_data = (const __le32 *)
6462					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6463				table_offset = le32_to_cpu(hdr->jt_offset);
6464				table_size = le32_to_cpu(hdr->jt_size);
6465			} else {
6466				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6467				fw_data = (const __le32 *)
6468					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6469				table_offset = le32_to_cpu(hdr->jt_offset);
6470				table_size = le32_to_cpu(hdr->jt_size);
6471			}
6472
6473			for (i = 0; i < table_size; i ++) {
6474				dst_ptr[bo_offset + i] =
6475					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6476			}
6477			bo_offset += table_size;
6478		} else {
6479			const __be32 *fw_data;
6480			table_size = CP_ME_TABLE_SIZE;
6481
6482			if (me == 0) {
6483				fw_data = (const __be32 *)rdev->ce_fw->data;
6484				table_offset = CP_ME_TABLE_OFFSET;
6485			} else if (me == 1) {
6486				fw_data = (const __be32 *)rdev->pfp_fw->data;
6487				table_offset = CP_ME_TABLE_OFFSET;
6488			} else if (me == 2) {
6489				fw_data = (const __be32 *)rdev->me_fw->data;
6490				table_offset = CP_ME_TABLE_OFFSET;
6491			} else {
6492				fw_data = (const __be32 *)rdev->mec_fw->data;
6493				table_offset = CP_MEC_TABLE_OFFSET;
6494			}
6495
6496			for (i = 0; i < table_size; i ++) {
6497				dst_ptr[bo_offset + i] =
6498					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6499			}
6500			bo_offset += table_size;
6501		}
6502	}
6503}
6504
6505static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6506				bool enable)
6507{
6508	u32 data, orig;
6509
6510	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6511		orig = data = RREG32(RLC_PG_CNTL);
6512		data |= GFX_PG_ENABLE;
6513		if (orig != data)
6514			WREG32(RLC_PG_CNTL, data);
6515
6516		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6517		data |= AUTO_PG_EN;
6518		if (orig != data)
6519			WREG32(RLC_AUTO_PG_CTRL, data);
6520	} else {
6521		orig = data = RREG32(RLC_PG_CNTL);
6522		data &= ~GFX_PG_ENABLE;
6523		if (orig != data)
6524			WREG32(RLC_PG_CNTL, data);
6525
6526		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6527		data &= ~AUTO_PG_EN;
6528		if (orig != data)
6529			WREG32(RLC_AUTO_PG_CTRL, data);
6530
6531		data = RREG32(DB_RENDER_CONTROL);
6532	}
6533}
6534
6535static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6536{
6537	u32 mask = 0, tmp, tmp1;
6538	int i;
6539
6540	cik_select_se_sh(rdev, se, sh);
6541	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6542	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6543	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6544
6545	tmp &= 0xffff0000;
6546
6547	tmp |= tmp1;
6548	tmp >>= 16;
6549
6550	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6551		mask <<= 1;
6552		mask |= 1;
6553	}
6554
6555	return (~tmp) & mask;
6556}
6557
6558static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6559{
6560	u32 i, j, k, active_cu_number = 0;
6561	u32 mask, counter, cu_bitmap;
6562	u32 tmp = 0;
6563
6564	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6565		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6566			mask = 1;
6567			cu_bitmap = 0;
6568			counter = 0;
6569			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6570				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6571					if (counter < 2)
6572						cu_bitmap |= mask;
6573					counter ++;
6574				}
6575				mask <<= 1;
6576			}
6577
6578			active_cu_number += counter;
6579			tmp |= (cu_bitmap << (i * 16 + j * 8));
6580		}
6581	}
6582
6583	WREG32(RLC_PG_AO_CU_MASK, tmp);
6584
6585	tmp = RREG32(RLC_MAX_PG_CU);
6586	tmp &= ~MAX_PU_CU_MASK;
6587	tmp |= MAX_PU_CU(active_cu_number);
6588	WREG32(RLC_MAX_PG_CU, tmp);
6589}
6590
6591static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6592				       bool enable)
6593{
6594	u32 data, orig;
6595
6596	orig = data = RREG32(RLC_PG_CNTL);
6597	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6598		data |= STATIC_PER_CU_PG_ENABLE;
6599	else
6600		data &= ~STATIC_PER_CU_PG_ENABLE;
6601	if (orig != data)
6602		WREG32(RLC_PG_CNTL, data);
6603}
6604
6605static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6606					bool enable)
6607{
6608	u32 data, orig;
6609
6610	orig = data = RREG32(RLC_PG_CNTL);
6611	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6612		data |= DYN_PER_CU_PG_ENABLE;
6613	else
6614		data &= ~DYN_PER_CU_PG_ENABLE;
6615	if (orig != data)
6616		WREG32(RLC_PG_CNTL, data);
6617}
6618
6619#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6620#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6621
6622static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6623{
6624	u32 data, orig;
6625	u32 i;
6626
6627	if (rdev->rlc.cs_data) {
6628		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6629		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6630		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6631		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6632	} else {
6633		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6634		for (i = 0; i < 3; i++)
6635			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6636	}
6637	if (rdev->rlc.reg_list) {
6638		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6639		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6640			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6641	}
6642
6643	orig = data = RREG32(RLC_PG_CNTL);
6644	data |= GFX_PG_SRC;
6645	if (orig != data)
6646		WREG32(RLC_PG_CNTL, data);
6647
6648	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6649	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6650
6651	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6652	data &= ~IDLE_POLL_COUNT_MASK;
6653	data |= IDLE_POLL_COUNT(0x60);
6654	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6655
6656	data = 0x10101010;
6657	WREG32(RLC_PG_DELAY, data);
6658
6659	data = RREG32(RLC_PG_DELAY_2);
6660	data &= ~0xff;
6661	data |= 0x3;
6662	WREG32(RLC_PG_DELAY_2, data);
6663
6664	data = RREG32(RLC_AUTO_PG_CTRL);
6665	data &= ~GRBM_REG_SGIT_MASK;
6666	data |= GRBM_REG_SGIT(0x700);
6667	WREG32(RLC_AUTO_PG_CTRL, data);
6668
6669}
6670
6671static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6672{
6673	cik_enable_gfx_cgpg(rdev, enable);
6674	cik_enable_gfx_static_mgpg(rdev, enable);
6675	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6676}
6677
6678u32 cik_get_csb_size(struct radeon_device *rdev)
6679{
6680	u32 count = 0;
6681	const struct cs_section_def *sect = NULL;
6682	const struct cs_extent_def *ext = NULL;
6683
6684	if (rdev->rlc.cs_data == NULL)
6685		return 0;
6686
6687	/* begin clear state */
6688	count += 2;
6689	/* context control state */
6690	count += 3;
6691
6692	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6693		for (ext = sect->section; ext->extent != NULL; ++ext) {
6694			if (sect->id == SECT_CONTEXT)
6695				count += 2 + ext->reg_count;
6696			else
6697				return 0;
6698		}
6699	}
6700	/* pa_sc_raster_config/pa_sc_raster_config1 */
6701	count += 4;
6702	/* end clear state */
6703	count += 2;
6704	/* clear state */
6705	count += 2;
6706
6707	return count;
6708}
6709
6710void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6711{
6712	u32 count = 0, i;
6713	const struct cs_section_def *sect = NULL;
6714	const struct cs_extent_def *ext = NULL;
6715
6716	if (rdev->rlc.cs_data == NULL)
6717		return;
6718	if (buffer == NULL)
6719		return;
6720
6721	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6722	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6723
6724	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6725	buffer[count++] = cpu_to_le32(0x80000000);
6726	buffer[count++] = cpu_to_le32(0x80000000);
6727
6728	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6729		for (ext = sect->section; ext->extent != NULL; ++ext) {
6730			if (sect->id == SECT_CONTEXT) {
6731				buffer[count++] =
6732					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6733				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6734				for (i = 0; i < ext->reg_count; i++)
6735					buffer[count++] = cpu_to_le32(ext->extent[i]);
6736			} else {
6737				return;
6738			}
6739		}
6740	}
6741
6742	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6743	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6744	switch (rdev->family) {
6745	case CHIP_BONAIRE:
6746		buffer[count++] = cpu_to_le32(0x16000012);
6747		buffer[count++] = cpu_to_le32(0x00000000);
6748		break;
6749	case CHIP_KAVERI:
6750		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6751		buffer[count++] = cpu_to_le32(0x00000000);
6752		break;
6753	case CHIP_KABINI:
6754	case CHIP_MULLINS:
6755		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6756		buffer[count++] = cpu_to_le32(0x00000000);
6757		break;
6758	case CHIP_HAWAII:
6759		buffer[count++] = cpu_to_le32(0x3a00161a);
6760		buffer[count++] = cpu_to_le32(0x0000002e);
6761		break;
6762	default:
6763		buffer[count++] = cpu_to_le32(0x00000000);
6764		buffer[count++] = cpu_to_le32(0x00000000);
6765		break;
6766	}
6767
6768	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6769	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6770
6771	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6772	buffer[count++] = cpu_to_le32(0);
6773}
6774
6775static void cik_init_pg(struct radeon_device *rdev)
6776{
6777	if (rdev->pg_flags) {
6778		cik_enable_sck_slowdown_on_pu(rdev, true);
6779		cik_enable_sck_slowdown_on_pd(rdev, true);
6780		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6781			cik_init_gfx_cgpg(rdev);
6782			cik_enable_cp_pg(rdev, true);
6783			cik_enable_gds_pg(rdev, true);
6784		}
6785		cik_init_ao_cu_mask(rdev);
6786		cik_update_gfx_pg(rdev, true);
6787	}
6788}
6789
6790static void cik_fini_pg(struct radeon_device *rdev)
6791{
6792	if (rdev->pg_flags) {
6793		cik_update_gfx_pg(rdev, false);
6794		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6795			cik_enable_cp_pg(rdev, false);
6796			cik_enable_gds_pg(rdev, false);
6797		}
6798	}
6799}
6800
6801/*
6802 * Interrupts
6803 * Starting with r6xx, interrupts are handled via a ring buffer.
6804 * Ring buffers are areas of GPU accessible memory that the GPU
6805 * writes interrupt vectors into and the host reads vectors out of.
6806 * There is a rptr (read pointer) that determines where the
6807 * host is currently reading, and a wptr (write pointer)
6808 * which determines where the GPU has written.  When the
6809 * pointers are equal, the ring is idle.  When the GPU
6810 * writes vectors to the ring buffer, it increments the
6811 * wptr.  When there is an interrupt, the host then starts
6812 * fetching commands and processing them until the pointers are
6813 * equal again at which point it updates the rptr.
6814 */
6815
6816/**
6817 * cik_enable_interrupts - Enable the interrupt ring buffer
6818 *
6819 * @rdev: radeon_device pointer
6820 *
6821 * Enable the interrupt ring buffer (CIK).
6822 */
6823static void cik_enable_interrupts(struct radeon_device *rdev)
6824{
6825	u32 ih_cntl = RREG32(IH_CNTL);
6826	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6827
6828	ih_cntl |= ENABLE_INTR;
6829	ih_rb_cntl |= IH_RB_ENABLE;
6830	WREG32(IH_CNTL, ih_cntl);
6831	WREG32(IH_RB_CNTL, ih_rb_cntl);
6832	rdev->ih.enabled = true;
6833}
6834
6835/**
6836 * cik_disable_interrupts - Disable the interrupt ring buffer
6837 *
6838 * @rdev: radeon_device pointer
6839 *
6840 * Disable the interrupt ring buffer (CIK).
6841 */
6842static void cik_disable_interrupts(struct radeon_device *rdev)
6843{
6844	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6845	u32 ih_cntl = RREG32(IH_CNTL);
6846
6847	ih_rb_cntl &= ~IH_RB_ENABLE;
6848	ih_cntl &= ~ENABLE_INTR;
6849	WREG32(IH_RB_CNTL, ih_rb_cntl);
6850	WREG32(IH_CNTL, ih_cntl);
6851	/* set rptr, wptr to 0 */
6852	WREG32(IH_RB_RPTR, 0);
6853	WREG32(IH_RB_WPTR, 0);
6854	rdev->ih.enabled = false;
6855	rdev->ih.rptr = 0;
6856}
6857
6858/**
6859 * cik_disable_interrupt_state - Disable all interrupt sources
6860 *
6861 * @rdev: radeon_device pointer
6862 *
6863 * Clear all interrupt enable bits used by the driver (CIK).
6864 */
6865static void cik_disable_interrupt_state(struct radeon_device *rdev)
6866{
6867	u32 tmp;
6868
6869	/* gfx ring */
6870	tmp = RREG32(CP_INT_CNTL_RING0) &
6871		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6872	WREG32(CP_INT_CNTL_RING0, tmp);
6873	/* sdma */
6874	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6875	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6876	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6877	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6878	/* compute queues */
6879	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6880	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6881	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6882	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6883	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6884	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6885	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6886	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6887	/* grbm */
6888	WREG32(GRBM_INT_CNTL, 0);
6889	/* SRBM */
6890	WREG32(SRBM_INT_CNTL, 0);
6891	/* vline/vblank, etc. */
6892	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6893	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6894	if (rdev->num_crtc >= 4) {
6895		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6896		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6897	}
6898	if (rdev->num_crtc >= 6) {
6899		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6900		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6901	}
6902	/* pflip */
6903	if (rdev->num_crtc >= 2) {
6904		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6905		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6906	}
6907	if (rdev->num_crtc >= 4) {
6908		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6909		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6910	}
6911	if (rdev->num_crtc >= 6) {
6912		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6913		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6914	}
6915
6916	/* dac hotplug */
6917	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6918
6919	/* digital hotplug */
6920	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6921	WREG32(DC_HPD1_INT_CONTROL, tmp);
6922	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6923	WREG32(DC_HPD2_INT_CONTROL, tmp);
6924	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925	WREG32(DC_HPD3_INT_CONTROL, tmp);
6926	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927	WREG32(DC_HPD4_INT_CONTROL, tmp);
6928	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929	WREG32(DC_HPD5_INT_CONTROL, tmp);
6930	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931	WREG32(DC_HPD6_INT_CONTROL, tmp);
6932
6933}
6934
6935/**
6936 * cik_irq_init - init and enable the interrupt ring
6937 *
6938 * @rdev: radeon_device pointer
6939 *
6940 * Allocate a ring buffer for the interrupt controller,
6941 * enable the RLC, disable interrupts, enable the IH
6942 * ring buffer and enable it (CIK).
6943 * Called at device load and reume.
6944 * Returns 0 for success, errors for failure.
6945 */
6946static int cik_irq_init(struct radeon_device *rdev)
6947{
6948	int ret = 0;
6949	int rb_bufsz;
6950	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6951
6952	/* allocate ring */
6953	ret = r600_ih_ring_alloc(rdev);
6954	if (ret)
6955		return ret;
6956
6957	/* disable irqs */
6958	cik_disable_interrupts(rdev);
6959
6960	/* init rlc */
6961	ret = cik_rlc_resume(rdev);
6962	if (ret) {
6963		r600_ih_ring_fini(rdev);
6964		return ret;
6965	}
6966
6967	/* setup interrupt control */
6968	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6969	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6970	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6971	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6972	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6973	 */
6974	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6975	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6976	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6977	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6978
6979	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6980	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6981
6982	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6983		      IH_WPTR_OVERFLOW_CLEAR |
6984		      (rb_bufsz << 1));
6985
6986	if (rdev->wb.enabled)
6987		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6988
6989	/* set the writeback address whether it's enabled or not */
6990	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6991	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6992
6993	WREG32(IH_RB_CNTL, ih_rb_cntl);
6994
6995	/* set rptr, wptr to 0 */
6996	WREG32(IH_RB_RPTR, 0);
6997	WREG32(IH_RB_WPTR, 0);
6998
6999	/* Default settings for IH_CNTL (disabled at first) */
7000	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7001	/* RPTR_REARM only works if msi's are enabled */
7002	if (rdev->msi_enabled)
7003		ih_cntl |= RPTR_REARM;
7004	WREG32(IH_CNTL, ih_cntl);
7005
7006	/* force the active interrupt state to all disabled */
7007	cik_disable_interrupt_state(rdev);
7008
7009	pci_set_master(rdev->pdev);
7010
7011	/* enable irqs */
7012	cik_enable_interrupts(rdev);
7013
7014	return ret;
7015}
7016
7017/**
7018 * cik_irq_set - enable/disable interrupt sources
7019 *
7020 * @rdev: radeon_device pointer
7021 *
7022 * Enable interrupt sources on the GPU (vblanks, hpd,
7023 * etc.) (CIK).
7024 * Returns 0 for success, errors for failure.
7025 */
7026int cik_irq_set(struct radeon_device *rdev)
7027{
7028	u32 cp_int_cntl;
7029	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7030	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7031	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7032	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7033	u32 grbm_int_cntl = 0;
7034	u32 dma_cntl, dma_cntl1;
7035
7036	if (!rdev->irq.installed) {
7037		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7038		return -EINVAL;
7039	}
7040	/* don't enable anything if the ih is disabled */
7041	if (!rdev->ih.enabled) {
7042		cik_disable_interrupts(rdev);
7043		/* force the active interrupt state to all disabled */
7044		cik_disable_interrupt_state(rdev);
7045		return 0;
7046	}
7047
7048	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7049		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7050	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7051
7052	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7053	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7054	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7055	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7056	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058
7059	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7060	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7061
7062	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7063	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7064	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7065	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7066	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070
7071	/* enable CP interrupts on all rings */
7072	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7073		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7074		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7075	}
7076	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7077		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7078		DRM_DEBUG("si_irq_set: sw int cp1\n");
7079		if (ring->me == 1) {
7080			switch (ring->pipe) {
7081			case 0:
7082				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7083				break;
7084			case 1:
7085				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7086				break;
7087			case 2:
7088				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7089				break;
7090			case 3:
7091				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7092				break;
7093			default:
7094				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7095				break;
7096			}
7097		} else if (ring->me == 2) {
7098			switch (ring->pipe) {
7099			case 0:
7100				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7101				break;
7102			case 1:
7103				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7104				break;
7105			case 2:
7106				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7107				break;
7108			case 3:
7109				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7110				break;
7111			default:
7112				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7113				break;
7114			}
7115		} else {
7116			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7117		}
7118	}
7119	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7120		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7121		DRM_DEBUG("si_irq_set: sw int cp2\n");
7122		if (ring->me == 1) {
7123			switch (ring->pipe) {
7124			case 0:
7125				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7126				break;
7127			case 1:
7128				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7129				break;
7130			case 2:
7131				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7132				break;
7133			case 3:
7134				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7135				break;
7136			default:
7137				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7138				break;
7139			}
7140		} else if (ring->me == 2) {
7141			switch (ring->pipe) {
7142			case 0:
7143				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7144				break;
7145			case 1:
7146				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7147				break;
7148			case 2:
7149				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7150				break;
7151			case 3:
7152				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7153				break;
7154			default:
7155				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7156				break;
7157			}
7158		} else {
7159			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7160		}
7161	}
7162
7163	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7164		DRM_DEBUG("cik_irq_set: sw int dma\n");
7165		dma_cntl |= TRAP_ENABLE;
7166	}
7167
7168	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7169		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7170		dma_cntl1 |= TRAP_ENABLE;
7171	}
7172
7173	if (rdev->irq.crtc_vblank_int[0] ||
7174	    atomic_read(&rdev->irq.pflip[0])) {
7175		DRM_DEBUG("cik_irq_set: vblank 0\n");
7176		crtc1 |= VBLANK_INTERRUPT_MASK;
7177	}
7178	if (rdev->irq.crtc_vblank_int[1] ||
7179	    atomic_read(&rdev->irq.pflip[1])) {
7180		DRM_DEBUG("cik_irq_set: vblank 1\n");
7181		crtc2 |= VBLANK_INTERRUPT_MASK;
7182	}
7183	if (rdev->irq.crtc_vblank_int[2] ||
7184	    atomic_read(&rdev->irq.pflip[2])) {
7185		DRM_DEBUG("cik_irq_set: vblank 2\n");
7186		crtc3 |= VBLANK_INTERRUPT_MASK;
7187	}
7188	if (rdev->irq.crtc_vblank_int[3] ||
7189	    atomic_read(&rdev->irq.pflip[3])) {
7190		DRM_DEBUG("cik_irq_set: vblank 3\n");
7191		crtc4 |= VBLANK_INTERRUPT_MASK;
7192	}
7193	if (rdev->irq.crtc_vblank_int[4] ||
7194	    atomic_read(&rdev->irq.pflip[4])) {
7195		DRM_DEBUG("cik_irq_set: vblank 4\n");
7196		crtc5 |= VBLANK_INTERRUPT_MASK;
7197	}
7198	if (rdev->irq.crtc_vblank_int[5] ||
7199	    atomic_read(&rdev->irq.pflip[5])) {
7200		DRM_DEBUG("cik_irq_set: vblank 5\n");
7201		crtc6 |= VBLANK_INTERRUPT_MASK;
7202	}
7203	if (rdev->irq.hpd[0]) {
7204		DRM_DEBUG("cik_irq_set: hpd 1\n");
7205		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7206	}
7207	if (rdev->irq.hpd[1]) {
7208		DRM_DEBUG("cik_irq_set: hpd 2\n");
7209		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210	}
7211	if (rdev->irq.hpd[2]) {
7212		DRM_DEBUG("cik_irq_set: hpd 3\n");
7213		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214	}
7215	if (rdev->irq.hpd[3]) {
7216		DRM_DEBUG("cik_irq_set: hpd 4\n");
7217		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218	}
7219	if (rdev->irq.hpd[4]) {
7220		DRM_DEBUG("cik_irq_set: hpd 5\n");
7221		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222	}
7223	if (rdev->irq.hpd[5]) {
7224		DRM_DEBUG("cik_irq_set: hpd 6\n");
7225		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226	}
7227
7228	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7229
7230	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7231	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7232
7233	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7234	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7235	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7236	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7237	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7238	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7239	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7240	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7241
7242	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7243
7244	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7245	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7246	if (rdev->num_crtc >= 4) {
7247		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7248		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7249	}
7250	if (rdev->num_crtc >= 6) {
7251		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7252		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7253	}
7254
7255	if (rdev->num_crtc >= 2) {
7256		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7257		       GRPH_PFLIP_INT_MASK);
7258		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7259		       GRPH_PFLIP_INT_MASK);
7260	}
7261	if (rdev->num_crtc >= 4) {
7262		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7263		       GRPH_PFLIP_INT_MASK);
7264		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7265		       GRPH_PFLIP_INT_MASK);
7266	}
7267	if (rdev->num_crtc >= 6) {
7268		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7269		       GRPH_PFLIP_INT_MASK);
7270		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7271		       GRPH_PFLIP_INT_MASK);
7272	}
7273
7274	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7275	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7276	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7277	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7278	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7279	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7280
7281	/* posting read */
7282	RREG32(SRBM_STATUS);
7283
7284	return 0;
7285}
7286
7287/**
7288 * cik_irq_ack - ack interrupt sources
7289 *
7290 * @rdev: radeon_device pointer
7291 *
7292 * Ack interrupt sources on the GPU (vblanks, hpd,
7293 * etc.) (CIK).  Certain interrupts sources are sw
7294 * generated and do not require an explicit ack.
7295 */
7296static inline void cik_irq_ack(struct radeon_device *rdev)
7297{
7298	u32 tmp;
7299
7300	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7301	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7302	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7303	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7304	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7305	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7306	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7307
7308	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7309		EVERGREEN_CRTC0_REGISTER_OFFSET);
7310	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7311		EVERGREEN_CRTC1_REGISTER_OFFSET);
7312	if (rdev->num_crtc >= 4) {
7313		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7314			EVERGREEN_CRTC2_REGISTER_OFFSET);
7315		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7316			EVERGREEN_CRTC3_REGISTER_OFFSET);
7317	}
7318	if (rdev->num_crtc >= 6) {
7319		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7320			EVERGREEN_CRTC4_REGISTER_OFFSET);
7321		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7322			EVERGREEN_CRTC5_REGISTER_OFFSET);
7323	}
7324
7325	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7326		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7327		       GRPH_PFLIP_INT_CLEAR);
7328	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7329		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7330		       GRPH_PFLIP_INT_CLEAR);
7331	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7332		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7333	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7334		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7335	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7336		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7337	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7338		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7339
7340	if (rdev->num_crtc >= 4) {
7341		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7342			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7343			       GRPH_PFLIP_INT_CLEAR);
7344		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7345			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7346			       GRPH_PFLIP_INT_CLEAR);
7347		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7348			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7349		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7350			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7351		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7352			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7354			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7355	}
7356
7357	if (rdev->num_crtc >= 6) {
7358		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7359			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7360			       GRPH_PFLIP_INT_CLEAR);
7361		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7362			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7363			       GRPH_PFLIP_INT_CLEAR);
7364		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7365			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7366		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7367			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7368		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7369			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7370		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7371			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7372	}
7373
7374	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7375		tmp = RREG32(DC_HPD1_INT_CONTROL);
7376		tmp |= DC_HPDx_INT_ACK;
7377		WREG32(DC_HPD1_INT_CONTROL, tmp);
7378	}
7379	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7380		tmp = RREG32(DC_HPD2_INT_CONTROL);
7381		tmp |= DC_HPDx_INT_ACK;
7382		WREG32(DC_HPD2_INT_CONTROL, tmp);
7383	}
7384	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7385		tmp = RREG32(DC_HPD3_INT_CONTROL);
7386		tmp |= DC_HPDx_INT_ACK;
7387		WREG32(DC_HPD3_INT_CONTROL, tmp);
7388	}
7389	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7390		tmp = RREG32(DC_HPD4_INT_CONTROL);
7391		tmp |= DC_HPDx_INT_ACK;
7392		WREG32(DC_HPD4_INT_CONTROL, tmp);
7393	}
7394	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7395		tmp = RREG32(DC_HPD5_INT_CONTROL);
7396		tmp |= DC_HPDx_INT_ACK;
7397		WREG32(DC_HPD5_INT_CONTROL, tmp);
7398	}
7399	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7400		tmp = RREG32(DC_HPD6_INT_CONTROL);
7401		tmp |= DC_HPDx_INT_ACK;
7402		WREG32(DC_HPD6_INT_CONTROL, tmp);
7403	}
7404	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7405		tmp = RREG32(DC_HPD1_INT_CONTROL);
7406		tmp |= DC_HPDx_RX_INT_ACK;
7407		WREG32(DC_HPD1_INT_CONTROL, tmp);
7408	}
7409	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7410		tmp = RREG32(DC_HPD2_INT_CONTROL);
7411		tmp |= DC_HPDx_RX_INT_ACK;
7412		WREG32(DC_HPD2_INT_CONTROL, tmp);
7413	}
7414	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7415		tmp = RREG32(DC_HPD3_INT_CONTROL);
7416		tmp |= DC_HPDx_RX_INT_ACK;
7417		WREG32(DC_HPD3_INT_CONTROL, tmp);
7418	}
7419	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7420		tmp = RREG32(DC_HPD4_INT_CONTROL);
7421		tmp |= DC_HPDx_RX_INT_ACK;
7422		WREG32(DC_HPD4_INT_CONTROL, tmp);
7423	}
7424	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7425		tmp = RREG32(DC_HPD5_INT_CONTROL);
7426		tmp |= DC_HPDx_RX_INT_ACK;
7427		WREG32(DC_HPD5_INT_CONTROL, tmp);
7428	}
7429	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7430		tmp = RREG32(DC_HPD6_INT_CONTROL);
7431		tmp |= DC_HPDx_RX_INT_ACK;
7432		WREG32(DC_HPD6_INT_CONTROL, tmp);
7433	}
7434}
7435
7436/**
7437 * cik_irq_disable - disable interrupts
7438 *
7439 * @rdev: radeon_device pointer
7440 *
7441 * Disable interrupts on the hw (CIK).
7442 */
7443static void cik_irq_disable(struct radeon_device *rdev)
7444{
7445	cik_disable_interrupts(rdev);
7446	/* Wait and acknowledge irq */
7447	mdelay(1);
7448	cik_irq_ack(rdev);
7449	cik_disable_interrupt_state(rdev);
7450}
7451
7452/**
7453 * cik_irq_disable - disable interrupts for suspend
7454 *
7455 * @rdev: radeon_device pointer
7456 *
7457 * Disable interrupts and stop the RLC (CIK).
7458 * Used for suspend.
7459 */
7460static void cik_irq_suspend(struct radeon_device *rdev)
7461{
7462	cik_irq_disable(rdev);
7463	cik_rlc_stop(rdev);
7464}
7465
7466/**
7467 * cik_irq_fini - tear down interrupt support
7468 *
7469 * @rdev: radeon_device pointer
7470 *
7471 * Disable interrupts on the hw and free the IH ring
7472 * buffer (CIK).
7473 * Used for driver unload.
7474 */
7475static void cik_irq_fini(struct radeon_device *rdev)
7476{
7477	cik_irq_suspend(rdev);
7478	r600_ih_ring_fini(rdev);
7479}
7480
7481/**
7482 * cik_get_ih_wptr - get the IH ring buffer wptr
7483 *
7484 * @rdev: radeon_device pointer
7485 *
7486 * Get the IH ring buffer wptr from either the register
7487 * or the writeback memory buffer (CIK).  Also check for
7488 * ring buffer overflow and deal with it.
7489 * Used by cik_irq_process().
7490 * Returns the value of the wptr.
7491 */
7492static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7493{
7494	u32 wptr, tmp;
7495
7496	if (rdev->wb.enabled)
7497		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7498	else
7499		wptr = RREG32(IH_RB_WPTR);
7500
7501	if (wptr & RB_OVERFLOW) {
7502		wptr &= ~RB_OVERFLOW;
7503		/* When a ring buffer overflow happen start parsing interrupt
7504		 * from the last not overwritten vector (wptr + 16). Hopefully
7505		 * this should allow us to catchup.
7506		 */
7507		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7508			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7509		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7510		tmp = RREG32(IH_RB_CNTL);
7511		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7512		WREG32(IH_RB_CNTL, tmp);
7513	}
7514	return (wptr & rdev->ih.ptr_mask);
7515}
7516
7517/*        CIK IV Ring
7518 * Each IV ring entry is 128 bits:
7519 * [7:0]    - interrupt source id
7520 * [31:8]   - reserved
7521 * [59:32]  - interrupt source data
7522 * [63:60]  - reserved
7523 * [71:64]  - RINGID
7524 *            CP:
7525 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7526 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7527 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7528 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7529 *            PIPE_ID - ME0 0=3D
7530 *                    - ME1&2 compute dispatcher (4 pipes each)
7531 *            SDMA:
7532 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7533 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7534 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7535 * [79:72]  - VMID
7536 * [95:80]  - PASID
7537 * [127:96] - reserved
7538 */
7539/**
7540 * cik_irq_process - interrupt handler
7541 *
7542 * @rdev: radeon_device pointer
7543 *
7544 * Interrupt hander (CIK).  Walk the IH ring,
7545 * ack interrupts and schedule work to handle
7546 * interrupt events.
7547 * Returns irq process return code.
7548 */
7549int cik_irq_process(struct radeon_device *rdev)
7550{
7551	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7552	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7553	u32 wptr;
7554	u32 rptr;
7555	u32 src_id, src_data, ring_id;
7556	u8 me_id, pipe_id, queue_id;
7557	u32 ring_index;
7558	bool queue_hotplug = false;
7559	bool queue_dp = false;
7560	bool queue_reset = false;
7561	u32 addr, status, mc_client;
7562	bool queue_thermal = false;
7563
7564	if (!rdev->ih.enabled || rdev->shutdown)
7565		return IRQ_NONE;
7566
7567	wptr = cik_get_ih_wptr(rdev);
7568
7569restart_ih:
7570	/* is somebody else already processing irqs? */
7571	if (atomic_xchg(&rdev->ih.lock, 1))
7572		return IRQ_NONE;
7573
7574	rptr = rdev->ih.rptr;
7575	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7576
7577	/* Order reading of wptr vs. reading of IH ring data */
7578	rmb();
7579
7580	/* display interrupts */
7581	cik_irq_ack(rdev);
7582
7583	while (rptr != wptr) {
7584		/* wptr/rptr are in bytes! */
7585		ring_index = rptr / 4;
7586
7587		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7588		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7589		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7590
7591		switch (src_id) {
7592		case 1: /* D1 vblank/vline */
7593			switch (src_data) {
7594			case 0: /* D1 vblank */
7595				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7596					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7597
7598				if (rdev->irq.crtc_vblank_int[0]) {
7599					drm_handle_vblank(rdev->ddev, 0);
7600					rdev->pm.vblank_sync = true;
7601					wake_up(&rdev->irq.vblank_queue);
7602				}
7603				if (atomic_read(&rdev->irq.pflip[0]))
7604					radeon_crtc_handle_vblank(rdev, 0);
7605				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7606				DRM_DEBUG("IH: D1 vblank\n");
7607
7608				break;
7609			case 1: /* D1 vline */
7610				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7611					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7612
7613				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7614				DRM_DEBUG("IH: D1 vline\n");
7615
7616				break;
7617			default:
7618				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7619				break;
7620			}
7621			break;
7622		case 2: /* D2 vblank/vline */
7623			switch (src_data) {
7624			case 0: /* D2 vblank */
7625				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7626					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7627
7628				if (rdev->irq.crtc_vblank_int[1]) {
7629					drm_handle_vblank(rdev->ddev, 1);
7630					rdev->pm.vblank_sync = true;
7631					wake_up(&rdev->irq.vblank_queue);
7632				}
7633				if (atomic_read(&rdev->irq.pflip[1]))
7634					radeon_crtc_handle_vblank(rdev, 1);
7635				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7636				DRM_DEBUG("IH: D2 vblank\n");
7637
7638				break;
7639			case 1: /* D2 vline */
7640				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7641					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7642
7643				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7644				DRM_DEBUG("IH: D2 vline\n");
7645
7646				break;
7647			default:
7648				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7649				break;
7650			}
7651			break;
7652		case 3: /* D3 vblank/vline */
7653			switch (src_data) {
7654			case 0: /* D3 vblank */
7655				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7656					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7657
7658				if (rdev->irq.crtc_vblank_int[2]) {
7659					drm_handle_vblank(rdev->ddev, 2);
7660					rdev->pm.vblank_sync = true;
7661					wake_up(&rdev->irq.vblank_queue);
7662				}
7663				if (atomic_read(&rdev->irq.pflip[2]))
7664					radeon_crtc_handle_vblank(rdev, 2);
7665				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7666				DRM_DEBUG("IH: D3 vblank\n");
7667
7668				break;
7669			case 1: /* D3 vline */
7670				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7671					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7672
7673				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7674				DRM_DEBUG("IH: D3 vline\n");
7675
7676				break;
7677			default:
7678				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7679				break;
7680			}
7681			break;
7682		case 4: /* D4 vblank/vline */
7683			switch (src_data) {
7684			case 0: /* D4 vblank */
7685				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7686					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7687
7688				if (rdev->irq.crtc_vblank_int[3]) {
7689					drm_handle_vblank(rdev->ddev, 3);
7690					rdev->pm.vblank_sync = true;
7691					wake_up(&rdev->irq.vblank_queue);
7692				}
7693				if (atomic_read(&rdev->irq.pflip[3]))
7694					radeon_crtc_handle_vblank(rdev, 3);
7695				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7696				DRM_DEBUG("IH: D4 vblank\n");
7697
7698				break;
7699			case 1: /* D4 vline */
7700				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7701					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7702
7703				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7704				DRM_DEBUG("IH: D4 vline\n");
7705
7706				break;
7707			default:
7708				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7709				break;
7710			}
7711			break;
7712		case 5: /* D5 vblank/vline */
7713			switch (src_data) {
7714			case 0: /* D5 vblank */
7715				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7716					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7717
7718				if (rdev->irq.crtc_vblank_int[4]) {
7719					drm_handle_vblank(rdev->ddev, 4);
7720					rdev->pm.vblank_sync = true;
7721					wake_up(&rdev->irq.vblank_queue);
7722				}
7723				if (atomic_read(&rdev->irq.pflip[4]))
7724					radeon_crtc_handle_vblank(rdev, 4);
7725				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7726				DRM_DEBUG("IH: D5 vblank\n");
7727
7728				break;
7729			case 1: /* D5 vline */
7730				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7731					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7732
7733				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7734				DRM_DEBUG("IH: D5 vline\n");
7735
7736				break;
7737			default:
7738				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7739				break;
7740			}
7741			break;
7742		case 6: /* D6 vblank/vline */
7743			switch (src_data) {
7744			case 0: /* D6 vblank */
7745				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7746					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747
7748				if (rdev->irq.crtc_vblank_int[5]) {
7749					drm_handle_vblank(rdev->ddev, 5);
7750					rdev->pm.vblank_sync = true;
7751					wake_up(&rdev->irq.vblank_queue);
7752				}
7753				if (atomic_read(&rdev->irq.pflip[5]))
7754					radeon_crtc_handle_vblank(rdev, 5);
7755				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7756				DRM_DEBUG("IH: D6 vblank\n");
7757
7758				break;
7759			case 1: /* D6 vline */
7760				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7761					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762
7763				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7764				DRM_DEBUG("IH: D6 vline\n");
7765
7766				break;
7767			default:
7768				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7769				break;
7770			}
7771			break;
7772		case 8: /* D1 page flip */
7773		case 10: /* D2 page flip */
7774		case 12: /* D3 page flip */
7775		case 14: /* D4 page flip */
7776		case 16: /* D5 page flip */
7777		case 18: /* D6 page flip */
7778			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7779			if (radeon_use_pflipirq > 0)
7780				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7781			break;
7782		case 42: /* HPD hotplug */
7783			switch (src_data) {
7784			case 0:
7785				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7786					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7787
7788				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7789				queue_hotplug = true;
7790				DRM_DEBUG("IH: HPD1\n");
7791
7792				break;
7793			case 1:
7794				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7795					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7796
7797				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7798				queue_hotplug = true;
7799				DRM_DEBUG("IH: HPD2\n");
7800
7801				break;
7802			case 2:
7803				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7804					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7805
7806				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7807				queue_hotplug = true;
7808				DRM_DEBUG("IH: HPD3\n");
7809
7810				break;
7811			case 3:
7812				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7813					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7814
7815				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7816				queue_hotplug = true;
7817				DRM_DEBUG("IH: HPD4\n");
7818
7819				break;
7820			case 4:
7821				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7822					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7823
7824				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7825				queue_hotplug = true;
7826				DRM_DEBUG("IH: HPD5\n");
7827
7828				break;
7829			case 5:
7830				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7831					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7832
7833				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7834				queue_hotplug = true;
7835				DRM_DEBUG("IH: HPD6\n");
7836
7837				break;
7838			case 6:
7839				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7840					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7841
7842				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7843				queue_dp = true;
7844				DRM_DEBUG("IH: HPD_RX 1\n");
7845
7846				break;
7847			case 7:
7848				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7849					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7850
7851				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7852				queue_dp = true;
7853				DRM_DEBUG("IH: HPD_RX 2\n");
7854
7855				break;
7856			case 8:
7857				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7858					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7859
7860				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7861				queue_dp = true;
7862				DRM_DEBUG("IH: HPD_RX 3\n");
7863
7864				break;
7865			case 9:
7866				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7867					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7868
7869				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7870				queue_dp = true;
7871				DRM_DEBUG("IH: HPD_RX 4\n");
7872
7873				break;
7874			case 10:
7875				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7876					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7877
7878				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7879				queue_dp = true;
7880				DRM_DEBUG("IH: HPD_RX 5\n");
7881
7882				break;
7883			case 11:
7884				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7885					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7886
7887				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7888				queue_dp = true;
7889				DRM_DEBUG("IH: HPD_RX 6\n");
7890
7891				break;
7892			default:
7893				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7894				break;
7895			}
7896			break;
7897		case 96:
7898			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7899			WREG32(SRBM_INT_ACK, 0x1);
7900			break;
7901		case 124: /* UVD */
7902			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7903			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7904			break;
7905		case 146:
7906		case 147:
7907			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7908			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7909			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7910			/* reset addr and status */
7911			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7912			if (addr == 0x0 && status == 0x0)
7913				break;
7914			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7915			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7916				addr);
7917			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7918				status);
7919			cik_vm_decode_fault(rdev, status, addr, mc_client);
7920			break;
7921		case 167: /* VCE */
7922			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7923			switch (src_data) {
7924			case 0:
7925				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7926				break;
7927			case 1:
7928				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7929				break;
7930			default:
7931				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7932				break;
7933			}
7934			break;
7935		case 176: /* GFX RB CP_INT */
7936		case 177: /* GFX IB CP_INT */
7937			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7938			break;
7939		case 181: /* CP EOP event */
7940			DRM_DEBUG("IH: CP EOP\n");
7941			/* XXX check the bitfield order! */
7942			me_id = (ring_id & 0x60) >> 5;
7943			pipe_id = (ring_id & 0x18) >> 3;
7944			queue_id = (ring_id & 0x7) >> 0;
7945			switch (me_id) {
7946			case 0:
7947				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7948				break;
7949			case 1:
7950			case 2:
7951				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7952					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7953				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7954					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7955				break;
7956			}
7957			break;
7958		case 184: /* CP Privileged reg access */
7959			DRM_ERROR("Illegal register access in command stream\n");
7960			/* XXX check the bitfield order! */
7961			me_id = (ring_id & 0x60) >> 5;
7962			pipe_id = (ring_id & 0x18) >> 3;
7963			queue_id = (ring_id & 0x7) >> 0;
7964			switch (me_id) {
7965			case 0:
7966				/* This results in a full GPU reset, but all we need to do is soft
7967				 * reset the CP for gfx
7968				 */
7969				queue_reset = true;
7970				break;
7971			case 1:
7972				/* XXX compute */
7973				queue_reset = true;
7974				break;
7975			case 2:
7976				/* XXX compute */
7977				queue_reset = true;
7978				break;
7979			}
7980			break;
7981		case 185: /* CP Privileged inst */
7982			DRM_ERROR("Illegal instruction in command stream\n");
7983			/* XXX check the bitfield order! */
7984			me_id = (ring_id & 0x60) >> 5;
7985			pipe_id = (ring_id & 0x18) >> 3;
7986			queue_id = (ring_id & 0x7) >> 0;
7987			switch (me_id) {
7988			case 0:
7989				/* This results in a full GPU reset, but all we need to do is soft
7990				 * reset the CP for gfx
7991				 */
7992				queue_reset = true;
7993				break;
7994			case 1:
7995				/* XXX compute */
7996				queue_reset = true;
7997				break;
7998			case 2:
7999				/* XXX compute */
8000				queue_reset = true;
8001				break;
8002			}
8003			break;
8004		case 224: /* SDMA trap event */
8005			/* XXX check the bitfield order! */
8006			me_id = (ring_id & 0x3) >> 0;
8007			queue_id = (ring_id & 0xc) >> 2;
8008			DRM_DEBUG("IH: SDMA trap\n");
8009			switch (me_id) {
8010			case 0:
8011				switch (queue_id) {
8012				case 0:
8013					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8014					break;
8015				case 1:
8016					/* XXX compute */
8017					break;
8018				case 2:
8019					/* XXX compute */
8020					break;
8021				}
8022				break;
8023			case 1:
8024				switch (queue_id) {
8025				case 0:
8026					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8027					break;
8028				case 1:
8029					/* XXX compute */
8030					break;
8031				case 2:
8032					/* XXX compute */
8033					break;
8034				}
8035				break;
8036			}
8037			break;
8038		case 230: /* thermal low to high */
8039			DRM_DEBUG("IH: thermal low to high\n");
8040			rdev->pm.dpm.thermal.high_to_low = false;
8041			queue_thermal = true;
8042			break;
8043		case 231: /* thermal high to low */
8044			DRM_DEBUG("IH: thermal high to low\n");
8045			rdev->pm.dpm.thermal.high_to_low = true;
8046			queue_thermal = true;
8047			break;
8048		case 233: /* GUI IDLE */
8049			DRM_DEBUG("IH: GUI idle\n");
8050			break;
8051		case 241: /* SDMA Privileged inst */
8052		case 247: /* SDMA Privileged inst */
8053			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8054			/* XXX check the bitfield order! */
8055			me_id = (ring_id & 0x3) >> 0;
8056			queue_id = (ring_id & 0xc) >> 2;
8057			switch (me_id) {
8058			case 0:
8059				switch (queue_id) {
8060				case 0:
8061					queue_reset = true;
8062					break;
8063				case 1:
8064					/* XXX compute */
8065					queue_reset = true;
8066					break;
8067				case 2:
8068					/* XXX compute */
8069					queue_reset = true;
8070					break;
8071				}
8072				break;
8073			case 1:
8074				switch (queue_id) {
8075				case 0:
8076					queue_reset = true;
8077					break;
8078				case 1:
8079					/* XXX compute */
8080					queue_reset = true;
8081					break;
8082				case 2:
8083					/* XXX compute */
8084					queue_reset = true;
8085					break;
8086				}
8087				break;
8088			}
8089			break;
8090		default:
8091			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8092			break;
8093		}
8094
8095		/* wptr/rptr are in bytes! */
8096		rptr += 16;
8097		rptr &= rdev->ih.ptr_mask;
8098		WREG32(IH_RB_RPTR, rptr);
8099	}
8100	if (queue_dp)
8101		schedule_work(&rdev->dp_work);
8102	if (queue_hotplug)
8103		schedule_delayed_work(&rdev->hotplug_work, 0);
8104	if (queue_reset) {
8105		rdev->needs_reset = true;
8106		wake_up_all(&rdev->fence_queue);
8107	}
8108	if (queue_thermal)
8109		schedule_work(&rdev->pm.dpm.thermal.work);
8110	rdev->ih.rptr = rptr;
8111	atomic_set(&rdev->ih.lock, 0);
8112
8113	/* make sure wptr hasn't changed while processing */
8114	wptr = cik_get_ih_wptr(rdev);
8115	if (wptr != rptr)
8116		goto restart_ih;
8117
8118	return IRQ_HANDLED;
8119}
8120
8121/*
8122 * startup/shutdown callbacks
8123 */
8124static void cik_uvd_init(struct radeon_device *rdev)
8125{
8126	int r;
8127
8128	if (!rdev->has_uvd)
8129		return;
8130
8131	r = radeon_uvd_init(rdev);
8132	if (r) {
8133		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8134		/*
8135		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8136		 * to early fails cik_uvd_start() and thus nothing happens
8137		 * there. So it is pointless to try to go through that code
8138		 * hence why we disable uvd here.
8139		 */
8140		rdev->has_uvd = 0;
8141		return;
8142	}
8143	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8144	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8145}
8146
8147static void cik_uvd_start(struct radeon_device *rdev)
8148{
8149	int r;
8150
8151	if (!rdev->has_uvd)
8152		return;
8153
8154	r = radeon_uvd_resume(rdev);
8155	if (r) {
8156		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8157		goto error;
8158	}
8159	r = uvd_v4_2_resume(rdev);
8160	if (r) {
8161		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8162		goto error;
8163	}
8164	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8165	if (r) {
8166		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8167		goto error;
8168	}
8169	return;
8170
8171error:
8172	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8173}
8174
8175static void cik_uvd_resume(struct radeon_device *rdev)
8176{
8177	struct radeon_ring *ring;
8178	int r;
8179
8180	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8181		return;
8182
8183	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8184	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8185	if (r) {
8186		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8187		return;
8188	}
8189	r = uvd_v1_0_init(rdev);
8190	if (r) {
8191		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8192		return;
8193	}
8194}
8195
8196static void cik_vce_init(struct radeon_device *rdev)
8197{
8198	int r;
8199
8200	if (!rdev->has_vce)
8201		return;
8202
8203	r = radeon_vce_init(rdev);
8204	if (r) {
8205		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8206		/*
8207		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8208		 * to early fails cik_vce_start() and thus nothing happens
8209		 * there. So it is pointless to try to go through that code
8210		 * hence why we disable vce here.
8211		 */
8212		rdev->has_vce = 0;
8213		return;
8214	}
8215	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8216	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8217	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8218	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8219}
8220
8221static void cik_vce_start(struct radeon_device *rdev)
8222{
8223	int r;
8224
8225	if (!rdev->has_vce)
8226		return;
8227
8228	r = radeon_vce_resume(rdev);
8229	if (r) {
8230		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8231		goto error;
8232	}
8233	r = vce_v2_0_resume(rdev);
8234	if (r) {
8235		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8236		goto error;
8237	}
8238	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8239	if (r) {
8240		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8241		goto error;
8242	}
8243	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8244	if (r) {
8245		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8246		goto error;
8247	}
8248	return;
8249
8250error:
8251	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8252	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8253}
8254
8255static void cik_vce_resume(struct radeon_device *rdev)
8256{
8257	struct radeon_ring *ring;
8258	int r;
8259
8260	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8261		return;
8262
8263	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8264	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8265	if (r) {
8266		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8267		return;
8268	}
8269	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8270	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8271	if (r) {
8272		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8273		return;
8274	}
8275	r = vce_v1_0_init(rdev);
8276	if (r) {
8277		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8278		return;
8279	}
8280}
8281
8282/**
8283 * cik_startup - program the asic to a functional state
8284 *
8285 * @rdev: radeon_device pointer
8286 *
8287 * Programs the asic to a functional state (CIK).
8288 * Called by cik_init() and cik_resume().
8289 * Returns 0 for success, error for failure.
8290 */
8291static int cik_startup(struct radeon_device *rdev)
8292{
8293	struct radeon_ring *ring;
8294	u32 nop;
8295	int r;
8296
8297	/* enable pcie gen2/3 link */
8298	cik_pcie_gen3_enable(rdev);
8299	/* enable aspm */
8300	cik_program_aspm(rdev);
8301
8302	/* scratch needs to be initialized before MC */
8303	r = r600_vram_scratch_init(rdev);
8304	if (r)
8305		return r;
8306
8307	cik_mc_program(rdev);
8308
8309	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8310		r = ci_mc_load_microcode(rdev);
8311		if (r) {
8312			DRM_ERROR("Failed to load MC firmware!\n");
8313			return r;
8314		}
8315	}
8316
8317	r = cik_pcie_gart_enable(rdev);
8318	if (r)
8319		return r;
8320	cik_gpu_init(rdev);
8321
8322	/* allocate rlc buffers */
8323	if (rdev->flags & RADEON_IS_IGP) {
8324		if (rdev->family == CHIP_KAVERI) {
8325			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8326			rdev->rlc.reg_list_size =
8327				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8328		} else {
8329			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8330			rdev->rlc.reg_list_size =
8331				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8332		}
8333	}
8334	rdev->rlc.cs_data = ci_cs_data;
8335	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8336	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8337	r = sumo_rlc_init(rdev);
8338	if (r) {
8339		DRM_ERROR("Failed to init rlc BOs!\n");
8340		return r;
8341	}
8342
8343	/* allocate wb buffer */
8344	r = radeon_wb_init(rdev);
8345	if (r)
8346		return r;
8347
8348	/* allocate mec buffers */
8349	r = cik_mec_init(rdev);
8350	if (r) {
8351		DRM_ERROR("Failed to init MEC BOs!\n");
8352		return r;
8353	}
8354
8355	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8356	if (r) {
8357		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8358		return r;
8359	}
8360
8361	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8362	if (r) {
8363		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8364		return r;
8365	}
8366
8367	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8368	if (r) {
8369		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8370		return r;
8371	}
8372
8373	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8374	if (r) {
8375		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8376		return r;
8377	}
8378
8379	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8380	if (r) {
8381		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8382		return r;
8383	}
8384
8385	cik_uvd_start(rdev);
8386	cik_vce_start(rdev);
8387
8388	/* Enable IRQ */
8389	if (!rdev->irq.installed) {
8390		r = radeon_irq_kms_init(rdev);
8391		if (r)
8392			return r;
8393	}
8394
8395	r = cik_irq_init(rdev);
8396	if (r) {
8397		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8398		radeon_irq_kms_fini(rdev);
8399		return r;
8400	}
8401	cik_irq_set(rdev);
8402
8403	if (rdev->family == CHIP_HAWAII) {
8404		if (rdev->new_fw)
8405			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8406		else
8407			nop = RADEON_CP_PACKET2;
8408	} else {
8409		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410	}
8411
8412	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8413	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8414			     nop);
8415	if (r)
8416		return r;
8417
8418	/* set up the compute queues */
8419	/* type-2 packets are deprecated on MEC, use type-3 instead */
8420	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8421	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8422			     nop);
8423	if (r)
8424		return r;
8425	ring->me = 1; /* first MEC */
8426	ring->pipe = 0; /* first pipe */
8427	ring->queue = 0; /* first queue */
8428	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8429
8430	/* type-2 packets are deprecated on MEC, use type-3 instead */
8431	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8432	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8433			     nop);
8434	if (r)
8435		return r;
8436	/* dGPU only have 1 MEC */
8437	ring->me = 1; /* first MEC */
8438	ring->pipe = 0; /* first pipe */
8439	ring->queue = 1; /* second queue */
8440	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8441
8442	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8443	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8444			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8445	if (r)
8446		return r;
8447
8448	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8449	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8450			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8451	if (r)
8452		return r;
8453
8454	r = cik_cp_resume(rdev);
8455	if (r)
8456		return r;
8457
8458	r = cik_sdma_resume(rdev);
8459	if (r)
8460		return r;
8461
8462	cik_uvd_resume(rdev);
8463	cik_vce_resume(rdev);
8464
8465	r = radeon_ib_pool_init(rdev);
8466	if (r) {
8467		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8468		return r;
8469	}
8470
8471	r = radeon_vm_manager_init(rdev);
8472	if (r) {
8473		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8474		return r;
8475	}
8476
8477	r = radeon_audio_init(rdev);
8478	if (r)
8479		return r;
8480
8481	return 0;
8482}
8483
8484/**
8485 * cik_resume - resume the asic to a functional state
8486 *
8487 * @rdev: radeon_device pointer
8488 *
8489 * Programs the asic to a functional state (CIK).
8490 * Called at resume.
8491 * Returns 0 for success, error for failure.
8492 */
8493int cik_resume(struct radeon_device *rdev)
8494{
8495	int r;
8496
8497	/* post card */
8498	atom_asic_init(rdev->mode_info.atom_context);
8499
8500	/* init golden registers */
8501	cik_init_golden_registers(rdev);
8502
8503	if (rdev->pm.pm_method == PM_METHOD_DPM)
8504		radeon_pm_resume(rdev);
8505
8506	rdev->accel_working = true;
8507	r = cik_startup(rdev);
8508	if (r) {
8509		DRM_ERROR("cik startup failed on resume\n");
8510		rdev->accel_working = false;
8511		return r;
8512	}
8513
8514	return r;
8515
8516}
8517
8518/**
8519 * cik_suspend - suspend the asic
8520 *
8521 * @rdev: radeon_device pointer
8522 *
8523 * Bring the chip into a state suitable for suspend (CIK).
8524 * Called at suspend.
8525 * Returns 0 for success.
8526 */
8527int cik_suspend(struct radeon_device *rdev)
8528{
8529	radeon_pm_suspend(rdev);
8530	radeon_audio_fini(rdev);
8531	radeon_vm_manager_fini(rdev);
8532	cik_cp_enable(rdev, false);
8533	cik_sdma_enable(rdev, false);
8534	if (rdev->has_uvd) {
8535		uvd_v1_0_fini(rdev);
8536		radeon_uvd_suspend(rdev);
8537	}
8538	if (rdev->has_vce)
8539		radeon_vce_suspend(rdev);
8540	cik_fini_pg(rdev);
8541	cik_fini_cg(rdev);
8542	cik_irq_suspend(rdev);
8543	radeon_wb_disable(rdev);
8544	cik_pcie_gart_disable(rdev);
8545	return 0;
8546}
8547
8548/* Plan is to move initialization in that function and use
8549 * helper function so that radeon_device_init pretty much
8550 * do nothing more than calling asic specific function. This
8551 * should also allow to remove a bunch of callback function
8552 * like vram_info.
8553 */
8554/**
8555 * cik_init - asic specific driver and hw init
8556 *
8557 * @rdev: radeon_device pointer
8558 *
8559 * Setup asic specific driver variables and program the hw
8560 * to a functional state (CIK).
8561 * Called at driver startup.
8562 * Returns 0 for success, errors for failure.
8563 */
8564int cik_init(struct radeon_device *rdev)
8565{
8566	struct radeon_ring *ring;
8567	int r;
8568
8569	/* Read BIOS */
8570	if (!radeon_get_bios(rdev)) {
8571		if (ASIC_IS_AVIVO(rdev))
8572			return -EINVAL;
8573	}
8574	/* Must be an ATOMBIOS */
8575	if (!rdev->is_atom_bios) {
8576		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8577		return -EINVAL;
8578	}
8579	r = radeon_atombios_init(rdev);
8580	if (r)
8581		return r;
8582
8583	/* Post card if necessary */
8584	if (!radeon_card_posted(rdev)) {
8585		if (!rdev->bios) {
8586			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8587			return -EINVAL;
8588		}
8589		DRM_INFO("GPU not posted. posting now...\n");
8590		atom_asic_init(rdev->mode_info.atom_context);
8591	}
8592	/* init golden registers */
8593	cik_init_golden_registers(rdev);
8594	/* Initialize scratch registers */
8595	cik_scratch_init(rdev);
8596	/* Initialize surface registers */
8597	radeon_surface_init(rdev);
8598	/* Initialize clocks */
8599	radeon_get_clock_info(rdev->ddev);
8600
8601	/* Fence driver */
8602	r = radeon_fence_driver_init(rdev);
8603	if (r)
8604		return r;
8605
8606	/* initialize memory controller */
8607	r = cik_mc_init(rdev);
8608	if (r)
8609		return r;
8610	/* Memory manager */
8611	r = radeon_bo_init(rdev);
8612	if (r)
8613		return r;
8614
8615	if (rdev->flags & RADEON_IS_IGP) {
8616		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8617		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8618			r = cik_init_microcode(rdev);
8619			if (r) {
8620				DRM_ERROR("Failed to load firmware!\n");
8621				return r;
8622			}
8623		}
8624	} else {
8625		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8626		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8627		    !rdev->mc_fw) {
8628			r = cik_init_microcode(rdev);
8629			if (r) {
8630				DRM_ERROR("Failed to load firmware!\n");
8631				return r;
8632			}
8633		}
8634	}
8635
8636	/* Initialize power management */
8637	radeon_pm_init(rdev);
8638
8639	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8640	ring->ring_obj = NULL;
8641	r600_ring_init(rdev, ring, 1024 * 1024);
8642
8643	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8644	ring->ring_obj = NULL;
8645	r600_ring_init(rdev, ring, 1024 * 1024);
8646	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8647	if (r)
8648		return r;
8649
8650	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8651	ring->ring_obj = NULL;
8652	r600_ring_init(rdev, ring, 1024 * 1024);
8653	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8654	if (r)
8655		return r;
8656
8657	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8658	ring->ring_obj = NULL;
8659	r600_ring_init(rdev, ring, 256 * 1024);
8660
8661	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8662	ring->ring_obj = NULL;
8663	r600_ring_init(rdev, ring, 256 * 1024);
8664
8665	cik_uvd_init(rdev);
8666	cik_vce_init(rdev);
8667
8668	rdev->ih.ring_obj = NULL;
8669	r600_ih_ring_init(rdev, 64 * 1024);
8670
8671	r = r600_pcie_gart_init(rdev);
8672	if (r)
8673		return r;
8674
8675	rdev->accel_working = true;
8676	r = cik_startup(rdev);
8677	if (r) {
8678		dev_err(rdev->dev, "disabling GPU acceleration\n");
8679		cik_cp_fini(rdev);
8680		cik_sdma_fini(rdev);
8681		cik_irq_fini(rdev);
8682		sumo_rlc_fini(rdev);
8683		cik_mec_fini(rdev);
8684		radeon_wb_fini(rdev);
8685		radeon_ib_pool_fini(rdev);
8686		radeon_vm_manager_fini(rdev);
8687		radeon_irq_kms_fini(rdev);
8688		cik_pcie_gart_fini(rdev);
8689		rdev->accel_working = false;
8690	}
8691
8692	/* Don't start up if the MC ucode is missing.
8693	 * The default clocks and voltages before the MC ucode
8694	 * is loaded are not suffient for advanced operations.
8695	 */
8696	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8697		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8698		return -EINVAL;
8699	}
8700
8701	return 0;
8702}
8703
8704/**
8705 * cik_fini - asic specific driver and hw fini
8706 *
8707 * @rdev: radeon_device pointer
8708 *
8709 * Tear down the asic specific driver variables and program the hw
8710 * to an idle state (CIK).
8711 * Called at driver unload.
8712 */
8713void cik_fini(struct radeon_device *rdev)
8714{
8715	radeon_pm_fini(rdev);
8716	cik_cp_fini(rdev);
8717	cik_sdma_fini(rdev);
8718	cik_fini_pg(rdev);
8719	cik_fini_cg(rdev);
8720	cik_irq_fini(rdev);
8721	sumo_rlc_fini(rdev);
8722	cik_mec_fini(rdev);
8723	radeon_wb_fini(rdev);
8724	radeon_vm_manager_fini(rdev);
8725	radeon_ib_pool_fini(rdev);
8726	radeon_irq_kms_fini(rdev);
8727	uvd_v1_0_fini(rdev);
8728	radeon_uvd_fini(rdev);
8729	radeon_vce_fini(rdev);
8730	cik_pcie_gart_fini(rdev);
8731	r600_vram_scratch_fini(rdev);
8732	radeon_gem_fini(rdev);
8733	radeon_fence_driver_fini(rdev);
8734	radeon_bo_fini(rdev);
8735	radeon_atombios_fini(rdev);
8736	kfree(rdev->bios);
8737	rdev->bios = NULL;
8738}
8739
8740void dce8_program_fmt(struct drm_encoder *encoder)
8741{
8742	struct drm_device *dev = encoder->dev;
8743	struct radeon_device *rdev = dev->dev_private;
8744	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8745	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8746	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8747	int bpc = 0;
8748	u32 tmp = 0;
8749	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8750
8751	if (connector) {
8752		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8753		bpc = radeon_get_monitor_bpc(connector);
8754		dither = radeon_connector->dither;
8755	}
8756
8757	/* LVDS/eDP FMT is set up by atom */
8758	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8759		return;
8760
8761	/* not needed for analog */
8762	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8763	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8764		return;
8765
8766	if (bpc == 0)
8767		return;
8768
8769	switch (bpc) {
8770	case 6:
8771		if (dither == RADEON_FMT_DITHER_ENABLE)
8772			/* XXX sort out optimal dither settings */
8773			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8775		else
8776			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8777		break;
8778	case 8:
8779		if (dither == RADEON_FMT_DITHER_ENABLE)
8780			/* XXX sort out optimal dither settings */
8781			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8782				FMT_RGB_RANDOM_ENABLE |
8783				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8784		else
8785			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8786		break;
8787	case 10:
8788		if (dither == RADEON_FMT_DITHER_ENABLE)
8789			/* XXX sort out optimal dither settings */
8790			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8791				FMT_RGB_RANDOM_ENABLE |
8792				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8793		else
8794			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8795		break;
8796	default:
8797		/* not needed */
8798		break;
8799	}
8800
8801	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8802}
8803
8804/* display watermark setup */
8805/**
8806 * dce8_line_buffer_adjust - Set up the line buffer
8807 *
8808 * @rdev: radeon_device pointer
8809 * @radeon_crtc: the selected display controller
8810 * @mode: the current display mode on the selected display
8811 * controller
8812 *
8813 * Setup up the line buffer allocation for
8814 * the selected display controller (CIK).
8815 * Returns the line buffer size in pixels.
8816 */
8817static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8818				   struct radeon_crtc *radeon_crtc,
8819				   struct drm_display_mode *mode)
8820{
8821	u32 tmp, buffer_alloc, i;
8822	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8823	/*
8824	 * Line Buffer Setup
8825	 * There are 6 line buffers, one for each display controllers.
8826	 * There are 3 partitions per LB. Select the number of partitions
8827	 * to enable based on the display width.  For display widths larger
8828	 * than 4096, you need use to use 2 display controllers and combine
8829	 * them using the stereo blender.
8830	 */
8831	if (radeon_crtc->base.enabled && mode) {
8832		if (mode->crtc_hdisplay < 1920) {
8833			tmp = 1;
8834			buffer_alloc = 2;
8835		} else if (mode->crtc_hdisplay < 2560) {
8836			tmp = 2;
8837			buffer_alloc = 2;
8838		} else if (mode->crtc_hdisplay < 4096) {
8839			tmp = 0;
8840			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8841		} else {
8842			DRM_DEBUG_KMS("Mode too big for LB!\n");
8843			tmp = 0;
8844			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845		}
8846	} else {
8847		tmp = 1;
8848		buffer_alloc = 0;
8849	}
8850
8851	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8852	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8853
8854	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8855	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8856	for (i = 0; i < rdev->usec_timeout; i++) {
8857		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8858		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8859			break;
8860		udelay(1);
8861	}
8862
8863	if (radeon_crtc->base.enabled && mode) {
8864		switch (tmp) {
8865		case 0:
8866		default:
8867			return 4096 * 2;
8868		case 1:
8869			return 1920 * 2;
8870		case 2:
8871			return 2560 * 2;
8872		}
8873	}
8874
8875	/* controller not enabled, so no lb used */
8876	return 0;
8877}
8878
8879/**
8880 * cik_get_number_of_dram_channels - get the number of dram channels
8881 *
8882 * @rdev: radeon_device pointer
8883 *
8884 * Look up the number of video ram channels (CIK).
8885 * Used for display watermark bandwidth calculations
8886 * Returns the number of dram channels
8887 */
8888static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8889{
8890	u32 tmp = RREG32(MC_SHARED_CHMAP);
8891
8892	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8893	case 0:
8894	default:
8895		return 1;
8896	case 1:
8897		return 2;
8898	case 2:
8899		return 4;
8900	case 3:
8901		return 8;
8902	case 4:
8903		return 3;
8904	case 5:
8905		return 6;
8906	case 6:
8907		return 10;
8908	case 7:
8909		return 12;
8910	case 8:
8911		return 16;
8912	}
8913}
8914
8915struct dce8_wm_params {
8916	u32 dram_channels; /* number of dram channels */
8917	u32 yclk;          /* bandwidth per dram data pin in kHz */
8918	u32 sclk;          /* engine clock in kHz */
8919	u32 disp_clk;      /* display clock in kHz */
8920	u32 src_width;     /* viewport width */
8921	u32 active_time;   /* active display time in ns */
8922	u32 blank_time;    /* blank time in ns */
8923	bool interlaced;    /* mode is interlaced */
8924	fixed20_12 vsc;    /* vertical scale ratio */
8925	u32 num_heads;     /* number of active crtcs */
8926	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8927	u32 lb_size;       /* line buffer allocated to pipe */
8928	u32 vtaps;         /* vertical scaler taps */
8929};
8930
8931/**
8932 * dce8_dram_bandwidth - get the dram bandwidth
8933 *
8934 * @wm: watermark calculation data
8935 *
8936 * Calculate the raw dram bandwidth (CIK).
8937 * Used for display watermark bandwidth calculations
8938 * Returns the dram bandwidth in MBytes/s
8939 */
8940static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8941{
8942	/* Calculate raw DRAM Bandwidth */
8943	fixed20_12 dram_efficiency; /* 0.7 */
8944	fixed20_12 yclk, dram_channels, bandwidth;
8945	fixed20_12 a;
8946
8947	a.full = dfixed_const(1000);
8948	yclk.full = dfixed_const(wm->yclk);
8949	yclk.full = dfixed_div(yclk, a);
8950	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8951	a.full = dfixed_const(10);
8952	dram_efficiency.full = dfixed_const(7);
8953	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8954	bandwidth.full = dfixed_mul(dram_channels, yclk);
8955	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8956
8957	return dfixed_trunc(bandwidth);
8958}
8959
8960/**
8961 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8962 *
8963 * @wm: watermark calculation data
8964 *
8965 * Calculate the dram bandwidth used for display (CIK).
8966 * Used for display watermark bandwidth calculations
8967 * Returns the dram bandwidth for display in MBytes/s
8968 */
8969static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8970{
8971	/* Calculate DRAM Bandwidth and the part allocated to display. */
8972	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8973	fixed20_12 yclk, dram_channels, bandwidth;
8974	fixed20_12 a;
8975
8976	a.full = dfixed_const(1000);
8977	yclk.full = dfixed_const(wm->yclk);
8978	yclk.full = dfixed_div(yclk, a);
8979	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8980	a.full = dfixed_const(10);
8981	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8982	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8983	bandwidth.full = dfixed_mul(dram_channels, yclk);
8984	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8985
8986	return dfixed_trunc(bandwidth);
8987}
8988
8989/**
8990 * dce8_data_return_bandwidth - get the data return bandwidth
8991 *
8992 * @wm: watermark calculation data
8993 *
8994 * Calculate the data return bandwidth used for display (CIK).
8995 * Used for display watermark bandwidth calculations
8996 * Returns the data return bandwidth in MBytes/s
8997 */
8998static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8999{
9000	/* Calculate the display Data return Bandwidth */
9001	fixed20_12 return_efficiency; /* 0.8 */
9002	fixed20_12 sclk, bandwidth;
9003	fixed20_12 a;
9004
9005	a.full = dfixed_const(1000);
9006	sclk.full = dfixed_const(wm->sclk);
9007	sclk.full = dfixed_div(sclk, a);
9008	a.full = dfixed_const(10);
9009	return_efficiency.full = dfixed_const(8);
9010	return_efficiency.full = dfixed_div(return_efficiency, a);
9011	a.full = dfixed_const(32);
9012	bandwidth.full = dfixed_mul(a, sclk);
9013	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9014
9015	return dfixed_trunc(bandwidth);
9016}
9017
9018/**
9019 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9020 *
9021 * @wm: watermark calculation data
9022 *
9023 * Calculate the dmif bandwidth used for display (CIK).
9024 * Used for display watermark bandwidth calculations
9025 * Returns the dmif bandwidth in MBytes/s
9026 */
9027static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9028{
9029	/* Calculate the DMIF Request Bandwidth */
9030	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9031	fixed20_12 disp_clk, bandwidth;
9032	fixed20_12 a, b;
9033
9034	a.full = dfixed_const(1000);
9035	disp_clk.full = dfixed_const(wm->disp_clk);
9036	disp_clk.full = dfixed_div(disp_clk, a);
9037	a.full = dfixed_const(32);
9038	b.full = dfixed_mul(a, disp_clk);
9039
9040	a.full = dfixed_const(10);
9041	disp_clk_request_efficiency.full = dfixed_const(8);
9042	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9043
9044	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9045
9046	return dfixed_trunc(bandwidth);
9047}
9048
9049/**
9050 * dce8_available_bandwidth - get the min available bandwidth
9051 *
9052 * @wm: watermark calculation data
9053 *
9054 * Calculate the min available bandwidth used for display (CIK).
9055 * Used for display watermark bandwidth calculations
9056 * Returns the min available bandwidth in MBytes/s
9057 */
9058static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9059{
9060	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9061	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9062	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9063	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9064
9065	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9066}
9067
9068/**
9069 * dce8_average_bandwidth - get the average available bandwidth
9070 *
9071 * @wm: watermark calculation data
9072 *
9073 * Calculate the average available bandwidth used for display (CIK).
9074 * Used for display watermark bandwidth calculations
9075 * Returns the average available bandwidth in MBytes/s
9076 */
9077static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9078{
9079	/* Calculate the display mode Average Bandwidth
9080	 * DisplayMode should contain the source and destination dimensions,
9081	 * timing, etc.
9082	 */
9083	fixed20_12 bpp;
9084	fixed20_12 line_time;
9085	fixed20_12 src_width;
9086	fixed20_12 bandwidth;
9087	fixed20_12 a;
9088
9089	a.full = dfixed_const(1000);
9090	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9091	line_time.full = dfixed_div(line_time, a);
9092	bpp.full = dfixed_const(wm->bytes_per_pixel);
9093	src_width.full = dfixed_const(wm->src_width);
9094	bandwidth.full = dfixed_mul(src_width, bpp);
9095	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9096	bandwidth.full = dfixed_div(bandwidth, line_time);
9097
9098	return dfixed_trunc(bandwidth);
9099}
9100
9101/**
9102 * dce8_latency_watermark - get the latency watermark
9103 *
9104 * @wm: watermark calculation data
9105 *
9106 * Calculate the latency watermark (CIK).
9107 * Used for display watermark bandwidth calculations
9108 * Returns the latency watermark in ns
9109 */
9110static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9111{
9112	/* First calculate the latency in ns */
9113	u32 mc_latency = 2000; /* 2000 ns. */
9114	u32 available_bandwidth = dce8_available_bandwidth(wm);
9115	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9116	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9117	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9118	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9119		(wm->num_heads * cursor_line_pair_return_time);
9120	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9121	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9122	u32 tmp, dmif_size = 12288;
9123	fixed20_12 a, b, c;
9124
9125	if (wm->num_heads == 0)
9126		return 0;
9127
9128	a.full = dfixed_const(2);
9129	b.full = dfixed_const(1);
9130	if ((wm->vsc.full > a.full) ||
9131	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9132	    (wm->vtaps >= 5) ||
9133	    ((wm->vsc.full >= a.full) && wm->interlaced))
9134		max_src_lines_per_dst_line = 4;
9135	else
9136		max_src_lines_per_dst_line = 2;
9137
9138	a.full = dfixed_const(available_bandwidth);
9139	b.full = dfixed_const(wm->num_heads);
9140	a.full = dfixed_div(a, b);
9141	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9142	tmp = min(dfixed_trunc(a), tmp);
9143
9144	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9145
9146	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9147	b.full = dfixed_const(1000);
9148	c.full = dfixed_const(lb_fill_bw);
9149	b.full = dfixed_div(c, b);
9150	a.full = dfixed_div(a, b);
9151	line_fill_time = dfixed_trunc(a);
9152
9153	if (line_fill_time < wm->active_time)
9154		return latency;
9155	else
9156		return latency + (line_fill_time - wm->active_time);
9157
9158}
9159
9160/**
9161 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9162 * average and available dram bandwidth
9163 *
9164 * @wm: watermark calculation data
9165 *
9166 * Check if the display average bandwidth fits in the display
9167 * dram bandwidth (CIK).
9168 * Used for display watermark bandwidth calculations
9169 * Returns true if the display fits, false if not.
9170 */
9171static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9172{
9173	if (dce8_average_bandwidth(wm) <=
9174	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9175		return true;
9176	else
9177		return false;
9178}
9179
9180/**
9181 * dce8_average_bandwidth_vs_available_bandwidth - check
9182 * average and available bandwidth
9183 *
9184 * @wm: watermark calculation data
9185 *
9186 * Check if the display average bandwidth fits in the display
9187 * available bandwidth (CIK).
9188 * Used for display watermark bandwidth calculations
9189 * Returns true if the display fits, false if not.
9190 */
9191static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9192{
9193	if (dce8_average_bandwidth(wm) <=
9194	    (dce8_available_bandwidth(wm) / wm->num_heads))
9195		return true;
9196	else
9197		return false;
9198}
9199
9200/**
9201 * dce8_check_latency_hiding - check latency hiding
9202 *
9203 * @wm: watermark calculation data
9204 *
9205 * Check latency hiding (CIK).
9206 * Used for display watermark bandwidth calculations
9207 * Returns true if the display fits, false if not.
9208 */
9209static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9210{
9211	u32 lb_partitions = wm->lb_size / wm->src_width;
9212	u32 line_time = wm->active_time + wm->blank_time;
9213	u32 latency_tolerant_lines;
9214	u32 latency_hiding;
9215	fixed20_12 a;
9216
9217	a.full = dfixed_const(1);
9218	if (wm->vsc.full > a.full)
9219		latency_tolerant_lines = 1;
9220	else {
9221		if (lb_partitions <= (wm->vtaps + 1))
9222			latency_tolerant_lines = 1;
9223		else
9224			latency_tolerant_lines = 2;
9225	}
9226
9227	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9228
9229	if (dce8_latency_watermark(wm) <= latency_hiding)
9230		return true;
9231	else
9232		return false;
9233}
9234
9235/**
9236 * dce8_program_watermarks - program display watermarks
9237 *
9238 * @rdev: radeon_device pointer
9239 * @radeon_crtc: the selected display controller
9240 * @lb_size: line buffer size
9241 * @num_heads: number of display controllers in use
9242 *
9243 * Calculate and program the display watermarks for the
9244 * selected display controller (CIK).
9245 */
9246static void dce8_program_watermarks(struct radeon_device *rdev,
9247				    struct radeon_crtc *radeon_crtc,
9248				    u32 lb_size, u32 num_heads)
9249{
9250	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9251	struct dce8_wm_params wm_low, wm_high;
9252	u32 active_time;
9253	u32 line_time = 0;
9254	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9255	u32 tmp, wm_mask;
9256
9257	if (radeon_crtc->base.enabled && num_heads && mode) {
9258		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9259					    (u32)mode->clock);
9260		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9261					  (u32)mode->clock);
9262		line_time = min(line_time, (u32)65535);
9263
9264		/* watermark for high clocks */
9265		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9266		    rdev->pm.dpm_enabled) {
9267			wm_high.yclk =
9268				radeon_dpm_get_mclk(rdev, false) * 10;
9269			wm_high.sclk =
9270				radeon_dpm_get_sclk(rdev, false) * 10;
9271		} else {
9272			wm_high.yclk = rdev->pm.current_mclk * 10;
9273			wm_high.sclk = rdev->pm.current_sclk * 10;
9274		}
9275
9276		wm_high.disp_clk = mode->clock;
9277		wm_high.src_width = mode->crtc_hdisplay;
9278		wm_high.active_time = active_time;
9279		wm_high.blank_time = line_time - wm_high.active_time;
9280		wm_high.interlaced = false;
9281		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9282			wm_high.interlaced = true;
9283		wm_high.vsc = radeon_crtc->vsc;
9284		wm_high.vtaps = 1;
9285		if (radeon_crtc->rmx_type != RMX_OFF)
9286			wm_high.vtaps = 2;
9287		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9288		wm_high.lb_size = lb_size;
9289		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9290		wm_high.num_heads = num_heads;
9291
9292		/* set for high clocks */
9293		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9294
9295		/* possibly force display priority to high */
9296		/* should really do this at mode validation time... */
9297		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9298		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9299		    !dce8_check_latency_hiding(&wm_high) ||
9300		    (rdev->disp_priority == 2)) {
9301			DRM_DEBUG_KMS("force priority to high\n");
9302		}
9303
9304		/* watermark for low clocks */
9305		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9306		    rdev->pm.dpm_enabled) {
9307			wm_low.yclk =
9308				radeon_dpm_get_mclk(rdev, true) * 10;
9309			wm_low.sclk =
9310				radeon_dpm_get_sclk(rdev, true) * 10;
9311		} else {
9312			wm_low.yclk = rdev->pm.current_mclk * 10;
9313			wm_low.sclk = rdev->pm.current_sclk * 10;
9314		}
9315
9316		wm_low.disp_clk = mode->clock;
9317		wm_low.src_width = mode->crtc_hdisplay;
9318		wm_low.active_time = active_time;
9319		wm_low.blank_time = line_time - wm_low.active_time;
9320		wm_low.interlaced = false;
9321		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9322			wm_low.interlaced = true;
9323		wm_low.vsc = radeon_crtc->vsc;
9324		wm_low.vtaps = 1;
9325		if (radeon_crtc->rmx_type != RMX_OFF)
9326			wm_low.vtaps = 2;
9327		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9328		wm_low.lb_size = lb_size;
9329		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9330		wm_low.num_heads = num_heads;
9331
9332		/* set for low clocks */
9333		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9334
9335		/* possibly force display priority to high */
9336		/* should really do this at mode validation time... */
9337		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9338		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9339		    !dce8_check_latency_hiding(&wm_low) ||
9340		    (rdev->disp_priority == 2)) {
9341			DRM_DEBUG_KMS("force priority to high\n");
9342		}
9343
9344		/* Save number of lines the linebuffer leads before the scanout */
9345		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9346	}
9347
9348	/* select wm A */
9349	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9350	tmp = wm_mask;
9351	tmp &= ~LATENCY_WATERMARK_MASK(3);
9352	tmp |= LATENCY_WATERMARK_MASK(1);
9353	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9354	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9355	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9356		LATENCY_HIGH_WATERMARK(line_time)));
9357	/* select wm B */
9358	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9359	tmp &= ~LATENCY_WATERMARK_MASK(3);
9360	tmp |= LATENCY_WATERMARK_MASK(2);
9361	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9362	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9363	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9364		LATENCY_HIGH_WATERMARK(line_time)));
9365	/* restore original selection */
9366	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9367
9368	/* save values for DPM */
9369	radeon_crtc->line_time = line_time;
9370	radeon_crtc->wm_high = latency_watermark_a;
9371	radeon_crtc->wm_low = latency_watermark_b;
9372}
9373
9374/**
9375 * dce8_bandwidth_update - program display watermarks
9376 *
9377 * @rdev: radeon_device pointer
9378 *
9379 * Calculate and program the display watermarks and line
9380 * buffer allocation (CIK).
9381 */
9382void dce8_bandwidth_update(struct radeon_device *rdev)
9383{
9384	struct drm_display_mode *mode = NULL;
9385	u32 num_heads = 0, lb_size;
9386	int i;
9387
9388	if (!rdev->mode_info.mode_config_initialized)
9389		return;
9390
9391	radeon_update_display_priority(rdev);
9392
9393	for (i = 0; i < rdev->num_crtc; i++) {
9394		if (rdev->mode_info.crtcs[i]->base.enabled)
9395			num_heads++;
9396	}
9397	for (i = 0; i < rdev->num_crtc; i++) {
9398		mode = &rdev->mode_info.crtcs[i]->base.mode;
9399		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9400		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9401	}
9402}
9403
9404/**
9405 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9406 *
9407 * @rdev: radeon_device pointer
9408 *
9409 * Fetches a GPU clock counter snapshot (SI).
9410 * Returns the 64 bit clock counter snapshot.
9411 */
9412uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9413{
9414	uint64_t clock;
9415
9416	mutex_lock(&rdev->gpu_clock_mutex);
9417	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9418	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9419		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9420	mutex_unlock(&rdev->gpu_clock_mutex);
9421	return clock;
9422}
9423
9424static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9425			     u32 cntl_reg, u32 status_reg)
9426{
9427	int r, i;
9428	struct atom_clock_dividers dividers;
9429	uint32_t tmp;
9430
9431	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9432					   clock, false, &dividers);
9433	if (r)
9434		return r;
9435
9436	tmp = RREG32_SMC(cntl_reg);
9437	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9438	tmp |= dividers.post_divider;
9439	WREG32_SMC(cntl_reg, tmp);
9440
9441	for (i = 0; i < 100; i++) {
9442		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9443			break;
9444		mdelay(10);
9445	}
9446	if (i == 100)
9447		return -ETIMEDOUT;
9448
9449	return 0;
9450}
9451
9452int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9453{
9454	int r = 0;
9455
9456	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9457	if (r)
9458		return r;
9459
9460	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9461	return r;
9462}
9463
9464int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9465{
9466	int r, i;
9467	struct atom_clock_dividers dividers;
9468	u32 tmp;
9469
9470	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9471					   ecclk, false, &dividers);
9472	if (r)
9473		return r;
9474
9475	for (i = 0; i < 100; i++) {
9476		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9477			break;
9478		mdelay(10);
9479	}
9480	if (i == 100)
9481		return -ETIMEDOUT;
9482
9483	tmp = RREG32_SMC(CG_ECLK_CNTL);
9484	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9485	tmp |= dividers.post_divider;
9486	WREG32_SMC(CG_ECLK_CNTL, tmp);
9487
9488	for (i = 0; i < 100; i++) {
9489		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9490			break;
9491		mdelay(10);
9492	}
9493	if (i == 100)
9494		return -ETIMEDOUT;
9495
9496	return 0;
9497}
9498
9499static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9500{
9501	struct pci_dev *root = rdev->pdev->bus->self;
9502	int bridge_pos, gpu_pos;
9503	u32 speed_cntl, mask, current_data_rate;
9504	int ret, i;
9505	u16 tmp16;
9506
9507	if (pci_is_root_bus(rdev->pdev->bus))
9508		return;
9509
9510	if (radeon_pcie_gen2 == 0)
9511		return;
9512
9513	if (rdev->flags & RADEON_IS_IGP)
9514		return;
9515
9516	if (!(rdev->flags & RADEON_IS_PCIE))
9517		return;
9518
9519	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9520	if (ret != 0)
9521		return;
9522
9523	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9524		return;
9525
9526	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9527	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9528		LC_CURRENT_DATA_RATE_SHIFT;
9529	if (mask & DRM_PCIE_SPEED_80) {
9530		if (current_data_rate == 2) {
9531			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9532			return;
9533		}
9534		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9535	} else if (mask & DRM_PCIE_SPEED_50) {
9536		if (current_data_rate == 1) {
9537			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9538			return;
9539		}
9540		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9541	}
9542
9543	bridge_pos = pci_pcie_cap(root);
9544	if (!bridge_pos)
9545		return;
9546
9547	gpu_pos = pci_pcie_cap(rdev->pdev);
9548	if (!gpu_pos)
9549		return;
9550
9551	if (mask & DRM_PCIE_SPEED_80) {
9552		/* re-try equalization if gen3 is not already enabled */
9553		if (current_data_rate != 2) {
9554			u16 bridge_cfg, gpu_cfg;
9555			u16 bridge_cfg2, gpu_cfg2;
9556			u32 max_lw, current_lw, tmp;
9557
9558			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9559			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9560
9561			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9562			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9563
9564			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9565			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9566
9567			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9568			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9569			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9570
9571			if (current_lw < max_lw) {
9572				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9573				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9574					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9575					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9576					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9577					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9578				}
9579			}
9580
9581			for (i = 0; i < 10; i++) {
9582				/* check status */
9583				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9584				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9585					break;
9586
9587				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9588				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9589
9590				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9591				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9592
9593				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9594				tmp |= LC_SET_QUIESCE;
9595				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9596
9597				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9598				tmp |= LC_REDO_EQ;
9599				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9600
9601				mdelay(100);
9602
9603				/* linkctl */
9604				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9605				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9606				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9607				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9608
9609				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9610				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9611				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9612				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9613
9614				/* linkctl2 */
9615				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9616				tmp16 &= ~((1 << 4) | (7 << 9));
9617				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9618				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9619
9620				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9621				tmp16 &= ~((1 << 4) | (7 << 9));
9622				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9623				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9624
9625				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9626				tmp &= ~LC_SET_QUIESCE;
9627				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9628			}
9629		}
9630	}
9631
9632	/* set the link speed */
9633	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9634	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9635	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9636
9637	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9638	tmp16 &= ~0xf;
9639	if (mask & DRM_PCIE_SPEED_80)
9640		tmp16 |= 3; /* gen3 */
9641	else if (mask & DRM_PCIE_SPEED_50)
9642		tmp16 |= 2; /* gen2 */
9643	else
9644		tmp16 |= 1; /* gen1 */
9645	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9646
9647	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9648	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9649	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9650
9651	for (i = 0; i < rdev->usec_timeout; i++) {
9652		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9653		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9654			break;
9655		udelay(1);
9656	}
9657}
9658
9659static void cik_program_aspm(struct radeon_device *rdev)
9660{
9661	u32 data, orig;
9662	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9663	bool disable_clkreq = false;
9664
9665	if (radeon_aspm == 0)
9666		return;
9667
9668	/* XXX double check IGPs */
9669	if (rdev->flags & RADEON_IS_IGP)
9670		return;
9671
9672	if (!(rdev->flags & RADEON_IS_PCIE))
9673		return;
9674
9675	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9676	data &= ~LC_XMIT_N_FTS_MASK;
9677	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9678	if (orig != data)
9679		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9680
9681	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9682	data |= LC_GO_TO_RECOVERY;
9683	if (orig != data)
9684		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9685
9686	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9687	data |= P_IGNORE_EDB_ERR;
9688	if (orig != data)
9689		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9690
9691	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9692	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9693	data |= LC_PMI_TO_L1_DIS;
9694	if (!disable_l0s)
9695		data |= LC_L0S_INACTIVITY(7);
9696
9697	if (!disable_l1) {
9698		data |= LC_L1_INACTIVITY(7);
9699		data &= ~LC_PMI_TO_L1_DIS;
9700		if (orig != data)
9701			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9702
9703		if (!disable_plloff_in_l1) {
9704			bool clk_req_support;
9705
9706			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9707			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9708			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9709			if (orig != data)
9710				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9711
9712			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9713			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9714			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9715			if (orig != data)
9716				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9717
9718			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9719			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9720			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9721			if (orig != data)
9722				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9723
9724			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9725			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9726			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9727			if (orig != data)
9728				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9729
9730			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9731			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9732			data |= LC_DYN_LANES_PWR_STATE(3);
9733			if (orig != data)
9734				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9735
9736			if (!disable_clkreq &&
9737			    !pci_is_root_bus(rdev->pdev->bus)) {
9738				struct pci_dev *root = rdev->pdev->bus->self;
9739				u32 lnkcap;
9740
9741				clk_req_support = false;
9742				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9743				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9744					clk_req_support = true;
9745			} else {
9746				clk_req_support = false;
9747			}
9748
9749			if (clk_req_support) {
9750				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9751				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9752				if (orig != data)
9753					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9754
9755				orig = data = RREG32_SMC(THM_CLK_CNTL);
9756				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9757				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9758				if (orig != data)
9759					WREG32_SMC(THM_CLK_CNTL, data);
9760
9761				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9762				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9763				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9764				if (orig != data)
9765					WREG32_SMC(MISC_CLK_CTRL, data);
9766
9767				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9768				data &= ~BCLK_AS_XCLK;
9769				if (orig != data)
9770					WREG32_SMC(CG_CLKPIN_CNTL, data);
9771
9772				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9773				data &= ~FORCE_BIF_REFCLK_EN;
9774				if (orig != data)
9775					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9776
9777				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9778				data &= ~MPLL_CLKOUT_SEL_MASK;
9779				data |= MPLL_CLKOUT_SEL(4);
9780				if (orig != data)
9781					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9782			}
9783		}
9784	} else {
9785		if (orig != data)
9786			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9787	}
9788
9789	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9790	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9791	if (orig != data)
9792		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9793
9794	if (!disable_l0s) {
9795		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9796		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9797			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9798			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9799				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9800				data &= ~LC_L0S_INACTIVITY_MASK;
9801				if (orig != data)
9802					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9803			}
9804		}
9805	}
9806}