Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/slab.h>
  26#include <linux/module.h>
  27#include "drmP.h"
  28#include "radeon.h"
  29#include "radeon_asic.h"
  30#include "radeon_audio.h"
  31#include "cikd.h"
  32#include "atom.h"
  33#include "cik_blit_shaders.h"
  34#include "radeon_ucode.h"
  35#include "clearstate_ci.h"
  36#include "radeon_kfd.h"
  37
  38MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  39MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  40MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  41MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  42MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  43MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  44MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  47
  48MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  49MODULE_FIRMWARE("radeon/bonaire_me.bin");
  50MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  51MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  52MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  53MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  54MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  55MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  56MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  57
  58MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  59MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  60MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  61MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  62MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  63MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  64MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  66MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  67
  68MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  69MODULE_FIRMWARE("radeon/hawaii_me.bin");
  70MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  71MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  72MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  73MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  74MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  75MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  76MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  77
  78MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  79MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  80MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  81MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  82MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  83MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  84
  85MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  86MODULE_FIRMWARE("radeon/kaveri_me.bin");
  87MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  88MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  89MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  90MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  91MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  92
  93MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
  94MODULE_FIRMWARE("radeon/KABINI_me.bin");
  95MODULE_FIRMWARE("radeon/KABINI_ce.bin");
  96MODULE_FIRMWARE("radeon/KABINI_mec.bin");
  97MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
  98MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
  99
 100MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 101MODULE_FIRMWARE("radeon/kabini_me.bin");
 102MODULE_FIRMWARE("radeon/kabini_ce.bin");
 103MODULE_FIRMWARE("radeon/kabini_mec.bin");
 104MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 105MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 106
 107MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 108MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 109MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 110MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 111MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 112MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 113
 114MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 115MODULE_FIRMWARE("radeon/mullins_me.bin");
 116MODULE_FIRMWARE("radeon/mullins_ce.bin");
 117MODULE_FIRMWARE("radeon/mullins_mec.bin");
 118MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 119MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 120
 121extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 122extern void r600_ih_ring_fini(struct radeon_device *rdev);
 123extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 124extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 125extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 126extern void sumo_rlc_fini(struct radeon_device *rdev);
 127extern int sumo_rlc_init(struct radeon_device *rdev);
 128extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 129extern void si_rlc_reset(struct radeon_device *rdev);
 130extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 131static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 132extern int cik_sdma_resume(struct radeon_device *rdev);
 133extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 134extern void cik_sdma_fini(struct radeon_device *rdev);
 135extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 136static void cik_rlc_stop(struct radeon_device *rdev);
 137static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 138static void cik_program_aspm(struct radeon_device *rdev);
 139static void cik_init_pg(struct radeon_device *rdev);
 140static void cik_init_cg(struct radeon_device *rdev);
 141static void cik_fini_pg(struct radeon_device *rdev);
 142static void cik_fini_cg(struct radeon_device *rdev);
 143static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 144					  bool enable);
 145
 146/**
 147 * cik_get_allowed_info_register - fetch the register for the info ioctl
 148 *
 149 * @rdev: radeon_device pointer
 150 * @reg: register offset in bytes
 151 * @val: register value
 152 *
 153 * Returns 0 for success or -EINVAL for an invalid register
 154 *
 155 */
 156int cik_get_allowed_info_register(struct radeon_device *rdev,
 157				  u32 reg, u32 *val)
 158{
 159	switch (reg) {
 160	case GRBM_STATUS:
 161	case GRBM_STATUS2:
 162	case GRBM_STATUS_SE0:
 163	case GRBM_STATUS_SE1:
 164	case GRBM_STATUS_SE2:
 165	case GRBM_STATUS_SE3:
 166	case SRBM_STATUS:
 167	case SRBM_STATUS2:
 168	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 169	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 170	case UVD_STATUS:
 171	/* TODO VCE */
 172		*val = RREG32(reg);
 173		return 0;
 174	default:
 175		return -EINVAL;
 176	}
 177}
 178
 179/*
 180 * Indirect registers accessor
 181 */
 182u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 183{
 184	unsigned long flags;
 185	u32 r;
 186
 187	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 188	WREG32(CIK_DIDT_IND_INDEX, (reg));
 189	r = RREG32(CIK_DIDT_IND_DATA);
 190	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 191	return r;
 192}
 193
 194void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 195{
 196	unsigned long flags;
 197
 198	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 199	WREG32(CIK_DIDT_IND_INDEX, (reg));
 200	WREG32(CIK_DIDT_IND_DATA, (v));
 201	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 202}
 203
 204/* get temperature in millidegrees */
 205int ci_get_temp(struct radeon_device *rdev)
 206{
 207	u32 temp;
 208	int actual_temp = 0;
 209
 210	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 211		CTF_TEMP_SHIFT;
 212
 213	if (temp & 0x200)
 214		actual_temp = 255;
 215	else
 216		actual_temp = temp & 0x1ff;
 217
 218	actual_temp = actual_temp * 1000;
 219
 220	return actual_temp;
 221}
 222
 223/* get temperature in millidegrees */
 224int kv_get_temp(struct radeon_device *rdev)
 225{
 226	u32 temp;
 227	int actual_temp = 0;
 228
 229	temp = RREG32_SMC(0xC0300E0C);
 230
 231	if (temp)
 232		actual_temp = (temp / 8) - 49;
 233	else
 234		actual_temp = 0;
 235
 236	actual_temp = actual_temp * 1000;
 237
 238	return actual_temp;
 239}
 240
 241/*
 242 * Indirect registers accessor
 243 */
 244u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 245{
 246	unsigned long flags;
 247	u32 r;
 248
 249	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 250	WREG32(PCIE_INDEX, reg);
 251	(void)RREG32(PCIE_INDEX);
 252	r = RREG32(PCIE_DATA);
 253	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 254	return r;
 255}
 256
 257void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 258{
 259	unsigned long flags;
 260
 261	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 262	WREG32(PCIE_INDEX, reg);
 263	(void)RREG32(PCIE_INDEX);
 264	WREG32(PCIE_DATA, v);
 265	(void)RREG32(PCIE_DATA);
 266	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 267}
 268
 269static const u32 spectre_rlc_save_restore_register_list[] =
 270{
 271	(0x0e00 << 16) | (0xc12c >> 2),
 272	0x00000000,
 273	(0x0e00 << 16) | (0xc140 >> 2),
 274	0x00000000,
 275	(0x0e00 << 16) | (0xc150 >> 2),
 276	0x00000000,
 277	(0x0e00 << 16) | (0xc15c >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc168 >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc170 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0xc178 >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0xc204 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0xc2b4 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0xc2b8 >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0xc2bc >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0xc2c0 >> 2),
 294	0x00000000,
 295	(0x0e00 << 16) | (0x8228 >> 2),
 296	0x00000000,
 297	(0x0e00 << 16) | (0x829c >> 2),
 298	0x00000000,
 299	(0x0e00 << 16) | (0x869c >> 2),
 300	0x00000000,
 301	(0x0600 << 16) | (0x98f4 >> 2),
 302	0x00000000,
 303	(0x0e00 << 16) | (0x98f8 >> 2),
 304	0x00000000,
 305	(0x0e00 << 16) | (0x9900 >> 2),
 306	0x00000000,
 307	(0x0e00 << 16) | (0xc260 >> 2),
 308	0x00000000,
 309	(0x0e00 << 16) | (0x90e8 >> 2),
 310	0x00000000,
 311	(0x0e00 << 16) | (0x3c000 >> 2),
 312	0x00000000,
 313	(0x0e00 << 16) | (0x3c00c >> 2),
 314	0x00000000,
 315	(0x0e00 << 16) | (0x8c1c >> 2),
 316	0x00000000,
 317	(0x0e00 << 16) | (0x9700 >> 2),
 318	0x00000000,
 319	(0x0e00 << 16) | (0xcd20 >> 2),
 320	0x00000000,
 321	(0x4e00 << 16) | (0xcd20 >> 2),
 322	0x00000000,
 323	(0x5e00 << 16) | (0xcd20 >> 2),
 324	0x00000000,
 325	(0x6e00 << 16) | (0xcd20 >> 2),
 326	0x00000000,
 327	(0x7e00 << 16) | (0xcd20 >> 2),
 328	0x00000000,
 329	(0x8e00 << 16) | (0xcd20 >> 2),
 330	0x00000000,
 331	(0x9e00 << 16) | (0xcd20 >> 2),
 332	0x00000000,
 333	(0xae00 << 16) | (0xcd20 >> 2),
 334	0x00000000,
 335	(0xbe00 << 16) | (0xcd20 >> 2),
 336	0x00000000,
 337	(0x0e00 << 16) | (0x89bc >> 2),
 338	0x00000000,
 339	(0x0e00 << 16) | (0x8900 >> 2),
 340	0x00000000,
 341	0x3,
 342	(0x0e00 << 16) | (0xc130 >> 2),
 343	0x00000000,
 344	(0x0e00 << 16) | (0xc134 >> 2),
 345	0x00000000,
 346	(0x0e00 << 16) | (0xc1fc >> 2),
 347	0x00000000,
 348	(0x0e00 << 16) | (0xc208 >> 2),
 349	0x00000000,
 350	(0x0e00 << 16) | (0xc264 >> 2),
 351	0x00000000,
 352	(0x0e00 << 16) | (0xc268 >> 2),
 353	0x00000000,
 354	(0x0e00 << 16) | (0xc26c >> 2),
 355	0x00000000,
 356	(0x0e00 << 16) | (0xc270 >> 2),
 357	0x00000000,
 358	(0x0e00 << 16) | (0xc274 >> 2),
 359	0x00000000,
 360	(0x0e00 << 16) | (0xc278 >> 2),
 361	0x00000000,
 362	(0x0e00 << 16) | (0xc27c >> 2),
 363	0x00000000,
 364	(0x0e00 << 16) | (0xc280 >> 2),
 365	0x00000000,
 366	(0x0e00 << 16) | (0xc284 >> 2),
 367	0x00000000,
 368	(0x0e00 << 16) | (0xc288 >> 2),
 369	0x00000000,
 370	(0x0e00 << 16) | (0xc28c >> 2),
 371	0x00000000,
 372	(0x0e00 << 16) | (0xc290 >> 2),
 373	0x00000000,
 374	(0x0e00 << 16) | (0xc294 >> 2),
 375	0x00000000,
 376	(0x0e00 << 16) | (0xc298 >> 2),
 377	0x00000000,
 378	(0x0e00 << 16) | (0xc29c >> 2),
 379	0x00000000,
 380	(0x0e00 << 16) | (0xc2a0 >> 2),
 381	0x00000000,
 382	(0x0e00 << 16) | (0xc2a4 >> 2),
 383	0x00000000,
 384	(0x0e00 << 16) | (0xc2a8 >> 2),
 385	0x00000000,
 386	(0x0e00 << 16) | (0xc2ac  >> 2),
 387	0x00000000,
 388	(0x0e00 << 16) | (0xc2b0 >> 2),
 389	0x00000000,
 390	(0x0e00 << 16) | (0x301d0 >> 2),
 391	0x00000000,
 392	(0x0e00 << 16) | (0x30238 >> 2),
 393	0x00000000,
 394	(0x0e00 << 16) | (0x30250 >> 2),
 395	0x00000000,
 396	(0x0e00 << 16) | (0x30254 >> 2),
 397	0x00000000,
 398	(0x0e00 << 16) | (0x30258 >> 2),
 399	0x00000000,
 400	(0x0e00 << 16) | (0x3025c >> 2),
 401	0x00000000,
 402	(0x4e00 << 16) | (0xc900 >> 2),
 403	0x00000000,
 404	(0x5e00 << 16) | (0xc900 >> 2),
 405	0x00000000,
 406	(0x6e00 << 16) | (0xc900 >> 2),
 407	0x00000000,
 408	(0x7e00 << 16) | (0xc900 >> 2),
 409	0x00000000,
 410	(0x8e00 << 16) | (0xc900 >> 2),
 411	0x00000000,
 412	(0x9e00 << 16) | (0xc900 >> 2),
 413	0x00000000,
 414	(0xae00 << 16) | (0xc900 >> 2),
 415	0x00000000,
 416	(0xbe00 << 16) | (0xc900 >> 2),
 417	0x00000000,
 418	(0x4e00 << 16) | (0xc904 >> 2),
 419	0x00000000,
 420	(0x5e00 << 16) | (0xc904 >> 2),
 421	0x00000000,
 422	(0x6e00 << 16) | (0xc904 >> 2),
 423	0x00000000,
 424	(0x7e00 << 16) | (0xc904 >> 2),
 425	0x00000000,
 426	(0x8e00 << 16) | (0xc904 >> 2),
 427	0x00000000,
 428	(0x9e00 << 16) | (0xc904 >> 2),
 429	0x00000000,
 430	(0xae00 << 16) | (0xc904 >> 2),
 431	0x00000000,
 432	(0xbe00 << 16) | (0xc904 >> 2),
 433	0x00000000,
 434	(0x4e00 << 16) | (0xc908 >> 2),
 435	0x00000000,
 436	(0x5e00 << 16) | (0xc908 >> 2),
 437	0x00000000,
 438	(0x6e00 << 16) | (0xc908 >> 2),
 439	0x00000000,
 440	(0x7e00 << 16) | (0xc908 >> 2),
 441	0x00000000,
 442	(0x8e00 << 16) | (0xc908 >> 2),
 443	0x00000000,
 444	(0x9e00 << 16) | (0xc908 >> 2),
 445	0x00000000,
 446	(0xae00 << 16) | (0xc908 >> 2),
 447	0x00000000,
 448	(0xbe00 << 16) | (0xc908 >> 2),
 449	0x00000000,
 450	(0x4e00 << 16) | (0xc90c >> 2),
 451	0x00000000,
 452	(0x5e00 << 16) | (0xc90c >> 2),
 453	0x00000000,
 454	(0x6e00 << 16) | (0xc90c >> 2),
 455	0x00000000,
 456	(0x7e00 << 16) | (0xc90c >> 2),
 457	0x00000000,
 458	(0x8e00 << 16) | (0xc90c >> 2),
 459	0x00000000,
 460	(0x9e00 << 16) | (0xc90c >> 2),
 461	0x00000000,
 462	(0xae00 << 16) | (0xc90c >> 2),
 463	0x00000000,
 464	(0xbe00 << 16) | (0xc90c >> 2),
 465	0x00000000,
 466	(0x4e00 << 16) | (0xc910 >> 2),
 467	0x00000000,
 468	(0x5e00 << 16) | (0xc910 >> 2),
 469	0x00000000,
 470	(0x6e00 << 16) | (0xc910 >> 2),
 471	0x00000000,
 472	(0x7e00 << 16) | (0xc910 >> 2),
 473	0x00000000,
 474	(0x8e00 << 16) | (0xc910 >> 2),
 475	0x00000000,
 476	(0x9e00 << 16) | (0xc910 >> 2),
 477	0x00000000,
 478	(0xae00 << 16) | (0xc910 >> 2),
 479	0x00000000,
 480	(0xbe00 << 16) | (0xc910 >> 2),
 481	0x00000000,
 482	(0x0e00 << 16) | (0xc99c >> 2),
 483	0x00000000,
 484	(0x0e00 << 16) | (0x9834 >> 2),
 485	0x00000000,
 486	(0x0000 << 16) | (0x30f00 >> 2),
 487	0x00000000,
 488	(0x0001 << 16) | (0x30f00 >> 2),
 489	0x00000000,
 490	(0x0000 << 16) | (0x30f04 >> 2),
 491	0x00000000,
 492	(0x0001 << 16) | (0x30f04 >> 2),
 493	0x00000000,
 494	(0x0000 << 16) | (0x30f08 >> 2),
 495	0x00000000,
 496	(0x0001 << 16) | (0x30f08 >> 2),
 497	0x00000000,
 498	(0x0000 << 16) | (0x30f0c >> 2),
 499	0x00000000,
 500	(0x0001 << 16) | (0x30f0c >> 2),
 501	0x00000000,
 502	(0x0600 << 16) | (0x9b7c >> 2),
 503	0x00000000,
 504	(0x0e00 << 16) | (0x8a14 >> 2),
 505	0x00000000,
 506	(0x0e00 << 16) | (0x8a18 >> 2),
 507	0x00000000,
 508	(0x0600 << 16) | (0x30a00 >> 2),
 509	0x00000000,
 510	(0x0e00 << 16) | (0x8bf0 >> 2),
 511	0x00000000,
 512	(0x0e00 << 16) | (0x8bcc >> 2),
 513	0x00000000,
 514	(0x0e00 << 16) | (0x8b24 >> 2),
 515	0x00000000,
 516	(0x0e00 << 16) | (0x30a04 >> 2),
 517	0x00000000,
 518	(0x0600 << 16) | (0x30a10 >> 2),
 519	0x00000000,
 520	(0x0600 << 16) | (0x30a14 >> 2),
 521	0x00000000,
 522	(0x0600 << 16) | (0x30a18 >> 2),
 523	0x00000000,
 524	(0x0600 << 16) | (0x30a2c >> 2),
 525	0x00000000,
 526	(0x0e00 << 16) | (0xc700 >> 2),
 527	0x00000000,
 528	(0x0e00 << 16) | (0xc704 >> 2),
 529	0x00000000,
 530	(0x0e00 << 16) | (0xc708 >> 2),
 531	0x00000000,
 532	(0x0e00 << 16) | (0xc768 >> 2),
 533	0x00000000,
 534	(0x0400 << 16) | (0xc770 >> 2),
 535	0x00000000,
 536	(0x0400 << 16) | (0xc774 >> 2),
 537	0x00000000,
 538	(0x0400 << 16) | (0xc778 >> 2),
 539	0x00000000,
 540	(0x0400 << 16) | (0xc77c >> 2),
 541	0x00000000,
 542	(0x0400 << 16) | (0xc780 >> 2),
 543	0x00000000,
 544	(0x0400 << 16) | (0xc784 >> 2),
 545	0x00000000,
 546	(0x0400 << 16) | (0xc788 >> 2),
 547	0x00000000,
 548	(0x0400 << 16) | (0xc78c >> 2),
 549	0x00000000,
 550	(0x0400 << 16) | (0xc798 >> 2),
 551	0x00000000,
 552	(0x0400 << 16) | (0xc79c >> 2),
 553	0x00000000,
 554	(0x0400 << 16) | (0xc7a0 >> 2),
 555	0x00000000,
 556	(0x0400 << 16) | (0xc7a4 >> 2),
 557	0x00000000,
 558	(0x0400 << 16) | (0xc7a8 >> 2),
 559	0x00000000,
 560	(0x0400 << 16) | (0xc7ac >> 2),
 561	0x00000000,
 562	(0x0400 << 16) | (0xc7b0 >> 2),
 563	0x00000000,
 564	(0x0400 << 16) | (0xc7b4 >> 2),
 565	0x00000000,
 566	(0x0e00 << 16) | (0x9100 >> 2),
 567	0x00000000,
 568	(0x0e00 << 16) | (0x3c010 >> 2),
 569	0x00000000,
 570	(0x0e00 << 16) | (0x92a8 >> 2),
 571	0x00000000,
 572	(0x0e00 << 16) | (0x92ac >> 2),
 573	0x00000000,
 574	(0x0e00 << 16) | (0x92b4 >> 2),
 575	0x00000000,
 576	(0x0e00 << 16) | (0x92b8 >> 2),
 577	0x00000000,
 578	(0x0e00 << 16) | (0x92bc >> 2),
 579	0x00000000,
 580	(0x0e00 << 16) | (0x92c0 >> 2),
 581	0x00000000,
 582	(0x0e00 << 16) | (0x92c4 >> 2),
 583	0x00000000,
 584	(0x0e00 << 16) | (0x92c8 >> 2),
 585	0x00000000,
 586	(0x0e00 << 16) | (0x92cc >> 2),
 587	0x00000000,
 588	(0x0e00 << 16) | (0x92d0 >> 2),
 589	0x00000000,
 590	(0x0e00 << 16) | (0x8c00 >> 2),
 591	0x00000000,
 592	(0x0e00 << 16) | (0x8c04 >> 2),
 593	0x00000000,
 594	(0x0e00 << 16) | (0x8c20 >> 2),
 595	0x00000000,
 596	(0x0e00 << 16) | (0x8c38 >> 2),
 597	0x00000000,
 598	(0x0e00 << 16) | (0x8c3c >> 2),
 599	0x00000000,
 600	(0x0e00 << 16) | (0xae00 >> 2),
 601	0x00000000,
 602	(0x0e00 << 16) | (0x9604 >> 2),
 603	0x00000000,
 604	(0x0e00 << 16) | (0xac08 >> 2),
 605	0x00000000,
 606	(0x0e00 << 16) | (0xac0c >> 2),
 607	0x00000000,
 608	(0x0e00 << 16) | (0xac10 >> 2),
 609	0x00000000,
 610	(0x0e00 << 16) | (0xac14 >> 2),
 611	0x00000000,
 612	(0x0e00 << 16) | (0xac58 >> 2),
 613	0x00000000,
 614	(0x0e00 << 16) | (0xac68 >> 2),
 615	0x00000000,
 616	(0x0e00 << 16) | (0xac6c >> 2),
 617	0x00000000,
 618	(0x0e00 << 16) | (0xac70 >> 2),
 619	0x00000000,
 620	(0x0e00 << 16) | (0xac74 >> 2),
 621	0x00000000,
 622	(0x0e00 << 16) | (0xac78 >> 2),
 623	0x00000000,
 624	(0x0e00 << 16) | (0xac7c >> 2),
 625	0x00000000,
 626	(0x0e00 << 16) | (0xac80 >> 2),
 627	0x00000000,
 628	(0x0e00 << 16) | (0xac84 >> 2),
 629	0x00000000,
 630	(0x0e00 << 16) | (0xac88 >> 2),
 631	0x00000000,
 632	(0x0e00 << 16) | (0xac8c >> 2),
 633	0x00000000,
 634	(0x0e00 << 16) | (0x970c >> 2),
 635	0x00000000,
 636	(0x0e00 << 16) | (0x9714 >> 2),
 637	0x00000000,
 638	(0x0e00 << 16) | (0x9718 >> 2),
 639	0x00000000,
 640	(0x0e00 << 16) | (0x971c >> 2),
 641	0x00000000,
 642	(0x0e00 << 16) | (0x31068 >> 2),
 643	0x00000000,
 644	(0x4e00 << 16) | (0x31068 >> 2),
 645	0x00000000,
 646	(0x5e00 << 16) | (0x31068 >> 2),
 647	0x00000000,
 648	(0x6e00 << 16) | (0x31068 >> 2),
 649	0x00000000,
 650	(0x7e00 << 16) | (0x31068 >> 2),
 651	0x00000000,
 652	(0x8e00 << 16) | (0x31068 >> 2),
 653	0x00000000,
 654	(0x9e00 << 16) | (0x31068 >> 2),
 655	0x00000000,
 656	(0xae00 << 16) | (0x31068 >> 2),
 657	0x00000000,
 658	(0xbe00 << 16) | (0x31068 >> 2),
 659	0x00000000,
 660	(0x0e00 << 16) | (0xcd10 >> 2),
 661	0x00000000,
 662	(0x0e00 << 16) | (0xcd14 >> 2),
 663	0x00000000,
 664	(0x0e00 << 16) | (0x88b0 >> 2),
 665	0x00000000,
 666	(0x0e00 << 16) | (0x88b4 >> 2),
 667	0x00000000,
 668	(0x0e00 << 16) | (0x88b8 >> 2),
 669	0x00000000,
 670	(0x0e00 << 16) | (0x88bc >> 2),
 671	0x00000000,
 672	(0x0400 << 16) | (0x89c0 >> 2),
 673	0x00000000,
 674	(0x0e00 << 16) | (0x88c4 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0x88c8 >> 2),
 677	0x00000000,
 678	(0x0e00 << 16) | (0x88d0 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0x88d4 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0x88d8 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0x8980 >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0x30938 >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0x3093c >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0x30940 >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0x89a0 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0x30900 >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0x30904 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0x89b4 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0x3c210 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0x3c214 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0x3c218 >> 2),
 705	0x00000000,
 706	(0x0e00 << 16) | (0x8904 >> 2),
 707	0x00000000,
 708	0x5,
 709	(0x0e00 << 16) | (0x8c28 >> 2),
 710	(0x0e00 << 16) | (0x8c2c >> 2),
 711	(0x0e00 << 16) | (0x8c30 >> 2),
 712	(0x0e00 << 16) | (0x8c34 >> 2),
 713	(0x0e00 << 16) | (0x9600 >> 2),
 714};
 715
 716static const u32 kalindi_rlc_save_restore_register_list[] =
 717{
 718	(0x0e00 << 16) | (0xc12c >> 2),
 719	0x00000000,
 720	(0x0e00 << 16) | (0xc140 >> 2),
 721	0x00000000,
 722	(0x0e00 << 16) | (0xc150 >> 2),
 723	0x00000000,
 724	(0x0e00 << 16) | (0xc15c >> 2),
 725	0x00000000,
 726	(0x0e00 << 16) | (0xc168 >> 2),
 727	0x00000000,
 728	(0x0e00 << 16) | (0xc170 >> 2),
 729	0x00000000,
 730	(0x0e00 << 16) | (0xc204 >> 2),
 731	0x00000000,
 732	(0x0e00 << 16) | (0xc2b4 >> 2),
 733	0x00000000,
 734	(0x0e00 << 16) | (0xc2b8 >> 2),
 735	0x00000000,
 736	(0x0e00 << 16) | (0xc2bc >> 2),
 737	0x00000000,
 738	(0x0e00 << 16) | (0xc2c0 >> 2),
 739	0x00000000,
 740	(0x0e00 << 16) | (0x8228 >> 2),
 741	0x00000000,
 742	(0x0e00 << 16) | (0x829c >> 2),
 743	0x00000000,
 744	(0x0e00 << 16) | (0x869c >> 2),
 745	0x00000000,
 746	(0x0600 << 16) | (0x98f4 >> 2),
 747	0x00000000,
 748	(0x0e00 << 16) | (0x98f8 >> 2),
 749	0x00000000,
 750	(0x0e00 << 16) | (0x9900 >> 2),
 751	0x00000000,
 752	(0x0e00 << 16) | (0xc260 >> 2),
 753	0x00000000,
 754	(0x0e00 << 16) | (0x90e8 >> 2),
 755	0x00000000,
 756	(0x0e00 << 16) | (0x3c000 >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0x3c00c >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x8c1c >> 2),
 761	0x00000000,
 762	(0x0e00 << 16) | (0x9700 >> 2),
 763	0x00000000,
 764	(0x0e00 << 16) | (0xcd20 >> 2),
 765	0x00000000,
 766	(0x4e00 << 16) | (0xcd20 >> 2),
 767	0x00000000,
 768	(0x5e00 << 16) | (0xcd20 >> 2),
 769	0x00000000,
 770	(0x6e00 << 16) | (0xcd20 >> 2),
 771	0x00000000,
 772	(0x7e00 << 16) | (0xcd20 >> 2),
 773	0x00000000,
 774	(0x0e00 << 16) | (0x89bc >> 2),
 775	0x00000000,
 776	(0x0e00 << 16) | (0x8900 >> 2),
 777	0x00000000,
 778	0x3,
 779	(0x0e00 << 16) | (0xc130 >> 2),
 780	0x00000000,
 781	(0x0e00 << 16) | (0xc134 >> 2),
 782	0x00000000,
 783	(0x0e00 << 16) | (0xc1fc >> 2),
 784	0x00000000,
 785	(0x0e00 << 16) | (0xc208 >> 2),
 786	0x00000000,
 787	(0x0e00 << 16) | (0xc264 >> 2),
 788	0x00000000,
 789	(0x0e00 << 16) | (0xc268 >> 2),
 790	0x00000000,
 791	(0x0e00 << 16) | (0xc26c >> 2),
 792	0x00000000,
 793	(0x0e00 << 16) | (0xc270 >> 2),
 794	0x00000000,
 795	(0x0e00 << 16) | (0xc274 >> 2),
 796	0x00000000,
 797	(0x0e00 << 16) | (0xc28c >> 2),
 798	0x00000000,
 799	(0x0e00 << 16) | (0xc290 >> 2),
 800	0x00000000,
 801	(0x0e00 << 16) | (0xc294 >> 2),
 802	0x00000000,
 803	(0x0e00 << 16) | (0xc298 >> 2),
 804	0x00000000,
 805	(0x0e00 << 16) | (0xc2a0 >> 2),
 806	0x00000000,
 807	(0x0e00 << 16) | (0xc2a4 >> 2),
 808	0x00000000,
 809	(0x0e00 << 16) | (0xc2a8 >> 2),
 810	0x00000000,
 811	(0x0e00 << 16) | (0xc2ac >> 2),
 812	0x00000000,
 813	(0x0e00 << 16) | (0x301d0 >> 2),
 814	0x00000000,
 815	(0x0e00 << 16) | (0x30238 >> 2),
 816	0x00000000,
 817	(0x0e00 << 16) | (0x30250 >> 2),
 818	0x00000000,
 819	(0x0e00 << 16) | (0x30254 >> 2),
 820	0x00000000,
 821	(0x0e00 << 16) | (0x30258 >> 2),
 822	0x00000000,
 823	(0x0e00 << 16) | (0x3025c >> 2),
 824	0x00000000,
 825	(0x4e00 << 16) | (0xc900 >> 2),
 826	0x00000000,
 827	(0x5e00 << 16) | (0xc900 >> 2),
 828	0x00000000,
 829	(0x6e00 << 16) | (0xc900 >> 2),
 830	0x00000000,
 831	(0x7e00 << 16) | (0xc900 >> 2),
 832	0x00000000,
 833	(0x4e00 << 16) | (0xc904 >> 2),
 834	0x00000000,
 835	(0x5e00 << 16) | (0xc904 >> 2),
 836	0x00000000,
 837	(0x6e00 << 16) | (0xc904 >> 2),
 838	0x00000000,
 839	(0x7e00 << 16) | (0xc904 >> 2),
 840	0x00000000,
 841	(0x4e00 << 16) | (0xc908 >> 2),
 842	0x00000000,
 843	(0x5e00 << 16) | (0xc908 >> 2),
 844	0x00000000,
 845	(0x6e00 << 16) | (0xc908 >> 2),
 846	0x00000000,
 847	(0x7e00 << 16) | (0xc908 >> 2),
 848	0x00000000,
 849	(0x4e00 << 16) | (0xc90c >> 2),
 850	0x00000000,
 851	(0x5e00 << 16) | (0xc90c >> 2),
 852	0x00000000,
 853	(0x6e00 << 16) | (0xc90c >> 2),
 854	0x00000000,
 855	(0x7e00 << 16) | (0xc90c >> 2),
 856	0x00000000,
 857	(0x4e00 << 16) | (0xc910 >> 2),
 858	0x00000000,
 859	(0x5e00 << 16) | (0xc910 >> 2),
 860	0x00000000,
 861	(0x6e00 << 16) | (0xc910 >> 2),
 862	0x00000000,
 863	(0x7e00 << 16) | (0xc910 >> 2),
 864	0x00000000,
 865	(0x0e00 << 16) | (0xc99c >> 2),
 866	0x00000000,
 867	(0x0e00 << 16) | (0x9834 >> 2),
 868	0x00000000,
 869	(0x0000 << 16) | (0x30f00 >> 2),
 870	0x00000000,
 871	(0x0000 << 16) | (0x30f04 >> 2),
 872	0x00000000,
 873	(0x0000 << 16) | (0x30f08 >> 2),
 874	0x00000000,
 875	(0x0000 << 16) | (0x30f0c >> 2),
 876	0x00000000,
 877	(0x0600 << 16) | (0x9b7c >> 2),
 878	0x00000000,
 879	(0x0e00 << 16) | (0x8a14 >> 2),
 880	0x00000000,
 881	(0x0e00 << 16) | (0x8a18 >> 2),
 882	0x00000000,
 883	(0x0600 << 16) | (0x30a00 >> 2),
 884	0x00000000,
 885	(0x0e00 << 16) | (0x8bf0 >> 2),
 886	0x00000000,
 887	(0x0e00 << 16) | (0x8bcc >> 2),
 888	0x00000000,
 889	(0x0e00 << 16) | (0x8b24 >> 2),
 890	0x00000000,
 891	(0x0e00 << 16) | (0x30a04 >> 2),
 892	0x00000000,
 893	(0x0600 << 16) | (0x30a10 >> 2),
 894	0x00000000,
 895	(0x0600 << 16) | (0x30a14 >> 2),
 896	0x00000000,
 897	(0x0600 << 16) | (0x30a18 >> 2),
 898	0x00000000,
 899	(0x0600 << 16) | (0x30a2c >> 2),
 900	0x00000000,
 901	(0x0e00 << 16) | (0xc700 >> 2),
 902	0x00000000,
 903	(0x0e00 << 16) | (0xc704 >> 2),
 904	0x00000000,
 905	(0x0e00 << 16) | (0xc708 >> 2),
 906	0x00000000,
 907	(0x0e00 << 16) | (0xc768 >> 2),
 908	0x00000000,
 909	(0x0400 << 16) | (0xc770 >> 2),
 910	0x00000000,
 911	(0x0400 << 16) | (0xc774 >> 2),
 912	0x00000000,
 913	(0x0400 << 16) | (0xc798 >> 2),
 914	0x00000000,
 915	(0x0400 << 16) | (0xc79c >> 2),
 916	0x00000000,
 917	(0x0e00 << 16) | (0x9100 >> 2),
 918	0x00000000,
 919	(0x0e00 << 16) | (0x3c010 >> 2),
 920	0x00000000,
 921	(0x0e00 << 16) | (0x8c00 >> 2),
 922	0x00000000,
 923	(0x0e00 << 16) | (0x8c04 >> 2),
 924	0x00000000,
 925	(0x0e00 << 16) | (0x8c20 >> 2),
 926	0x00000000,
 927	(0x0e00 << 16) | (0x8c38 >> 2),
 928	0x00000000,
 929	(0x0e00 << 16) | (0x8c3c >> 2),
 930	0x00000000,
 931	(0x0e00 << 16) | (0xae00 >> 2),
 932	0x00000000,
 933	(0x0e00 << 16) | (0x9604 >> 2),
 934	0x00000000,
 935	(0x0e00 << 16) | (0xac08 >> 2),
 936	0x00000000,
 937	(0x0e00 << 16) | (0xac0c >> 2),
 938	0x00000000,
 939	(0x0e00 << 16) | (0xac10 >> 2),
 940	0x00000000,
 941	(0x0e00 << 16) | (0xac14 >> 2),
 942	0x00000000,
 943	(0x0e00 << 16) | (0xac58 >> 2),
 944	0x00000000,
 945	(0x0e00 << 16) | (0xac68 >> 2),
 946	0x00000000,
 947	(0x0e00 << 16) | (0xac6c >> 2),
 948	0x00000000,
 949	(0x0e00 << 16) | (0xac70 >> 2),
 950	0x00000000,
 951	(0x0e00 << 16) | (0xac74 >> 2),
 952	0x00000000,
 953	(0x0e00 << 16) | (0xac78 >> 2),
 954	0x00000000,
 955	(0x0e00 << 16) | (0xac7c >> 2),
 956	0x00000000,
 957	(0x0e00 << 16) | (0xac80 >> 2),
 958	0x00000000,
 959	(0x0e00 << 16) | (0xac84 >> 2),
 960	0x00000000,
 961	(0x0e00 << 16) | (0xac88 >> 2),
 962	0x00000000,
 963	(0x0e00 << 16) | (0xac8c >> 2),
 964	0x00000000,
 965	(0x0e00 << 16) | (0x970c >> 2),
 966	0x00000000,
 967	(0x0e00 << 16) | (0x9714 >> 2),
 968	0x00000000,
 969	(0x0e00 << 16) | (0x9718 >> 2),
 970	0x00000000,
 971	(0x0e00 << 16) | (0x971c >> 2),
 972	0x00000000,
 973	(0x0e00 << 16) | (0x31068 >> 2),
 974	0x00000000,
 975	(0x4e00 << 16) | (0x31068 >> 2),
 976	0x00000000,
 977	(0x5e00 << 16) | (0x31068 >> 2),
 978	0x00000000,
 979	(0x6e00 << 16) | (0x31068 >> 2),
 980	0x00000000,
 981	(0x7e00 << 16) | (0x31068 >> 2),
 982	0x00000000,
 983	(0x0e00 << 16) | (0xcd10 >> 2),
 984	0x00000000,
 985	(0x0e00 << 16) | (0xcd14 >> 2),
 986	0x00000000,
 987	(0x0e00 << 16) | (0x88b0 >> 2),
 988	0x00000000,
 989	(0x0e00 << 16) | (0x88b4 >> 2),
 990	0x00000000,
 991	(0x0e00 << 16) | (0x88b8 >> 2),
 992	0x00000000,
 993	(0x0e00 << 16) | (0x88bc >> 2),
 994	0x00000000,
 995	(0x0400 << 16) | (0x89c0 >> 2),
 996	0x00000000,
 997	(0x0e00 << 16) | (0x88c4 >> 2),
 998	0x00000000,
 999	(0x0e00 << 16) | (0x88c8 >> 2),
1000	0x00000000,
1001	(0x0e00 << 16) | (0x88d0 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88d4 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x88d8 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x8980 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x30938 >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x3093c >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x30940 >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x89a0 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x30900 >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x30904 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x89b4 >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x3e1fc >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x3c210 >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x3c214 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x3c218 >> 2),
1030	0x00000000,
1031	(0x0e00 << 16) | (0x8904 >> 2),
1032	0x00000000,
1033	0x5,
1034	(0x0e00 << 16) | (0x8c28 >> 2),
1035	(0x0e00 << 16) | (0x8c2c >> 2),
1036	(0x0e00 << 16) | (0x8c30 >> 2),
1037	(0x0e00 << 16) | (0x8c34 >> 2),
1038	(0x0e00 << 16) | (0x9600 >> 2),
1039};
1040
1041static const u32 bonaire_golden_spm_registers[] =
1042{
1043	0x30800, 0xe0ffffff, 0xe0000000
1044};
1045
1046static const u32 bonaire_golden_common_registers[] =
1047{
1048	0xc770, 0xffffffff, 0x00000800,
1049	0xc774, 0xffffffff, 0x00000800,
1050	0xc798, 0xffffffff, 0x00007fbf,
1051	0xc79c, 0xffffffff, 0x00007faf
1052};
1053
1054static const u32 bonaire_golden_registers[] =
1055{
1056	0x3354, 0x00000333, 0x00000333,
1057	0x3350, 0x000c0fc0, 0x00040200,
1058	0x9a10, 0x00010000, 0x00058208,
1059	0x3c000, 0xffff1fff, 0x00140000,
1060	0x3c200, 0xfdfc0fff, 0x00000100,
1061	0x3c234, 0x40000000, 0x40000200,
1062	0x9830, 0xffffffff, 0x00000000,
1063	0x9834, 0xf00fffff, 0x00000400,
1064	0x9838, 0x0002021c, 0x00020200,
1065	0xc78, 0x00000080, 0x00000000,
1066	0x5bb0, 0x000000f0, 0x00000070,
1067	0x5bc0, 0xf0311fff, 0x80300000,
1068	0x98f8, 0x73773777, 0x12010001,
1069	0x350c, 0x00810000, 0x408af000,
1070	0x7030, 0x31000111, 0x00000011,
1071	0x2f48, 0x73773777, 0x12010001,
1072	0x220c, 0x00007fb6, 0x0021a1b1,
1073	0x2210, 0x00007fb6, 0x002021b1,
1074	0x2180, 0x00007fb6, 0x00002191,
1075	0x2218, 0x00007fb6, 0x002121b1,
1076	0x221c, 0x00007fb6, 0x002021b1,
1077	0x21dc, 0x00007fb6, 0x00002191,
1078	0x21e0, 0x00007fb6, 0x00002191,
1079	0x3628, 0x0000003f, 0x0000000a,
1080	0x362c, 0x0000003f, 0x0000000a,
1081	0x2ae4, 0x00073ffe, 0x000022a2,
1082	0x240c, 0x000007ff, 0x00000000,
1083	0x8a14, 0xf000003f, 0x00000007,
1084	0x8bf0, 0x00002001, 0x00000001,
1085	0x8b24, 0xffffffff, 0x00ffffff,
1086	0x30a04, 0x0000ff0f, 0x00000000,
1087	0x28a4c, 0x07ffffff, 0x06000000,
1088	0x4d8, 0x00000fff, 0x00000100,
1089	0x3e78, 0x00000001, 0x00000002,
1090	0x9100, 0x03000000, 0x0362c688,
1091	0x8c00, 0x000000ff, 0x00000001,
1092	0xe40, 0x00001fff, 0x00001fff,
1093	0x9060, 0x0000007f, 0x00000020,
1094	0x9508, 0x00010000, 0x00010000,
1095	0xac14, 0x000003ff, 0x000000f3,
1096	0xac0c, 0xffffffff, 0x00001032
1097};
1098
1099static const u32 bonaire_mgcg_cgcg_init[] =
1100{
1101	0xc420, 0xffffffff, 0xfffffffc,
1102	0x30800, 0xffffffff, 0xe0000000,
1103	0x3c2a0, 0xffffffff, 0x00000100,
1104	0x3c208, 0xffffffff, 0x00000100,
1105	0x3c2c0, 0xffffffff, 0xc0000100,
1106	0x3c2c8, 0xffffffff, 0xc0000100,
1107	0x3c2c4, 0xffffffff, 0xc0000100,
1108	0x55e4, 0xffffffff, 0x00600100,
1109	0x3c280, 0xffffffff, 0x00000100,
1110	0x3c214, 0xffffffff, 0x06000100,
1111	0x3c220, 0xffffffff, 0x00000100,
1112	0x3c218, 0xffffffff, 0x06000100,
1113	0x3c204, 0xffffffff, 0x00000100,
1114	0x3c2e0, 0xffffffff, 0x00000100,
1115	0x3c224, 0xffffffff, 0x00000100,
1116	0x3c200, 0xffffffff, 0x00000100,
1117	0x3c230, 0xffffffff, 0x00000100,
1118	0x3c234, 0xffffffff, 0x00000100,
1119	0x3c250, 0xffffffff, 0x00000100,
1120	0x3c254, 0xffffffff, 0x00000100,
1121	0x3c258, 0xffffffff, 0x00000100,
1122	0x3c25c, 0xffffffff, 0x00000100,
1123	0x3c260, 0xffffffff, 0x00000100,
1124	0x3c27c, 0xffffffff, 0x00000100,
1125	0x3c278, 0xffffffff, 0x00000100,
1126	0x3c210, 0xffffffff, 0x06000100,
1127	0x3c290, 0xffffffff, 0x00000100,
1128	0x3c274, 0xffffffff, 0x00000100,
1129	0x3c2b4, 0xffffffff, 0x00000100,
1130	0x3c2b0, 0xffffffff, 0x00000100,
1131	0x3c270, 0xffffffff, 0x00000100,
1132	0x30800, 0xffffffff, 0xe0000000,
1133	0x3c020, 0xffffffff, 0x00010000,
1134	0x3c024, 0xffffffff, 0x00030002,
1135	0x3c028, 0xffffffff, 0x00040007,
1136	0x3c02c, 0xffffffff, 0x00060005,
1137	0x3c030, 0xffffffff, 0x00090008,
1138	0x3c034, 0xffffffff, 0x00010000,
1139	0x3c038, 0xffffffff, 0x00030002,
1140	0x3c03c, 0xffffffff, 0x00040007,
1141	0x3c040, 0xffffffff, 0x00060005,
1142	0x3c044, 0xffffffff, 0x00090008,
1143	0x3c048, 0xffffffff, 0x00010000,
1144	0x3c04c, 0xffffffff, 0x00030002,
1145	0x3c050, 0xffffffff, 0x00040007,
1146	0x3c054, 0xffffffff, 0x00060005,
1147	0x3c058, 0xffffffff, 0x00090008,
1148	0x3c05c, 0xffffffff, 0x00010000,
1149	0x3c060, 0xffffffff, 0x00030002,
1150	0x3c064, 0xffffffff, 0x00040007,
1151	0x3c068, 0xffffffff, 0x00060005,
1152	0x3c06c, 0xffffffff, 0x00090008,
1153	0x3c070, 0xffffffff, 0x00010000,
1154	0x3c074, 0xffffffff, 0x00030002,
1155	0x3c078, 0xffffffff, 0x00040007,
1156	0x3c07c, 0xffffffff, 0x00060005,
1157	0x3c080, 0xffffffff, 0x00090008,
1158	0x3c084, 0xffffffff, 0x00010000,
1159	0x3c088, 0xffffffff, 0x00030002,
1160	0x3c08c, 0xffffffff, 0x00040007,
1161	0x3c090, 0xffffffff, 0x00060005,
1162	0x3c094, 0xffffffff, 0x00090008,
1163	0x3c098, 0xffffffff, 0x00010000,
1164	0x3c09c, 0xffffffff, 0x00030002,
1165	0x3c0a0, 0xffffffff, 0x00040007,
1166	0x3c0a4, 0xffffffff, 0x00060005,
1167	0x3c0a8, 0xffffffff, 0x00090008,
1168	0x3c000, 0xffffffff, 0x96e00200,
1169	0x8708, 0xffffffff, 0x00900100,
1170	0xc424, 0xffffffff, 0x0020003f,
1171	0x38, 0xffffffff, 0x0140001c,
1172	0x3c, 0x000f0000, 0x000f0000,
1173	0x220, 0xffffffff, 0xC060000C,
1174	0x224, 0xc0000fff, 0x00000100,
1175	0xf90, 0xffffffff, 0x00000100,
1176	0xf98, 0x00000101, 0x00000000,
1177	0x20a8, 0xffffffff, 0x00000104,
1178	0x55e4, 0xff000fff, 0x00000100,
1179	0x30cc, 0xc0000fff, 0x00000104,
1180	0xc1e4, 0x00000001, 0x00000001,
1181	0xd00c, 0xff000ff0, 0x00000100,
1182	0xd80c, 0xff000ff0, 0x00000100
1183};
1184
1185static const u32 spectre_golden_spm_registers[] =
1186{
1187	0x30800, 0xe0ffffff, 0xe0000000
1188};
1189
1190static const u32 spectre_golden_common_registers[] =
1191{
1192	0xc770, 0xffffffff, 0x00000800,
1193	0xc774, 0xffffffff, 0x00000800,
1194	0xc798, 0xffffffff, 0x00007fbf,
1195	0xc79c, 0xffffffff, 0x00007faf
1196};
1197
1198static const u32 spectre_golden_registers[] =
1199{
1200	0x3c000, 0xffff1fff, 0x96940200,
1201	0x3c00c, 0xffff0001, 0xff000000,
1202	0x3c200, 0xfffc0fff, 0x00000100,
1203	0x6ed8, 0x00010101, 0x00010000,
1204	0x9834, 0xf00fffff, 0x00000400,
1205	0x9838, 0xfffffffc, 0x00020200,
1206	0x5bb0, 0x000000f0, 0x00000070,
1207	0x5bc0, 0xf0311fff, 0x80300000,
1208	0x98f8, 0x73773777, 0x12010001,
1209	0x9b7c, 0x00ff0000, 0x00fc0000,
1210	0x2f48, 0x73773777, 0x12010001,
1211	0x8a14, 0xf000003f, 0x00000007,
1212	0x8b24, 0xffffffff, 0x00ffffff,
1213	0x28350, 0x3f3f3fff, 0x00000082,
1214	0x28354, 0x0000003f, 0x00000000,
1215	0x3e78, 0x00000001, 0x00000002,
1216	0x913c, 0xffff03df, 0x00000004,
1217	0xc768, 0x00000008, 0x00000008,
1218	0x8c00, 0x000008ff, 0x00000800,
1219	0x9508, 0x00010000, 0x00010000,
1220	0xac0c, 0xffffffff, 0x54763210,
1221	0x214f8, 0x01ff01ff, 0x00000002,
1222	0x21498, 0x007ff800, 0x00200000,
1223	0x2015c, 0xffffffff, 0x00000f40,
1224	0x30934, 0xffffffff, 0x00000001
1225};
1226
1227static const u32 spectre_mgcg_cgcg_init[] =
1228{
1229	0xc420, 0xffffffff, 0xfffffffc,
1230	0x30800, 0xffffffff, 0xe0000000,
1231	0x3c2a0, 0xffffffff, 0x00000100,
1232	0x3c208, 0xffffffff, 0x00000100,
1233	0x3c2c0, 0xffffffff, 0x00000100,
1234	0x3c2c8, 0xffffffff, 0x00000100,
1235	0x3c2c4, 0xffffffff, 0x00000100,
1236	0x55e4, 0xffffffff, 0x00600100,
1237	0x3c280, 0xffffffff, 0x00000100,
1238	0x3c214, 0xffffffff, 0x06000100,
1239	0x3c220, 0xffffffff, 0x00000100,
1240	0x3c218, 0xffffffff, 0x06000100,
1241	0x3c204, 0xffffffff, 0x00000100,
1242	0x3c2e0, 0xffffffff, 0x00000100,
1243	0x3c224, 0xffffffff, 0x00000100,
1244	0x3c200, 0xffffffff, 0x00000100,
1245	0x3c230, 0xffffffff, 0x00000100,
1246	0x3c234, 0xffffffff, 0x00000100,
1247	0x3c250, 0xffffffff, 0x00000100,
1248	0x3c254, 0xffffffff, 0x00000100,
1249	0x3c258, 0xffffffff, 0x00000100,
1250	0x3c25c, 0xffffffff, 0x00000100,
1251	0x3c260, 0xffffffff, 0x00000100,
1252	0x3c27c, 0xffffffff, 0x00000100,
1253	0x3c278, 0xffffffff, 0x00000100,
1254	0x3c210, 0xffffffff, 0x06000100,
1255	0x3c290, 0xffffffff, 0x00000100,
1256	0x3c274, 0xffffffff, 0x00000100,
1257	0x3c2b4, 0xffffffff, 0x00000100,
1258	0x3c2b0, 0xffffffff, 0x00000100,
1259	0x3c270, 0xffffffff, 0x00000100,
1260	0x30800, 0xffffffff, 0xe0000000,
1261	0x3c020, 0xffffffff, 0x00010000,
1262	0x3c024, 0xffffffff, 0x00030002,
1263	0x3c028, 0xffffffff, 0x00040007,
1264	0x3c02c, 0xffffffff, 0x00060005,
1265	0x3c030, 0xffffffff, 0x00090008,
1266	0x3c034, 0xffffffff, 0x00010000,
1267	0x3c038, 0xffffffff, 0x00030002,
1268	0x3c03c, 0xffffffff, 0x00040007,
1269	0x3c040, 0xffffffff, 0x00060005,
1270	0x3c044, 0xffffffff, 0x00090008,
1271	0x3c048, 0xffffffff, 0x00010000,
1272	0x3c04c, 0xffffffff, 0x00030002,
1273	0x3c050, 0xffffffff, 0x00040007,
1274	0x3c054, 0xffffffff, 0x00060005,
1275	0x3c058, 0xffffffff, 0x00090008,
1276	0x3c05c, 0xffffffff, 0x00010000,
1277	0x3c060, 0xffffffff, 0x00030002,
1278	0x3c064, 0xffffffff, 0x00040007,
1279	0x3c068, 0xffffffff, 0x00060005,
1280	0x3c06c, 0xffffffff, 0x00090008,
1281	0x3c070, 0xffffffff, 0x00010000,
1282	0x3c074, 0xffffffff, 0x00030002,
1283	0x3c078, 0xffffffff, 0x00040007,
1284	0x3c07c, 0xffffffff, 0x00060005,
1285	0x3c080, 0xffffffff, 0x00090008,
1286	0x3c084, 0xffffffff, 0x00010000,
1287	0x3c088, 0xffffffff, 0x00030002,
1288	0x3c08c, 0xffffffff, 0x00040007,
1289	0x3c090, 0xffffffff, 0x00060005,
1290	0x3c094, 0xffffffff, 0x00090008,
1291	0x3c098, 0xffffffff, 0x00010000,
1292	0x3c09c, 0xffffffff, 0x00030002,
1293	0x3c0a0, 0xffffffff, 0x00040007,
1294	0x3c0a4, 0xffffffff, 0x00060005,
1295	0x3c0a8, 0xffffffff, 0x00090008,
1296	0x3c0ac, 0xffffffff, 0x00010000,
1297	0x3c0b0, 0xffffffff, 0x00030002,
1298	0x3c0b4, 0xffffffff, 0x00040007,
1299	0x3c0b8, 0xffffffff, 0x00060005,
1300	0x3c0bc, 0xffffffff, 0x00090008,
1301	0x3c000, 0xffffffff, 0x96e00200,
1302	0x8708, 0xffffffff, 0x00900100,
1303	0xc424, 0xffffffff, 0x0020003f,
1304	0x38, 0xffffffff, 0x0140001c,
1305	0x3c, 0x000f0000, 0x000f0000,
1306	0x220, 0xffffffff, 0xC060000C,
1307	0x224, 0xc0000fff, 0x00000100,
1308	0xf90, 0xffffffff, 0x00000100,
1309	0xf98, 0x00000101, 0x00000000,
1310	0x20a8, 0xffffffff, 0x00000104,
1311	0x55e4, 0xff000fff, 0x00000100,
1312	0x30cc, 0xc0000fff, 0x00000104,
1313	0xc1e4, 0x00000001, 0x00000001,
1314	0xd00c, 0xff000ff0, 0x00000100,
1315	0xd80c, 0xff000ff0, 0x00000100
1316};
1317
1318static const u32 kalindi_golden_spm_registers[] =
1319{
1320	0x30800, 0xe0ffffff, 0xe0000000
1321};
1322
1323static const u32 kalindi_golden_common_registers[] =
1324{
1325	0xc770, 0xffffffff, 0x00000800,
1326	0xc774, 0xffffffff, 0x00000800,
1327	0xc798, 0xffffffff, 0x00007fbf,
1328	0xc79c, 0xffffffff, 0x00007faf
1329};
1330
1331static const u32 kalindi_golden_registers[] =
1332{
1333	0x3c000, 0xffffdfff, 0x6e944040,
1334	0x55e4, 0xff607fff, 0xfc000100,
1335	0x3c220, 0xff000fff, 0x00000100,
1336	0x3c224, 0xff000fff, 0x00000100,
1337	0x3c200, 0xfffc0fff, 0x00000100,
1338	0x6ed8, 0x00010101, 0x00010000,
1339	0x9830, 0xffffffff, 0x00000000,
1340	0x9834, 0xf00fffff, 0x00000400,
1341	0x5bb0, 0x000000f0, 0x00000070,
1342	0x5bc0, 0xf0311fff, 0x80300000,
1343	0x98f8, 0x73773777, 0x12010001,
1344	0x98fc, 0xffffffff, 0x00000010,
1345	0x9b7c, 0x00ff0000, 0x00fc0000,
1346	0x8030, 0x00001f0f, 0x0000100a,
1347	0x2f48, 0x73773777, 0x12010001,
1348	0x2408, 0x000fffff, 0x000c007f,
1349	0x8a14, 0xf000003f, 0x00000007,
1350	0x8b24, 0x3fff3fff, 0x00ffcfff,
1351	0x30a04, 0x0000ff0f, 0x00000000,
1352	0x28a4c, 0x07ffffff, 0x06000000,
1353	0x4d8, 0x00000fff, 0x00000100,
1354	0x3e78, 0x00000001, 0x00000002,
1355	0xc768, 0x00000008, 0x00000008,
1356	0x8c00, 0x000000ff, 0x00000003,
1357	0x214f8, 0x01ff01ff, 0x00000002,
1358	0x21498, 0x007ff800, 0x00200000,
1359	0x2015c, 0xffffffff, 0x00000f40,
1360	0x88c4, 0x001f3ae3, 0x00000082,
1361	0x88d4, 0x0000001f, 0x00000010,
1362	0x30934, 0xffffffff, 0x00000000
1363};
1364
1365static const u32 kalindi_mgcg_cgcg_init[] =
1366{
1367	0xc420, 0xffffffff, 0xfffffffc,
1368	0x30800, 0xffffffff, 0xe0000000,
1369	0x3c2a0, 0xffffffff, 0x00000100,
1370	0x3c208, 0xffffffff, 0x00000100,
1371	0x3c2c0, 0xffffffff, 0x00000100,
1372	0x3c2c8, 0xffffffff, 0x00000100,
1373	0x3c2c4, 0xffffffff, 0x00000100,
1374	0x55e4, 0xffffffff, 0x00600100,
1375	0x3c280, 0xffffffff, 0x00000100,
1376	0x3c214, 0xffffffff, 0x06000100,
1377	0x3c220, 0xffffffff, 0x00000100,
1378	0x3c218, 0xffffffff, 0x06000100,
1379	0x3c204, 0xffffffff, 0x00000100,
1380	0x3c2e0, 0xffffffff, 0x00000100,
1381	0x3c224, 0xffffffff, 0x00000100,
1382	0x3c200, 0xffffffff, 0x00000100,
1383	0x3c230, 0xffffffff, 0x00000100,
1384	0x3c234, 0xffffffff, 0x00000100,
1385	0x3c250, 0xffffffff, 0x00000100,
1386	0x3c254, 0xffffffff, 0x00000100,
1387	0x3c258, 0xffffffff, 0x00000100,
1388	0x3c25c, 0xffffffff, 0x00000100,
1389	0x3c260, 0xffffffff, 0x00000100,
1390	0x3c27c, 0xffffffff, 0x00000100,
1391	0x3c278, 0xffffffff, 0x00000100,
1392	0x3c210, 0xffffffff, 0x06000100,
1393	0x3c290, 0xffffffff, 0x00000100,
1394	0x3c274, 0xffffffff, 0x00000100,
1395	0x3c2b4, 0xffffffff, 0x00000100,
1396	0x3c2b0, 0xffffffff, 0x00000100,
1397	0x3c270, 0xffffffff, 0x00000100,
1398	0x30800, 0xffffffff, 0xe0000000,
1399	0x3c020, 0xffffffff, 0x00010000,
1400	0x3c024, 0xffffffff, 0x00030002,
1401	0x3c028, 0xffffffff, 0x00040007,
1402	0x3c02c, 0xffffffff, 0x00060005,
1403	0x3c030, 0xffffffff, 0x00090008,
1404	0x3c034, 0xffffffff, 0x00010000,
1405	0x3c038, 0xffffffff, 0x00030002,
1406	0x3c03c, 0xffffffff, 0x00040007,
1407	0x3c040, 0xffffffff, 0x00060005,
1408	0x3c044, 0xffffffff, 0x00090008,
1409	0x3c000, 0xffffffff, 0x96e00200,
1410	0x8708, 0xffffffff, 0x00900100,
1411	0xc424, 0xffffffff, 0x0020003f,
1412	0x38, 0xffffffff, 0x0140001c,
1413	0x3c, 0x000f0000, 0x000f0000,
1414	0x220, 0xffffffff, 0xC060000C,
1415	0x224, 0xc0000fff, 0x00000100,
1416	0x20a8, 0xffffffff, 0x00000104,
1417	0x55e4, 0xff000fff, 0x00000100,
1418	0x30cc, 0xc0000fff, 0x00000104,
1419	0xc1e4, 0x00000001, 0x00000001,
1420	0xd00c, 0xff000ff0, 0x00000100,
1421	0xd80c, 0xff000ff0, 0x00000100
1422};
1423
1424static const u32 hawaii_golden_spm_registers[] =
1425{
1426	0x30800, 0xe0ffffff, 0xe0000000
1427};
1428
1429static const u32 hawaii_golden_common_registers[] =
1430{
1431	0x30800, 0xffffffff, 0xe0000000,
1432	0x28350, 0xffffffff, 0x3a00161a,
1433	0x28354, 0xffffffff, 0x0000002e,
1434	0x9a10, 0xffffffff, 0x00018208,
1435	0x98f8, 0xffffffff, 0x12011003
1436};
1437
1438static const u32 hawaii_golden_registers[] =
1439{
1440	0x3354, 0x00000333, 0x00000333,
1441	0x9a10, 0x00010000, 0x00058208,
1442	0x9830, 0xffffffff, 0x00000000,
1443	0x9834, 0xf00fffff, 0x00000400,
1444	0x9838, 0x0002021c, 0x00020200,
1445	0xc78, 0x00000080, 0x00000000,
1446	0x5bb0, 0x000000f0, 0x00000070,
1447	0x5bc0, 0xf0311fff, 0x80300000,
1448	0x350c, 0x00810000, 0x408af000,
1449	0x7030, 0x31000111, 0x00000011,
1450	0x2f48, 0x73773777, 0x12010001,
1451	0x2120, 0x0000007f, 0x0000001b,
1452	0x21dc, 0x00007fb6, 0x00002191,
1453	0x3628, 0x0000003f, 0x0000000a,
1454	0x362c, 0x0000003f, 0x0000000a,
1455	0x2ae4, 0x00073ffe, 0x000022a2,
1456	0x240c, 0x000007ff, 0x00000000,
1457	0x8bf0, 0x00002001, 0x00000001,
1458	0x8b24, 0xffffffff, 0x00ffffff,
1459	0x30a04, 0x0000ff0f, 0x00000000,
1460	0x28a4c, 0x07ffffff, 0x06000000,
1461	0x3e78, 0x00000001, 0x00000002,
1462	0xc768, 0x00000008, 0x00000008,
1463	0xc770, 0x00000f00, 0x00000800,
1464	0xc774, 0x00000f00, 0x00000800,
1465	0xc798, 0x00ffffff, 0x00ff7fbf,
1466	0xc79c, 0x00ffffff, 0x00ff7faf,
1467	0x8c00, 0x000000ff, 0x00000800,
1468	0xe40, 0x00001fff, 0x00001fff,
1469	0x9060, 0x0000007f, 0x00000020,
1470	0x9508, 0x00010000, 0x00010000,
1471	0xae00, 0x00100000, 0x000ff07c,
1472	0xac14, 0x000003ff, 0x0000000f,
1473	0xac10, 0xffffffff, 0x7564fdec,
1474	0xac0c, 0xffffffff, 0x3120b9a8,
1475	0xac08, 0x20000000, 0x0f9c0000
1476};
1477
1478static const u32 hawaii_mgcg_cgcg_init[] =
1479{
1480	0xc420, 0xffffffff, 0xfffffffd,
1481	0x30800, 0xffffffff, 0xe0000000,
1482	0x3c2a0, 0xffffffff, 0x00000100,
1483	0x3c208, 0xffffffff, 0x00000100,
1484	0x3c2c0, 0xffffffff, 0x00000100,
1485	0x3c2c8, 0xffffffff, 0x00000100,
1486	0x3c2c4, 0xffffffff, 0x00000100,
1487	0x55e4, 0xffffffff, 0x00200100,
1488	0x3c280, 0xffffffff, 0x00000100,
1489	0x3c214, 0xffffffff, 0x06000100,
1490	0x3c220, 0xffffffff, 0x00000100,
1491	0x3c218, 0xffffffff, 0x06000100,
1492	0x3c204, 0xffffffff, 0x00000100,
1493	0x3c2e0, 0xffffffff, 0x00000100,
1494	0x3c224, 0xffffffff, 0x00000100,
1495	0x3c200, 0xffffffff, 0x00000100,
1496	0x3c230, 0xffffffff, 0x00000100,
1497	0x3c234, 0xffffffff, 0x00000100,
1498	0x3c250, 0xffffffff, 0x00000100,
1499	0x3c254, 0xffffffff, 0x00000100,
1500	0x3c258, 0xffffffff, 0x00000100,
1501	0x3c25c, 0xffffffff, 0x00000100,
1502	0x3c260, 0xffffffff, 0x00000100,
1503	0x3c27c, 0xffffffff, 0x00000100,
1504	0x3c278, 0xffffffff, 0x00000100,
1505	0x3c210, 0xffffffff, 0x06000100,
1506	0x3c290, 0xffffffff, 0x00000100,
1507	0x3c274, 0xffffffff, 0x00000100,
1508	0x3c2b4, 0xffffffff, 0x00000100,
1509	0x3c2b0, 0xffffffff, 0x00000100,
1510	0x3c270, 0xffffffff, 0x00000100,
1511	0x30800, 0xffffffff, 0xe0000000,
1512	0x3c020, 0xffffffff, 0x00010000,
1513	0x3c024, 0xffffffff, 0x00030002,
1514	0x3c028, 0xffffffff, 0x00040007,
1515	0x3c02c, 0xffffffff, 0x00060005,
1516	0x3c030, 0xffffffff, 0x00090008,
1517	0x3c034, 0xffffffff, 0x00010000,
1518	0x3c038, 0xffffffff, 0x00030002,
1519	0x3c03c, 0xffffffff, 0x00040007,
1520	0x3c040, 0xffffffff, 0x00060005,
1521	0x3c044, 0xffffffff, 0x00090008,
1522	0x3c048, 0xffffffff, 0x00010000,
1523	0x3c04c, 0xffffffff, 0x00030002,
1524	0x3c050, 0xffffffff, 0x00040007,
1525	0x3c054, 0xffffffff, 0x00060005,
1526	0x3c058, 0xffffffff, 0x00090008,
1527	0x3c05c, 0xffffffff, 0x00010000,
1528	0x3c060, 0xffffffff, 0x00030002,
1529	0x3c064, 0xffffffff, 0x00040007,
1530	0x3c068, 0xffffffff, 0x00060005,
1531	0x3c06c, 0xffffffff, 0x00090008,
1532	0x3c070, 0xffffffff, 0x00010000,
1533	0x3c074, 0xffffffff, 0x00030002,
1534	0x3c078, 0xffffffff, 0x00040007,
1535	0x3c07c, 0xffffffff, 0x00060005,
1536	0x3c080, 0xffffffff, 0x00090008,
1537	0x3c084, 0xffffffff, 0x00010000,
1538	0x3c088, 0xffffffff, 0x00030002,
1539	0x3c08c, 0xffffffff, 0x00040007,
1540	0x3c090, 0xffffffff, 0x00060005,
1541	0x3c094, 0xffffffff, 0x00090008,
1542	0x3c098, 0xffffffff, 0x00010000,
1543	0x3c09c, 0xffffffff, 0x00030002,
1544	0x3c0a0, 0xffffffff, 0x00040007,
1545	0x3c0a4, 0xffffffff, 0x00060005,
1546	0x3c0a8, 0xffffffff, 0x00090008,
1547	0x3c0ac, 0xffffffff, 0x00010000,
1548	0x3c0b0, 0xffffffff, 0x00030002,
1549	0x3c0b4, 0xffffffff, 0x00040007,
1550	0x3c0b8, 0xffffffff, 0x00060005,
1551	0x3c0bc, 0xffffffff, 0x00090008,
1552	0x3c0c0, 0xffffffff, 0x00010000,
1553	0x3c0c4, 0xffffffff, 0x00030002,
1554	0x3c0c8, 0xffffffff, 0x00040007,
1555	0x3c0cc, 0xffffffff, 0x00060005,
1556	0x3c0d0, 0xffffffff, 0x00090008,
1557	0x3c0d4, 0xffffffff, 0x00010000,
1558	0x3c0d8, 0xffffffff, 0x00030002,
1559	0x3c0dc, 0xffffffff, 0x00040007,
1560	0x3c0e0, 0xffffffff, 0x00060005,
1561	0x3c0e4, 0xffffffff, 0x00090008,
1562	0x3c0e8, 0xffffffff, 0x00010000,
1563	0x3c0ec, 0xffffffff, 0x00030002,
1564	0x3c0f0, 0xffffffff, 0x00040007,
1565	0x3c0f4, 0xffffffff, 0x00060005,
1566	0x3c0f8, 0xffffffff, 0x00090008,
1567	0xc318, 0xffffffff, 0x00020200,
1568	0x3350, 0xffffffff, 0x00000200,
1569	0x15c0, 0xffffffff, 0x00000400,
1570	0x55e8, 0xffffffff, 0x00000000,
1571	0x2f50, 0xffffffff, 0x00000902,
1572	0x3c000, 0xffffffff, 0x96940200,
1573	0x8708, 0xffffffff, 0x00900100,
1574	0xc424, 0xffffffff, 0x0020003f,
1575	0x38, 0xffffffff, 0x0140001c,
1576	0x3c, 0x000f0000, 0x000f0000,
1577	0x220, 0xffffffff, 0xc060000c,
1578	0x224, 0xc0000fff, 0x00000100,
1579	0xf90, 0xffffffff, 0x00000100,
1580	0xf98, 0x00000101, 0x00000000,
1581	0x20a8, 0xffffffff, 0x00000104,
1582	0x55e4, 0xff000fff, 0x00000100,
1583	0x30cc, 0xc0000fff, 0x00000104,
1584	0xc1e4, 0x00000001, 0x00000001,
1585	0xd00c, 0xff000ff0, 0x00000100,
1586	0xd80c, 0xff000ff0, 0x00000100
1587};
1588
1589static const u32 godavari_golden_registers[] =
1590{
1591	0x55e4, 0xff607fff, 0xfc000100,
1592	0x6ed8, 0x00010101, 0x00010000,
1593	0x9830, 0xffffffff, 0x00000000,
1594	0x98302, 0xf00fffff, 0x00000400,
1595	0x6130, 0xffffffff, 0x00010000,
1596	0x5bb0, 0x000000f0, 0x00000070,
1597	0x5bc0, 0xf0311fff, 0x80300000,
1598	0x98f8, 0x73773777, 0x12010001,
1599	0x98fc, 0xffffffff, 0x00000010,
1600	0x8030, 0x00001f0f, 0x0000100a,
1601	0x2f48, 0x73773777, 0x12010001,
1602	0x2408, 0x000fffff, 0x000c007f,
1603	0x8a14, 0xf000003f, 0x00000007,
1604	0x8b24, 0xffffffff, 0x00ff0fff,
1605	0x30a04, 0x0000ff0f, 0x00000000,
1606	0x28a4c, 0x07ffffff, 0x06000000,
1607	0x4d8, 0x00000fff, 0x00000100,
1608	0xd014, 0x00010000, 0x00810001,
1609	0xd814, 0x00010000, 0x00810001,
1610	0x3e78, 0x00000001, 0x00000002,
1611	0xc768, 0x00000008, 0x00000008,
1612	0xc770, 0x00000f00, 0x00000800,
1613	0xc774, 0x00000f00, 0x00000800,
1614	0xc798, 0x00ffffff, 0x00ff7fbf,
1615	0xc79c, 0x00ffffff, 0x00ff7faf,
1616	0x8c00, 0x000000ff, 0x00000001,
1617	0x214f8, 0x01ff01ff, 0x00000002,
1618	0x21498, 0x007ff800, 0x00200000,
1619	0x2015c, 0xffffffff, 0x00000f40,
1620	0x88c4, 0x001f3ae3, 0x00000082,
1621	0x88d4, 0x0000001f, 0x00000010,
1622	0x30934, 0xffffffff, 0x00000000
1623};
1624
1625
1626static void cik_init_golden_registers(struct radeon_device *rdev)
1627{
1628	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1629	mutex_lock(&rdev->grbm_idx_mutex);
1630	switch (rdev->family) {
1631	case CHIP_BONAIRE:
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_mgcg_cgcg_init,
1634						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1635		radeon_program_register_sequence(rdev,
1636						 bonaire_golden_registers,
1637						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1638		radeon_program_register_sequence(rdev,
1639						 bonaire_golden_common_registers,
1640						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1641		radeon_program_register_sequence(rdev,
1642						 bonaire_golden_spm_registers,
1643						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1644		break;
1645	case CHIP_KABINI:
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_mgcg_cgcg_init,
1648						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1649		radeon_program_register_sequence(rdev,
1650						 kalindi_golden_registers,
1651						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1652		radeon_program_register_sequence(rdev,
1653						 kalindi_golden_common_registers,
1654						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1655		radeon_program_register_sequence(rdev,
1656						 kalindi_golden_spm_registers,
1657						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1658		break;
1659	case CHIP_MULLINS:
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_mgcg_cgcg_init,
1662						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1663		radeon_program_register_sequence(rdev,
1664						 godavari_golden_registers,
1665						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1666		radeon_program_register_sequence(rdev,
1667						 kalindi_golden_common_registers,
1668						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1669		radeon_program_register_sequence(rdev,
1670						 kalindi_golden_spm_registers,
1671						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1672		break;
1673	case CHIP_KAVERI:
1674		radeon_program_register_sequence(rdev,
1675						 spectre_mgcg_cgcg_init,
1676						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1677		radeon_program_register_sequence(rdev,
1678						 spectre_golden_registers,
1679						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1680		radeon_program_register_sequence(rdev,
1681						 spectre_golden_common_registers,
1682						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1683		radeon_program_register_sequence(rdev,
1684						 spectre_golden_spm_registers,
1685						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1686		break;
1687	case CHIP_HAWAII:
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_mgcg_cgcg_init,
1690						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1691		radeon_program_register_sequence(rdev,
1692						 hawaii_golden_registers,
1693						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1694		radeon_program_register_sequence(rdev,
1695						 hawaii_golden_common_registers,
1696						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1697		radeon_program_register_sequence(rdev,
1698						 hawaii_golden_spm_registers,
1699						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1700		break;
1701	default:
1702		break;
1703	}
1704	mutex_unlock(&rdev->grbm_idx_mutex);
1705}
1706
1707/**
1708 * cik_get_xclk - get the xclk
1709 *
1710 * @rdev: radeon_device pointer
1711 *
1712 * Returns the reference clock used by the gfx engine
1713 * (CIK).
1714 */
1715u32 cik_get_xclk(struct radeon_device *rdev)
1716{
1717	u32 reference_clock = rdev->clock.spll.reference_freq;
1718
1719	if (rdev->flags & RADEON_IS_IGP) {
1720		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1721			return reference_clock / 2;
1722	} else {
1723		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1724			return reference_clock / 4;
1725	}
1726	return reference_clock;
1727}
1728
1729/**
1730 * cik_mm_rdoorbell - read a doorbell dword
1731 *
1732 * @rdev: radeon_device pointer
1733 * @index: doorbell index
1734 *
1735 * Returns the value in the doorbell aperture at the
1736 * requested doorbell index (CIK).
1737 */
1738u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1739{
1740	if (index < rdev->doorbell.num_doorbells) {
1741		return readl(rdev->doorbell.ptr + index);
1742	} else {
1743		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1744		return 0;
1745	}
1746}
1747
1748/**
1749 * cik_mm_wdoorbell - write a doorbell dword
1750 *
1751 * @rdev: radeon_device pointer
1752 * @index: doorbell index
1753 * @v: value to write
1754 *
1755 * Writes @v to the doorbell aperture at the
1756 * requested doorbell index (CIK).
1757 */
1758void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1759{
1760	if (index < rdev->doorbell.num_doorbells) {
1761		writel(v, rdev->doorbell.ptr + index);
1762	} else {
1763		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1764	}
1765}
1766
1767#define BONAIRE_IO_MC_REGS_SIZE 36
1768
1769static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1770{
1771	{0x00000070, 0x04400000},
1772	{0x00000071, 0x80c01803},
1773	{0x00000072, 0x00004004},
1774	{0x00000073, 0x00000100},
1775	{0x00000074, 0x00ff0000},
1776	{0x00000075, 0x34000000},
1777	{0x00000076, 0x08000014},
1778	{0x00000077, 0x00cc08ec},
1779	{0x00000078, 0x00000400},
1780	{0x00000079, 0x00000000},
1781	{0x0000007a, 0x04090000},
1782	{0x0000007c, 0x00000000},
1783	{0x0000007e, 0x4408a8e8},
1784	{0x0000007f, 0x00000304},
1785	{0x00000080, 0x00000000},
1786	{0x00000082, 0x00000001},
1787	{0x00000083, 0x00000002},
1788	{0x00000084, 0xf3e4f400},
1789	{0x00000085, 0x052024e3},
1790	{0x00000087, 0x00000000},
1791	{0x00000088, 0x01000000},
1792	{0x0000008a, 0x1c0a0000},
1793	{0x0000008b, 0xff010000},
1794	{0x0000008d, 0xffffefff},
1795	{0x0000008e, 0xfff3efff},
1796	{0x0000008f, 0xfff3efbf},
1797	{0x00000092, 0xf7ffffff},
1798	{0x00000093, 0xffffff7f},
1799	{0x00000095, 0x00101101},
1800	{0x00000096, 0x00000fff},
1801	{0x00000097, 0x00116fff},
1802	{0x00000098, 0x60010000},
1803	{0x00000099, 0x10010000},
1804	{0x0000009a, 0x00006000},
1805	{0x0000009b, 0x00001000},
1806	{0x0000009f, 0x00b48000}
1807};
1808
1809#define HAWAII_IO_MC_REGS_SIZE 22
1810
1811static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1812{
1813	{0x0000007d, 0x40000000},
1814	{0x0000007e, 0x40180304},
1815	{0x0000007f, 0x0000ff00},
1816	{0x00000081, 0x00000000},
1817	{0x00000083, 0x00000800},
1818	{0x00000086, 0x00000000},
1819	{0x00000087, 0x00000100},
1820	{0x00000088, 0x00020100},
1821	{0x00000089, 0x00000000},
1822	{0x0000008b, 0x00040000},
1823	{0x0000008c, 0x00000100},
1824	{0x0000008e, 0xff010000},
1825	{0x00000090, 0xffffefff},
1826	{0x00000091, 0xfff3efff},
1827	{0x00000092, 0xfff3efbf},
1828	{0x00000093, 0xf7ffffff},
1829	{0x00000094, 0xffffff7f},
1830	{0x00000095, 0x00000fff},
1831	{0x00000096, 0x00116fff},
1832	{0x00000097, 0x60010000},
1833	{0x00000098, 0x10010000},
1834	{0x0000009f, 0x00c79000}
1835};
1836
1837
1838/**
1839 * cik_srbm_select - select specific register instances
1840 *
1841 * @rdev: radeon_device pointer
1842 * @me: selected ME (micro engine)
1843 * @pipe: pipe
1844 * @queue: queue
1845 * @vmid: VMID
1846 *
1847 * Switches the currently active registers instances.  Some
1848 * registers are instanced per VMID, others are instanced per
1849 * me/pipe/queue combination.
1850 */
1851static void cik_srbm_select(struct radeon_device *rdev,
1852			    u32 me, u32 pipe, u32 queue, u32 vmid)
1853{
1854	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1855			     MEID(me & 0x3) |
1856			     VMID(vmid & 0xf) |
1857			     QUEUEID(queue & 0x7));
1858	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1859}
1860
1861/* ucode loading */
1862/**
1863 * ci_mc_load_microcode - load MC ucode into the hw
1864 *
1865 * @rdev: radeon_device pointer
1866 *
1867 * Load the GDDR MC ucode into the hw (CIK).
1868 * Returns 0 on success, error on failure.
1869 */
1870int ci_mc_load_microcode(struct radeon_device *rdev)
1871{
1872	const __be32 *fw_data = NULL;
1873	const __le32 *new_fw_data = NULL;
1874	u32 running, tmp;
1875	u32 *io_mc_regs = NULL;
1876	const __le32 *new_io_mc_regs = NULL;
1877	int i, regs_size, ucode_size;
1878
1879	if (!rdev->mc_fw)
1880		return -EINVAL;
1881
1882	if (rdev->new_fw) {
1883		const struct mc_firmware_header_v1_0 *hdr =
1884			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1885
1886		radeon_ucode_print_mc_hdr(&hdr->header);
1887
1888		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1889		new_io_mc_regs = (const __le32 *)
1890			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1891		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1892		new_fw_data = (const __le32 *)
1893			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1894	} else {
1895		ucode_size = rdev->mc_fw->size / 4;
1896
1897		switch (rdev->family) {
1898		case CHIP_BONAIRE:
1899			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1900			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1901			break;
1902		case CHIP_HAWAII:
1903			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1904			regs_size = HAWAII_IO_MC_REGS_SIZE;
1905			break;
1906		default:
1907			return -EINVAL;
1908		}
1909		fw_data = (const __be32 *)rdev->mc_fw->data;
1910	}
1911
1912	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1913
1914	if (running == 0) {
1915		/* reset the engine and set to writable */
1916		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1917		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1918
1919		/* load mc io regs */
1920		for (i = 0; i < regs_size; i++) {
1921			if (rdev->new_fw) {
1922				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1923				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1924			} else {
1925				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1926				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1927			}
1928		}
1929
1930		tmp = RREG32(MC_SEQ_MISC0);
1931		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1932			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1933			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1934			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1935			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1936		}
1937
1938		/* load the MC ucode */
1939		for (i = 0; i < ucode_size; i++) {
1940			if (rdev->new_fw)
1941				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1942			else
1943				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1944		}
1945
1946		/* put the engine back into the active state */
1947		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1948		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1949		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1950
1951		/* wait for training to complete */
1952		for (i = 0; i < rdev->usec_timeout; i++) {
1953			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1954				break;
1955			udelay(1);
1956		}
1957		for (i = 0; i < rdev->usec_timeout; i++) {
1958			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1959				break;
1960			udelay(1);
1961		}
1962	}
1963
1964	return 0;
1965}
1966
1967/**
1968 * cik_init_microcode - load ucode images from disk
1969 *
1970 * @rdev: radeon_device pointer
1971 *
1972 * Use the firmware interface to load the ucode images into
1973 * the driver (not loaded into hw).
1974 * Returns 0 on success, error on failure.
1975 */
1976static int cik_init_microcode(struct radeon_device *rdev)
1977{
1978	const char *chip_name;
1979	const char *new_chip_name;
1980	size_t pfp_req_size, me_req_size, ce_req_size,
1981		mec_req_size, rlc_req_size, mc_req_size = 0,
1982		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1983	char fw_name[30];
1984	int new_fw = 0;
1985	int err;
1986	int num_fw;
1987	bool new_smc = false;
1988
1989	DRM_DEBUG("\n");
1990
1991	switch (rdev->family) {
1992	case CHIP_BONAIRE:
1993		chip_name = "BONAIRE";
1994		if ((rdev->pdev->revision == 0x80) ||
1995		    (rdev->pdev->revision == 0x81) ||
1996		    (rdev->pdev->device == 0x665f))
1997			new_smc = true;
1998		new_chip_name = "bonaire";
1999		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2000		me_req_size = CIK_ME_UCODE_SIZE * 4;
2001		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2002		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2003		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2004		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2005		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2006		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2007		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2008		num_fw = 8;
2009		break;
2010	case CHIP_HAWAII:
2011		chip_name = "HAWAII";
2012		if (rdev->pdev->revision == 0x80)
2013			new_smc = true;
2014		new_chip_name = "hawaii";
2015		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016		me_req_size = CIK_ME_UCODE_SIZE * 4;
2017		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2021		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2022		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2024		num_fw = 8;
2025		break;
2026	case CHIP_KAVERI:
2027		chip_name = "KAVERI";
2028		new_chip_name = "kaveri";
2029		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2030		me_req_size = CIK_ME_UCODE_SIZE * 4;
2031		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2032		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2033		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2034		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2035		num_fw = 7;
2036		break;
2037	case CHIP_KABINI:
2038		chip_name = "KABINI";
2039		new_chip_name = "kabini";
2040		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2041		me_req_size = CIK_ME_UCODE_SIZE * 4;
2042		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2043		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2044		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2045		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2046		num_fw = 6;
2047		break;
2048	case CHIP_MULLINS:
2049		chip_name = "MULLINS";
2050		new_chip_name = "mullins";
2051		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2052		me_req_size = CIK_ME_UCODE_SIZE * 4;
2053		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2054		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2055		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2056		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2057		num_fw = 6;
2058		break;
2059	default: BUG();
2060	}
2061
2062	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2063
2064	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2065	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2066	if (err) {
2067		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2068		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069		if (err)
2070			goto out;
2071		if (rdev->pfp_fw->size != pfp_req_size) {
2072			printk(KERN_ERR
2073			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2074			       rdev->pfp_fw->size, fw_name);
2075			err = -EINVAL;
2076			goto out;
2077		}
2078	} else {
2079		err = radeon_ucode_validate(rdev->pfp_fw);
2080		if (err) {
2081			printk(KERN_ERR
2082			       "cik_fw: validation failed for firmware \"%s\"\n",
2083			       fw_name);
2084			goto out;
2085		} else {
2086			new_fw++;
2087		}
2088	}
2089
2090	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2091	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2092	if (err) {
2093		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2094		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2095		if (err)
2096			goto out;
2097		if (rdev->me_fw->size != me_req_size) {
2098			printk(KERN_ERR
2099			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100			       rdev->me_fw->size, fw_name);
2101			err = -EINVAL;
2102		}
2103	} else {
2104		err = radeon_ucode_validate(rdev->me_fw);
2105		if (err) {
2106			printk(KERN_ERR
2107			       "cik_fw: validation failed for firmware \"%s\"\n",
2108			       fw_name);
2109			goto out;
2110		} else {
2111			new_fw++;
2112		}
2113	}
2114
2115	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2116	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2117	if (err) {
2118		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2119		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2120		if (err)
2121			goto out;
2122		if (rdev->ce_fw->size != ce_req_size) {
2123			printk(KERN_ERR
2124			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2125			       rdev->ce_fw->size, fw_name);
2126			err = -EINVAL;
2127		}
2128	} else {
2129		err = radeon_ucode_validate(rdev->ce_fw);
2130		if (err) {
2131			printk(KERN_ERR
2132			       "cik_fw: validation failed for firmware \"%s\"\n",
2133			       fw_name);
2134			goto out;
2135		} else {
2136			new_fw++;
2137		}
2138	}
2139
2140	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2141	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142	if (err) {
2143		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2144		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2145		if (err)
2146			goto out;
2147		if (rdev->mec_fw->size != mec_req_size) {
2148			printk(KERN_ERR
2149			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2150			       rdev->mec_fw->size, fw_name);
2151			err = -EINVAL;
2152		}
2153	} else {
2154		err = radeon_ucode_validate(rdev->mec_fw);
2155		if (err) {
2156			printk(KERN_ERR
2157			       "cik_fw: validation failed for firmware \"%s\"\n",
2158			       fw_name);
2159			goto out;
2160		} else {
2161			new_fw++;
2162		}
2163	}
2164
2165	if (rdev->family == CHIP_KAVERI) {
2166		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2167		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2168		if (err) {
2169			goto out;
2170		} else {
2171			err = radeon_ucode_validate(rdev->mec2_fw);
2172			if (err) {
2173				goto out;
2174			} else {
2175				new_fw++;
2176			}
2177		}
2178	}
2179
2180	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2181	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2182	if (err) {
2183		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2184		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2185		if (err)
2186			goto out;
2187		if (rdev->rlc_fw->size != rlc_req_size) {
2188			printk(KERN_ERR
2189			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2190			       rdev->rlc_fw->size, fw_name);
2191			err = -EINVAL;
2192		}
2193	} else {
2194		err = radeon_ucode_validate(rdev->rlc_fw);
2195		if (err) {
2196			printk(KERN_ERR
2197			       "cik_fw: validation failed for firmware \"%s\"\n",
2198			       fw_name);
2199			goto out;
2200		} else {
2201			new_fw++;
2202		}
2203	}
2204
2205	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2206	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2207	if (err) {
2208		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2209		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2210		if (err)
2211			goto out;
2212		if (rdev->sdma_fw->size != sdma_req_size) {
2213			printk(KERN_ERR
2214			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2215			       rdev->sdma_fw->size, fw_name);
2216			err = -EINVAL;
2217		}
2218	} else {
2219		err = radeon_ucode_validate(rdev->sdma_fw);
2220		if (err) {
2221			printk(KERN_ERR
2222			       "cik_fw: validation failed for firmware \"%s\"\n",
2223			       fw_name);
2224			goto out;
2225		} else {
2226			new_fw++;
2227		}
2228	}
2229
2230	/* No SMC, MC ucode on APUs */
2231	if (!(rdev->flags & RADEON_IS_IGP)) {
2232		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2233		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2234		if (err) {
2235			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2236			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2237			if (err) {
2238				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2239				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2240				if (err)
2241					goto out;
2242			}
2243			if ((rdev->mc_fw->size != mc_req_size) &&
2244			    (rdev->mc_fw->size != mc2_req_size)){
2245				printk(KERN_ERR
2246				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2247				       rdev->mc_fw->size, fw_name);
2248				err = -EINVAL;
2249			}
2250			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2251		} else {
2252			err = radeon_ucode_validate(rdev->mc_fw);
2253			if (err) {
2254				printk(KERN_ERR
2255				       "cik_fw: validation failed for firmware \"%s\"\n",
2256				       fw_name);
2257				goto out;
2258			} else {
2259				new_fw++;
2260			}
2261		}
2262
2263		if (new_smc)
2264			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2265		else
2266			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2267		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2268		if (err) {
2269			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2270			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2271			if (err) {
2272				printk(KERN_ERR
2273				       "smc: error loading firmware \"%s\"\n",
2274				       fw_name);
2275				release_firmware(rdev->smc_fw);
2276				rdev->smc_fw = NULL;
2277				err = 0;
2278			} else if (rdev->smc_fw->size != smc_req_size) {
2279				printk(KERN_ERR
2280				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281				       rdev->smc_fw->size, fw_name);
2282				err = -EINVAL;
2283			}
2284		} else {
2285			err = radeon_ucode_validate(rdev->smc_fw);
2286			if (err) {
2287				printk(KERN_ERR
2288				       "cik_fw: validation failed for firmware \"%s\"\n",
2289				       fw_name);
2290				goto out;
2291			} else {
2292				new_fw++;
2293			}
2294		}
2295	}
2296
2297	if (new_fw == 0) {
2298		rdev->new_fw = false;
2299	} else if (new_fw < num_fw) {
2300		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2301		err = -EINVAL;
2302	} else {
2303		rdev->new_fw = true;
2304	}
2305
2306out:
2307	if (err) {
2308		if (err != -EINVAL)
2309			printk(KERN_ERR
2310			       "cik_cp: Failed to load firmware \"%s\"\n",
2311			       fw_name);
2312		release_firmware(rdev->pfp_fw);
2313		rdev->pfp_fw = NULL;
2314		release_firmware(rdev->me_fw);
2315		rdev->me_fw = NULL;
2316		release_firmware(rdev->ce_fw);
2317		rdev->ce_fw = NULL;
2318		release_firmware(rdev->mec_fw);
2319		rdev->mec_fw = NULL;
2320		release_firmware(rdev->mec2_fw);
2321		rdev->mec2_fw = NULL;
2322		release_firmware(rdev->rlc_fw);
2323		rdev->rlc_fw = NULL;
2324		release_firmware(rdev->sdma_fw);
2325		rdev->sdma_fw = NULL;
2326		release_firmware(rdev->mc_fw);
2327		rdev->mc_fw = NULL;
2328		release_firmware(rdev->smc_fw);
2329		rdev->smc_fw = NULL;
2330	}
2331	return err;
2332}
2333
2334/*
2335 * Core functions
2336 */
2337/**
2338 * cik_tiling_mode_table_init - init the hw tiling table
2339 *
2340 * @rdev: radeon_device pointer
2341 *
2342 * Starting with SI, the tiling setup is done globally in a
2343 * set of 32 tiling modes.  Rather than selecting each set of
2344 * parameters per surface as on older asics, we just select
2345 * which index in the tiling table we want to use, and the
2346 * surface uses those parameters (CIK).
2347 */
2348static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349{
2350	u32 *tile = rdev->config.cik.tile_mode_array;
2351	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352	const u32 num_tile_mode_states =
2353			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354	const u32 num_secondary_tile_mode_states =
2355			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356	u32 reg_offset, split_equal_to_row_size;
2357	u32 num_pipe_configs;
2358	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359		rdev->config.cik.max_shader_engines;
2360
2361	switch (rdev->config.cik.mem_row_size_in_kb) {
2362	case 1:
2363		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364		break;
2365	case 2:
2366	default:
2367		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368		break;
2369	case 4:
2370		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371		break;
2372	}
2373
2374	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375	if (num_pipe_configs > 8)
2376		num_pipe_configs = 16;
2377
2378	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379		tile[reg_offset] = 0;
2380	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381		macrotile[reg_offset] = 0;
2382
2383	switch(num_pipe_configs) {
2384	case 16:
2385		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404			   TILE_SPLIT(split_equal_to_row_size));
2405		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415			   TILE_SPLIT(split_equal_to_row_size));
2416		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463
2464		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467			   NUM_BANKS(ADDR_SURF_16_BANK));
2468		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471			   NUM_BANKS(ADDR_SURF_16_BANK));
2472		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475			   NUM_BANKS(ADDR_SURF_16_BANK));
2476		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479			   NUM_BANKS(ADDR_SURF_16_BANK));
2480		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483			   NUM_BANKS(ADDR_SURF_8_BANK));
2484		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487			   NUM_BANKS(ADDR_SURF_4_BANK));
2488		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491			   NUM_BANKS(ADDR_SURF_2_BANK));
2492		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495			   NUM_BANKS(ADDR_SURF_16_BANK));
2496		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499			   NUM_BANKS(ADDR_SURF_16_BANK));
2500		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503			    NUM_BANKS(ADDR_SURF_16_BANK));
2504		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507			    NUM_BANKS(ADDR_SURF_8_BANK));
2508		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511			    NUM_BANKS(ADDR_SURF_4_BANK));
2512		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515			    NUM_BANKS(ADDR_SURF_2_BANK));
2516		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519			    NUM_BANKS(ADDR_SURF_2_BANK));
2520
2521		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525		break;
2526
2527	case 8:
2528		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547			   TILE_SPLIT(split_equal_to_row_size));
2548		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558			   TILE_SPLIT(split_equal_to_row_size));
2559		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606
2607		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614				NUM_BANKS(ADDR_SURF_16_BANK));
2615		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				NUM_BANKS(ADDR_SURF_16_BANK));
2619		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622				NUM_BANKS(ADDR_SURF_16_BANK));
2623		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				NUM_BANKS(ADDR_SURF_8_BANK));
2627		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				NUM_BANKS(ADDR_SURF_4_BANK));
2631		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634				NUM_BANKS(ADDR_SURF_2_BANK));
2635		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638				NUM_BANKS(ADDR_SURF_16_BANK));
2639		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642				NUM_BANKS(ADDR_SURF_16_BANK));
2643		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646				NUM_BANKS(ADDR_SURF_16_BANK));
2647		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650				NUM_BANKS(ADDR_SURF_16_BANK));
2651		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654				NUM_BANKS(ADDR_SURF_8_BANK));
2655		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658				NUM_BANKS(ADDR_SURF_4_BANK));
2659		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662				NUM_BANKS(ADDR_SURF_2_BANK));
2663
2664		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668		break;
2669
2670	case 4:
2671		if (num_rbs == 4) {
2672		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691			   TILE_SPLIT(split_equal_to_row_size));
2692		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702			   TILE_SPLIT(split_equal_to_row_size));
2703		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750
2751		} else if (num_rbs < 4) {
2752		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771			   TILE_SPLIT(split_equal_to_row_size));
2772		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782			   TILE_SPLIT(split_equal_to_row_size));
2783		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830		}
2831
2832		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843				NUM_BANKS(ADDR_SURF_16_BANK));
2844		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847				NUM_BANKS(ADDR_SURF_16_BANK));
2848		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855				NUM_BANKS(ADDR_SURF_8_BANK));
2856		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859				NUM_BANKS(ADDR_SURF_4_BANK));
2860		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863				NUM_BANKS(ADDR_SURF_16_BANK));
2864		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867				NUM_BANKS(ADDR_SURF_16_BANK));
2868		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871				NUM_BANKS(ADDR_SURF_16_BANK));
2872		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875				NUM_BANKS(ADDR_SURF_16_BANK));
2876		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879				NUM_BANKS(ADDR_SURF_16_BANK));
2880		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883				NUM_BANKS(ADDR_SURF_8_BANK));
2884		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887				NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893		break;
2894
2895	case 2:
2896		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898			   PIPE_CONFIG(ADDR_SURF_P2) |
2899			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910			   PIPE_CONFIG(ADDR_SURF_P2) |
2911			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914			   PIPE_CONFIG(ADDR_SURF_P2) |
2915			   TILE_SPLIT(split_equal_to_row_size));
2916		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917			   PIPE_CONFIG(ADDR_SURF_P2) |
2918			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921			   PIPE_CONFIG(ADDR_SURF_P2) |
2922			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925			   PIPE_CONFIG(ADDR_SURF_P2) |
2926			   TILE_SPLIT(split_equal_to_row_size));
2927		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928			   PIPE_CONFIG(ADDR_SURF_P2);
2929		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931			   PIPE_CONFIG(ADDR_SURF_P2));
2932		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942			    PIPE_CONFIG(ADDR_SURF_P2) |
2943			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945			    PIPE_CONFIG(ADDR_SURF_P2) |
2946			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957			    PIPE_CONFIG(ADDR_SURF_P2) |
2958			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961			    PIPE_CONFIG(ADDR_SURF_P2));
2962		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964			    PIPE_CONFIG(ADDR_SURF_P2) |
2965			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968			    PIPE_CONFIG(ADDR_SURF_P2) |
2969			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972			    PIPE_CONFIG(ADDR_SURF_P2) |
2973			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974
2975		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978				NUM_BANKS(ADDR_SURF_16_BANK));
2979		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994				NUM_BANKS(ADDR_SURF_16_BANK));
2995		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998				NUM_BANKS(ADDR_SURF_16_BANK));
2999		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002				NUM_BANKS(ADDR_SURF_8_BANK));
3003		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006				NUM_BANKS(ADDR_SURF_16_BANK));
3007		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010				NUM_BANKS(ADDR_SURF_16_BANK));
3011		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014				NUM_BANKS(ADDR_SURF_16_BANK));
3015		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018				NUM_BANKS(ADDR_SURF_16_BANK));
3019		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022				NUM_BANKS(ADDR_SURF_16_BANK));
3023		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026				NUM_BANKS(ADDR_SURF_16_BANK));
3027		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030				NUM_BANKS(ADDR_SURF_8_BANK));
3031
3032		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036		break;
3037
3038	default:
3039		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040	}
3041}
3042
3043/**
3044 * cik_select_se_sh - select which SE, SH to address
3045 *
3046 * @rdev: radeon_device pointer
3047 * @se_num: shader engine to address
3048 * @sh_num: sh block to address
3049 *
3050 * Select which SE, SH combinations to address. Certain
3051 * registers are instanced per SE or SH.  0xffffffff means
3052 * broadcast to all SEs or SHs (CIK).
3053 */
3054static void cik_select_se_sh(struct radeon_device *rdev,
3055			     u32 se_num, u32 sh_num)
3056{
3057	u32 data = INSTANCE_BROADCAST_WRITES;
3058
3059	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061	else if (se_num == 0xffffffff)
3062		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063	else if (sh_num == 0xffffffff)
3064		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065	else
3066		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067	WREG32(GRBM_GFX_INDEX, data);
3068}
3069
3070/**
3071 * cik_create_bitmask - create a bitmask
3072 *
3073 * @bit_width: length of the mask
3074 *
3075 * create a variable length bit mask (CIK).
3076 * Returns the bitmask.
3077 */
3078static u32 cik_create_bitmask(u32 bit_width)
3079{
3080	u32 i, mask = 0;
3081
3082	for (i = 0; i < bit_width; i++) {
3083		mask <<= 1;
3084		mask |= 1;
3085	}
3086	return mask;
3087}
3088
3089/**
3090 * cik_get_rb_disabled - computes the mask of disabled RBs
3091 *
3092 * @rdev: radeon_device pointer
3093 * @max_rb_num: max RBs (render backends) for the asic
3094 * @se_num: number of SEs (shader engines) for the asic
3095 * @sh_per_se: number of SH blocks per SE for the asic
3096 *
3097 * Calculates the bitmask of disabled RBs (CIK).
3098 * Returns the disabled RB bitmask.
3099 */
3100static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101			      u32 max_rb_num_per_se,
3102			      u32 sh_per_se)
3103{
3104	u32 data, mask;
3105
3106	data = RREG32(CC_RB_BACKEND_DISABLE);
3107	if (data & 1)
3108		data &= BACKEND_DISABLE_MASK;
3109	else
3110		data = 0;
3111	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112
3113	data >>= BACKEND_DISABLE_SHIFT;
3114
3115	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116
3117	return data & mask;
3118}
3119
3120/**
3121 * cik_setup_rb - setup the RBs on the asic
3122 *
3123 * @rdev: radeon_device pointer
3124 * @se_num: number of SEs (shader engines) for the asic
3125 * @sh_per_se: number of SH blocks per SE for the asic
3126 * @max_rb_num: max RBs (render backends) for the asic
3127 *
3128 * Configures per-SE/SH RB registers (CIK).
3129 */
3130static void cik_setup_rb(struct radeon_device *rdev,
3131			 u32 se_num, u32 sh_per_se,
3132			 u32 max_rb_num_per_se)
3133{
3134	int i, j;
3135	u32 data, mask;
3136	u32 disabled_rbs = 0;
3137	u32 enabled_rbs = 0;
3138
3139	mutex_lock(&rdev->grbm_idx_mutex);
3140	for (i = 0; i < se_num; i++) {
3141		for (j = 0; j < sh_per_se; j++) {
3142			cik_select_se_sh(rdev, i, j);
3143			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144			if (rdev->family == CHIP_HAWAII)
3145				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146			else
3147				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148		}
3149	}
3150	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151	mutex_unlock(&rdev->grbm_idx_mutex);
3152
3153	mask = 1;
3154	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155		if (!(disabled_rbs & mask))
3156			enabled_rbs |= mask;
3157		mask <<= 1;
3158	}
3159
3160	rdev->config.cik.backend_enable_mask = enabled_rbs;
3161
3162	mutex_lock(&rdev->grbm_idx_mutex);
3163	for (i = 0; i < se_num; i++) {
3164		cik_select_se_sh(rdev, i, 0xffffffff);
3165		data = 0;
3166		for (j = 0; j < sh_per_se; j++) {
3167			switch (enabled_rbs & 3) {
3168			case 0:
3169				if (j == 0)
3170					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171				else
3172					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173				break;
3174			case 1:
3175				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176				break;
3177			case 2:
3178				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179				break;
3180			case 3:
3181			default:
3182				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183				break;
3184			}
3185			enabled_rbs >>= 2;
3186		}
3187		WREG32(PA_SC_RASTER_CONFIG, data);
3188	}
3189	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190	mutex_unlock(&rdev->grbm_idx_mutex);
3191}
3192
3193/**
3194 * cik_gpu_init - setup the 3D engine
3195 *
3196 * @rdev: radeon_device pointer
3197 *
3198 * Configures the 3D engine and tiling configuration
3199 * registers so that the 3D engine is usable.
3200 */
3201static void cik_gpu_init(struct radeon_device *rdev)
3202{
3203	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204	u32 mc_shared_chmap, mc_arb_ramcfg;
3205	u32 hdp_host_path_cntl;
3206	u32 tmp;
3207	int i, j;
3208
3209	switch (rdev->family) {
3210	case CHIP_BONAIRE:
3211		rdev->config.cik.max_shader_engines = 2;
3212		rdev->config.cik.max_tile_pipes = 4;
3213		rdev->config.cik.max_cu_per_sh = 7;
3214		rdev->config.cik.max_sh_per_se = 1;
3215		rdev->config.cik.max_backends_per_se = 2;
3216		rdev->config.cik.max_texture_channel_caches = 4;
3217		rdev->config.cik.max_gprs = 256;
3218		rdev->config.cik.max_gs_threads = 32;
3219		rdev->config.cik.max_hw_contexts = 8;
3220
3221		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226		break;
3227	case CHIP_HAWAII:
3228		rdev->config.cik.max_shader_engines = 4;
3229		rdev->config.cik.max_tile_pipes = 16;
3230		rdev->config.cik.max_cu_per_sh = 11;
3231		rdev->config.cik.max_sh_per_se = 1;
3232		rdev->config.cik.max_backends_per_se = 4;
3233		rdev->config.cik.max_texture_channel_caches = 16;
3234		rdev->config.cik.max_gprs = 256;
3235		rdev->config.cik.max_gs_threads = 32;
3236		rdev->config.cik.max_hw_contexts = 8;
3237
3238		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243		break;
3244	case CHIP_KAVERI:
3245		rdev->config.cik.max_shader_engines = 1;
3246		rdev->config.cik.max_tile_pipes = 4;
3247		if ((rdev->pdev->device == 0x1304) ||
3248		    (rdev->pdev->device == 0x1305) ||
3249		    (rdev->pdev->device == 0x130C) ||
3250		    (rdev->pdev->device == 0x130F) ||
3251		    (rdev->pdev->device == 0x1310) ||
3252		    (rdev->pdev->device == 0x1311) ||
3253		    (rdev->pdev->device == 0x131C)) {
3254			rdev->config.cik.max_cu_per_sh = 8;
3255			rdev->config.cik.max_backends_per_se = 2;
3256		} else if ((rdev->pdev->device == 0x1309) ||
3257			   (rdev->pdev->device == 0x130A) ||
3258			   (rdev->pdev->device == 0x130D) ||
3259			   (rdev->pdev->device == 0x1313) ||
3260			   (rdev->pdev->device == 0x131D)) {
3261			rdev->config.cik.max_cu_per_sh = 6;
3262			rdev->config.cik.max_backends_per_se = 2;
3263		} else if ((rdev->pdev->device == 0x1306) ||
3264			   (rdev->pdev->device == 0x1307) ||
3265			   (rdev->pdev->device == 0x130B) ||
3266			   (rdev->pdev->device == 0x130E) ||
3267			   (rdev->pdev->device == 0x1315) ||
3268			   (rdev->pdev->device == 0x1318) ||
3269			   (rdev->pdev->device == 0x131B)) {
3270			rdev->config.cik.max_cu_per_sh = 4;
3271			rdev->config.cik.max_backends_per_se = 1;
3272		} else {
3273			rdev->config.cik.max_cu_per_sh = 3;
3274			rdev->config.cik.max_backends_per_se = 1;
3275		}
3276		rdev->config.cik.max_sh_per_se = 1;
3277		rdev->config.cik.max_texture_channel_caches = 4;
3278		rdev->config.cik.max_gprs = 256;
3279		rdev->config.cik.max_gs_threads = 16;
3280		rdev->config.cik.max_hw_contexts = 8;
3281
3282		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3283		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3284		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3285		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3286		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3287		break;
3288	case CHIP_KABINI:
3289	case CHIP_MULLINS:
3290	default:
3291		rdev->config.cik.max_shader_engines = 1;
3292		rdev->config.cik.max_tile_pipes = 2;
3293		rdev->config.cik.max_cu_per_sh = 2;
3294		rdev->config.cik.max_sh_per_se = 1;
3295		rdev->config.cik.max_backends_per_se = 1;
3296		rdev->config.cik.max_texture_channel_caches = 2;
3297		rdev->config.cik.max_gprs = 256;
3298		rdev->config.cik.max_gs_threads = 16;
3299		rdev->config.cik.max_hw_contexts = 8;
3300
3301		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3302		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3303		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3304		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3305		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3306		break;
3307	}
3308
3309	/* Initialize HDP */
3310	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3311		WREG32((0x2c14 + j), 0x00000000);
3312		WREG32((0x2c18 + j), 0x00000000);
3313		WREG32((0x2c1c + j), 0x00000000);
3314		WREG32((0x2c20 + j), 0x00000000);
3315		WREG32((0x2c24 + j), 0x00000000);
3316	}
3317
3318	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3319	WREG32(SRBM_INT_CNTL, 0x1);
3320	WREG32(SRBM_INT_ACK, 0x1);
3321
3322	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3323
3324	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3325	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3326
3327	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3328	rdev->config.cik.mem_max_burst_length_bytes = 256;
3329	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3330	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3331	if (rdev->config.cik.mem_row_size_in_kb > 4)
3332		rdev->config.cik.mem_row_size_in_kb = 4;
3333	/* XXX use MC settings? */
3334	rdev->config.cik.shader_engine_tile_size = 32;
3335	rdev->config.cik.num_gpus = 1;
3336	rdev->config.cik.multi_gpu_tile_size = 64;
3337
3338	/* fix up row size */
3339	gb_addr_config &= ~ROW_SIZE_MASK;
3340	switch (rdev->config.cik.mem_row_size_in_kb) {
3341	case 1:
3342	default:
3343		gb_addr_config |= ROW_SIZE(0);
3344		break;
3345	case 2:
3346		gb_addr_config |= ROW_SIZE(1);
3347		break;
3348	case 4:
3349		gb_addr_config |= ROW_SIZE(2);
3350		break;
3351	}
3352
3353	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3354	 * not have bank info, so create a custom tiling dword.
3355	 * bits 3:0   num_pipes
3356	 * bits 7:4   num_banks
3357	 * bits 11:8  group_size
3358	 * bits 15:12 row_size
3359	 */
3360	rdev->config.cik.tile_config = 0;
3361	switch (rdev->config.cik.num_tile_pipes) {
3362	case 1:
3363		rdev->config.cik.tile_config |= (0 << 0);
3364		break;
3365	case 2:
3366		rdev->config.cik.tile_config |= (1 << 0);
3367		break;
3368	case 4:
3369		rdev->config.cik.tile_config |= (2 << 0);
3370		break;
3371	case 8:
3372	default:
3373		/* XXX what about 12? */
3374		rdev->config.cik.tile_config |= (3 << 0);
3375		break;
3376	}
3377	rdev->config.cik.tile_config |=
3378		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3379	rdev->config.cik.tile_config |=
3380		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3381	rdev->config.cik.tile_config |=
3382		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3383
3384	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3385	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3386	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3387	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3388	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3389	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3390	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3391	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3392
3393	cik_tiling_mode_table_init(rdev);
3394
3395	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3396		     rdev->config.cik.max_sh_per_se,
3397		     rdev->config.cik.max_backends_per_se);
3398
3399	rdev->config.cik.active_cus = 0;
3400	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3401		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3402			rdev->config.cik.active_cus +=
3403				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3404		}
3405	}
3406
3407	/* set HW defaults for 3D engine */
3408	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3409
3410	mutex_lock(&rdev->grbm_idx_mutex);
3411	/*
3412	 * making sure that the following register writes will be broadcasted
3413	 * to all the shaders
3414	 */
3415	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3416	WREG32(SX_DEBUG_1, 0x20);
3417
3418	WREG32(TA_CNTL_AUX, 0x00010000);
3419
3420	tmp = RREG32(SPI_CONFIG_CNTL);
3421	tmp |= 0x03000000;
3422	WREG32(SPI_CONFIG_CNTL, tmp);
3423
3424	WREG32(SQ_CONFIG, 1);
3425
3426	WREG32(DB_DEBUG, 0);
3427
3428	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3429	tmp |= 0x00000400;
3430	WREG32(DB_DEBUG2, tmp);
3431
3432	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3433	tmp |= 0x00020200;
3434	WREG32(DB_DEBUG3, tmp);
3435
3436	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3437	tmp |= 0x00018208;
3438	WREG32(CB_HW_CONTROL, tmp);
3439
3440	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3441
3442	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3443				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3444				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3445				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3446
3447	WREG32(VGT_NUM_INSTANCES, 1);
3448
3449	WREG32(CP_PERFMON_CNTL, 0);
3450
3451	WREG32(SQ_CONFIG, 0);
3452
3453	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3454					  FORCE_EOV_MAX_REZ_CNT(255)));
3455
3456	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3457	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3458
3459	WREG32(VGT_GS_VERTEX_REUSE, 16);
3460	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3461
3462	tmp = RREG32(HDP_MISC_CNTL);
3463	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3464	WREG32(HDP_MISC_CNTL, tmp);
3465
3466	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3467	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3468
3469	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3470	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3471	mutex_unlock(&rdev->grbm_idx_mutex);
3472
3473	udelay(50);
3474}
3475
3476/*
3477 * GPU scratch registers helpers function.
3478 */
3479/**
3480 * cik_scratch_init - setup driver info for CP scratch regs
3481 *
3482 * @rdev: radeon_device pointer
3483 *
3484 * Set up the number and offset of the CP scratch registers.
3485 * NOTE: use of CP scratch registers is a legacy inferface and
3486 * is not used by default on newer asics (r6xx+).  On newer asics,
3487 * memory buffers are used for fences rather than scratch regs.
3488 */
3489static void cik_scratch_init(struct radeon_device *rdev)
3490{
3491	int i;
3492
3493	rdev->scratch.num_reg = 7;
3494	rdev->scratch.reg_base = SCRATCH_REG0;
3495	for (i = 0; i < rdev->scratch.num_reg; i++) {
3496		rdev->scratch.free[i] = true;
3497		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3498	}
3499}
3500
3501/**
3502 * cik_ring_test - basic gfx ring test
3503 *
3504 * @rdev: radeon_device pointer
3505 * @ring: radeon_ring structure holding ring information
3506 *
3507 * Allocate a scratch register and write to it using the gfx ring (CIK).
3508 * Provides a basic gfx ring test to verify that the ring is working.
3509 * Used by cik_cp_gfx_resume();
3510 * Returns 0 on success, error on failure.
3511 */
3512int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3513{
3514	uint32_t scratch;
3515	uint32_t tmp = 0;
3516	unsigned i;
3517	int r;
3518
3519	r = radeon_scratch_get(rdev, &scratch);
3520	if (r) {
3521		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3522		return r;
3523	}
3524	WREG32(scratch, 0xCAFEDEAD);
3525	r = radeon_ring_lock(rdev, ring, 3);
3526	if (r) {
3527		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3528		radeon_scratch_free(rdev, scratch);
3529		return r;
3530	}
3531	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3532	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3533	radeon_ring_write(ring, 0xDEADBEEF);
3534	radeon_ring_unlock_commit(rdev, ring, false);
3535
3536	for (i = 0; i < rdev->usec_timeout; i++) {
3537		tmp = RREG32(scratch);
3538		if (tmp == 0xDEADBEEF)
3539			break;
3540		DRM_UDELAY(1);
3541	}
3542	if (i < rdev->usec_timeout) {
3543		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3544	} else {
3545		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3546			  ring->idx, scratch, tmp);
3547		r = -EINVAL;
3548	}
3549	radeon_scratch_free(rdev, scratch);
3550	return r;
3551}
3552
3553/**
3554 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3555 *
3556 * @rdev: radeon_device pointer
3557 * @ridx: radeon ring index
3558 *
3559 * Emits an hdp flush on the cp.
3560 */
3561static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3562				       int ridx)
3563{
3564	struct radeon_ring *ring = &rdev->ring[ridx];
3565	u32 ref_and_mask;
3566
3567	switch (ring->idx) {
3568	case CAYMAN_RING_TYPE_CP1_INDEX:
3569	case CAYMAN_RING_TYPE_CP2_INDEX:
3570	default:
3571		switch (ring->me) {
3572		case 0:
3573			ref_and_mask = CP2 << ring->pipe;
3574			break;
3575		case 1:
3576			ref_and_mask = CP6 << ring->pipe;
3577			break;
3578		default:
3579			return;
3580		}
3581		break;
3582	case RADEON_RING_TYPE_GFX_INDEX:
3583		ref_and_mask = CP0;
3584		break;
3585	}
3586
3587	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3588	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3589				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3590				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3591	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3592	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3593	radeon_ring_write(ring, ref_and_mask);
3594	radeon_ring_write(ring, ref_and_mask);
3595	radeon_ring_write(ring, 0x20); /* poll interval */
3596}
3597
3598/**
3599 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3600 *
3601 * @rdev: radeon_device pointer
3602 * @fence: radeon fence object
3603 *
3604 * Emits a fence sequnce number on the gfx ring and flushes
3605 * GPU caches.
3606 */
3607void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3608			     struct radeon_fence *fence)
3609{
3610	struct radeon_ring *ring = &rdev->ring[fence->ring];
3611	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3612
3613	/* Workaround for cache flush problems. First send a dummy EOP
3614	 * event down the pipe with seq one below.
3615	 */
3616	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3617	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3618				 EOP_TC_ACTION_EN |
3619				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3620				 EVENT_INDEX(5)));
3621	radeon_ring_write(ring, addr & 0xfffffffc);
3622	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3623				DATA_SEL(1) | INT_SEL(0));
3624	radeon_ring_write(ring, fence->seq - 1);
3625	radeon_ring_write(ring, 0);
3626
3627	/* Then send the real EOP event down the pipe. */
3628	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3629	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630				 EOP_TC_ACTION_EN |
3631				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632				 EVENT_INDEX(5)));
3633	radeon_ring_write(ring, addr & 0xfffffffc);
3634	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3635	radeon_ring_write(ring, fence->seq);
3636	radeon_ring_write(ring, 0);
3637}
3638
3639/**
3640 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3641 *
3642 * @rdev: radeon_device pointer
3643 * @fence: radeon fence object
3644 *
3645 * Emits a fence sequnce number on the compute ring and flushes
3646 * GPU caches.
3647 */
3648void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3649				 struct radeon_fence *fence)
3650{
3651	struct radeon_ring *ring = &rdev->ring[fence->ring];
3652	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3653
3654	/* RELEASE_MEM - flush caches, send int */
3655	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3656	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3657				 EOP_TC_ACTION_EN |
3658				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3659				 EVENT_INDEX(5)));
3660	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3661	radeon_ring_write(ring, addr & 0xfffffffc);
3662	radeon_ring_write(ring, upper_32_bits(addr));
3663	radeon_ring_write(ring, fence->seq);
3664	radeon_ring_write(ring, 0);
3665}
3666
3667/**
3668 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3669 *
3670 * @rdev: radeon_device pointer
3671 * @ring: radeon ring buffer object
3672 * @semaphore: radeon semaphore object
3673 * @emit_wait: Is this a sempahore wait?
3674 *
3675 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3676 * from running ahead of semaphore waits.
3677 */
3678bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3679			     struct radeon_ring *ring,
3680			     struct radeon_semaphore *semaphore,
3681			     bool emit_wait)
3682{
3683	uint64_t addr = semaphore->gpu_addr;
3684	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3685
3686	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3687	radeon_ring_write(ring, lower_32_bits(addr));
3688	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3689
3690	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3691		/* Prevent the PFP from running ahead of the semaphore wait */
3692		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3693		radeon_ring_write(ring, 0x0);
3694	}
3695
3696	return true;
3697}
3698
3699/**
3700 * cik_copy_cpdma - copy pages using the CP DMA engine
3701 *
3702 * @rdev: radeon_device pointer
3703 * @src_offset: src GPU address
3704 * @dst_offset: dst GPU address
3705 * @num_gpu_pages: number of GPU pages to xfer
3706 * @resv: reservation object to sync to
3707 *
3708 * Copy GPU paging using the CP DMA engine (CIK+).
3709 * Used by the radeon ttm implementation to move pages if
3710 * registered as the asic copy callback.
3711 */
3712struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3713				    uint64_t src_offset, uint64_t dst_offset,
3714				    unsigned num_gpu_pages,
3715				    struct reservation_object *resv)
3716{
3717	struct radeon_fence *fence;
3718	struct radeon_sync sync;
3719	int ring_index = rdev->asic->copy.blit_ring_index;
3720	struct radeon_ring *ring = &rdev->ring[ring_index];
3721	u32 size_in_bytes, cur_size_in_bytes, control;
3722	int i, num_loops;
3723	int r = 0;
3724
3725	radeon_sync_create(&sync);
3726
3727	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3728	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3729	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3730	if (r) {
3731		DRM_ERROR("radeon: moving bo (%d).\n", r);
3732		radeon_sync_free(rdev, &sync, NULL);
3733		return ERR_PTR(r);
3734	}
3735
3736	radeon_sync_resv(rdev, &sync, resv, false);
3737	radeon_sync_rings(rdev, &sync, ring->idx);
3738
3739	for (i = 0; i < num_loops; i++) {
3740		cur_size_in_bytes = size_in_bytes;
3741		if (cur_size_in_bytes > 0x1fffff)
3742			cur_size_in_bytes = 0x1fffff;
3743		size_in_bytes -= cur_size_in_bytes;
3744		control = 0;
3745		if (size_in_bytes == 0)
3746			control |= PACKET3_DMA_DATA_CP_SYNC;
3747		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3748		radeon_ring_write(ring, control);
3749		radeon_ring_write(ring, lower_32_bits(src_offset));
3750		radeon_ring_write(ring, upper_32_bits(src_offset));
3751		radeon_ring_write(ring, lower_32_bits(dst_offset));
3752		radeon_ring_write(ring, upper_32_bits(dst_offset));
3753		radeon_ring_write(ring, cur_size_in_bytes);
3754		src_offset += cur_size_in_bytes;
3755		dst_offset += cur_size_in_bytes;
3756	}
3757
3758	r = radeon_fence_emit(rdev, &fence, ring->idx);
3759	if (r) {
3760		radeon_ring_unlock_undo(rdev, ring);
3761		radeon_sync_free(rdev, &sync, NULL);
3762		return ERR_PTR(r);
3763	}
3764
3765	radeon_ring_unlock_commit(rdev, ring, false);
3766	radeon_sync_free(rdev, &sync, fence);
3767
3768	return fence;
3769}
3770
3771/*
3772 * IB stuff
3773 */
3774/**
3775 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3776 *
3777 * @rdev: radeon_device pointer
3778 * @ib: radeon indirect buffer object
3779 *
3780 * Emits a DE (drawing engine) or CE (constant engine) IB
3781 * on the gfx ring.  IBs are usually generated by userspace
3782 * acceleration drivers and submitted to the kernel for
3783 * scheduling on the ring.  This function schedules the IB
3784 * on the gfx ring for execution by the GPU.
3785 */
3786void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3787{
3788	struct radeon_ring *ring = &rdev->ring[ib->ring];
3789	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3790	u32 header, control = INDIRECT_BUFFER_VALID;
3791
3792	if (ib->is_const_ib) {
3793		/* set switch buffer packet before const IB */
3794		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3795		radeon_ring_write(ring, 0);
3796
3797		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3798	} else {
3799		u32 next_rptr;
3800		if (ring->rptr_save_reg) {
3801			next_rptr = ring->wptr + 3 + 4;
3802			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3803			radeon_ring_write(ring, ((ring->rptr_save_reg -
3804						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3805			radeon_ring_write(ring, next_rptr);
3806		} else if (rdev->wb.enabled) {
3807			next_rptr = ring->wptr + 5 + 4;
3808			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3809			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3810			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3811			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3812			radeon_ring_write(ring, next_rptr);
3813		}
3814
3815		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3816	}
3817
3818	control |= ib->length_dw | (vm_id << 24);
3819
3820	radeon_ring_write(ring, header);
3821	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3822	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3823	radeon_ring_write(ring, control);
3824}
3825
3826/**
3827 * cik_ib_test - basic gfx ring IB test
3828 *
3829 * @rdev: radeon_device pointer
3830 * @ring: radeon_ring structure holding ring information
3831 *
3832 * Allocate an IB and execute it on the gfx ring (CIK).
3833 * Provides a basic gfx ring test to verify that IBs are working.
3834 * Returns 0 on success, error on failure.
3835 */
3836int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3837{
3838	struct radeon_ib ib;
3839	uint32_t scratch;
3840	uint32_t tmp = 0;
3841	unsigned i;
3842	int r;
3843
3844	r = radeon_scratch_get(rdev, &scratch);
3845	if (r) {
3846		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3847		return r;
3848	}
3849	WREG32(scratch, 0xCAFEDEAD);
3850	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3851	if (r) {
3852		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3853		radeon_scratch_free(rdev, scratch);
3854		return r;
3855	}
3856	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3857	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3858	ib.ptr[2] = 0xDEADBEEF;
3859	ib.length_dw = 3;
3860	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3861	if (r) {
3862		radeon_scratch_free(rdev, scratch);
3863		radeon_ib_free(rdev, &ib);
3864		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3865		return r;
3866	}
3867	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3868		RADEON_USEC_IB_TEST_TIMEOUT));
3869	if (r < 0) {
3870		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3871		radeon_scratch_free(rdev, scratch);
3872		radeon_ib_free(rdev, &ib);
3873		return r;
3874	} else if (r == 0) {
3875		DRM_ERROR("radeon: fence wait timed out.\n");
3876		radeon_scratch_free(rdev, scratch);
3877		radeon_ib_free(rdev, &ib);
3878		return -ETIMEDOUT;
3879	}
3880	r = 0;
3881	for (i = 0; i < rdev->usec_timeout; i++) {
3882		tmp = RREG32(scratch);
3883		if (tmp == 0xDEADBEEF)
3884			break;
3885		DRM_UDELAY(1);
3886	}
3887	if (i < rdev->usec_timeout) {
3888		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3889	} else {
3890		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3891			  scratch, tmp);
3892		r = -EINVAL;
3893	}
3894	radeon_scratch_free(rdev, scratch);
3895	radeon_ib_free(rdev, &ib);
3896	return r;
3897}
3898
3899/*
3900 * CP.
3901 * On CIK, gfx and compute now have independant command processors.
3902 *
3903 * GFX
3904 * Gfx consists of a single ring and can process both gfx jobs and
3905 * compute jobs.  The gfx CP consists of three microengines (ME):
3906 * PFP - Pre-Fetch Parser
3907 * ME - Micro Engine
3908 * CE - Constant Engine
3909 * The PFP and ME make up what is considered the Drawing Engine (DE).
3910 * The CE is an asynchronous engine used for updating buffer desciptors
3911 * used by the DE so that they can be loaded into cache in parallel
3912 * while the DE is processing state update packets.
3913 *
3914 * Compute
3915 * The compute CP consists of two microengines (ME):
3916 * MEC1 - Compute MicroEngine 1
3917 * MEC2 - Compute MicroEngine 2
3918 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3919 * The queues are exposed to userspace and are programmed directly
3920 * by the compute runtime.
3921 */
3922/**
3923 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3924 *
3925 * @rdev: radeon_device pointer
3926 * @enable: enable or disable the MEs
3927 *
3928 * Halts or unhalts the gfx MEs.
3929 */
3930static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3931{
3932	if (enable)
3933		WREG32(CP_ME_CNTL, 0);
3934	else {
3935		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3936			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3937		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3938		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3939	}
3940	udelay(50);
3941}
3942
3943/**
3944 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3945 *
3946 * @rdev: radeon_device pointer
3947 *
3948 * Loads the gfx PFP, ME, and CE ucode.
3949 * Returns 0 for success, -EINVAL if the ucode is not available.
3950 */
3951static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3952{
3953	int i;
3954
3955	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3956		return -EINVAL;
3957
3958	cik_cp_gfx_enable(rdev, false);
3959
3960	if (rdev->new_fw) {
3961		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3962			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3963		const struct gfx_firmware_header_v1_0 *ce_hdr =
3964			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3965		const struct gfx_firmware_header_v1_0 *me_hdr =
3966			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3967		const __le32 *fw_data;
3968		u32 fw_size;
3969
3970		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3971		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3972		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3973
3974		/* PFP */
3975		fw_data = (const __le32 *)
3976			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3977		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3978		WREG32(CP_PFP_UCODE_ADDR, 0);
3979		for (i = 0; i < fw_size; i++)
3980			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3981		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3982
3983		/* CE */
3984		fw_data = (const __le32 *)
3985			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3986		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3987		WREG32(CP_CE_UCODE_ADDR, 0);
3988		for (i = 0; i < fw_size; i++)
3989			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3990		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3991
3992		/* ME */
3993		fw_data = (const __be32 *)
3994			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3995		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3996		WREG32(CP_ME_RAM_WADDR, 0);
3997		for (i = 0; i < fw_size; i++)
3998			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3999		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4000		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4001	} else {
4002		const __be32 *fw_data;
4003
4004		/* PFP */
4005		fw_data = (const __be32 *)rdev->pfp_fw->data;
4006		WREG32(CP_PFP_UCODE_ADDR, 0);
4007		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4008			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4009		WREG32(CP_PFP_UCODE_ADDR, 0);
4010
4011		/* CE */
4012		fw_data = (const __be32 *)rdev->ce_fw->data;
4013		WREG32(CP_CE_UCODE_ADDR, 0);
4014		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4015			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4016		WREG32(CP_CE_UCODE_ADDR, 0);
4017
4018		/* ME */
4019		fw_data = (const __be32 *)rdev->me_fw->data;
4020		WREG32(CP_ME_RAM_WADDR, 0);
4021		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4022			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4023		WREG32(CP_ME_RAM_WADDR, 0);
4024	}
4025
4026	return 0;
4027}
4028
4029/**
4030 * cik_cp_gfx_start - start the gfx ring
4031 *
4032 * @rdev: radeon_device pointer
4033 *
4034 * Enables the ring and loads the clear state context and other
4035 * packets required to init the ring.
4036 * Returns 0 for success, error for failure.
4037 */
4038static int cik_cp_gfx_start(struct radeon_device *rdev)
4039{
4040	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4041	int r, i;
4042
4043	/* init the CP */
4044	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4045	WREG32(CP_ENDIAN_SWAP, 0);
4046	WREG32(CP_DEVICE_ID, 1);
4047
4048	cik_cp_gfx_enable(rdev, true);
4049
4050	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4051	if (r) {
4052		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4053		return r;
4054	}
4055
4056	/* init the CE partitions.  CE only used for gfx on CIK */
4057	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4058	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4059	radeon_ring_write(ring, 0x8000);
4060	radeon_ring_write(ring, 0x8000);
4061
4062	/* setup clear context state */
4063	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4064	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4065
4066	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4067	radeon_ring_write(ring, 0x80000000);
4068	radeon_ring_write(ring, 0x80000000);
4069
4070	for (i = 0; i < cik_default_size; i++)
4071		radeon_ring_write(ring, cik_default_state[i]);
4072
4073	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4074	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4075
4076	/* set clear context state */
4077	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4078	radeon_ring_write(ring, 0);
4079
4080	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4081	radeon_ring_write(ring, 0x00000316);
4082	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4083	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4084
4085	radeon_ring_unlock_commit(rdev, ring, false);
4086
4087	return 0;
4088}
4089
4090/**
4091 * cik_cp_gfx_fini - stop the gfx ring
4092 *
4093 * @rdev: radeon_device pointer
4094 *
4095 * Stop the gfx ring and tear down the driver ring
4096 * info.
4097 */
4098static void cik_cp_gfx_fini(struct radeon_device *rdev)
4099{
4100	cik_cp_gfx_enable(rdev, false);
4101	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102}
4103
4104/**
4105 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4106 *
4107 * @rdev: radeon_device pointer
4108 *
4109 * Program the location and size of the gfx ring buffer
4110 * and test it to make sure it's working.
4111 * Returns 0 for success, error for failure.
4112 */
4113static int cik_cp_gfx_resume(struct radeon_device *rdev)
4114{
4115	struct radeon_ring *ring;
4116	u32 tmp;
4117	u32 rb_bufsz;
4118	u64 rb_addr;
4119	int r;
4120
4121	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4122	if (rdev->family != CHIP_HAWAII)
4123		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4124
4125	/* Set the write pointer delay */
4126	WREG32(CP_RB_WPTR_DELAY, 0);
4127
4128	/* set the RB to use vmid 0 */
4129	WREG32(CP_RB_VMID, 0);
4130
4131	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4132
4133	/* ring 0 - compute and gfx */
4134	/* Set ring buffer size */
4135	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4136	rb_bufsz = order_base_2(ring->ring_size / 8);
4137	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4138#ifdef __BIG_ENDIAN
4139	tmp |= BUF_SWAP_32BIT;
4140#endif
4141	WREG32(CP_RB0_CNTL, tmp);
4142
4143	/* Initialize the ring buffer's read and write pointers */
4144	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4145	ring->wptr = 0;
4146	WREG32(CP_RB0_WPTR, ring->wptr);
4147
4148	/* set the wb address wether it's enabled or not */
4149	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4150	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4151
4152	/* scratch register shadowing is no longer supported */
4153	WREG32(SCRATCH_UMSK, 0);
4154
4155	if (!rdev->wb.enabled)
4156		tmp |= RB_NO_UPDATE;
4157
4158	mdelay(1);
4159	WREG32(CP_RB0_CNTL, tmp);
4160
4161	rb_addr = ring->gpu_addr >> 8;
4162	WREG32(CP_RB0_BASE, rb_addr);
4163	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4164
4165	/* start the ring */
4166	cik_cp_gfx_start(rdev);
4167	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4168	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4169	if (r) {
4170		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4171		return r;
4172	}
4173
4174	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4175		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4176
4177	return 0;
4178}
4179
4180u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4181		     struct radeon_ring *ring)
4182{
4183	u32 rptr;
4184
4185	if (rdev->wb.enabled)
4186		rptr = rdev->wb.wb[ring->rptr_offs/4];
4187	else
4188		rptr = RREG32(CP_RB0_RPTR);
4189
4190	return rptr;
4191}
4192
4193u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4194		     struct radeon_ring *ring)
4195{
4196	return RREG32(CP_RB0_WPTR);
4197}
4198
4199void cik_gfx_set_wptr(struct radeon_device *rdev,
4200		      struct radeon_ring *ring)
4201{
4202	WREG32(CP_RB0_WPTR, ring->wptr);
4203	(void)RREG32(CP_RB0_WPTR);
4204}
4205
4206u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207			 struct radeon_ring *ring)
4208{
4209	u32 rptr;
4210
4211	if (rdev->wb.enabled) {
4212		rptr = rdev->wb.wb[ring->rptr_offs/4];
4213	} else {
4214		mutex_lock(&rdev->srbm_mutex);
4215		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216		rptr = RREG32(CP_HQD_PQ_RPTR);
4217		cik_srbm_select(rdev, 0, 0, 0, 0);
4218		mutex_unlock(&rdev->srbm_mutex);
4219	}
4220
4221	return rptr;
4222}
4223
4224u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225			 struct radeon_ring *ring)
4226{
4227	u32 wptr;
4228
4229	if (rdev->wb.enabled) {
4230		/* XXX check if swapping is necessary on BE */
4231		wptr = rdev->wb.wb[ring->wptr_offs/4];
4232	} else {
4233		mutex_lock(&rdev->srbm_mutex);
4234		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235		wptr = RREG32(CP_HQD_PQ_WPTR);
4236		cik_srbm_select(rdev, 0, 0, 0, 0);
4237		mutex_unlock(&rdev->srbm_mutex);
4238	}
4239
4240	return wptr;
4241}
4242
4243void cik_compute_set_wptr(struct radeon_device *rdev,
4244			  struct radeon_ring *ring)
4245{
4246	/* XXX check if swapping is necessary on BE */
4247	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248	WDOORBELL32(ring->doorbell_index, ring->wptr);
4249}
4250
4251static void cik_compute_stop(struct radeon_device *rdev,
4252			     struct radeon_ring *ring)
4253{
4254	u32 j, tmp;
4255
4256	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257	/* Disable wptr polling. */
4258	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259	tmp &= ~WPTR_POLL_EN;
4260	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261	/* Disable HQD. */
4262	if (RREG32(CP_HQD_ACTIVE) & 1) {
4263		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264		for (j = 0; j < rdev->usec_timeout; j++) {
4265			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266				break;
4267			udelay(1);
4268		}
4269		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270		WREG32(CP_HQD_PQ_RPTR, 0);
4271		WREG32(CP_HQD_PQ_WPTR, 0);
4272	}
4273	cik_srbm_select(rdev, 0, 0, 0, 0);
4274}
4275
4276/**
4277 * cik_cp_compute_enable - enable/disable the compute CP MEs
4278 *
4279 * @rdev: radeon_device pointer
4280 * @enable: enable or disable the MEs
4281 *
4282 * Halts or unhalts the compute MEs.
4283 */
4284static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285{
4286	if (enable)
4287		WREG32(CP_MEC_CNTL, 0);
4288	else {
4289		/*
4290		 * To make hibernation reliable we need to clear compute ring
4291		 * configuration before halting the compute ring.
4292		 */
4293		mutex_lock(&rdev->srbm_mutex);
4294		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296		mutex_unlock(&rdev->srbm_mutex);
4297
4298		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301	}
4302	udelay(50);
4303}
4304
4305/**
4306 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307 *
4308 * @rdev: radeon_device pointer
4309 *
4310 * Loads the compute MEC1&2 ucode.
4311 * Returns 0 for success, -EINVAL if the ucode is not available.
4312 */
4313static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314{
4315	int i;
4316
4317	if (!rdev->mec_fw)
4318		return -EINVAL;
4319
4320	cik_cp_compute_enable(rdev, false);
4321
4322	if (rdev->new_fw) {
4323		const struct gfx_firmware_header_v1_0 *mec_hdr =
4324			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325		const __le32 *fw_data;
4326		u32 fw_size;
4327
4328		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329
4330		/* MEC1 */
4331		fw_data = (const __le32 *)
4332			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335		for (i = 0; i < fw_size; i++)
4336			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338
4339		/* MEC2 */
4340		if (rdev->family == CHIP_KAVERI) {
4341			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343
4344			fw_data = (const __le32 *)
4345				(rdev->mec2_fw->data +
4346				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349			for (i = 0; i < fw_size; i++)
4350				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352		}
4353	} else {
4354		const __be32 *fw_data;
4355
4356		/* MEC1 */
4357		fw_data = (const __be32 *)rdev->mec_fw->data;
4358		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362
4363		if (rdev->family == CHIP_KAVERI) {
4364			/* MEC2 */
4365			fw_data = (const __be32 *)rdev->mec_fw->data;
4366			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370		}
4371	}
4372
4373	return 0;
4374}
4375
4376/**
4377 * cik_cp_compute_start - start the compute queues
4378 *
4379 * @rdev: radeon_device pointer
4380 *
4381 * Enable the compute queues.
4382 * Returns 0 for success, error for failure.
4383 */
4384static int cik_cp_compute_start(struct radeon_device *rdev)
4385{
4386	cik_cp_compute_enable(rdev, true);
4387
4388	return 0;
4389}
4390
4391/**
4392 * cik_cp_compute_fini - stop the compute queues
4393 *
4394 * @rdev: radeon_device pointer
4395 *
4396 * Stop the compute queues and tear down the driver queue
4397 * info.
4398 */
4399static void cik_cp_compute_fini(struct radeon_device *rdev)
4400{
4401	int i, idx, r;
4402
4403	cik_cp_compute_enable(rdev, false);
4404
4405	for (i = 0; i < 2; i++) {
4406		if (i == 0)
4407			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408		else
4409			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410
4411		if (rdev->ring[idx].mqd_obj) {
4412			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413			if (unlikely(r != 0))
4414				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415
4416			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418
4419			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420			rdev->ring[idx].mqd_obj = NULL;
4421		}
4422	}
4423}
4424
4425static void cik_mec_fini(struct radeon_device *rdev)
4426{
4427	int r;
4428
4429	if (rdev->mec.hpd_eop_obj) {
4430		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431		if (unlikely(r != 0))
4432			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435
4436		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437		rdev->mec.hpd_eop_obj = NULL;
4438	}
4439}
4440
4441#define MEC_HPD_SIZE 2048
4442
4443static int cik_mec_init(struct radeon_device *rdev)
4444{
4445	int r;
4446	u32 *hpd;
4447
4448	/*
4449	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451	 * Nonetheless, we assign only 1 pipe because all other pipes will
4452	 * be handled by KFD
4453	 */
4454	rdev->mec.num_mec = 1;
4455	rdev->mec.num_pipe = 1;
4456	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457
4458	if (rdev->mec.hpd_eop_obj == NULL) {
4459		r = radeon_bo_create(rdev,
4460				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461				     PAGE_SIZE, true,
4462				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463				     &rdev->mec.hpd_eop_obj);
4464		if (r) {
4465			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466			return r;
4467		}
4468	}
4469
4470	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471	if (unlikely(r != 0)) {
4472		cik_mec_fini(rdev);
4473		return r;
4474	}
4475	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476			  &rdev->mec.hpd_eop_gpu_addr);
4477	if (r) {
4478		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479		cik_mec_fini(rdev);
4480		return r;
4481	}
4482	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483	if (r) {
4484		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485		cik_mec_fini(rdev);
4486		return r;
4487	}
4488
4489	/* clear memory.  Not sure if this is required or not */
4490	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491
4492	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494
4495	return 0;
4496}
4497
4498struct hqd_registers
4499{
4500	u32 cp_mqd_base_addr;
4501	u32 cp_mqd_base_addr_hi;
4502	u32 cp_hqd_active;
4503	u32 cp_hqd_vmid;
4504	u32 cp_hqd_persistent_state;
4505	u32 cp_hqd_pipe_priority;
4506	u32 cp_hqd_queue_priority;
4507	u32 cp_hqd_quantum;
4508	u32 cp_hqd_pq_base;
4509	u32 cp_hqd_pq_base_hi;
4510	u32 cp_hqd_pq_rptr;
4511	u32 cp_hqd_pq_rptr_report_addr;
4512	u32 cp_hqd_pq_rptr_report_addr_hi;
4513	u32 cp_hqd_pq_wptr_poll_addr;
4514	u32 cp_hqd_pq_wptr_poll_addr_hi;
4515	u32 cp_hqd_pq_doorbell_control;
4516	u32 cp_hqd_pq_wptr;
4517	u32 cp_hqd_pq_control;
4518	u32 cp_hqd_ib_base_addr;
4519	u32 cp_hqd_ib_base_addr_hi;
4520	u32 cp_hqd_ib_rptr;
4521	u32 cp_hqd_ib_control;
4522	u32 cp_hqd_iq_timer;
4523	u32 cp_hqd_iq_rptr;
4524	u32 cp_hqd_dequeue_request;
4525	u32 cp_hqd_dma_offload;
4526	u32 cp_hqd_sema_cmd;
4527	u32 cp_hqd_msg_type;
4528	u32 cp_hqd_atomic0_preop_lo;
4529	u32 cp_hqd_atomic0_preop_hi;
4530	u32 cp_hqd_atomic1_preop_lo;
4531	u32 cp_hqd_atomic1_preop_hi;
4532	u32 cp_hqd_hq_scheduler0;
4533	u32 cp_hqd_hq_scheduler1;
4534	u32 cp_mqd_control;
4535};
4536
4537struct bonaire_mqd
4538{
4539	u32 header;
4540	u32 dispatch_initiator;
4541	u32 dimensions[3];
4542	u32 start_idx[3];
4543	u32 num_threads[3];
4544	u32 pipeline_stat_enable;
4545	u32 perf_counter_enable;
4546	u32 pgm[2];
4547	u32 tba[2];
4548	u32 tma[2];
4549	u32 pgm_rsrc[2];
4550	u32 vmid;
4551	u32 resource_limits;
4552	u32 static_thread_mgmt01[2];
4553	u32 tmp_ring_size;
4554	u32 static_thread_mgmt23[2];
4555	u32 restart[3];
4556	u32 thread_trace_enable;
4557	u32 reserved1;
4558	u32 user_data[16];
4559	u32 vgtcs_invoke_count[2];
4560	struct hqd_registers queue_state;
4561	u32 dequeue_cntr;
4562	u32 interrupt_queue[64];
4563};
4564
4565/**
4566 * cik_cp_compute_resume - setup the compute queue registers
4567 *
4568 * @rdev: radeon_device pointer
4569 *
4570 * Program the compute queues and test them to make sure they
4571 * are working.
4572 * Returns 0 for success, error for failure.
4573 */
4574static int cik_cp_compute_resume(struct radeon_device *rdev)
4575{
4576	int r, i, j, idx;
4577	u32 tmp;
4578	bool use_doorbell = true;
4579	u64 hqd_gpu_addr;
4580	u64 mqd_gpu_addr;
4581	u64 eop_gpu_addr;
4582	u64 wb_gpu_addr;
4583	u32 *buf;
4584	struct bonaire_mqd *mqd;
4585
4586	r = cik_cp_compute_start(rdev);
4587	if (r)
4588		return r;
4589
4590	/* fix up chicken bits */
4591	tmp = RREG32(CP_CPF_DEBUG);
4592	tmp |= (1 << 23);
4593	WREG32(CP_CPF_DEBUG, tmp);
4594
4595	/* init the pipes */
4596	mutex_lock(&rdev->srbm_mutex);
4597
4598	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599
4600	cik_srbm_select(rdev, 0, 0, 0, 0);
4601
4602	/* write the EOP addr */
4603	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605
4606	/* set the VMID assigned */
4607	WREG32(CP_HPD_EOP_VMID, 0);
4608
4609	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610	tmp = RREG32(CP_HPD_EOP_CONTROL);
4611	tmp &= ~EOP_SIZE_MASK;
4612	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613	WREG32(CP_HPD_EOP_CONTROL, tmp);
4614
4615	mutex_unlock(&rdev->srbm_mutex);
4616
4617	/* init the queues.  Just two for now. */
4618	for (i = 0; i < 2; i++) {
4619		if (i == 0)
4620			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621		else
4622			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623
4624		if (rdev->ring[idx].mqd_obj == NULL) {
4625			r = radeon_bo_create(rdev,
4626					     sizeof(struct bonaire_mqd),
4627					     PAGE_SIZE, true,
4628					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629					     NULL, &rdev->ring[idx].mqd_obj);
4630			if (r) {
4631				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632				return r;
4633			}
4634		}
4635
4636		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637		if (unlikely(r != 0)) {
4638			cik_cp_compute_fini(rdev);
4639			return r;
4640		}
4641		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642				  &mqd_gpu_addr);
4643		if (r) {
4644			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645			cik_cp_compute_fini(rdev);
4646			return r;
4647		}
4648		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649		if (r) {
4650			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651			cik_cp_compute_fini(rdev);
4652			return r;
4653		}
4654
4655		/* init the mqd struct */
4656		memset(buf, 0, sizeof(struct bonaire_mqd));
4657
4658		mqd = (struct bonaire_mqd *)buf;
4659		mqd->header = 0xC0310800;
4660		mqd->static_thread_mgmt01[0] = 0xffffffff;
4661		mqd->static_thread_mgmt01[1] = 0xffffffff;
4662		mqd->static_thread_mgmt23[0] = 0xffffffff;
4663		mqd->static_thread_mgmt23[1] = 0xffffffff;
4664
4665		mutex_lock(&rdev->srbm_mutex);
4666		cik_srbm_select(rdev, rdev->ring[idx].me,
4667				rdev->ring[idx].pipe,
4668				rdev->ring[idx].queue, 0);
4669
4670		/* disable wptr polling */
4671		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672		tmp &= ~WPTR_POLL_EN;
4673		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674
4675		/* enable doorbell? */
4676		mqd->queue_state.cp_hqd_pq_doorbell_control =
4677			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678		if (use_doorbell)
4679			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680		else
4681			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4684
4685		/* disable the queue if it's active */
4686		mqd->queue_state.cp_hqd_dequeue_request = 0;
4687		mqd->queue_state.cp_hqd_pq_rptr = 0;
4688		mqd->queue_state.cp_hqd_pq_wptr= 0;
4689		if (RREG32(CP_HQD_ACTIVE) & 1) {
4690			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691			for (j = 0; j < rdev->usec_timeout; j++) {
4692				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693					break;
4694				udelay(1);
4695			}
4696			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699		}
4700
4701		/* set the pointer to the MQD */
4702		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706		/* set MQD vmid to 0 */
4707		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710
4711		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717
4718		/* set up the HQD, this is similar to CP_RB0_CNTL */
4719		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720		mqd->queue_state.cp_hqd_pq_control &=
4721			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722
4723		mqd->queue_state.cp_hqd_pq_control |=
4724			order_base_2(rdev->ring[idx].ring_size / 8);
4725		mqd->queue_state.cp_hqd_pq_control |=
4726			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727#ifdef __BIG_ENDIAN
4728		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729#endif
4730		mqd->queue_state.cp_hqd_pq_control &=
4731			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732		mqd->queue_state.cp_hqd_pq_control |=
4733			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735
4736		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737		if (i == 0)
4738			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739		else
4740			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746
4747		/* set the wb address wether it's enabled or not */
4748		if (i == 0)
4749			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750		else
4751			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754			upper_32_bits(wb_gpu_addr) & 0xffff;
4755		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759
4760		/* enable the doorbell if requested */
4761		if (use_doorbell) {
4762			mqd->queue_state.cp_hqd_pq_doorbell_control =
4763				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769				~(DOORBELL_SOURCE | DOORBELL_HIT);
4770
4771		} else {
4772			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773		}
4774		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4776
4777		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778		rdev->ring[idx].wptr = 0;
4779		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782
4783		/* set the vmid for the queue */
4784		mqd->queue_state.cp_hqd_vmid = 0;
4785		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786
4787		/* activate the queue */
4788		mqd->queue_state.cp_hqd_active = 1;
4789		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790
4791		cik_srbm_select(rdev, 0, 0, 0, 0);
4792		mutex_unlock(&rdev->srbm_mutex);
4793
4794		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796
4797		rdev->ring[idx].ready = true;
4798		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799		if (r)
4800			rdev->ring[idx].ready = false;
4801	}
4802
4803	return 0;
4804}
4805
4806static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807{
4808	cik_cp_gfx_enable(rdev, enable);
4809	cik_cp_compute_enable(rdev, enable);
4810}
4811
4812static int cik_cp_load_microcode(struct radeon_device *rdev)
4813{
4814	int r;
4815
4816	r = cik_cp_gfx_load_microcode(rdev);
4817	if (r)
4818		return r;
4819	r = cik_cp_compute_load_microcode(rdev);
4820	if (r)
4821		return r;
4822
4823	return 0;
4824}
4825
4826static void cik_cp_fini(struct radeon_device *rdev)
4827{
4828	cik_cp_gfx_fini(rdev);
4829	cik_cp_compute_fini(rdev);
4830}
4831
4832static int cik_cp_resume(struct radeon_device *rdev)
4833{
4834	int r;
4835
4836	cik_enable_gui_idle_interrupt(rdev, false);
4837
4838	r = cik_cp_load_microcode(rdev);
4839	if (r)
4840		return r;
4841
4842	r = cik_cp_gfx_resume(rdev);
4843	if (r)
4844		return r;
4845	r = cik_cp_compute_resume(rdev);
4846	if (r)
4847		return r;
4848
4849	cik_enable_gui_idle_interrupt(rdev, true);
4850
4851	return 0;
4852}
4853
4854static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855{
4856	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857		RREG32(GRBM_STATUS));
4858	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859		RREG32(GRBM_STATUS2));
4860	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861		RREG32(GRBM_STATUS_SE0));
4862	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863		RREG32(GRBM_STATUS_SE1));
4864	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865		RREG32(GRBM_STATUS_SE2));
4866	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867		RREG32(GRBM_STATUS_SE3));
4868	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869		RREG32(SRBM_STATUS));
4870	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871		RREG32(SRBM_STATUS2));
4872	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878		 RREG32(CP_STALLED_STAT1));
4879	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880		 RREG32(CP_STALLED_STAT2));
4881	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882		 RREG32(CP_STALLED_STAT3));
4883	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884		 RREG32(CP_CPF_BUSY_STAT));
4885	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886		 RREG32(CP_CPF_STALLED_STAT1));
4887	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890		 RREG32(CP_CPC_STALLED_STAT1));
4891	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892}
4893
4894/**
4895 * cik_gpu_check_soft_reset - check which blocks are busy
4896 *
4897 * @rdev: radeon_device pointer
4898 *
4899 * Check which blocks are busy and return the relevant reset
4900 * mask to be used by cik_gpu_soft_reset().
4901 * Returns a mask of the blocks to be reset.
4902 */
4903u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904{
4905	u32 reset_mask = 0;
4906	u32 tmp;
4907
4908	/* GRBM_STATUS */
4909	tmp = RREG32(GRBM_STATUS);
4910	if (tmp & (PA_BUSY | SC_BUSY |
4911		   BCI_BUSY | SX_BUSY |
4912		   TA_BUSY | VGT_BUSY |
4913		   DB_BUSY | CB_BUSY |
4914		   GDS_BUSY | SPI_BUSY |
4915		   IA_BUSY | IA_BUSY_NO_DMA))
4916		reset_mask |= RADEON_RESET_GFX;
4917
4918	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919		reset_mask |= RADEON_RESET_CP;
4920
4921	/* GRBM_STATUS2 */
4922	tmp = RREG32(GRBM_STATUS2);
4923	if (tmp & RLC_BUSY)
4924		reset_mask |= RADEON_RESET_RLC;
4925
4926	/* SDMA0_STATUS_REG */
4927	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928	if (!(tmp & SDMA_IDLE))
4929		reset_mask |= RADEON_RESET_DMA;
4930
4931	/* SDMA1_STATUS_REG */
4932	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933	if (!(tmp & SDMA_IDLE))
4934		reset_mask |= RADEON_RESET_DMA1;
4935
4936	/* SRBM_STATUS2 */
4937	tmp = RREG32(SRBM_STATUS2);
4938	if (tmp & SDMA_BUSY)
4939		reset_mask |= RADEON_RESET_DMA;
4940
4941	if (tmp & SDMA1_BUSY)
4942		reset_mask |= RADEON_RESET_DMA1;
4943
4944	/* SRBM_STATUS */
4945	tmp = RREG32(SRBM_STATUS);
4946
4947	if (tmp & IH_BUSY)
4948		reset_mask |= RADEON_RESET_IH;
4949
4950	if (tmp & SEM_BUSY)
4951		reset_mask |= RADEON_RESET_SEM;
4952
4953	if (tmp & GRBM_RQ_PENDING)
4954		reset_mask |= RADEON_RESET_GRBM;
4955
4956	if (tmp & VMC_BUSY)
4957		reset_mask |= RADEON_RESET_VMC;
4958
4959	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960		   MCC_BUSY | MCD_BUSY))
4961		reset_mask |= RADEON_RESET_MC;
4962
4963	if (evergreen_is_display_hung(rdev))
4964		reset_mask |= RADEON_RESET_DISPLAY;
4965
4966	/* Skip MC reset as it's mostly likely not hung, just busy */
4967	if (reset_mask & RADEON_RESET_MC) {
4968		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969		reset_mask &= ~RADEON_RESET_MC;
4970	}
4971
4972	return reset_mask;
4973}
4974
4975/**
4976 * cik_gpu_soft_reset - soft reset GPU
4977 *
4978 * @rdev: radeon_device pointer
4979 * @reset_mask: mask of which blocks to reset
4980 *
4981 * Soft reset the blocks specified in @reset_mask.
4982 */
4983static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984{
4985	struct evergreen_mc_save save;
4986	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987	u32 tmp;
4988
4989	if (reset_mask == 0)
4990		return;
4991
4992	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993
4994	cik_print_gpu_status_regs(rdev);
4995	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999
5000	/* disable CG/PG */
5001	cik_fini_pg(rdev);
5002	cik_fini_cg(rdev);
5003
5004	/* stop the rlc */
5005	cik_rlc_stop(rdev);
5006
5007	/* Disable GFX parsing/prefetching */
5008	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009
5010	/* Disable MEC parsing/prefetching */
5011	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012
5013	if (reset_mask & RADEON_RESET_DMA) {
5014		/* sdma0 */
5015		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016		tmp |= SDMA_HALT;
5017		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018	}
5019	if (reset_mask & RADEON_RESET_DMA1) {
5020		/* sdma1 */
5021		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022		tmp |= SDMA_HALT;
5023		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024	}
5025
5026	evergreen_mc_stop(rdev, &save);
5027	if (evergreen_mc_wait_for_idle(rdev)) {
5028		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029	}
5030
5031	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033
5034	if (reset_mask & RADEON_RESET_CP) {
5035		grbm_soft_reset |= SOFT_RESET_CP;
5036
5037		srbm_soft_reset |= SOFT_RESET_GRBM;
5038	}
5039
5040	if (reset_mask & RADEON_RESET_DMA)
5041		srbm_soft_reset |= SOFT_RESET_SDMA;
5042
5043	if (reset_mask & RADEON_RESET_DMA1)
5044		srbm_soft_reset |= SOFT_RESET_SDMA1;
5045
5046	if (reset_mask & RADEON_RESET_DISPLAY)
5047		srbm_soft_reset |= SOFT_RESET_DC;
5048
5049	if (reset_mask & RADEON_RESET_RLC)
5050		grbm_soft_reset |= SOFT_RESET_RLC;
5051
5052	if (reset_mask & RADEON_RESET_SEM)
5053		srbm_soft_reset |= SOFT_RESET_SEM;
5054
5055	if (reset_mask & RADEON_RESET_IH)
5056		srbm_soft_reset |= SOFT_RESET_IH;
5057
5058	if (reset_mask & RADEON_RESET_GRBM)
5059		srbm_soft_reset |= SOFT_RESET_GRBM;
5060
5061	if (reset_mask & RADEON_RESET_VMC)
5062		srbm_soft_reset |= SOFT_RESET_VMC;
5063
5064	if (!(rdev->flags & RADEON_IS_IGP)) {
5065		if (reset_mask & RADEON_RESET_MC)
5066			srbm_soft_reset |= SOFT_RESET_MC;
5067	}
5068
5069	if (grbm_soft_reset) {
5070		tmp = RREG32(GRBM_SOFT_RESET);
5071		tmp |= grbm_soft_reset;
5072		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073		WREG32(GRBM_SOFT_RESET, tmp);
5074		tmp = RREG32(GRBM_SOFT_RESET);
5075
5076		udelay(50);
5077
5078		tmp &= ~grbm_soft_reset;
5079		WREG32(GRBM_SOFT_RESET, tmp);
5080		tmp = RREG32(GRBM_SOFT_RESET);
5081	}
5082
5083	if (srbm_soft_reset) {
5084		tmp = RREG32(SRBM_SOFT_RESET);
5085		tmp |= srbm_soft_reset;
5086		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087		WREG32(SRBM_SOFT_RESET, tmp);
5088		tmp = RREG32(SRBM_SOFT_RESET);
5089
5090		udelay(50);
5091
5092		tmp &= ~srbm_soft_reset;
5093		WREG32(SRBM_SOFT_RESET, tmp);
5094		tmp = RREG32(SRBM_SOFT_RESET);
5095	}
5096
5097	/* Wait a little for things to settle down */
5098	udelay(50);
5099
5100	evergreen_mc_resume(rdev, &save);
5101	udelay(50);
5102
5103	cik_print_gpu_status_regs(rdev);
5104}
5105
5106struct kv_reset_save_regs {
5107	u32 gmcon_reng_execute;
5108	u32 gmcon_misc;
5109	u32 gmcon_misc3;
5110};
5111
5112static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113				   struct kv_reset_save_regs *save)
5114{
5115	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116	save->gmcon_misc = RREG32(GMCON_MISC);
5117	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118
5119	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121						STCTRL_STUTTER_EN));
5122}
5123
5124static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125				      struct kv_reset_save_regs *save)
5126{
5127	int i;
5128
5129	WREG32(GMCON_PGFSM_WRITE, 0);
5130	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131
5132	for (i = 0; i < 5; i++)
5133		WREG32(GMCON_PGFSM_WRITE, 0);
5134
5135	WREG32(GMCON_PGFSM_WRITE, 0);
5136	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137
5138	for (i = 0; i < 5; i++)
5139		WREG32(GMCON_PGFSM_WRITE, 0);
5140
5141	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143
5144	for (i = 0; i < 5; i++)
5145		WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149
5150	for (i = 0; i < 5; i++)
5151		WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155
5156	for (i = 0; i < 5; i++)
5157		WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159	WREG32(GMCON_PGFSM_WRITE, 0);
5160	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161
5162	for (i = 0; i < 5; i++)
5163		WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167
5168	for (i = 0; i < 5; i++)
5169		WREG32(GMCON_PGFSM_WRITE, 0);
5170
5171	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173
5174	for (i = 0; i < 5; i++)
5175		WREG32(GMCON_PGFSM_WRITE, 0);
5176
5177	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179
5180	for (i = 0; i < 5; i++)
5181		WREG32(GMCON_PGFSM_WRITE, 0);
5182
5183	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185
5186	for (i = 0; i < 5; i++)
5187		WREG32(GMCON_PGFSM_WRITE, 0);
5188
5189	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191
5192	WREG32(GMCON_MISC3, save->gmcon_misc3);
5193	WREG32(GMCON_MISC, save->gmcon_misc);
5194	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195}
5196
5197static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198{
5199	struct evergreen_mc_save save;
5200	struct kv_reset_save_regs kv_save = { 0 };
5201	u32 tmp, i;
5202
5203	dev_info(rdev->dev, "GPU pci config reset\n");
5204
5205	/* disable dpm? */
5206
5207	/* disable cg/pg */
5208	cik_fini_pg(rdev);
5209	cik_fini_cg(rdev);
5210
5211	/* Disable GFX parsing/prefetching */
5212	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213
5214	/* Disable MEC parsing/prefetching */
5215	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216
5217	/* sdma0 */
5218	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219	tmp |= SDMA_HALT;
5220	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221	/* sdma1 */
5222	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223	tmp |= SDMA_HALT;
5224	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225	/* XXX other engines? */
5226
5227	/* halt the rlc, disable cp internal ints */
5228	cik_rlc_stop(rdev);
5229
5230	udelay(50);
5231
5232	/* disable mem access */
5233	evergreen_mc_stop(rdev, &save);
5234	if (evergreen_mc_wait_for_idle(rdev)) {
5235		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236	}
5237
5238	if (rdev->flags & RADEON_IS_IGP)
5239		kv_save_regs_for_reset(rdev, &kv_save);
5240
5241	/* disable BM */
5242	pci_clear_master(rdev->pdev);
5243	/* reset */
5244	radeon_pci_config_reset(rdev);
5245
5246	udelay(100);
5247
5248	/* wait for asic to come out of reset */
5249	for (i = 0; i < rdev->usec_timeout; i++) {
5250		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251			break;
5252		udelay(1);
5253	}
5254
5255	/* does asic init need to be run first??? */
5256	if (rdev->flags & RADEON_IS_IGP)
5257		kv_restore_regs_for_reset(rdev, &kv_save);
5258}
5259
5260/**
5261 * cik_asic_reset - soft reset GPU
5262 *
5263 * @rdev: radeon_device pointer
5264 * @hard: force hard reset
5265 *
5266 * Look up which blocks are hung and attempt
5267 * to reset them.
5268 * Returns 0 for success.
5269 */
5270int cik_asic_reset(struct radeon_device *rdev, bool hard)
5271{
5272	u32 reset_mask;
5273
5274	if (hard) {
5275		cik_gpu_pci_config_reset(rdev);
5276		return 0;
5277	}
5278
5279	reset_mask = cik_gpu_check_soft_reset(rdev);
5280
5281	if (reset_mask)
5282		r600_set_bios_scratch_engine_hung(rdev, true);
5283
5284	/* try soft reset */
5285	cik_gpu_soft_reset(rdev, reset_mask);
5286
5287	reset_mask = cik_gpu_check_soft_reset(rdev);
5288
5289	/* try pci config reset */
5290	if (reset_mask && radeon_hard_reset)
5291		cik_gpu_pci_config_reset(rdev);
5292
5293	reset_mask = cik_gpu_check_soft_reset(rdev);
5294
5295	if (!reset_mask)
5296		r600_set_bios_scratch_engine_hung(rdev, false);
5297
5298	return 0;
5299}
5300
5301/**
5302 * cik_gfx_is_lockup - check if the 3D engine is locked up
5303 *
5304 * @rdev: radeon_device pointer
5305 * @ring: radeon_ring structure holding ring information
5306 *
5307 * Check if the 3D engine is locked up (CIK).
5308 * Returns true if the engine is locked, false if not.
5309 */
5310bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5311{
5312	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5313
5314	if (!(reset_mask & (RADEON_RESET_GFX |
5315			    RADEON_RESET_COMPUTE |
5316			    RADEON_RESET_CP))) {
5317		radeon_ring_lockup_update(rdev, ring);
5318		return false;
5319	}
5320	return radeon_ring_test_lockup(rdev, ring);
5321}
5322
5323/* MC */
5324/**
5325 * cik_mc_program - program the GPU memory controller
5326 *
5327 * @rdev: radeon_device pointer
5328 *
5329 * Set the location of vram, gart, and AGP in the GPU's
5330 * physical address space (CIK).
5331 */
5332static void cik_mc_program(struct radeon_device *rdev)
5333{
5334	struct evergreen_mc_save save;
5335	u32 tmp;
5336	int i, j;
5337
5338	/* Initialize HDP */
5339	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5340		WREG32((0x2c14 + j), 0x00000000);
5341		WREG32((0x2c18 + j), 0x00000000);
5342		WREG32((0x2c1c + j), 0x00000000);
5343		WREG32((0x2c20 + j), 0x00000000);
5344		WREG32((0x2c24 + j), 0x00000000);
5345	}
5346	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5347
5348	evergreen_mc_stop(rdev, &save);
5349	if (radeon_mc_wait_for_idle(rdev)) {
5350		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351	}
5352	/* Lockout access through VGA aperture*/
5353	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5354	/* Update configuration */
5355	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5356	       rdev->mc.vram_start >> 12);
5357	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5358	       rdev->mc.vram_end >> 12);
5359	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5360	       rdev->vram_scratch.gpu_addr >> 12);
5361	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5362	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5363	WREG32(MC_VM_FB_LOCATION, tmp);
5364	/* XXX double check these! */
5365	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5366	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5367	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5368	WREG32(MC_VM_AGP_BASE, 0);
5369	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5370	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5371	if (radeon_mc_wait_for_idle(rdev)) {
5372		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5373	}
5374	evergreen_mc_resume(rdev, &save);
5375	/* we need to own VRAM, so turn off the VGA renderer here
5376	 * to stop it overwriting our objects */
5377	rv515_vga_render_disable(rdev);
5378}
5379
5380/**
5381 * cik_mc_init - initialize the memory controller driver params
5382 *
5383 * @rdev: radeon_device pointer
5384 *
5385 * Look up the amount of vram, vram width, and decide how to place
5386 * vram and gart within the GPU's physical address space (CIK).
5387 * Returns 0 for success.
5388 */
5389static int cik_mc_init(struct radeon_device *rdev)
5390{
5391	u32 tmp;
5392	int chansize, numchan;
5393
5394	/* Get VRAM informations */
5395	rdev->mc.vram_is_ddr = true;
5396	tmp = RREG32(MC_ARB_RAMCFG);
5397	if (tmp & CHANSIZE_MASK) {
5398		chansize = 64;
5399	} else {
5400		chansize = 32;
5401	}
5402	tmp = RREG32(MC_SHARED_CHMAP);
5403	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5404	case 0:
5405	default:
5406		numchan = 1;
5407		break;
5408	case 1:
5409		numchan = 2;
5410		break;
5411	case 2:
5412		numchan = 4;
5413		break;
5414	case 3:
5415		numchan = 8;
5416		break;
5417	case 4:
5418		numchan = 3;
5419		break;
5420	case 5:
5421		numchan = 6;
5422		break;
5423	case 6:
5424		numchan = 10;
5425		break;
5426	case 7:
5427		numchan = 12;
5428		break;
5429	case 8:
5430		numchan = 16;
5431		break;
5432	}
5433	rdev->mc.vram_width = numchan * chansize;
5434	/* Could aper size report 0 ? */
5435	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5436	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5437	/* size in MB on si */
5438	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5439	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5440	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5441	si_vram_gtt_location(rdev, &rdev->mc);
5442	radeon_update_bandwidth_info(rdev);
5443
5444	return 0;
5445}
5446
5447/*
5448 * GART
5449 * VMID 0 is the physical GPU addresses as used by the kernel.
5450 * VMIDs 1-15 are used for userspace clients and are handled
5451 * by the radeon vm/hsa code.
5452 */
5453/**
5454 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5455 *
5456 * @rdev: radeon_device pointer
5457 *
5458 * Flush the TLB for the VMID 0 page table (CIK).
5459 */
5460void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5461{
5462	/* flush hdp cache */
5463	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5464
5465	/* bits 0-15 are the VM contexts0-15 */
5466	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5467}
5468
5469static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5470{
5471	int i;
5472	uint32_t sh_mem_bases, sh_mem_config;
5473
5474	sh_mem_bases = 0x6000 | 0x6000 << 16;
5475	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5476	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5477
5478	mutex_lock(&rdev->srbm_mutex);
5479	for (i = 8; i < 16; i++) {
5480		cik_srbm_select(rdev, 0, 0, 0, i);
5481		/* CP and shaders */
5482		WREG32(SH_MEM_CONFIG, sh_mem_config);
5483		WREG32(SH_MEM_APE1_BASE, 1);
5484		WREG32(SH_MEM_APE1_LIMIT, 0);
5485		WREG32(SH_MEM_BASES, sh_mem_bases);
5486	}
5487	cik_srbm_select(rdev, 0, 0, 0, 0);
5488	mutex_unlock(&rdev->srbm_mutex);
5489}
5490
5491/**
5492 * cik_pcie_gart_enable - gart enable
5493 *
5494 * @rdev: radeon_device pointer
5495 *
5496 * This sets up the TLBs, programs the page tables for VMID0,
5497 * sets up the hw for VMIDs 1-15 which are allocated on
5498 * demand, and sets up the global locations for the LDS, GDS,
5499 * and GPUVM for FSA64 clients (CIK).
5500 * Returns 0 for success, errors for failure.
5501 */
5502static int cik_pcie_gart_enable(struct radeon_device *rdev)
5503{
5504	int r, i;
5505
5506	if (rdev->gart.robj == NULL) {
5507		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5508		return -EINVAL;
5509	}
5510	r = radeon_gart_table_vram_pin(rdev);
5511	if (r)
5512		return r;
5513	/* Setup TLB control */
5514	WREG32(MC_VM_MX_L1_TLB_CNTL,
5515	       (0xA << 7) |
5516	       ENABLE_L1_TLB |
5517	       ENABLE_L1_FRAGMENT_PROCESSING |
5518	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5519	       ENABLE_ADVANCED_DRIVER_MODEL |
5520	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5521	/* Setup L2 cache */
5522	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5523	       ENABLE_L2_FRAGMENT_PROCESSING |
5524	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5525	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5526	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5527	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5528	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5529	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5530	       BANK_SELECT(4) |
5531	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5532	/* setup context0 */
5533	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5534	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5535	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5536	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5537			(u32)(rdev->dummy_page.addr >> 12));
5538	WREG32(VM_CONTEXT0_CNTL2, 0);
5539	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5540				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5541
5542	WREG32(0x15D4, 0);
5543	WREG32(0x15D8, 0);
5544	WREG32(0x15DC, 0);
5545
5546	/* restore context1-15 */
5547	/* set vm size, must be a multiple of 4 */
5548	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5549	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5550	for (i = 1; i < 16; i++) {
5551		if (i < 8)
5552			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5553			       rdev->vm_manager.saved_table_addr[i]);
5554		else
5555			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5556			       rdev->vm_manager.saved_table_addr[i]);
5557	}
5558
5559	/* enable context1-15 */
5560	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5561	       (u32)(rdev->dummy_page.addr >> 12));
5562	WREG32(VM_CONTEXT1_CNTL2, 4);
5563	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5564				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5565				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5567				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5569				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5571				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5572				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5573				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5574				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5575				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5576				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5577
5578	if (rdev->family == CHIP_KAVERI) {
5579		u32 tmp = RREG32(CHUB_CONTROL);
5580		tmp &= ~BYPASS_VM;
5581		WREG32(CHUB_CONTROL, tmp);
5582	}
5583
5584	/* XXX SH_MEM regs */
5585	/* where to put LDS, scratch, GPUVM in FSA64 space */
5586	mutex_lock(&rdev->srbm_mutex);
5587	for (i = 0; i < 16; i++) {
5588		cik_srbm_select(rdev, 0, 0, 0, i);
5589		/* CP and shaders */
5590		WREG32(SH_MEM_CONFIG, 0);
5591		WREG32(SH_MEM_APE1_BASE, 1);
5592		WREG32(SH_MEM_APE1_LIMIT, 0);
5593		WREG32(SH_MEM_BASES, 0);
5594		/* SDMA GFX */
5595		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5596		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5597		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5598		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5599		/* XXX SDMA RLC - todo */
5600	}
5601	cik_srbm_select(rdev, 0, 0, 0, 0);
5602	mutex_unlock(&rdev->srbm_mutex);
5603
5604	cik_pcie_init_compute_vmid(rdev);
5605
5606	cik_pcie_gart_tlb_flush(rdev);
5607	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5608		 (unsigned)(rdev->mc.gtt_size >> 20),
5609		 (unsigned long long)rdev->gart.table_addr);
5610	rdev->gart.ready = true;
5611	return 0;
5612}
5613
5614/**
5615 * cik_pcie_gart_disable - gart disable
5616 *
5617 * @rdev: radeon_device pointer
5618 *
5619 * This disables all VM page table (CIK).
5620 */
5621static void cik_pcie_gart_disable(struct radeon_device *rdev)
5622{
5623	unsigned i;
5624
5625	for (i = 1; i < 16; ++i) {
5626		uint32_t reg;
5627		if (i < 8)
5628			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5629		else
5630			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5631		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5632	}
5633
5634	/* Disable all tables */
5635	WREG32(VM_CONTEXT0_CNTL, 0);
5636	WREG32(VM_CONTEXT1_CNTL, 0);
5637	/* Setup TLB control */
5638	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5639	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5640	/* Setup L2 cache */
5641	WREG32(VM_L2_CNTL,
5642	       ENABLE_L2_FRAGMENT_PROCESSING |
5643	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5644	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5645	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5646	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5647	WREG32(VM_L2_CNTL2, 0);
5648	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5649	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5650	radeon_gart_table_vram_unpin(rdev);
5651}
5652
5653/**
5654 * cik_pcie_gart_fini - vm fini callback
5655 *
5656 * @rdev: radeon_device pointer
5657 *
5658 * Tears down the driver GART/VM setup (CIK).
5659 */
5660static void cik_pcie_gart_fini(struct radeon_device *rdev)
5661{
5662	cik_pcie_gart_disable(rdev);
5663	radeon_gart_table_vram_free(rdev);
5664	radeon_gart_fini(rdev);
5665}
5666
5667/* vm parser */
5668/**
5669 * cik_ib_parse - vm ib_parse callback
5670 *
5671 * @rdev: radeon_device pointer
5672 * @ib: indirect buffer pointer
5673 *
5674 * CIK uses hw IB checking so this is a nop (CIK).
5675 */
5676int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5677{
5678	return 0;
5679}
5680
5681/*
5682 * vm
5683 * VMID 0 is the physical GPU addresses as used by the kernel.
5684 * VMIDs 1-15 are used for userspace clients and are handled
5685 * by the radeon vm/hsa code.
5686 */
5687/**
5688 * cik_vm_init - cik vm init callback
5689 *
5690 * @rdev: radeon_device pointer
5691 *
5692 * Inits cik specific vm parameters (number of VMs, base of vram for
5693 * VMIDs 1-15) (CIK).
5694 * Returns 0 for success.
5695 */
5696int cik_vm_init(struct radeon_device *rdev)
5697{
5698	/*
5699	 * number of VMs
5700	 * VMID 0 is reserved for System
5701	 * radeon graphics/compute will use VMIDs 1-7
5702	 * amdkfd will use VMIDs 8-15
5703	 */
5704	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5705	/* base offset of vram pages */
5706	if (rdev->flags & RADEON_IS_IGP) {
5707		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5708		tmp <<= 22;
5709		rdev->vm_manager.vram_base_offset = tmp;
5710	} else
5711		rdev->vm_manager.vram_base_offset = 0;
5712
5713	return 0;
5714}
5715
5716/**
5717 * cik_vm_fini - cik vm fini callback
5718 *
5719 * @rdev: radeon_device pointer
5720 *
5721 * Tear down any asic specific VM setup (CIK).
5722 */
5723void cik_vm_fini(struct radeon_device *rdev)
5724{
5725}
5726
5727/**
5728 * cik_vm_decode_fault - print human readable fault info
5729 *
5730 * @rdev: radeon_device pointer
5731 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5732 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5733 *
5734 * Print human readable fault information (CIK).
5735 */
5736static void cik_vm_decode_fault(struct radeon_device *rdev,
5737				u32 status, u32 addr, u32 mc_client)
5738{
5739	u32 mc_id;
5740	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5741	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5742	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5743		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5744
5745	if (rdev->family == CHIP_HAWAII)
5746		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5747	else
5748		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5749
5750	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5751	       protections, vmid, addr,
5752	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5753	       block, mc_client, mc_id);
5754}
5755
5756/**
5757 * cik_vm_flush - cik vm flush using the CP
5758 *
5759 * @rdev: radeon_device pointer
5760 *
5761 * Update the page table base and flush the VM TLB
5762 * using the CP (CIK).
5763 */
5764void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5765		  unsigned vm_id, uint64_t pd_addr)
5766{
5767	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5768
5769	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5770	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5771				 WRITE_DATA_DST_SEL(0)));
5772	if (vm_id < 8) {
5773		radeon_ring_write(ring,
5774				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5775	} else {
5776		radeon_ring_write(ring,
5777				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5778	}
5779	radeon_ring_write(ring, 0);
5780	radeon_ring_write(ring, pd_addr >> 12);
5781
5782	/* update SH_MEM_* regs */
5783	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5784	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5785				 WRITE_DATA_DST_SEL(0)));
5786	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5787	radeon_ring_write(ring, 0);
5788	radeon_ring_write(ring, VMID(vm_id));
5789
5790	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5791	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5792				 WRITE_DATA_DST_SEL(0)));
5793	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5794	radeon_ring_write(ring, 0);
5795
5796	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5797	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5798	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5799	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5800
5801	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5802	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5803				 WRITE_DATA_DST_SEL(0)));
5804	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5805	radeon_ring_write(ring, 0);
5806	radeon_ring_write(ring, VMID(0));
5807
5808	/* HDP flush */
5809	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5810
5811	/* bits 0-15 are the VM contexts0-15 */
5812	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5813	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5814				 WRITE_DATA_DST_SEL(0)));
5815	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5816	radeon_ring_write(ring, 0);
5817	radeon_ring_write(ring, 1 << vm_id);
5818
5819	/* wait for the invalidate to complete */
5820	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5821	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5822				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5823				 WAIT_REG_MEM_ENGINE(0))); /* me */
5824	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5825	radeon_ring_write(ring, 0);
5826	radeon_ring_write(ring, 0); /* ref */
5827	radeon_ring_write(ring, 0); /* mask */
5828	radeon_ring_write(ring, 0x20); /* poll interval */
5829
5830	/* compute doesn't have PFP */
5831	if (usepfp) {
5832		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5833		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5834		radeon_ring_write(ring, 0x0);
5835	}
5836}
5837
5838/*
5839 * RLC
5840 * The RLC is a multi-purpose microengine that handles a
5841 * variety of functions, the most important of which is
5842 * the interrupt controller.
5843 */
5844static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5845					  bool enable)
5846{
5847	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5848
5849	if (enable)
5850		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5851	else
5852		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5853	WREG32(CP_INT_CNTL_RING0, tmp);
5854}
5855
5856static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5857{
5858	u32 tmp;
5859
5860	tmp = RREG32(RLC_LB_CNTL);
5861	if (enable)
5862		tmp |= LOAD_BALANCE_ENABLE;
5863	else
5864		tmp &= ~LOAD_BALANCE_ENABLE;
5865	WREG32(RLC_LB_CNTL, tmp);
5866}
5867
5868static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5869{
5870	u32 i, j, k;
5871	u32 mask;
5872
5873	mutex_lock(&rdev->grbm_idx_mutex);
5874	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5875		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5876			cik_select_se_sh(rdev, i, j);
5877			for (k = 0; k < rdev->usec_timeout; k++) {
5878				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5879					break;
5880				udelay(1);
5881			}
5882		}
5883	}
5884	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5885	mutex_unlock(&rdev->grbm_idx_mutex);
5886
5887	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5888	for (k = 0; k < rdev->usec_timeout; k++) {
5889		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5890			break;
5891		udelay(1);
5892	}
5893}
5894
5895static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5896{
5897	u32 tmp;
5898
5899	tmp = RREG32(RLC_CNTL);
5900	if (tmp != rlc)
5901		WREG32(RLC_CNTL, rlc);
5902}
5903
5904static u32 cik_halt_rlc(struct radeon_device *rdev)
5905{
5906	u32 data, orig;
5907
5908	orig = data = RREG32(RLC_CNTL);
5909
5910	if (data & RLC_ENABLE) {
5911		u32 i;
5912
5913		data &= ~RLC_ENABLE;
5914		WREG32(RLC_CNTL, data);
5915
5916		for (i = 0; i < rdev->usec_timeout; i++) {
5917			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5918				break;
5919			udelay(1);
5920		}
5921
5922		cik_wait_for_rlc_serdes(rdev);
5923	}
5924
5925	return orig;
5926}
5927
5928void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5929{
5930	u32 tmp, i, mask;
5931
5932	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5933	WREG32(RLC_GPR_REG2, tmp);
5934
5935	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5936	for (i = 0; i < rdev->usec_timeout; i++) {
5937		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5938			break;
5939		udelay(1);
5940	}
5941
5942	for (i = 0; i < rdev->usec_timeout; i++) {
5943		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5944			break;
5945		udelay(1);
5946	}
5947}
5948
5949void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5950{
5951	u32 tmp;
5952
5953	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5954	WREG32(RLC_GPR_REG2, tmp);
5955}
5956
5957/**
5958 * cik_rlc_stop - stop the RLC ME
5959 *
5960 * @rdev: radeon_device pointer
5961 *
5962 * Halt the RLC ME (MicroEngine) (CIK).
5963 */
5964static void cik_rlc_stop(struct radeon_device *rdev)
5965{
5966	WREG32(RLC_CNTL, 0);
5967
5968	cik_enable_gui_idle_interrupt(rdev, false);
5969
5970	cik_wait_for_rlc_serdes(rdev);
5971}
5972
5973/**
5974 * cik_rlc_start - start the RLC ME
5975 *
5976 * @rdev: radeon_device pointer
5977 *
5978 * Unhalt the RLC ME (MicroEngine) (CIK).
5979 */
5980static void cik_rlc_start(struct radeon_device *rdev)
5981{
5982	WREG32(RLC_CNTL, RLC_ENABLE);
5983
5984	cik_enable_gui_idle_interrupt(rdev, true);
5985
5986	udelay(50);
5987}
5988
5989/**
5990 * cik_rlc_resume - setup the RLC hw
5991 *
5992 * @rdev: radeon_device pointer
5993 *
5994 * Initialize the RLC registers, load the ucode,
5995 * and start the RLC (CIK).
5996 * Returns 0 for success, -EINVAL if the ucode is not available.
5997 */
5998static int cik_rlc_resume(struct radeon_device *rdev)
5999{
6000	u32 i, size, tmp;
6001
6002	if (!rdev->rlc_fw)
6003		return -EINVAL;
6004
6005	cik_rlc_stop(rdev);
6006
6007	/* disable CG */
6008	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6009	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6010
6011	si_rlc_reset(rdev);
6012
6013	cik_init_pg(rdev);
6014
6015	cik_init_cg(rdev);
6016
6017	WREG32(RLC_LB_CNTR_INIT, 0);
6018	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6019
6020	mutex_lock(&rdev->grbm_idx_mutex);
6021	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6023	WREG32(RLC_LB_PARAMS, 0x00600408);
6024	WREG32(RLC_LB_CNTL, 0x80000004);
6025	mutex_unlock(&rdev->grbm_idx_mutex);
6026
6027	WREG32(RLC_MC_CNTL, 0);
6028	WREG32(RLC_UCODE_CNTL, 0);
6029
6030	if (rdev->new_fw) {
6031		const struct rlc_firmware_header_v1_0 *hdr =
6032			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6033		const __le32 *fw_data = (const __le32 *)
6034			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6035
6036		radeon_ucode_print_rlc_hdr(&hdr->header);
6037
6038		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6039		WREG32(RLC_GPM_UCODE_ADDR, 0);
6040		for (i = 0; i < size; i++)
6041			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6042		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6043	} else {
6044		const __be32 *fw_data;
6045
6046		switch (rdev->family) {
6047		case CHIP_BONAIRE:
6048		case CHIP_HAWAII:
6049		default:
6050			size = BONAIRE_RLC_UCODE_SIZE;
6051			break;
6052		case CHIP_KAVERI:
6053			size = KV_RLC_UCODE_SIZE;
6054			break;
6055		case CHIP_KABINI:
6056			size = KB_RLC_UCODE_SIZE;
6057			break;
6058		case CHIP_MULLINS:
6059			size = ML_RLC_UCODE_SIZE;
6060			break;
6061		}
6062
6063		fw_data = (const __be32 *)rdev->rlc_fw->data;
6064		WREG32(RLC_GPM_UCODE_ADDR, 0);
6065		for (i = 0; i < size; i++)
6066			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6067		WREG32(RLC_GPM_UCODE_ADDR, 0);
6068	}
6069
6070	/* XXX - find out what chips support lbpw */
6071	cik_enable_lbpw(rdev, false);
6072
6073	if (rdev->family == CHIP_BONAIRE)
6074		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6075
6076	cik_rlc_start(rdev);
6077
6078	return 0;
6079}
6080
6081static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6082{
6083	u32 data, orig, tmp, tmp2;
6084
6085	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6086
6087	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6088		cik_enable_gui_idle_interrupt(rdev, true);
6089
6090		tmp = cik_halt_rlc(rdev);
6091
6092		mutex_lock(&rdev->grbm_idx_mutex);
6093		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6094		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6095		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6096		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6097		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6098		mutex_unlock(&rdev->grbm_idx_mutex);
6099
6100		cik_update_rlc(rdev, tmp);
6101
6102		data |= CGCG_EN | CGLS_EN;
6103	} else {
6104		cik_enable_gui_idle_interrupt(rdev, false);
6105
6106		RREG32(CB_CGTT_SCLK_CTRL);
6107		RREG32(CB_CGTT_SCLK_CTRL);
6108		RREG32(CB_CGTT_SCLK_CTRL);
6109		RREG32(CB_CGTT_SCLK_CTRL);
6110
6111		data &= ~(CGCG_EN | CGLS_EN);
6112	}
6113
6114	if (orig != data)
6115		WREG32(RLC_CGCG_CGLS_CTRL, data);
6116
6117}
6118
6119static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6120{
6121	u32 data, orig, tmp = 0;
6122
6123	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6124		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6125			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6126				orig = data = RREG32(CP_MEM_SLP_CNTL);
6127				data |= CP_MEM_LS_EN;
6128				if (orig != data)
6129					WREG32(CP_MEM_SLP_CNTL, data);
6130			}
6131		}
6132
6133		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6134		data |= 0x00000001;
6135		data &= 0xfffffffd;
6136		if (orig != data)
6137			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6138
6139		tmp = cik_halt_rlc(rdev);
6140
6141		mutex_lock(&rdev->grbm_idx_mutex);
6142		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6143		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6144		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6145		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6146		WREG32(RLC_SERDES_WR_CTRL, data);
6147		mutex_unlock(&rdev->grbm_idx_mutex);
6148
6149		cik_update_rlc(rdev, tmp);
6150
6151		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6152			orig = data = RREG32(CGTS_SM_CTRL_REG);
6153			data &= ~SM_MODE_MASK;
6154			data |= SM_MODE(0x2);
6155			data |= SM_MODE_ENABLE;
6156			data &= ~CGTS_OVERRIDE;
6157			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6158			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6159				data &= ~CGTS_LS_OVERRIDE;
6160			data &= ~ON_MONITOR_ADD_MASK;
6161			data |= ON_MONITOR_ADD_EN;
6162			data |= ON_MONITOR_ADD(0x96);
6163			if (orig != data)
6164				WREG32(CGTS_SM_CTRL_REG, data);
6165		}
6166	} else {
6167		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6168		data |= 0x00000003;
6169		if (orig != data)
6170			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6171
6172		data = RREG32(RLC_MEM_SLP_CNTL);
6173		if (data & RLC_MEM_LS_EN) {
6174			data &= ~RLC_MEM_LS_EN;
6175			WREG32(RLC_MEM_SLP_CNTL, data);
6176		}
6177
6178		data = RREG32(CP_MEM_SLP_CNTL);
6179		if (data & CP_MEM_LS_EN) {
6180			data &= ~CP_MEM_LS_EN;
6181			WREG32(CP_MEM_SLP_CNTL, data);
6182		}
6183
6184		orig = data = RREG32(CGTS_SM_CTRL_REG);
6185		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6186		if (orig != data)
6187			WREG32(CGTS_SM_CTRL_REG, data);
6188
6189		tmp = cik_halt_rlc(rdev);
6190
6191		mutex_lock(&rdev->grbm_idx_mutex);
6192		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6193		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6194		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6195		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6196		WREG32(RLC_SERDES_WR_CTRL, data);
6197		mutex_unlock(&rdev->grbm_idx_mutex);
6198
6199		cik_update_rlc(rdev, tmp);
6200	}
6201}
6202
6203static const u32 mc_cg_registers[] =
6204{
6205	MC_HUB_MISC_HUB_CG,
6206	MC_HUB_MISC_SIP_CG,
6207	MC_HUB_MISC_VM_CG,
6208	MC_XPB_CLK_GAT,
6209	ATC_MISC_CG,
6210	MC_CITF_MISC_WR_CG,
6211	MC_CITF_MISC_RD_CG,
6212	MC_CITF_MISC_VM_CG,
6213	VM_L2_CG,
6214};
6215
6216static void cik_enable_mc_ls(struct radeon_device *rdev,
6217			     bool enable)
6218{
6219	int i;
6220	u32 orig, data;
6221
6222	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6223		orig = data = RREG32(mc_cg_registers[i]);
6224		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6225			data |= MC_LS_ENABLE;
6226		else
6227			data &= ~MC_LS_ENABLE;
6228		if (data != orig)
6229			WREG32(mc_cg_registers[i], data);
6230	}
6231}
6232
6233static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6234			       bool enable)
6235{
6236	int i;
6237	u32 orig, data;
6238
6239	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6240		orig = data = RREG32(mc_cg_registers[i]);
6241		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6242			data |= MC_CG_ENABLE;
6243		else
6244			data &= ~MC_CG_ENABLE;
6245		if (data != orig)
6246			WREG32(mc_cg_registers[i], data);
6247	}
6248}
6249
6250static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6251				 bool enable)
6252{
6253	u32 orig, data;
6254
6255	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6256		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6257		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6258	} else {
6259		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6260		data |= 0xff000000;
6261		if (data != orig)
6262			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6263
6264		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6265		data |= 0xff000000;
6266		if (data != orig)
6267			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6268	}
6269}
6270
6271static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6272				 bool enable)
6273{
6274	u32 orig, data;
6275
6276	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6277		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6278		data |= 0x100;
6279		if (orig != data)
6280			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6281
6282		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6283		data |= 0x100;
6284		if (orig != data)
6285			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6286	} else {
6287		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6288		data &= ~0x100;
6289		if (orig != data)
6290			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6291
6292		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6293		data &= ~0x100;
6294		if (orig != data)
6295			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6296	}
6297}
6298
6299static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6300				bool enable)
6301{
6302	u32 orig, data;
6303
6304	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6305		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6306		data = 0xfff;
6307		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6308
6309		orig = data = RREG32(UVD_CGC_CTRL);
6310		data |= DCM;
6311		if (orig != data)
6312			WREG32(UVD_CGC_CTRL, data);
6313	} else {
6314		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6315		data &= ~0xfff;
6316		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6317
6318		orig = data = RREG32(UVD_CGC_CTRL);
6319		data &= ~DCM;
6320		if (orig != data)
6321			WREG32(UVD_CGC_CTRL, data);
6322	}
6323}
6324
6325static void cik_enable_bif_mgls(struct radeon_device *rdev,
6326			       bool enable)
6327{
6328	u32 orig, data;
6329
6330	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6331
6332	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6333		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6334			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6335	else
6336		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6337			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6338
6339	if (orig != data)
6340		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6341}
6342
6343static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6344				bool enable)
6345{
6346	u32 orig, data;
6347
6348	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6349
6350	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6351		data &= ~CLOCK_GATING_DIS;
6352	else
6353		data |= CLOCK_GATING_DIS;
6354
6355	if (orig != data)
6356		WREG32(HDP_HOST_PATH_CNTL, data);
6357}
6358
6359static void cik_enable_hdp_ls(struct radeon_device *rdev,
6360			      bool enable)
6361{
6362	u32 orig, data;
6363
6364	orig = data = RREG32(HDP_MEM_POWER_LS);
6365
6366	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6367		data |= HDP_LS_ENABLE;
6368	else
6369		data &= ~HDP_LS_ENABLE;
6370
6371	if (orig != data)
6372		WREG32(HDP_MEM_POWER_LS, data);
6373}
6374
6375void cik_update_cg(struct radeon_device *rdev,
6376		   u32 block, bool enable)
6377{
6378
6379	if (block & RADEON_CG_BLOCK_GFX) {
6380		cik_enable_gui_idle_interrupt(rdev, false);
6381		/* order matters! */
6382		if (enable) {
6383			cik_enable_mgcg(rdev, true);
6384			cik_enable_cgcg(rdev, true);
6385		} else {
6386			cik_enable_cgcg(rdev, false);
6387			cik_enable_mgcg(rdev, false);
6388		}
6389		cik_enable_gui_idle_interrupt(rdev, true);
6390	}
6391
6392	if (block & RADEON_CG_BLOCK_MC) {
6393		if (!(rdev->flags & RADEON_IS_IGP)) {
6394			cik_enable_mc_mgcg(rdev, enable);
6395			cik_enable_mc_ls(rdev, enable);
6396		}
6397	}
6398
6399	if (block & RADEON_CG_BLOCK_SDMA) {
6400		cik_enable_sdma_mgcg(rdev, enable);
6401		cik_enable_sdma_mgls(rdev, enable);
6402	}
6403
6404	if (block & RADEON_CG_BLOCK_BIF) {
6405		cik_enable_bif_mgls(rdev, enable);
6406	}
6407
6408	if (block & RADEON_CG_BLOCK_UVD) {
6409		if (rdev->has_uvd)
6410			cik_enable_uvd_mgcg(rdev, enable);
6411	}
6412
6413	if (block & RADEON_CG_BLOCK_HDP) {
6414		cik_enable_hdp_mgcg(rdev, enable);
6415		cik_enable_hdp_ls(rdev, enable);
6416	}
6417
6418	if (block & RADEON_CG_BLOCK_VCE) {
6419		vce_v2_0_enable_mgcg(rdev, enable);
6420	}
6421}
6422
6423static void cik_init_cg(struct radeon_device *rdev)
6424{
6425
6426	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6427
6428	if (rdev->has_uvd)
6429		si_init_uvd_internal_cg(rdev);
6430
6431	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6432			     RADEON_CG_BLOCK_SDMA |
6433			     RADEON_CG_BLOCK_BIF |
6434			     RADEON_CG_BLOCK_UVD |
6435			     RADEON_CG_BLOCK_HDP), true);
6436}
6437
6438static void cik_fini_cg(struct radeon_device *rdev)
6439{
6440	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6441			     RADEON_CG_BLOCK_SDMA |
6442			     RADEON_CG_BLOCK_BIF |
6443			     RADEON_CG_BLOCK_UVD |
6444			     RADEON_CG_BLOCK_HDP), false);
6445
6446	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6447}
6448
6449static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6450					  bool enable)
6451{
6452	u32 data, orig;
6453
6454	orig = data = RREG32(RLC_PG_CNTL);
6455	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6456		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6457	else
6458		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6459	if (orig != data)
6460		WREG32(RLC_PG_CNTL, data);
6461}
6462
6463static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6464					  bool enable)
6465{
6466	u32 data, orig;
6467
6468	orig = data = RREG32(RLC_PG_CNTL);
6469	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6470		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6471	else
6472		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6473	if (orig != data)
6474		WREG32(RLC_PG_CNTL, data);
6475}
6476
6477static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6478{
6479	u32 data, orig;
6480
6481	orig = data = RREG32(RLC_PG_CNTL);
6482	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6483		data &= ~DISABLE_CP_PG;
6484	else
6485		data |= DISABLE_CP_PG;
6486	if (orig != data)
6487		WREG32(RLC_PG_CNTL, data);
6488}
6489
6490static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6491{
6492	u32 data, orig;
6493
6494	orig = data = RREG32(RLC_PG_CNTL);
6495	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6496		data &= ~DISABLE_GDS_PG;
6497	else
6498		data |= DISABLE_GDS_PG;
6499	if (orig != data)
6500		WREG32(RLC_PG_CNTL, data);
6501}
6502
6503#define CP_ME_TABLE_SIZE    96
6504#define CP_ME_TABLE_OFFSET  2048
6505#define CP_MEC_TABLE_OFFSET 4096
6506
6507void cik_init_cp_pg_table(struct radeon_device *rdev)
6508{
6509	volatile u32 *dst_ptr;
6510	int me, i, max_me = 4;
6511	u32 bo_offset = 0;
6512	u32 table_offset, table_size;
6513
6514	if (rdev->family == CHIP_KAVERI)
6515		max_me = 5;
6516
6517	if (rdev->rlc.cp_table_ptr == NULL)
6518		return;
6519
6520	/* write the cp table buffer */
6521	dst_ptr = rdev->rlc.cp_table_ptr;
6522	for (me = 0; me < max_me; me++) {
6523		if (rdev->new_fw) {
6524			const __le32 *fw_data;
6525			const struct gfx_firmware_header_v1_0 *hdr;
6526
6527			if (me == 0) {
6528				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6529				fw_data = (const __le32 *)
6530					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531				table_offset = le32_to_cpu(hdr->jt_offset);
6532				table_size = le32_to_cpu(hdr->jt_size);
6533			} else if (me == 1) {
6534				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6535				fw_data = (const __le32 *)
6536					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537				table_offset = le32_to_cpu(hdr->jt_offset);
6538				table_size = le32_to_cpu(hdr->jt_size);
6539			} else if (me == 2) {
6540				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6541				fw_data = (const __le32 *)
6542					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543				table_offset = le32_to_cpu(hdr->jt_offset);
6544				table_size = le32_to_cpu(hdr->jt_size);
6545			} else if (me == 3) {
6546				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6547				fw_data = (const __le32 *)
6548					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549				table_offset = le32_to_cpu(hdr->jt_offset);
6550				table_size = le32_to_cpu(hdr->jt_size);
6551			} else {
6552				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6553				fw_data = (const __le32 *)
6554					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6555				table_offset = le32_to_cpu(hdr->jt_offset);
6556				table_size = le32_to_cpu(hdr->jt_size);
6557			}
6558
6559			for (i = 0; i < table_size; i ++) {
6560				dst_ptr[bo_offset + i] =
6561					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6562			}
6563			bo_offset += table_size;
6564		} else {
6565			const __be32 *fw_data;
6566			table_size = CP_ME_TABLE_SIZE;
6567
6568			if (me == 0) {
6569				fw_data = (const __be32 *)rdev->ce_fw->data;
6570				table_offset = CP_ME_TABLE_OFFSET;
6571			} else if (me == 1) {
6572				fw_data = (const __be32 *)rdev->pfp_fw->data;
6573				table_offset = CP_ME_TABLE_OFFSET;
6574			} else if (me == 2) {
6575				fw_data = (const __be32 *)rdev->me_fw->data;
6576				table_offset = CP_ME_TABLE_OFFSET;
6577			} else {
6578				fw_data = (const __be32 *)rdev->mec_fw->data;
6579				table_offset = CP_MEC_TABLE_OFFSET;
6580			}
6581
6582			for (i = 0; i < table_size; i ++) {
6583				dst_ptr[bo_offset + i] =
6584					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6585			}
6586			bo_offset += table_size;
6587		}
6588	}
6589}
6590
6591static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6592				bool enable)
6593{
6594	u32 data, orig;
6595
6596	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6597		orig = data = RREG32(RLC_PG_CNTL);
6598		data |= GFX_PG_ENABLE;
6599		if (orig != data)
6600			WREG32(RLC_PG_CNTL, data);
6601
6602		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6603		data |= AUTO_PG_EN;
6604		if (orig != data)
6605			WREG32(RLC_AUTO_PG_CTRL, data);
6606	} else {
6607		orig = data = RREG32(RLC_PG_CNTL);
6608		data &= ~GFX_PG_ENABLE;
6609		if (orig != data)
6610			WREG32(RLC_PG_CNTL, data);
6611
6612		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6613		data &= ~AUTO_PG_EN;
6614		if (orig != data)
6615			WREG32(RLC_AUTO_PG_CTRL, data);
6616
6617		data = RREG32(DB_RENDER_CONTROL);
6618	}
6619}
6620
6621static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6622{
6623	u32 mask = 0, tmp, tmp1;
6624	int i;
6625
6626	mutex_lock(&rdev->grbm_idx_mutex);
6627	cik_select_se_sh(rdev, se, sh);
6628	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6629	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6630	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6631	mutex_unlock(&rdev->grbm_idx_mutex);
6632
6633	tmp &= 0xffff0000;
6634
6635	tmp |= tmp1;
6636	tmp >>= 16;
6637
6638	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6639		mask <<= 1;
6640		mask |= 1;
6641	}
6642
6643	return (~tmp) & mask;
6644}
6645
6646static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6647{
6648	u32 i, j, k, active_cu_number = 0;
6649	u32 mask, counter, cu_bitmap;
6650	u32 tmp = 0;
6651
6652	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6653		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6654			mask = 1;
6655			cu_bitmap = 0;
6656			counter = 0;
6657			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6658				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6659					if (counter < 2)
6660						cu_bitmap |= mask;
6661					counter ++;
6662				}
6663				mask <<= 1;
6664			}
6665
6666			active_cu_number += counter;
6667			tmp |= (cu_bitmap << (i * 16 + j * 8));
6668		}
6669	}
6670
6671	WREG32(RLC_PG_AO_CU_MASK, tmp);
6672
6673	tmp = RREG32(RLC_MAX_PG_CU);
6674	tmp &= ~MAX_PU_CU_MASK;
6675	tmp |= MAX_PU_CU(active_cu_number);
6676	WREG32(RLC_MAX_PG_CU, tmp);
6677}
6678
6679static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6680				       bool enable)
6681{
6682	u32 data, orig;
6683
6684	orig = data = RREG32(RLC_PG_CNTL);
6685	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6686		data |= STATIC_PER_CU_PG_ENABLE;
6687	else
6688		data &= ~STATIC_PER_CU_PG_ENABLE;
6689	if (orig != data)
6690		WREG32(RLC_PG_CNTL, data);
6691}
6692
6693static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6694					bool enable)
6695{
6696	u32 data, orig;
6697
6698	orig = data = RREG32(RLC_PG_CNTL);
6699	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6700		data |= DYN_PER_CU_PG_ENABLE;
6701	else
6702		data &= ~DYN_PER_CU_PG_ENABLE;
6703	if (orig != data)
6704		WREG32(RLC_PG_CNTL, data);
6705}
6706
6707#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6708#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6709
6710static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6711{
6712	u32 data, orig;
6713	u32 i;
6714
6715	if (rdev->rlc.cs_data) {
6716		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6717		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6718		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6719		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6720	} else {
6721		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6722		for (i = 0; i < 3; i++)
6723			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6724	}
6725	if (rdev->rlc.reg_list) {
6726		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6727		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6728			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6729	}
6730
6731	orig = data = RREG32(RLC_PG_CNTL);
6732	data |= GFX_PG_SRC;
6733	if (orig != data)
6734		WREG32(RLC_PG_CNTL, data);
6735
6736	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6737	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6738
6739	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6740	data &= ~IDLE_POLL_COUNT_MASK;
6741	data |= IDLE_POLL_COUNT(0x60);
6742	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6743
6744	data = 0x10101010;
6745	WREG32(RLC_PG_DELAY, data);
6746
6747	data = RREG32(RLC_PG_DELAY_2);
6748	data &= ~0xff;
6749	data |= 0x3;
6750	WREG32(RLC_PG_DELAY_2, data);
6751
6752	data = RREG32(RLC_AUTO_PG_CTRL);
6753	data &= ~GRBM_REG_SGIT_MASK;
6754	data |= GRBM_REG_SGIT(0x700);
6755	WREG32(RLC_AUTO_PG_CTRL, data);
6756
6757}
6758
6759static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6760{
6761	cik_enable_gfx_cgpg(rdev, enable);
6762	cik_enable_gfx_static_mgpg(rdev, enable);
6763	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6764}
6765
6766u32 cik_get_csb_size(struct radeon_device *rdev)
6767{
6768	u32 count = 0;
6769	const struct cs_section_def *sect = NULL;
6770	const struct cs_extent_def *ext = NULL;
6771
6772	if (rdev->rlc.cs_data == NULL)
6773		return 0;
6774
6775	/* begin clear state */
6776	count += 2;
6777	/* context control state */
6778	count += 3;
6779
6780	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6781		for (ext = sect->section; ext->extent != NULL; ++ext) {
6782			if (sect->id == SECT_CONTEXT)
6783				count += 2 + ext->reg_count;
6784			else
6785				return 0;
6786		}
6787	}
6788	/* pa_sc_raster_config/pa_sc_raster_config1 */
6789	count += 4;
6790	/* end clear state */
6791	count += 2;
6792	/* clear state */
6793	count += 2;
6794
6795	return count;
6796}
6797
6798void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6799{
6800	u32 count = 0, i;
6801	const struct cs_section_def *sect = NULL;
6802	const struct cs_extent_def *ext = NULL;
6803
6804	if (rdev->rlc.cs_data == NULL)
6805		return;
6806	if (buffer == NULL)
6807		return;
6808
6809	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6810	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6811
6812	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6813	buffer[count++] = cpu_to_le32(0x80000000);
6814	buffer[count++] = cpu_to_le32(0x80000000);
6815
6816	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6817		for (ext = sect->section; ext->extent != NULL; ++ext) {
6818			if (sect->id == SECT_CONTEXT) {
6819				buffer[count++] =
6820					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6821				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6822				for (i = 0; i < ext->reg_count; i++)
6823					buffer[count++] = cpu_to_le32(ext->extent[i]);
6824			} else {
6825				return;
6826			}
6827		}
6828	}
6829
6830	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6831	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6832	switch (rdev->family) {
6833	case CHIP_BONAIRE:
6834		buffer[count++] = cpu_to_le32(0x16000012);
6835		buffer[count++] = cpu_to_le32(0x00000000);
6836		break;
6837	case CHIP_KAVERI:
6838		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6839		buffer[count++] = cpu_to_le32(0x00000000);
6840		break;
6841	case CHIP_KABINI:
6842	case CHIP_MULLINS:
6843		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6844		buffer[count++] = cpu_to_le32(0x00000000);
6845		break;
6846	case CHIP_HAWAII:
6847		buffer[count++] = cpu_to_le32(0x3a00161a);
6848		buffer[count++] = cpu_to_le32(0x0000002e);
6849		break;
6850	default:
6851		buffer[count++] = cpu_to_le32(0x00000000);
6852		buffer[count++] = cpu_to_le32(0x00000000);
6853		break;
6854	}
6855
6856	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6857	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6858
6859	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6860	buffer[count++] = cpu_to_le32(0);
6861}
6862
6863static void cik_init_pg(struct radeon_device *rdev)
6864{
6865	if (rdev->pg_flags) {
6866		cik_enable_sck_slowdown_on_pu(rdev, true);
6867		cik_enable_sck_slowdown_on_pd(rdev, true);
6868		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6869			cik_init_gfx_cgpg(rdev);
6870			cik_enable_cp_pg(rdev, true);
6871			cik_enable_gds_pg(rdev, true);
6872		}
6873		cik_init_ao_cu_mask(rdev);
6874		cik_update_gfx_pg(rdev, true);
6875	}
6876}
6877
6878static void cik_fini_pg(struct radeon_device *rdev)
6879{
6880	if (rdev->pg_flags) {
6881		cik_update_gfx_pg(rdev, false);
6882		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6883			cik_enable_cp_pg(rdev, false);
6884			cik_enable_gds_pg(rdev, false);
6885		}
6886	}
6887}
6888
6889/*
6890 * Interrupts
6891 * Starting with r6xx, interrupts are handled via a ring buffer.
6892 * Ring buffers are areas of GPU accessible memory that the GPU
6893 * writes interrupt vectors into and the host reads vectors out of.
6894 * There is a rptr (read pointer) that determines where the
6895 * host is currently reading, and a wptr (write pointer)
6896 * which determines where the GPU has written.  When the
6897 * pointers are equal, the ring is idle.  When the GPU
6898 * writes vectors to the ring buffer, it increments the
6899 * wptr.  When there is an interrupt, the host then starts
6900 * fetching commands and processing them until the pointers are
6901 * equal again at which point it updates the rptr.
6902 */
6903
6904/**
6905 * cik_enable_interrupts - Enable the interrupt ring buffer
6906 *
6907 * @rdev: radeon_device pointer
6908 *
6909 * Enable the interrupt ring buffer (CIK).
6910 */
6911static void cik_enable_interrupts(struct radeon_device *rdev)
6912{
6913	u32 ih_cntl = RREG32(IH_CNTL);
6914	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6915
6916	ih_cntl |= ENABLE_INTR;
6917	ih_rb_cntl |= IH_RB_ENABLE;
6918	WREG32(IH_CNTL, ih_cntl);
6919	WREG32(IH_RB_CNTL, ih_rb_cntl);
6920	rdev->ih.enabled = true;
6921}
6922
6923/**
6924 * cik_disable_interrupts - Disable the interrupt ring buffer
6925 *
6926 * @rdev: radeon_device pointer
6927 *
6928 * Disable the interrupt ring buffer (CIK).
6929 */
6930static void cik_disable_interrupts(struct radeon_device *rdev)
6931{
6932	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6933	u32 ih_cntl = RREG32(IH_CNTL);
6934
6935	ih_rb_cntl &= ~IH_RB_ENABLE;
6936	ih_cntl &= ~ENABLE_INTR;
6937	WREG32(IH_RB_CNTL, ih_rb_cntl);
6938	WREG32(IH_CNTL, ih_cntl);
6939	/* set rptr, wptr to 0 */
6940	WREG32(IH_RB_RPTR, 0);
6941	WREG32(IH_RB_WPTR, 0);
6942	rdev->ih.enabled = false;
6943	rdev->ih.rptr = 0;
6944}
6945
6946/**
6947 * cik_disable_interrupt_state - Disable all interrupt sources
6948 *
6949 * @rdev: radeon_device pointer
6950 *
6951 * Clear all interrupt enable bits used by the driver (CIK).
6952 */
6953static void cik_disable_interrupt_state(struct radeon_device *rdev)
6954{
6955	u32 tmp;
6956
6957	/* gfx ring */
6958	tmp = RREG32(CP_INT_CNTL_RING0) &
6959		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6960	WREG32(CP_INT_CNTL_RING0, tmp);
6961	/* sdma */
6962	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6963	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6964	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6965	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6966	/* compute queues */
6967	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6968	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6969	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6970	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6971	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6972	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6973	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6974	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6975	/* grbm */
6976	WREG32(GRBM_INT_CNTL, 0);
6977	/* SRBM */
6978	WREG32(SRBM_INT_CNTL, 0);
6979	/* vline/vblank, etc. */
6980	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6981	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6982	if (rdev->num_crtc >= 4) {
6983		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6984		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6985	}
6986	if (rdev->num_crtc >= 6) {
6987		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6988		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6989	}
6990	/* pflip */
6991	if (rdev->num_crtc >= 2) {
6992		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6993		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6994	}
6995	if (rdev->num_crtc >= 4) {
6996		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6997		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6998	}
6999	if (rdev->num_crtc >= 6) {
7000		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7001		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7002	}
7003
7004	/* dac hotplug */
7005	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7006
7007	/* digital hotplug */
7008	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009	WREG32(DC_HPD1_INT_CONTROL, tmp);
7010	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011	WREG32(DC_HPD2_INT_CONTROL, tmp);
7012	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013	WREG32(DC_HPD3_INT_CONTROL, tmp);
7014	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7015	WREG32(DC_HPD4_INT_CONTROL, tmp);
7016	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7017	WREG32(DC_HPD5_INT_CONTROL, tmp);
7018	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7019	WREG32(DC_HPD6_INT_CONTROL, tmp);
7020
7021}
7022
7023/**
7024 * cik_irq_init - init and enable the interrupt ring
7025 *
7026 * @rdev: radeon_device pointer
7027 *
7028 * Allocate a ring buffer for the interrupt controller,
7029 * enable the RLC, disable interrupts, enable the IH
7030 * ring buffer and enable it (CIK).
7031 * Called at device load and reume.
7032 * Returns 0 for success, errors for failure.
7033 */
7034static int cik_irq_init(struct radeon_device *rdev)
7035{
7036	int ret = 0;
7037	int rb_bufsz;
7038	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7039
7040	/* allocate ring */
7041	ret = r600_ih_ring_alloc(rdev);
7042	if (ret)
7043		return ret;
7044
7045	/* disable irqs */
7046	cik_disable_interrupts(rdev);
7047
7048	/* init rlc */
7049	ret = cik_rlc_resume(rdev);
7050	if (ret) {
7051		r600_ih_ring_fini(rdev);
7052		return ret;
7053	}
7054
7055	/* setup interrupt control */
7056	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7057	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7058	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7059	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7060	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7061	 */
7062	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7063	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7064	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7065	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7066
7067	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7068	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7069
7070	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7071		      IH_WPTR_OVERFLOW_CLEAR |
7072		      (rb_bufsz << 1));
7073
7074	if (rdev->wb.enabled)
7075		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7076
7077	/* set the writeback address whether it's enabled or not */
7078	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7079	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7080
7081	WREG32(IH_RB_CNTL, ih_rb_cntl);
7082
7083	/* set rptr, wptr to 0 */
7084	WREG32(IH_RB_RPTR, 0);
7085	WREG32(IH_RB_WPTR, 0);
7086
7087	/* Default settings for IH_CNTL (disabled at first) */
7088	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7089	/* RPTR_REARM only works if msi's are enabled */
7090	if (rdev->msi_enabled)
7091		ih_cntl |= RPTR_REARM;
7092	WREG32(IH_CNTL, ih_cntl);
7093
7094	/* force the active interrupt state to all disabled */
7095	cik_disable_interrupt_state(rdev);
7096
7097	pci_set_master(rdev->pdev);
7098
7099	/* enable irqs */
7100	cik_enable_interrupts(rdev);
7101
7102	return ret;
7103}
7104
7105/**
7106 * cik_irq_set - enable/disable interrupt sources
7107 *
7108 * @rdev: radeon_device pointer
7109 *
7110 * Enable interrupt sources on the GPU (vblanks, hpd,
7111 * etc.) (CIK).
7112 * Returns 0 for success, errors for failure.
7113 */
7114int cik_irq_set(struct radeon_device *rdev)
7115{
7116	u32 cp_int_cntl;
7117	u32 cp_m1p0;
7118	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7119	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7120	u32 grbm_int_cntl = 0;
7121	u32 dma_cntl, dma_cntl1;
7122
7123	if (!rdev->irq.installed) {
7124		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7125		return -EINVAL;
7126	}
7127	/* don't enable anything if the ih is disabled */
7128	if (!rdev->ih.enabled) {
7129		cik_disable_interrupts(rdev);
7130		/* force the active interrupt state to all disabled */
7131		cik_disable_interrupt_state(rdev);
7132		return 0;
7133	}
7134
7135	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7136		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7137	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7138
7139	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7140	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7141	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7142	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7143	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7144	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7145
7146	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7147	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7148
7149	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7150
7151	/* enable CP interrupts on all rings */
7152	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7153		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7154		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7155	}
7156	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7157		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7158		DRM_DEBUG("si_irq_set: sw int cp1\n");
7159		if (ring->me == 1) {
7160			switch (ring->pipe) {
7161			case 0:
7162				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7163				break;
7164			default:
7165				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7166				break;
7167			}
7168		} else {
7169			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7170		}
7171	}
7172	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7173		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7174		DRM_DEBUG("si_irq_set: sw int cp2\n");
7175		if (ring->me == 1) {
7176			switch (ring->pipe) {
7177			case 0:
7178				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7179				break;
7180			default:
7181				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7182				break;
7183			}
7184		} else {
7185			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7186		}
7187	}
7188
7189	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7190		DRM_DEBUG("cik_irq_set: sw int dma\n");
7191		dma_cntl |= TRAP_ENABLE;
7192	}
7193
7194	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7195		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7196		dma_cntl1 |= TRAP_ENABLE;
7197	}
7198
7199	if (rdev->irq.crtc_vblank_int[0] ||
7200	    atomic_read(&rdev->irq.pflip[0])) {
7201		DRM_DEBUG("cik_irq_set: vblank 0\n");
7202		crtc1 |= VBLANK_INTERRUPT_MASK;
7203	}
7204	if (rdev->irq.crtc_vblank_int[1] ||
7205	    atomic_read(&rdev->irq.pflip[1])) {
7206		DRM_DEBUG("cik_irq_set: vblank 1\n");
7207		crtc2 |= VBLANK_INTERRUPT_MASK;
7208	}
7209	if (rdev->irq.crtc_vblank_int[2] ||
7210	    atomic_read(&rdev->irq.pflip[2])) {
7211		DRM_DEBUG("cik_irq_set: vblank 2\n");
7212		crtc3 |= VBLANK_INTERRUPT_MASK;
7213	}
7214	if (rdev->irq.crtc_vblank_int[3] ||
7215	    atomic_read(&rdev->irq.pflip[3])) {
7216		DRM_DEBUG("cik_irq_set: vblank 3\n");
7217		crtc4 |= VBLANK_INTERRUPT_MASK;
7218	}
7219	if (rdev->irq.crtc_vblank_int[4] ||
7220	    atomic_read(&rdev->irq.pflip[4])) {
7221		DRM_DEBUG("cik_irq_set: vblank 4\n");
7222		crtc5 |= VBLANK_INTERRUPT_MASK;
7223	}
7224	if (rdev->irq.crtc_vblank_int[5] ||
7225	    atomic_read(&rdev->irq.pflip[5])) {
7226		DRM_DEBUG("cik_irq_set: vblank 5\n");
7227		crtc6 |= VBLANK_INTERRUPT_MASK;
7228	}
7229	if (rdev->irq.hpd[0]) {
7230		DRM_DEBUG("cik_irq_set: hpd 1\n");
7231		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7232	}
7233	if (rdev->irq.hpd[1]) {
7234		DRM_DEBUG("cik_irq_set: hpd 2\n");
7235		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7236	}
7237	if (rdev->irq.hpd[2]) {
7238		DRM_DEBUG("cik_irq_set: hpd 3\n");
7239		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7240	}
7241	if (rdev->irq.hpd[3]) {
7242		DRM_DEBUG("cik_irq_set: hpd 4\n");
7243		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7244	}
7245	if (rdev->irq.hpd[4]) {
7246		DRM_DEBUG("cik_irq_set: hpd 5\n");
7247		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7248	}
7249	if (rdev->irq.hpd[5]) {
7250		DRM_DEBUG("cik_irq_set: hpd 6\n");
7251		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7252	}
7253
7254	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7255
7256	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7257	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7258
7259	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7260
7261	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7262
7263	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7264	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7265	if (rdev->num_crtc >= 4) {
7266		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7267		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7268	}
7269	if (rdev->num_crtc >= 6) {
7270		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7271		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7272	}
7273
7274	if (rdev->num_crtc >= 2) {
7275		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7276		       GRPH_PFLIP_INT_MASK);
7277		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7278		       GRPH_PFLIP_INT_MASK);
7279	}
7280	if (rdev->num_crtc >= 4) {
7281		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7282		       GRPH_PFLIP_INT_MASK);
7283		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7284		       GRPH_PFLIP_INT_MASK);
7285	}
7286	if (rdev->num_crtc >= 6) {
7287		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7288		       GRPH_PFLIP_INT_MASK);
7289		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7290		       GRPH_PFLIP_INT_MASK);
7291	}
7292
7293	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7294	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7295	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7296	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7297	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7298	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7299
7300	/* posting read */
7301	RREG32(SRBM_STATUS);
7302
7303	return 0;
7304}
7305
7306/**
7307 * cik_irq_ack - ack interrupt sources
7308 *
7309 * @rdev: radeon_device pointer
7310 *
7311 * Ack interrupt sources on the GPU (vblanks, hpd,
7312 * etc.) (CIK).  Certain interrupts sources are sw
7313 * generated and do not require an explicit ack.
7314 */
7315static inline void cik_irq_ack(struct radeon_device *rdev)
7316{
7317	u32 tmp;
7318
7319	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7320	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7321	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7322	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7323	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7324	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7325	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7326
7327	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7328		EVERGREEN_CRTC0_REGISTER_OFFSET);
7329	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7330		EVERGREEN_CRTC1_REGISTER_OFFSET);
7331	if (rdev->num_crtc >= 4) {
7332		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7333			EVERGREEN_CRTC2_REGISTER_OFFSET);
7334		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7335			EVERGREEN_CRTC3_REGISTER_OFFSET);
7336	}
7337	if (rdev->num_crtc >= 6) {
7338		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7339			EVERGREEN_CRTC4_REGISTER_OFFSET);
7340		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7341			EVERGREEN_CRTC5_REGISTER_OFFSET);
7342	}
7343
7344	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7345		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7346		       GRPH_PFLIP_INT_CLEAR);
7347	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7348		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7349		       GRPH_PFLIP_INT_CLEAR);
7350	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7351		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7352	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7353		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7354	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7355		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7356	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7357		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7358
7359	if (rdev->num_crtc >= 4) {
7360		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7361			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7362			       GRPH_PFLIP_INT_CLEAR);
7363		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7364			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7365			       GRPH_PFLIP_INT_CLEAR);
7366		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7367			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7368		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7369			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7370		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7371			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7372		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7373			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7374	}
7375
7376	if (rdev->num_crtc >= 6) {
7377		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7378			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7379			       GRPH_PFLIP_INT_CLEAR);
7380		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7381			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7382			       GRPH_PFLIP_INT_CLEAR);
7383		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7384			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7385		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7386			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7387		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7388			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7389		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7390			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7391	}
7392
7393	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7394		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395		tmp |= DC_HPDx_INT_ACK;
7396		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7399		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400		tmp |= DC_HPDx_INT_ACK;
7401		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7404		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405		tmp |= DC_HPDx_INT_ACK;
7406		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7409		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410		tmp |= DC_HPDx_INT_ACK;
7411		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7414		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415		tmp |= DC_HPDx_INT_ACK;
7416		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7419		tmp = RREG32(DC_HPD5_INT_CONTROL);
7420		tmp |= DC_HPDx_INT_ACK;
7421		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422	}
7423	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7424		tmp = RREG32(DC_HPD1_INT_CONTROL);
7425		tmp |= DC_HPDx_RX_INT_ACK;
7426		WREG32(DC_HPD1_INT_CONTROL, tmp);
7427	}
7428	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7429		tmp = RREG32(DC_HPD2_INT_CONTROL);
7430		tmp |= DC_HPDx_RX_INT_ACK;
7431		WREG32(DC_HPD2_INT_CONTROL, tmp);
7432	}
7433	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7434		tmp = RREG32(DC_HPD3_INT_CONTROL);
7435		tmp |= DC_HPDx_RX_INT_ACK;
7436		WREG32(DC_HPD3_INT_CONTROL, tmp);
7437	}
7438	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7439		tmp = RREG32(DC_HPD4_INT_CONTROL);
7440		tmp |= DC_HPDx_RX_INT_ACK;
7441		WREG32(DC_HPD4_INT_CONTROL, tmp);
7442	}
7443	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7444		tmp = RREG32(DC_HPD5_INT_CONTROL);
7445		tmp |= DC_HPDx_RX_INT_ACK;
7446		WREG32(DC_HPD5_INT_CONTROL, tmp);
7447	}
7448	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7449		tmp = RREG32(DC_HPD5_INT_CONTROL);
7450		tmp |= DC_HPDx_RX_INT_ACK;
7451		WREG32(DC_HPD6_INT_CONTROL, tmp);
7452	}
7453}
7454
7455/**
7456 * cik_irq_disable - disable interrupts
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw (CIK).
7461 */
7462static void cik_irq_disable(struct radeon_device *rdev)
7463{
7464	cik_disable_interrupts(rdev);
7465	/* Wait and acknowledge irq */
7466	mdelay(1);
7467	cik_irq_ack(rdev);
7468	cik_disable_interrupt_state(rdev);
7469}
7470
7471/**
7472 * cik_irq_disable - disable interrupts for suspend
7473 *
7474 * @rdev: radeon_device pointer
7475 *
7476 * Disable interrupts and stop the RLC (CIK).
7477 * Used for suspend.
7478 */
7479static void cik_irq_suspend(struct radeon_device *rdev)
7480{
7481	cik_irq_disable(rdev);
7482	cik_rlc_stop(rdev);
7483}
7484
7485/**
7486 * cik_irq_fini - tear down interrupt support
7487 *
7488 * @rdev: radeon_device pointer
7489 *
7490 * Disable interrupts on the hw and free the IH ring
7491 * buffer (CIK).
7492 * Used for driver unload.
7493 */
7494static void cik_irq_fini(struct radeon_device *rdev)
7495{
7496	cik_irq_suspend(rdev);
7497	r600_ih_ring_fini(rdev);
7498}
7499
7500/**
7501 * cik_get_ih_wptr - get the IH ring buffer wptr
7502 *
7503 * @rdev: radeon_device pointer
7504 *
7505 * Get the IH ring buffer wptr from either the register
7506 * or the writeback memory buffer (CIK).  Also check for
7507 * ring buffer overflow and deal with it.
7508 * Used by cik_irq_process().
7509 * Returns the value of the wptr.
7510 */
7511static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7512{
7513	u32 wptr, tmp;
7514
7515	if (rdev->wb.enabled)
7516		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7517	else
7518		wptr = RREG32(IH_RB_WPTR);
7519
7520	if (wptr & RB_OVERFLOW) {
7521		wptr &= ~RB_OVERFLOW;
7522		/* When a ring buffer overflow happen start parsing interrupt
7523		 * from the last not overwritten vector (wptr + 16). Hopefully
7524		 * this should allow us to catchup.
7525		 */
7526		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7527			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7528		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7529		tmp = RREG32(IH_RB_CNTL);
7530		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7531		WREG32(IH_RB_CNTL, tmp);
7532	}
7533	return (wptr & rdev->ih.ptr_mask);
7534}
7535
7536/*        CIK IV Ring
7537 * Each IV ring entry is 128 bits:
7538 * [7:0]    - interrupt source id
7539 * [31:8]   - reserved
7540 * [59:32]  - interrupt source data
7541 * [63:60]  - reserved
7542 * [71:64]  - RINGID
7543 *            CP:
7544 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7545 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7546 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7547 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7548 *            PIPE_ID - ME0 0=3D
7549 *                    - ME1&2 compute dispatcher (4 pipes each)
7550 *            SDMA:
7551 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7552 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7553 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7554 * [79:72]  - VMID
7555 * [95:80]  - PASID
7556 * [127:96] - reserved
7557 */
7558/**
7559 * cik_irq_process - interrupt handler
7560 *
7561 * @rdev: radeon_device pointer
7562 *
7563 * Interrupt hander (CIK).  Walk the IH ring,
7564 * ack interrupts and schedule work to handle
7565 * interrupt events.
7566 * Returns irq process return code.
7567 */
7568int cik_irq_process(struct radeon_device *rdev)
7569{
7570	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7571	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7572	u32 wptr;
7573	u32 rptr;
7574	u32 src_id, src_data, ring_id;
7575	u8 me_id, pipe_id, queue_id;
7576	u32 ring_index;
7577	bool queue_hotplug = false;
7578	bool queue_dp = false;
7579	bool queue_reset = false;
7580	u32 addr, status, mc_client;
7581	bool queue_thermal = false;
7582
7583	if (!rdev->ih.enabled || rdev->shutdown)
7584		return IRQ_NONE;
7585
7586	wptr = cik_get_ih_wptr(rdev);
7587
7588restart_ih:
7589	/* is somebody else already processing irqs? */
7590	if (atomic_xchg(&rdev->ih.lock, 1))
7591		return IRQ_NONE;
7592
7593	rptr = rdev->ih.rptr;
7594	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7595
7596	/* Order reading of wptr vs. reading of IH ring data */
7597	rmb();
7598
7599	/* display interrupts */
7600	cik_irq_ack(rdev);
7601
7602	while (rptr != wptr) {
7603		/* wptr/rptr are in bytes! */
7604		ring_index = rptr / 4;
7605
7606		radeon_kfd_interrupt(rdev,
7607				(const void *) &rdev->ih.ring[ring_index]);
7608
7609		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7610		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7611		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7612
7613		switch (src_id) {
7614		case 1: /* D1 vblank/vline */
7615			switch (src_data) {
7616			case 0: /* D1 vblank */
7617				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7618					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619
7620				if (rdev->irq.crtc_vblank_int[0]) {
7621					drm_handle_vblank(rdev->ddev, 0);
7622					rdev->pm.vblank_sync = true;
7623					wake_up(&rdev->irq.vblank_queue);
7624				}
7625				if (atomic_read(&rdev->irq.pflip[0]))
7626					radeon_crtc_handle_vblank(rdev, 0);
7627				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7628				DRM_DEBUG("IH: D1 vblank\n");
7629
7630				break;
7631			case 1: /* D1 vline */
7632				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7633					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634
7635				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7636				DRM_DEBUG("IH: D1 vline\n");
7637
7638				break;
7639			default:
7640				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7641				break;
7642			}
7643			break;
7644		case 2: /* D2 vblank/vline */
7645			switch (src_data) {
7646			case 0: /* D2 vblank */
7647				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7648					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649
7650				if (rdev->irq.crtc_vblank_int[1]) {
7651					drm_handle_vblank(rdev->ddev, 1);
7652					rdev->pm.vblank_sync = true;
7653					wake_up(&rdev->irq.vblank_queue);
7654				}
7655				if (atomic_read(&rdev->irq.pflip[1]))
7656					radeon_crtc_handle_vblank(rdev, 1);
7657				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7658				DRM_DEBUG("IH: D2 vblank\n");
7659
7660				break;
7661			case 1: /* D2 vline */
7662				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7663					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664
7665				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7666				DRM_DEBUG("IH: D2 vline\n");
7667
7668				break;
7669			default:
7670				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7671				break;
7672			}
7673			break;
7674		case 3: /* D3 vblank/vline */
7675			switch (src_data) {
7676			case 0: /* D3 vblank */
7677				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7678					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679
7680				if (rdev->irq.crtc_vblank_int[2]) {
7681					drm_handle_vblank(rdev->ddev, 2);
7682					rdev->pm.vblank_sync = true;
7683					wake_up(&rdev->irq.vblank_queue);
7684				}
7685				if (atomic_read(&rdev->irq.pflip[2]))
7686					radeon_crtc_handle_vblank(rdev, 2);
7687				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7688				DRM_DEBUG("IH: D3 vblank\n");
7689
7690				break;
7691			case 1: /* D3 vline */
7692				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7693					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694
7695				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7696				DRM_DEBUG("IH: D3 vline\n");
7697
7698				break;
7699			default:
7700				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7701				break;
7702			}
7703			break;
7704		case 4: /* D4 vblank/vline */
7705			switch (src_data) {
7706			case 0: /* D4 vblank */
7707				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7708					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709
7710				if (rdev->irq.crtc_vblank_int[3]) {
7711					drm_handle_vblank(rdev->ddev, 3);
7712					rdev->pm.vblank_sync = true;
7713					wake_up(&rdev->irq.vblank_queue);
7714				}
7715				if (atomic_read(&rdev->irq.pflip[3]))
7716					radeon_crtc_handle_vblank(rdev, 3);
7717				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7718				DRM_DEBUG("IH: D4 vblank\n");
7719
7720				break;
7721			case 1: /* D4 vline */
7722				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7723					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7724
7725				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7726				DRM_DEBUG("IH: D4 vline\n");
7727
7728				break;
7729			default:
7730				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7731				break;
7732			}
7733			break;
7734		case 5: /* D5 vblank/vline */
7735			switch (src_data) {
7736			case 0: /* D5 vblank */
7737				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7738					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739
7740				if (rdev->irq.crtc_vblank_int[4]) {
7741					drm_handle_vblank(rdev->ddev, 4);
7742					rdev->pm.vblank_sync = true;
7743					wake_up(&rdev->irq.vblank_queue);
7744				}
7745				if (atomic_read(&rdev->irq.pflip[4]))
7746					radeon_crtc_handle_vblank(rdev, 4);
7747				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7748				DRM_DEBUG("IH: D5 vblank\n");
7749
7750				break;
7751			case 1: /* D5 vline */
7752				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7753					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754
7755				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7756				DRM_DEBUG("IH: D5 vline\n");
7757
7758				break;
7759			default:
7760				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7761				break;
7762			}
7763			break;
7764		case 6: /* D6 vblank/vline */
7765			switch (src_data) {
7766			case 0: /* D6 vblank */
7767				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7768					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769
7770				if (rdev->irq.crtc_vblank_int[5]) {
7771					drm_handle_vblank(rdev->ddev, 5);
7772					rdev->pm.vblank_sync = true;
7773					wake_up(&rdev->irq.vblank_queue);
7774				}
7775				if (atomic_read(&rdev->irq.pflip[5]))
7776					radeon_crtc_handle_vblank(rdev, 5);
7777				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7778				DRM_DEBUG("IH: D6 vblank\n");
7779
7780				break;
7781			case 1: /* D6 vline */
7782				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7783					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784
7785				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7786				DRM_DEBUG("IH: D6 vline\n");
7787
7788				break;
7789			default:
7790				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791				break;
7792			}
7793			break;
7794		case 8: /* D1 page flip */
7795		case 10: /* D2 page flip */
7796		case 12: /* D3 page flip */
7797		case 14: /* D4 page flip */
7798		case 16: /* D5 page flip */
7799		case 18: /* D6 page flip */
7800			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7801			if (radeon_use_pflipirq > 0)
7802				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7803			break;
7804		case 42: /* HPD hotplug */
7805			switch (src_data) {
7806			case 0:
7807				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7808					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7811				queue_hotplug = true;
7812				DRM_DEBUG("IH: HPD1\n");
7813
7814				break;
7815			case 1:
7816				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7817					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7820				queue_hotplug = true;
7821				DRM_DEBUG("IH: HPD2\n");
7822
7823				break;
7824			case 2:
7825				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7826					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7829				queue_hotplug = true;
7830				DRM_DEBUG("IH: HPD3\n");
7831
7832				break;
7833			case 3:
7834				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7835					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7838				queue_hotplug = true;
7839				DRM_DEBUG("IH: HPD4\n");
7840
7841				break;
7842			case 4:
7843				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7844					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7847				queue_hotplug = true;
7848				DRM_DEBUG("IH: HPD5\n");
7849
7850				break;
7851			case 5:
7852				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7853					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7856				queue_hotplug = true;
7857				DRM_DEBUG("IH: HPD6\n");
7858
7859				break;
7860			case 6:
7861				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7862					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7865				queue_dp = true;
7866				DRM_DEBUG("IH: HPD_RX 1\n");
7867
7868				break;
7869			case 7:
7870				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7871					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7874				queue_dp = true;
7875				DRM_DEBUG("IH: HPD_RX 2\n");
7876
7877				break;
7878			case 8:
7879				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7880					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7883				queue_dp = true;
7884				DRM_DEBUG("IH: HPD_RX 3\n");
7885
7886				break;
7887			case 9:
7888				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7889					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7892				queue_dp = true;
7893				DRM_DEBUG("IH: HPD_RX 4\n");
7894
7895				break;
7896			case 10:
7897				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7898					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7899
7900				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7901				queue_dp = true;
7902				DRM_DEBUG("IH: HPD_RX 5\n");
7903
7904				break;
7905			case 11:
7906				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7907					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7908
7909				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7910				queue_dp = true;
7911				DRM_DEBUG("IH: HPD_RX 6\n");
7912
7913				break;
7914			default:
7915				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7916				break;
7917			}
7918			break;
7919		case 96:
7920			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7921			WREG32(SRBM_INT_ACK, 0x1);
7922			break;
7923		case 124: /* UVD */
7924			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7925			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7926			break;
7927		case 146:
7928		case 147:
7929			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7930			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7931			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7932			/* reset addr and status */
7933			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7934			if (addr == 0x0 && status == 0x0)
7935				break;
7936			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7937			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7938				addr);
7939			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7940				status);
7941			cik_vm_decode_fault(rdev, status, addr, mc_client);
7942			break;
7943		case 167: /* VCE */
7944			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7945			switch (src_data) {
7946			case 0:
7947				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7948				break;
7949			case 1:
7950				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7951				break;
7952			default:
7953				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7954				break;
7955			}
7956			break;
7957		case 176: /* GFX RB CP_INT */
7958		case 177: /* GFX IB CP_INT */
7959			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7960			break;
7961		case 181: /* CP EOP event */
7962			DRM_DEBUG("IH: CP EOP\n");
7963			/* XXX check the bitfield order! */
7964			me_id = (ring_id & 0x60) >> 5;
7965			pipe_id = (ring_id & 0x18) >> 3;
7966			queue_id = (ring_id & 0x7) >> 0;
7967			switch (me_id) {
7968			case 0:
7969				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7970				break;
7971			case 1:
7972			case 2:
7973				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7974					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7975				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7976					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7977				break;
7978			}
7979			break;
7980		case 184: /* CP Privileged reg access */
7981			DRM_ERROR("Illegal register access in command stream\n");
7982			/* XXX check the bitfield order! */
7983			me_id = (ring_id & 0x60) >> 5;
7984			pipe_id = (ring_id & 0x18) >> 3;
7985			queue_id = (ring_id & 0x7) >> 0;
7986			switch (me_id) {
7987			case 0:
7988				/* This results in a full GPU reset, but all we need to do is soft
7989				 * reset the CP for gfx
7990				 */
7991				queue_reset = true;
7992				break;
7993			case 1:
7994				/* XXX compute */
7995				queue_reset = true;
7996				break;
7997			case 2:
7998				/* XXX compute */
7999				queue_reset = true;
8000				break;
8001			}
8002			break;
8003		case 185: /* CP Privileged inst */
8004			DRM_ERROR("Illegal instruction in command stream\n");
8005			/* XXX check the bitfield order! */
8006			me_id = (ring_id & 0x60) >> 5;
8007			pipe_id = (ring_id & 0x18) >> 3;
8008			queue_id = (ring_id & 0x7) >> 0;
8009			switch (me_id) {
8010			case 0:
8011				/* This results in a full GPU reset, but all we need to do is soft
8012				 * reset the CP for gfx
8013				 */
8014				queue_reset = true;
8015				break;
8016			case 1:
8017				/* XXX compute */
8018				queue_reset = true;
8019				break;
8020			case 2:
8021				/* XXX compute */
8022				queue_reset = true;
8023				break;
8024			}
8025			break;
8026		case 224: /* SDMA trap event */
8027			/* XXX check the bitfield order! */
8028			me_id = (ring_id & 0x3) >> 0;
8029			queue_id = (ring_id & 0xc) >> 2;
8030			DRM_DEBUG("IH: SDMA trap\n");
8031			switch (me_id) {
8032			case 0:
8033				switch (queue_id) {
8034				case 0:
8035					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8036					break;
8037				case 1:
8038					/* XXX compute */
8039					break;
8040				case 2:
8041					/* XXX compute */
8042					break;
8043				}
8044				break;
8045			case 1:
8046				switch (queue_id) {
8047				case 0:
8048					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8049					break;
8050				case 1:
8051					/* XXX compute */
8052					break;
8053				case 2:
8054					/* XXX compute */
8055					break;
8056				}
8057				break;
8058			}
8059			break;
8060		case 230: /* thermal low to high */
8061			DRM_DEBUG("IH: thermal low to high\n");
8062			rdev->pm.dpm.thermal.high_to_low = false;
8063			queue_thermal = true;
8064			break;
8065		case 231: /* thermal high to low */
8066			DRM_DEBUG("IH: thermal high to low\n");
8067			rdev->pm.dpm.thermal.high_to_low = true;
8068			queue_thermal = true;
8069			break;
8070		case 233: /* GUI IDLE */
8071			DRM_DEBUG("IH: GUI idle\n");
8072			break;
8073		case 241: /* SDMA Privileged inst */
8074		case 247: /* SDMA Privileged inst */
8075			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8076			/* XXX check the bitfield order! */
8077			me_id = (ring_id & 0x3) >> 0;
8078			queue_id = (ring_id & 0xc) >> 2;
8079			switch (me_id) {
8080			case 0:
8081				switch (queue_id) {
8082				case 0:
8083					queue_reset = true;
8084					break;
8085				case 1:
8086					/* XXX compute */
8087					queue_reset = true;
8088					break;
8089				case 2:
8090					/* XXX compute */
8091					queue_reset = true;
8092					break;
8093				}
8094				break;
8095			case 1:
8096				switch (queue_id) {
8097				case 0:
8098					queue_reset = true;
8099					break;
8100				case 1:
8101					/* XXX compute */
8102					queue_reset = true;
8103					break;
8104				case 2:
8105					/* XXX compute */
8106					queue_reset = true;
8107					break;
8108				}
8109				break;
8110			}
8111			break;
8112		default:
8113			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8114			break;
8115		}
8116
8117		/* wptr/rptr are in bytes! */
8118		rptr += 16;
8119		rptr &= rdev->ih.ptr_mask;
8120		WREG32(IH_RB_RPTR, rptr);
8121	}
8122	if (queue_dp)
8123		schedule_work(&rdev->dp_work);
8124	if (queue_hotplug)
8125		schedule_delayed_work(&rdev->hotplug_work, 0);
8126	if (queue_reset) {
8127		rdev->needs_reset = true;
8128		wake_up_all(&rdev->fence_queue);
8129	}
8130	if (queue_thermal)
8131		schedule_work(&rdev->pm.dpm.thermal.work);
8132	rdev->ih.rptr = rptr;
8133	atomic_set(&rdev->ih.lock, 0);
8134
8135	/* make sure wptr hasn't changed while processing */
8136	wptr = cik_get_ih_wptr(rdev);
8137	if (wptr != rptr)
8138		goto restart_ih;
8139
8140	return IRQ_HANDLED;
8141}
8142
8143/*
8144 * startup/shutdown callbacks
8145 */
8146static void cik_uvd_init(struct radeon_device *rdev)
8147{
8148	int r;
8149
8150	if (!rdev->has_uvd)
8151		return;
8152
8153	r = radeon_uvd_init(rdev);
8154	if (r) {
8155		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8156		/*
8157		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8158		 * to early fails cik_uvd_start() and thus nothing happens
8159		 * there. So it is pointless to try to go through that code
8160		 * hence why we disable uvd here.
8161		 */
8162		rdev->has_uvd = 0;
8163		return;
8164	}
8165	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8166	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8167}
8168
8169static void cik_uvd_start(struct radeon_device *rdev)
8170{
8171	int r;
8172
8173	if (!rdev->has_uvd)
8174		return;
8175
8176	r = radeon_uvd_resume(rdev);
8177	if (r) {
8178		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8179		goto error;
8180	}
8181	r = uvd_v4_2_resume(rdev);
8182	if (r) {
8183		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8184		goto error;
8185	}
8186	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8187	if (r) {
8188		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8189		goto error;
8190	}
8191	return;
8192
8193error:
8194	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8195}
8196
8197static void cik_uvd_resume(struct radeon_device *rdev)
8198{
8199	struct radeon_ring *ring;
8200	int r;
8201
8202	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8203		return;
8204
8205	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8206	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8207	if (r) {
8208		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8209		return;
8210	}
8211	r = uvd_v1_0_init(rdev);
8212	if (r) {
8213		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8214		return;
8215	}
8216}
8217
8218static void cik_vce_init(struct radeon_device *rdev)
8219{
8220	int r;
8221
8222	if (!rdev->has_vce)
8223		return;
8224
8225	r = radeon_vce_init(rdev);
8226	if (r) {
8227		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8228		/*
8229		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8230		 * to early fails cik_vce_start() and thus nothing happens
8231		 * there. So it is pointless to try to go through that code
8232		 * hence why we disable vce here.
8233		 */
8234		rdev->has_vce = 0;
8235		return;
8236	}
8237	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8238	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8239	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8240	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8241}
8242
8243static void cik_vce_start(struct radeon_device *rdev)
8244{
8245	int r;
8246
8247	if (!rdev->has_vce)
8248		return;
8249
8250	r = radeon_vce_resume(rdev);
8251	if (r) {
8252		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8253		goto error;
8254	}
8255	r = vce_v2_0_resume(rdev);
8256	if (r) {
8257		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8258		goto error;
8259	}
8260	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8261	if (r) {
8262		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8263		goto error;
8264	}
8265	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8266	if (r) {
8267		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8268		goto error;
8269	}
8270	return;
8271
8272error:
8273	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8274	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8275}
8276
8277static void cik_vce_resume(struct radeon_device *rdev)
8278{
8279	struct radeon_ring *ring;
8280	int r;
8281
8282	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8283		return;
8284
8285	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8286	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8287	if (r) {
8288		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8289		return;
8290	}
8291	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8292	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8293	if (r) {
8294		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8295		return;
8296	}
8297	r = vce_v1_0_init(rdev);
8298	if (r) {
8299		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8300		return;
8301	}
8302}
8303
8304/**
8305 * cik_startup - program the asic to a functional state
8306 *
8307 * @rdev: radeon_device pointer
8308 *
8309 * Programs the asic to a functional state (CIK).
8310 * Called by cik_init() and cik_resume().
8311 * Returns 0 for success, error for failure.
8312 */
8313static int cik_startup(struct radeon_device *rdev)
8314{
8315	struct radeon_ring *ring;
8316	u32 nop;
8317	int r;
8318
8319	/* enable pcie gen2/3 link */
8320	cik_pcie_gen3_enable(rdev);
8321	/* enable aspm */
8322	cik_program_aspm(rdev);
8323
8324	/* scratch needs to be initialized before MC */
8325	r = r600_vram_scratch_init(rdev);
8326	if (r)
8327		return r;
8328
8329	cik_mc_program(rdev);
8330
8331	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8332		r = ci_mc_load_microcode(rdev);
8333		if (r) {
8334			DRM_ERROR("Failed to load MC firmware!\n");
8335			return r;
8336		}
8337	}
8338
8339	r = cik_pcie_gart_enable(rdev);
8340	if (r)
8341		return r;
8342	cik_gpu_init(rdev);
8343
8344	/* allocate rlc buffers */
8345	if (rdev->flags & RADEON_IS_IGP) {
8346		if (rdev->family == CHIP_KAVERI) {
8347			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8348			rdev->rlc.reg_list_size =
8349				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8350		} else {
8351			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8352			rdev->rlc.reg_list_size =
8353				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8354		}
8355	}
8356	rdev->rlc.cs_data = ci_cs_data;
8357	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8358	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8359	r = sumo_rlc_init(rdev);
8360	if (r) {
8361		DRM_ERROR("Failed to init rlc BOs!\n");
8362		return r;
8363	}
8364
8365	/* allocate wb buffer */
8366	r = radeon_wb_init(rdev);
8367	if (r)
8368		return r;
8369
8370	/* allocate mec buffers */
8371	r = cik_mec_init(rdev);
8372	if (r) {
8373		DRM_ERROR("Failed to init MEC BOs!\n");
8374		return r;
8375	}
8376
8377	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8378	if (r) {
8379		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8380		return r;
8381	}
8382
8383	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8384	if (r) {
8385		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8386		return r;
8387	}
8388
8389	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8390	if (r) {
8391		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8392		return r;
8393	}
8394
8395	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8396	if (r) {
8397		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8398		return r;
8399	}
8400
8401	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8402	if (r) {
8403		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8404		return r;
8405	}
8406
8407	cik_uvd_start(rdev);
8408	cik_vce_start(rdev);
8409
8410	/* Enable IRQ */
8411	if (!rdev->irq.installed) {
8412		r = radeon_irq_kms_init(rdev);
8413		if (r)
8414			return r;
8415	}
8416
8417	r = cik_irq_init(rdev);
8418	if (r) {
8419		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8420		radeon_irq_kms_fini(rdev);
8421		return r;
8422	}
8423	cik_irq_set(rdev);
8424
8425	if (rdev->family == CHIP_HAWAII) {
8426		if (rdev->new_fw)
8427			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8428		else
8429			nop = RADEON_CP_PACKET2;
8430	} else {
8431		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8432	}
8433
8434	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8435	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8436			     nop);
8437	if (r)
8438		return r;
8439
8440	/* set up the compute queues */
8441	/* type-2 packets are deprecated on MEC, use type-3 instead */
8442	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8443	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8444			     nop);
8445	if (r)
8446		return r;
8447	ring->me = 1; /* first MEC */
8448	ring->pipe = 0; /* first pipe */
8449	ring->queue = 0; /* first queue */
8450	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8451
8452	/* type-2 packets are deprecated on MEC, use type-3 instead */
8453	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8454	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8455			     nop);
8456	if (r)
8457		return r;
8458	/* dGPU only have 1 MEC */
8459	ring->me = 1; /* first MEC */
8460	ring->pipe = 0; /* first pipe */
8461	ring->queue = 1; /* second queue */
8462	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8463
8464	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8465	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8466			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8467	if (r)
8468		return r;
8469
8470	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8471	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8472			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8473	if (r)
8474		return r;
8475
8476	r = cik_cp_resume(rdev);
8477	if (r)
8478		return r;
8479
8480	r = cik_sdma_resume(rdev);
8481	if (r)
8482		return r;
8483
8484	cik_uvd_resume(rdev);
8485	cik_vce_resume(rdev);
8486
8487	r = radeon_ib_pool_init(rdev);
8488	if (r) {
8489		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8490		return r;
8491	}
8492
8493	r = radeon_vm_manager_init(rdev);
8494	if (r) {
8495		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8496		return r;
8497	}
8498
8499	r = radeon_audio_init(rdev);
8500	if (r)
8501		return r;
8502
8503	r = radeon_kfd_resume(rdev);
8504	if (r)
8505		return r;
8506
8507	return 0;
8508}
8509
8510/**
8511 * cik_resume - resume the asic to a functional state
8512 *
8513 * @rdev: radeon_device pointer
8514 *
8515 * Programs the asic to a functional state (CIK).
8516 * Called at resume.
8517 * Returns 0 for success, error for failure.
8518 */
8519int cik_resume(struct radeon_device *rdev)
8520{
8521	int r;
8522
8523	/* post card */
8524	atom_asic_init(rdev->mode_info.atom_context);
8525
8526	/* init golden registers */
8527	cik_init_golden_registers(rdev);
8528
8529	if (rdev->pm.pm_method == PM_METHOD_DPM)
8530		radeon_pm_resume(rdev);
8531
8532	rdev->accel_working = true;
8533	r = cik_startup(rdev);
8534	if (r) {
8535		DRM_ERROR("cik startup failed on resume\n");
8536		rdev->accel_working = false;
8537		return r;
8538	}
8539
8540	return r;
8541
8542}
8543
8544/**
8545 * cik_suspend - suspend the asic
8546 *
8547 * @rdev: radeon_device pointer
8548 *
8549 * Bring the chip into a state suitable for suspend (CIK).
8550 * Called at suspend.
8551 * Returns 0 for success.
8552 */
8553int cik_suspend(struct radeon_device *rdev)
8554{
8555	radeon_kfd_suspend(rdev);
8556	radeon_pm_suspend(rdev);
8557	radeon_audio_fini(rdev);
8558	radeon_vm_manager_fini(rdev);
8559	cik_cp_enable(rdev, false);
8560	cik_sdma_enable(rdev, false);
8561	if (rdev->has_uvd) {
8562		uvd_v1_0_fini(rdev);
8563		radeon_uvd_suspend(rdev);
8564	}
8565	if (rdev->has_vce)
8566		radeon_vce_suspend(rdev);
8567	cik_fini_pg(rdev);
8568	cik_fini_cg(rdev);
8569	cik_irq_suspend(rdev);
8570	radeon_wb_disable(rdev);
8571	cik_pcie_gart_disable(rdev);
8572	return 0;
8573}
8574
8575/* Plan is to move initialization in that function and use
8576 * helper function so that radeon_device_init pretty much
8577 * do nothing more than calling asic specific function. This
8578 * should also allow to remove a bunch of callback function
8579 * like vram_info.
8580 */
8581/**
8582 * cik_init - asic specific driver and hw init
8583 *
8584 * @rdev: radeon_device pointer
8585 *
8586 * Setup asic specific driver variables and program the hw
8587 * to a functional state (CIK).
8588 * Called at driver startup.
8589 * Returns 0 for success, errors for failure.
8590 */
8591int cik_init(struct radeon_device *rdev)
8592{
8593	struct radeon_ring *ring;
8594	int r;
8595
8596	/* Read BIOS */
8597	if (!radeon_get_bios(rdev)) {
8598		if (ASIC_IS_AVIVO(rdev))
8599			return -EINVAL;
8600	}
8601	/* Must be an ATOMBIOS */
8602	if (!rdev->is_atom_bios) {
8603		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8604		return -EINVAL;
8605	}
8606	r = radeon_atombios_init(rdev);
8607	if (r)
8608		return r;
8609
8610	/* Post card if necessary */
8611	if (!radeon_card_posted(rdev)) {
8612		if (!rdev->bios) {
8613			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8614			return -EINVAL;
8615		}
8616		DRM_INFO("GPU not posted. posting now...\n");
8617		atom_asic_init(rdev->mode_info.atom_context);
8618	}
8619	/* init golden registers */
8620	cik_init_golden_registers(rdev);
8621	/* Initialize scratch registers */
8622	cik_scratch_init(rdev);
8623	/* Initialize surface registers */
8624	radeon_surface_init(rdev);
8625	/* Initialize clocks */
8626	radeon_get_clock_info(rdev->ddev);
8627
8628	/* Fence driver */
8629	r = radeon_fence_driver_init(rdev);
8630	if (r)
8631		return r;
8632
8633	/* initialize memory controller */
8634	r = cik_mc_init(rdev);
8635	if (r)
8636		return r;
8637	/* Memory manager */
8638	r = radeon_bo_init(rdev);
8639	if (r)
8640		return r;
8641
8642	if (rdev->flags & RADEON_IS_IGP) {
8643		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8644		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8645			r = cik_init_microcode(rdev);
8646			if (r) {
8647				DRM_ERROR("Failed to load firmware!\n");
8648				return r;
8649			}
8650		}
8651	} else {
8652		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8653		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8654		    !rdev->mc_fw) {
8655			r = cik_init_microcode(rdev);
8656			if (r) {
8657				DRM_ERROR("Failed to load firmware!\n");
8658				return r;
8659			}
8660		}
8661	}
8662
8663	/* Initialize power management */
8664	radeon_pm_init(rdev);
8665
8666	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8667	ring->ring_obj = NULL;
8668	r600_ring_init(rdev, ring, 1024 * 1024);
8669
8670	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8671	ring->ring_obj = NULL;
8672	r600_ring_init(rdev, ring, 1024 * 1024);
8673	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8674	if (r)
8675		return r;
8676
8677	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8678	ring->ring_obj = NULL;
8679	r600_ring_init(rdev, ring, 1024 * 1024);
8680	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8681	if (r)
8682		return r;
8683
8684	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8685	ring->ring_obj = NULL;
8686	r600_ring_init(rdev, ring, 256 * 1024);
8687
8688	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8689	ring->ring_obj = NULL;
8690	r600_ring_init(rdev, ring, 256 * 1024);
8691
8692	cik_uvd_init(rdev);
8693	cik_vce_init(rdev);
8694
8695	rdev->ih.ring_obj = NULL;
8696	r600_ih_ring_init(rdev, 64 * 1024);
8697
8698	r = r600_pcie_gart_init(rdev);
8699	if (r)
8700		return r;
8701
8702	rdev->accel_working = true;
8703	r = cik_startup(rdev);
8704	if (r) {
8705		dev_err(rdev->dev, "disabling GPU acceleration\n");
8706		cik_cp_fini(rdev);
8707		cik_sdma_fini(rdev);
8708		cik_irq_fini(rdev);
8709		sumo_rlc_fini(rdev);
8710		cik_mec_fini(rdev);
8711		radeon_wb_fini(rdev);
8712		radeon_ib_pool_fini(rdev);
8713		radeon_vm_manager_fini(rdev);
8714		radeon_irq_kms_fini(rdev);
8715		cik_pcie_gart_fini(rdev);
8716		rdev->accel_working = false;
8717	}
8718
8719	/* Don't start up if the MC ucode is missing.
8720	 * The default clocks and voltages before the MC ucode
8721	 * is loaded are not suffient for advanced operations.
8722	 */
8723	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8724		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8725		return -EINVAL;
8726	}
8727
8728	return 0;
8729}
8730
8731/**
8732 * cik_fini - asic specific driver and hw fini
8733 *
8734 * @rdev: radeon_device pointer
8735 *
8736 * Tear down the asic specific driver variables and program the hw
8737 * to an idle state (CIK).
8738 * Called at driver unload.
8739 */
8740void cik_fini(struct radeon_device *rdev)
8741{
8742	radeon_pm_fini(rdev);
8743	cik_cp_fini(rdev);
8744	cik_sdma_fini(rdev);
8745	cik_fini_pg(rdev);
8746	cik_fini_cg(rdev);
8747	cik_irq_fini(rdev);
8748	sumo_rlc_fini(rdev);
8749	cik_mec_fini(rdev);
8750	radeon_wb_fini(rdev);
8751	radeon_vm_manager_fini(rdev);
8752	radeon_ib_pool_fini(rdev);
8753	radeon_irq_kms_fini(rdev);
8754	uvd_v1_0_fini(rdev);
8755	radeon_uvd_fini(rdev);
8756	radeon_vce_fini(rdev);
8757	cik_pcie_gart_fini(rdev);
8758	r600_vram_scratch_fini(rdev);
8759	radeon_gem_fini(rdev);
8760	radeon_fence_driver_fini(rdev);
8761	radeon_bo_fini(rdev);
8762	radeon_atombios_fini(rdev);
8763	kfree(rdev->bios);
8764	rdev->bios = NULL;
8765}
8766
8767void dce8_program_fmt(struct drm_encoder *encoder)
8768{
8769	struct drm_device *dev = encoder->dev;
8770	struct radeon_device *rdev = dev->dev_private;
8771	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8772	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8773	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8774	int bpc = 0;
8775	u32 tmp = 0;
8776	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8777
8778	if (connector) {
8779		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8780		bpc = radeon_get_monitor_bpc(connector);
8781		dither = radeon_connector->dither;
8782	}
8783
8784	/* LVDS/eDP FMT is set up by atom */
8785	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8786		return;
8787
8788	/* not needed for analog */
8789	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8790	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8791		return;
8792
8793	if (bpc == 0)
8794		return;
8795
8796	switch (bpc) {
8797	case 6:
8798		if (dither == RADEON_FMT_DITHER_ENABLE)
8799			/* XXX sort out optimal dither settings */
8800			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8801				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8802		else
8803			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8804		break;
8805	case 8:
8806		if (dither == RADEON_FMT_DITHER_ENABLE)
8807			/* XXX sort out optimal dither settings */
8808			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8809				FMT_RGB_RANDOM_ENABLE |
8810				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8811		else
8812			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8813		break;
8814	case 10:
8815		if (dither == RADEON_FMT_DITHER_ENABLE)
8816			/* XXX sort out optimal dither settings */
8817			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818				FMT_RGB_RANDOM_ENABLE |
8819				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8820		else
8821			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8822		break;
8823	default:
8824		/* not needed */
8825		break;
8826	}
8827
8828	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8829}
8830
8831/* display watermark setup */
8832/**
8833 * dce8_line_buffer_adjust - Set up the line buffer
8834 *
8835 * @rdev: radeon_device pointer
8836 * @radeon_crtc: the selected display controller
8837 * @mode: the current display mode on the selected display
8838 * controller
8839 *
8840 * Setup up the line buffer allocation for
8841 * the selected display controller (CIK).
8842 * Returns the line buffer size in pixels.
8843 */
8844static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8845				   struct radeon_crtc *radeon_crtc,
8846				   struct drm_display_mode *mode)
8847{
8848	u32 tmp, buffer_alloc, i;
8849	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8850	/*
8851	 * Line Buffer Setup
8852	 * There are 6 line buffers, one for each display controllers.
8853	 * There are 3 partitions per LB. Select the number of partitions
8854	 * to enable based on the display width.  For display widths larger
8855	 * than 4096, you need use to use 2 display controllers and combine
8856	 * them using the stereo blender.
8857	 */
8858	if (radeon_crtc->base.enabled && mode) {
8859		if (mode->crtc_hdisplay < 1920) {
8860			tmp = 1;
8861			buffer_alloc = 2;
8862		} else if (mode->crtc_hdisplay < 2560) {
8863			tmp = 2;
8864			buffer_alloc = 2;
8865		} else if (mode->crtc_hdisplay < 4096) {
8866			tmp = 0;
8867			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8868		} else {
8869			DRM_DEBUG_KMS("Mode too big for LB!\n");
8870			tmp = 0;
8871			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8872		}
8873	} else {
8874		tmp = 1;
8875		buffer_alloc = 0;
8876	}
8877
8878	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8879	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8880
8881	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8882	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8883	for (i = 0; i < rdev->usec_timeout; i++) {
8884		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8885		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8886			break;
8887		udelay(1);
8888	}
8889
8890	if (radeon_crtc->base.enabled && mode) {
8891		switch (tmp) {
8892		case 0:
8893		default:
8894			return 4096 * 2;
8895		case 1:
8896			return 1920 * 2;
8897		case 2:
8898			return 2560 * 2;
8899		}
8900	}
8901
8902	/* controller not enabled, so no lb used */
8903	return 0;
8904}
8905
8906/**
8907 * cik_get_number_of_dram_channels - get the number of dram channels
8908 *
8909 * @rdev: radeon_device pointer
8910 *
8911 * Look up the number of video ram channels (CIK).
8912 * Used for display watermark bandwidth calculations
8913 * Returns the number of dram channels
8914 */
8915static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8916{
8917	u32 tmp = RREG32(MC_SHARED_CHMAP);
8918
8919	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8920	case 0:
8921	default:
8922		return 1;
8923	case 1:
8924		return 2;
8925	case 2:
8926		return 4;
8927	case 3:
8928		return 8;
8929	case 4:
8930		return 3;
8931	case 5:
8932		return 6;
8933	case 6:
8934		return 10;
8935	case 7:
8936		return 12;
8937	case 8:
8938		return 16;
8939	}
8940}
8941
8942struct dce8_wm_params {
8943	u32 dram_channels; /* number of dram channels */
8944	u32 yclk;          /* bandwidth per dram data pin in kHz */
8945	u32 sclk;          /* engine clock in kHz */
8946	u32 disp_clk;      /* display clock in kHz */
8947	u32 src_width;     /* viewport width */
8948	u32 active_time;   /* active display time in ns */
8949	u32 blank_time;    /* blank time in ns */
8950	bool interlaced;    /* mode is interlaced */
8951	fixed20_12 vsc;    /* vertical scale ratio */
8952	u32 num_heads;     /* number of active crtcs */
8953	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8954	u32 lb_size;       /* line buffer allocated to pipe */
8955	u32 vtaps;         /* vertical scaler taps */
8956};
8957
8958/**
8959 * dce8_dram_bandwidth - get the dram bandwidth
8960 *
8961 * @wm: watermark calculation data
8962 *
8963 * Calculate the raw dram bandwidth (CIK).
8964 * Used for display watermark bandwidth calculations
8965 * Returns the dram bandwidth in MBytes/s
8966 */
8967static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8968{
8969	/* Calculate raw DRAM Bandwidth */
8970	fixed20_12 dram_efficiency; /* 0.7 */
8971	fixed20_12 yclk, dram_channels, bandwidth;
8972	fixed20_12 a;
8973
8974	a.full = dfixed_const(1000);
8975	yclk.full = dfixed_const(wm->yclk);
8976	yclk.full = dfixed_div(yclk, a);
8977	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8978	a.full = dfixed_const(10);
8979	dram_efficiency.full = dfixed_const(7);
8980	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8981	bandwidth.full = dfixed_mul(dram_channels, yclk);
8982	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8983
8984	return dfixed_trunc(bandwidth);
8985}
8986
8987/**
8988 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8989 *
8990 * @wm: watermark calculation data
8991 *
8992 * Calculate the dram bandwidth used for display (CIK).
8993 * Used for display watermark bandwidth calculations
8994 * Returns the dram bandwidth for display in MBytes/s
8995 */
8996static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8997{
8998	/* Calculate DRAM Bandwidth and the part allocated to display. */
8999	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9000	fixed20_12 yclk, dram_channels, bandwidth;
9001	fixed20_12 a;
9002
9003	a.full = dfixed_const(1000);
9004	yclk.full = dfixed_const(wm->yclk);
9005	yclk.full = dfixed_div(yclk, a);
9006	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9007	a.full = dfixed_const(10);
9008	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9009	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9010	bandwidth.full = dfixed_mul(dram_channels, yclk);
9011	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9012
9013	return dfixed_trunc(bandwidth);
9014}
9015
9016/**
9017 * dce8_data_return_bandwidth - get the data return bandwidth
9018 *
9019 * @wm: watermark calculation data
9020 *
9021 * Calculate the data return bandwidth used for display (CIK).
9022 * Used for display watermark bandwidth calculations
9023 * Returns the data return bandwidth in MBytes/s
9024 */
9025static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9026{
9027	/* Calculate the display Data return Bandwidth */
9028	fixed20_12 return_efficiency; /* 0.8 */
9029	fixed20_12 sclk, bandwidth;
9030	fixed20_12 a;
9031
9032	a.full = dfixed_const(1000);
9033	sclk.full = dfixed_const(wm->sclk);
9034	sclk.full = dfixed_div(sclk, a);
9035	a.full = dfixed_const(10);
9036	return_efficiency.full = dfixed_const(8);
9037	return_efficiency.full = dfixed_div(return_efficiency, a);
9038	a.full = dfixed_const(32);
9039	bandwidth.full = dfixed_mul(a, sclk);
9040	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9041
9042	return dfixed_trunc(bandwidth);
9043}
9044
9045/**
9046 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9047 *
9048 * @wm: watermark calculation data
9049 *
9050 * Calculate the dmif bandwidth used for display (CIK).
9051 * Used for display watermark bandwidth calculations
9052 * Returns the dmif bandwidth in MBytes/s
9053 */
9054static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9055{
9056	/* Calculate the DMIF Request Bandwidth */
9057	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9058	fixed20_12 disp_clk, bandwidth;
9059	fixed20_12 a, b;
9060
9061	a.full = dfixed_const(1000);
9062	disp_clk.full = dfixed_const(wm->disp_clk);
9063	disp_clk.full = dfixed_div(disp_clk, a);
9064	a.full = dfixed_const(32);
9065	b.full = dfixed_mul(a, disp_clk);
9066
9067	a.full = dfixed_const(10);
9068	disp_clk_request_efficiency.full = dfixed_const(8);
9069	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9070
9071	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9072
9073	return dfixed_trunc(bandwidth);
9074}
9075
9076/**
9077 * dce8_available_bandwidth - get the min available bandwidth
9078 *
9079 * @wm: watermark calculation data
9080 *
9081 * Calculate the min available bandwidth used for display (CIK).
9082 * Used for display watermark bandwidth calculations
9083 * Returns the min available bandwidth in MBytes/s
9084 */
9085static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9086{
9087	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9088	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9089	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9090	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9091
9092	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9093}
9094
9095/**
9096 * dce8_average_bandwidth - get the average available bandwidth
9097 *
9098 * @wm: watermark calculation data
9099 *
9100 * Calculate the average available bandwidth used for display (CIK).
9101 * Used for display watermark bandwidth calculations
9102 * Returns the average available bandwidth in MBytes/s
9103 */
9104static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9105{
9106	/* Calculate the display mode Average Bandwidth
9107	 * DisplayMode should contain the source and destination dimensions,
9108	 * timing, etc.
9109	 */
9110	fixed20_12 bpp;
9111	fixed20_12 line_time;
9112	fixed20_12 src_width;
9113	fixed20_12 bandwidth;
9114	fixed20_12 a;
9115
9116	a.full = dfixed_const(1000);
9117	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9118	line_time.full = dfixed_div(line_time, a);
9119	bpp.full = dfixed_const(wm->bytes_per_pixel);
9120	src_width.full = dfixed_const(wm->src_width);
9121	bandwidth.full = dfixed_mul(src_width, bpp);
9122	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9123	bandwidth.full = dfixed_div(bandwidth, line_time);
9124
9125	return dfixed_trunc(bandwidth);
9126}
9127
9128/**
9129 * dce8_latency_watermark - get the latency watermark
9130 *
9131 * @wm: watermark calculation data
9132 *
9133 * Calculate the latency watermark (CIK).
9134 * Used for display watermark bandwidth calculations
9135 * Returns the latency watermark in ns
9136 */
9137static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9138{
9139	/* First calculate the latency in ns */
9140	u32 mc_latency = 2000; /* 2000 ns. */
9141	u32 available_bandwidth = dce8_available_bandwidth(wm);
9142	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9143	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9144	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9145	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9146		(wm->num_heads * cursor_line_pair_return_time);
9147	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9148	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9149	u32 tmp, dmif_size = 12288;
9150	fixed20_12 a, b, c;
9151
9152	if (wm->num_heads == 0)
9153		return 0;
9154
9155	a.full = dfixed_const(2);
9156	b.full = dfixed_const(1);
9157	if ((wm->vsc.full > a.full) ||
9158	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9159	    (wm->vtaps >= 5) ||
9160	    ((wm->vsc.full >= a.full) && wm->interlaced))
9161		max_src_lines_per_dst_line = 4;
9162	else
9163		max_src_lines_per_dst_line = 2;
9164
9165	a.full = dfixed_const(available_bandwidth);
9166	b.full = dfixed_const(wm->num_heads);
9167	a.full = dfixed_div(a, b);
9168
9169	b.full = dfixed_const(mc_latency + 512);
9170	c.full = dfixed_const(wm->disp_clk);
9171	b.full = dfixed_div(b, c);
9172
9173	c.full = dfixed_const(dmif_size);
9174	b.full = dfixed_div(c, b);
9175
9176	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9177
9178	b.full = dfixed_const(1000);
9179	c.full = dfixed_const(wm->disp_clk);
9180	b.full = dfixed_div(c, b);
9181	c.full = dfixed_const(wm->bytes_per_pixel);
9182	b.full = dfixed_mul(b, c);
9183
9184	lb_fill_bw = min(tmp, dfixed_trunc(b));
9185
9186	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9187	b.full = dfixed_const(1000);
9188	c.full = dfixed_const(lb_fill_bw);
9189	b.full = dfixed_div(c, b);
9190	a.full = dfixed_div(a, b);
9191	line_fill_time = dfixed_trunc(a);
9192
9193	if (line_fill_time < wm->active_time)
9194		return latency;
9195	else
9196		return latency + (line_fill_time - wm->active_time);
9197
9198}
9199
9200/**
9201 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9202 * average and available dram bandwidth
9203 *
9204 * @wm: watermark calculation data
9205 *
9206 * Check if the display average bandwidth fits in the display
9207 * dram bandwidth (CIK).
9208 * Used for display watermark bandwidth calculations
9209 * Returns true if the display fits, false if not.
9210 */
9211static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9212{
9213	if (dce8_average_bandwidth(wm) <=
9214	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9215		return true;
9216	else
9217		return false;
9218}
9219
9220/**
9221 * dce8_average_bandwidth_vs_available_bandwidth - check
9222 * average and available bandwidth
9223 *
9224 * @wm: watermark calculation data
9225 *
9226 * Check if the display average bandwidth fits in the display
9227 * available bandwidth (CIK).
9228 * Used for display watermark bandwidth calculations
9229 * Returns true if the display fits, false if not.
9230 */
9231static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9232{
9233	if (dce8_average_bandwidth(wm) <=
9234	    (dce8_available_bandwidth(wm) / wm->num_heads))
9235		return true;
9236	else
9237		return false;
9238}
9239
9240/**
9241 * dce8_check_latency_hiding - check latency hiding
9242 *
9243 * @wm: watermark calculation data
9244 *
9245 * Check latency hiding (CIK).
9246 * Used for display watermark bandwidth calculations
9247 * Returns true if the display fits, false if not.
9248 */
9249static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9250{
9251	u32 lb_partitions = wm->lb_size / wm->src_width;
9252	u32 line_time = wm->active_time + wm->blank_time;
9253	u32 latency_tolerant_lines;
9254	u32 latency_hiding;
9255	fixed20_12 a;
9256
9257	a.full = dfixed_const(1);
9258	if (wm->vsc.full > a.full)
9259		latency_tolerant_lines = 1;
9260	else {
9261		if (lb_partitions <= (wm->vtaps + 1))
9262			latency_tolerant_lines = 1;
9263		else
9264			latency_tolerant_lines = 2;
9265	}
9266
9267	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9268
9269	if (dce8_latency_watermark(wm) <= latency_hiding)
9270		return true;
9271	else
9272		return false;
9273}
9274
9275/**
9276 * dce8_program_watermarks - program display watermarks
9277 *
9278 * @rdev: radeon_device pointer
9279 * @radeon_crtc: the selected display controller
9280 * @lb_size: line buffer size
9281 * @num_heads: number of display controllers in use
9282 *
9283 * Calculate and program the display watermarks for the
9284 * selected display controller (CIK).
9285 */
9286static void dce8_program_watermarks(struct radeon_device *rdev,
9287				    struct radeon_crtc *radeon_crtc,
9288				    u32 lb_size, u32 num_heads)
9289{
9290	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9291	struct dce8_wm_params wm_low, wm_high;
9292	u32 pixel_period;
9293	u32 line_time = 0;
9294	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9295	u32 tmp, wm_mask;
9296
9297	if (radeon_crtc->base.enabled && num_heads && mode) {
9298		pixel_period = 1000000 / (u32)mode->clock;
9299		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9300
9301		/* watermark for high clocks */
9302		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9303		    rdev->pm.dpm_enabled) {
9304			wm_high.yclk =
9305				radeon_dpm_get_mclk(rdev, false) * 10;
9306			wm_high.sclk =
9307				radeon_dpm_get_sclk(rdev, false) * 10;
9308		} else {
9309			wm_high.yclk = rdev->pm.current_mclk * 10;
9310			wm_high.sclk = rdev->pm.current_sclk * 10;
9311		}
9312
9313		wm_high.disp_clk = mode->clock;
9314		wm_high.src_width = mode->crtc_hdisplay;
9315		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9316		wm_high.blank_time = line_time - wm_high.active_time;
9317		wm_high.interlaced = false;
9318		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9319			wm_high.interlaced = true;
9320		wm_high.vsc = radeon_crtc->vsc;
9321		wm_high.vtaps = 1;
9322		if (radeon_crtc->rmx_type != RMX_OFF)
9323			wm_high.vtaps = 2;
9324		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9325		wm_high.lb_size = lb_size;
9326		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9327		wm_high.num_heads = num_heads;
9328
9329		/* set for high clocks */
9330		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9331
9332		/* possibly force display priority to high */
9333		/* should really do this at mode validation time... */
9334		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9335		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9336		    !dce8_check_latency_hiding(&wm_high) ||
9337		    (rdev->disp_priority == 2)) {
9338			DRM_DEBUG_KMS("force priority to high\n");
9339		}
9340
9341		/* watermark for low clocks */
9342		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9343		    rdev->pm.dpm_enabled) {
9344			wm_low.yclk =
9345				radeon_dpm_get_mclk(rdev, true) * 10;
9346			wm_low.sclk =
9347				radeon_dpm_get_sclk(rdev, true) * 10;
9348		} else {
9349			wm_low.yclk = rdev->pm.current_mclk * 10;
9350			wm_low.sclk = rdev->pm.current_sclk * 10;
9351		}
9352
9353		wm_low.disp_clk = mode->clock;
9354		wm_low.src_width = mode->crtc_hdisplay;
9355		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9356		wm_low.blank_time = line_time - wm_low.active_time;
9357		wm_low.interlaced = false;
9358		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9359			wm_low.interlaced = true;
9360		wm_low.vsc = radeon_crtc->vsc;
9361		wm_low.vtaps = 1;
9362		if (radeon_crtc->rmx_type != RMX_OFF)
9363			wm_low.vtaps = 2;
9364		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9365		wm_low.lb_size = lb_size;
9366		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9367		wm_low.num_heads = num_heads;
9368
9369		/* set for low clocks */
9370		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9371
9372		/* possibly force display priority to high */
9373		/* should really do this at mode validation time... */
9374		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9375		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9376		    !dce8_check_latency_hiding(&wm_low) ||
9377		    (rdev->disp_priority == 2)) {
9378			DRM_DEBUG_KMS("force priority to high\n");
9379		}
9380
9381		/* Save number of lines the linebuffer leads before the scanout */
9382		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9383	}
9384
9385	/* select wm A */
9386	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9387	tmp = wm_mask;
9388	tmp &= ~LATENCY_WATERMARK_MASK(3);
9389	tmp |= LATENCY_WATERMARK_MASK(1);
9390	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9391	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9392	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9393		LATENCY_HIGH_WATERMARK(line_time)));
9394	/* select wm B */
9395	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9396	tmp &= ~LATENCY_WATERMARK_MASK(3);
9397	tmp |= LATENCY_WATERMARK_MASK(2);
9398	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9399	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9400	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9401		LATENCY_HIGH_WATERMARK(line_time)));
9402	/* restore original selection */
9403	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9404
9405	/* save values for DPM */
9406	radeon_crtc->line_time = line_time;
9407	radeon_crtc->wm_high = latency_watermark_a;
9408	radeon_crtc->wm_low = latency_watermark_b;
9409}
9410
9411/**
9412 * dce8_bandwidth_update - program display watermarks
9413 *
9414 * @rdev: radeon_device pointer
9415 *
9416 * Calculate and program the display watermarks and line
9417 * buffer allocation (CIK).
9418 */
9419void dce8_bandwidth_update(struct radeon_device *rdev)
9420{
9421	struct drm_display_mode *mode = NULL;
9422	u32 num_heads = 0, lb_size;
9423	int i;
9424
9425	if (!rdev->mode_info.mode_config_initialized)
9426		return;
9427
9428	radeon_update_display_priority(rdev);
9429
9430	for (i = 0; i < rdev->num_crtc; i++) {
9431		if (rdev->mode_info.crtcs[i]->base.enabled)
9432			num_heads++;
9433	}
9434	for (i = 0; i < rdev->num_crtc; i++) {
9435		mode = &rdev->mode_info.crtcs[i]->base.mode;
9436		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9437		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9438	}
9439}
9440
9441/**
9442 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9443 *
9444 * @rdev: radeon_device pointer
9445 *
9446 * Fetches a GPU clock counter snapshot (SI).
9447 * Returns the 64 bit clock counter snapshot.
9448 */
9449uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9450{
9451	uint64_t clock;
9452
9453	mutex_lock(&rdev->gpu_clock_mutex);
9454	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9455	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9456		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9457	mutex_unlock(&rdev->gpu_clock_mutex);
9458	return clock;
9459}
9460
9461static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9462			     u32 cntl_reg, u32 status_reg)
9463{
9464	int r, i;
9465	struct atom_clock_dividers dividers;
9466	uint32_t tmp;
9467
9468	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9469					   clock, false, &dividers);
9470	if (r)
9471		return r;
9472
9473	tmp = RREG32_SMC(cntl_reg);
9474	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9475	tmp |= dividers.post_divider;
9476	WREG32_SMC(cntl_reg, tmp);
9477
9478	for (i = 0; i < 100; i++) {
9479		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9480			break;
9481		mdelay(10);
9482	}
9483	if (i == 100)
9484		return -ETIMEDOUT;
9485
9486	return 0;
9487}
9488
9489int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9490{
9491	int r = 0;
9492
9493	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9494	if (r)
9495		return r;
9496
9497	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9498	return r;
9499}
9500
9501int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9502{
9503	int r, i;
9504	struct atom_clock_dividers dividers;
9505	u32 tmp;
9506
9507	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9508					   ecclk, false, &dividers);
9509	if (r)
9510		return r;
9511
9512	for (i = 0; i < 100; i++) {
9513		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9514			break;
9515		mdelay(10);
9516	}
9517	if (i == 100)
9518		return -ETIMEDOUT;
9519
9520	tmp = RREG32_SMC(CG_ECLK_CNTL);
9521	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9522	tmp |= dividers.post_divider;
9523	WREG32_SMC(CG_ECLK_CNTL, tmp);
9524
9525	for (i = 0; i < 100; i++) {
9526		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9527			break;
9528		mdelay(10);
9529	}
9530	if (i == 100)
9531		return -ETIMEDOUT;
9532
9533	return 0;
9534}
9535
9536static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9537{
9538	struct pci_dev *root = rdev->pdev->bus->self;
9539	int bridge_pos, gpu_pos;
9540	u32 speed_cntl, mask, current_data_rate;
9541	int ret, i;
9542	u16 tmp16;
9543
9544	if (pci_is_root_bus(rdev->pdev->bus))
9545		return;
9546
9547	if (radeon_pcie_gen2 == 0)
9548		return;
9549
9550	if (rdev->flags & RADEON_IS_IGP)
9551		return;
9552
9553	if (!(rdev->flags & RADEON_IS_PCIE))
9554		return;
9555
9556	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9557	if (ret != 0)
9558		return;
9559
9560	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9561		return;
9562
9563	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9564	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9565		LC_CURRENT_DATA_RATE_SHIFT;
9566	if (mask & DRM_PCIE_SPEED_80) {
9567		if (current_data_rate == 2) {
9568			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9569			return;
9570		}
9571		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9572	} else if (mask & DRM_PCIE_SPEED_50) {
9573		if (current_data_rate == 1) {
9574			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9575			return;
9576		}
9577		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9578	}
9579
9580	bridge_pos = pci_pcie_cap(root);
9581	if (!bridge_pos)
9582		return;
9583
9584	gpu_pos = pci_pcie_cap(rdev->pdev);
9585	if (!gpu_pos)
9586		return;
9587
9588	if (mask & DRM_PCIE_SPEED_80) {
9589		/* re-try equalization if gen3 is not already enabled */
9590		if (current_data_rate != 2) {
9591			u16 bridge_cfg, gpu_cfg;
9592			u16 bridge_cfg2, gpu_cfg2;
9593			u32 max_lw, current_lw, tmp;
9594
9595			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9596			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9597
9598			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9599			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9600
9601			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9602			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9603
9604			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9605			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9606			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9607
9608			if (current_lw < max_lw) {
9609				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9610				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9611					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9612					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9613					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9614					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9615				}
9616			}
9617
9618			for (i = 0; i < 10; i++) {
9619				/* check status */
9620				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9621				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9622					break;
9623
9624				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9625				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9626
9627				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9628				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9629
9630				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9631				tmp |= LC_SET_QUIESCE;
9632				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9633
9634				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9635				tmp |= LC_REDO_EQ;
9636				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9637
9638				mdelay(100);
9639
9640				/* linkctl */
9641				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9642				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9643				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9644				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9645
9646				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9647				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9648				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9649				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9650
9651				/* linkctl2 */
9652				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9653				tmp16 &= ~((1 << 4) | (7 << 9));
9654				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9655				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9656
9657				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9658				tmp16 &= ~((1 << 4) | (7 << 9));
9659				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9660				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9661
9662				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9663				tmp &= ~LC_SET_QUIESCE;
9664				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9665			}
9666		}
9667	}
9668
9669	/* set the link speed */
9670	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9671	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9672	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9673
9674	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9675	tmp16 &= ~0xf;
9676	if (mask & DRM_PCIE_SPEED_80)
9677		tmp16 |= 3; /* gen3 */
9678	else if (mask & DRM_PCIE_SPEED_50)
9679		tmp16 |= 2; /* gen2 */
9680	else
9681		tmp16 |= 1; /* gen1 */
9682	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9683
9684	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9685	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9686	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9687
9688	for (i = 0; i < rdev->usec_timeout; i++) {
9689		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9690		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9691			break;
9692		udelay(1);
9693	}
9694}
9695
9696static void cik_program_aspm(struct radeon_device *rdev)
9697{
9698	u32 data, orig;
9699	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9700	bool disable_clkreq = false;
9701
9702	if (radeon_aspm == 0)
9703		return;
9704
9705	/* XXX double check IGPs */
9706	if (rdev->flags & RADEON_IS_IGP)
9707		return;
9708
9709	if (!(rdev->flags & RADEON_IS_PCIE))
9710		return;
9711
9712	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9713	data &= ~LC_XMIT_N_FTS_MASK;
9714	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9715	if (orig != data)
9716		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9717
9718	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9719	data |= LC_GO_TO_RECOVERY;
9720	if (orig != data)
9721		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9722
9723	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9724	data |= P_IGNORE_EDB_ERR;
9725	if (orig != data)
9726		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9727
9728	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9729	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9730	data |= LC_PMI_TO_L1_DIS;
9731	if (!disable_l0s)
9732		data |= LC_L0S_INACTIVITY(7);
9733
9734	if (!disable_l1) {
9735		data |= LC_L1_INACTIVITY(7);
9736		data &= ~LC_PMI_TO_L1_DIS;
9737		if (orig != data)
9738			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9739
9740		if (!disable_plloff_in_l1) {
9741			bool clk_req_support;
9742
9743			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9744			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9745			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9746			if (orig != data)
9747				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9748
9749			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9750			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9751			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9752			if (orig != data)
9753				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9754
9755			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9756			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9757			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9758			if (orig != data)
9759				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9760
9761			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9762			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9763			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9764			if (orig != data)
9765				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9766
9767			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9768			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9769			data |= LC_DYN_LANES_PWR_STATE(3);
9770			if (orig != data)
9771				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9772
9773			if (!disable_clkreq &&
9774			    !pci_is_root_bus(rdev->pdev->bus)) {
9775				struct pci_dev *root = rdev->pdev->bus->self;
9776				u32 lnkcap;
9777
9778				clk_req_support = false;
9779				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9780				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9781					clk_req_support = true;
9782			} else {
9783				clk_req_support = false;
9784			}
9785
9786			if (clk_req_support) {
9787				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9788				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9789				if (orig != data)
9790					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9791
9792				orig = data = RREG32_SMC(THM_CLK_CNTL);
9793				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9794				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9795				if (orig != data)
9796					WREG32_SMC(THM_CLK_CNTL, data);
9797
9798				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9799				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9800				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9801				if (orig != data)
9802					WREG32_SMC(MISC_CLK_CTRL, data);
9803
9804				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9805				data &= ~BCLK_AS_XCLK;
9806				if (orig != data)
9807					WREG32_SMC(CG_CLKPIN_CNTL, data);
9808
9809				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9810				data &= ~FORCE_BIF_REFCLK_EN;
9811				if (orig != data)
9812					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9813
9814				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9815				data &= ~MPLL_CLKOUT_SEL_MASK;
9816				data |= MPLL_CLKOUT_SEL(4);
9817				if (orig != data)
9818					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9819			}
9820		}
9821	} else {
9822		if (orig != data)
9823			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9824	}
9825
9826	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9827	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9828	if (orig != data)
9829		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9830
9831	if (!disable_l0s) {
9832		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9833		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9834			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9835			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9836				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9837				data &= ~LC_L0S_INACTIVITY_MASK;
9838				if (orig != data)
9839					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9840			}
9841		}
9842	}
9843}