Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/slab.h>
  26#include <linux/module.h>
  27#include "drmP.h"
  28#include "radeon.h"
  29#include "radeon_asic.h"
  30#include "radeon_audio.h"
  31#include "cikd.h"
  32#include "atom.h"
  33#include "cik_blit_shaders.h"
  34#include "radeon_ucode.h"
  35#include "clearstate_ci.h"
  36#include "radeon_kfd.h"
  37
  38MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  39MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  40MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  41MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  42MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  43MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  44MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  47
  48MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  49MODULE_FIRMWARE("radeon/bonaire_me.bin");
  50MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  51MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  52MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  53MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  54MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  55MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  56
  57MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  58MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  59MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  60MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  61MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  62MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  63MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  64MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  66
  67MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  68MODULE_FIRMWARE("radeon/hawaii_me.bin");
  69MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  70MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  71MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  72MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  73MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  74MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  75
  76MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  77MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  78MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  79MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  80MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  81MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  82
  83MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  84MODULE_FIRMWARE("radeon/kaveri_me.bin");
  85MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  86MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  87MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  88MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  89MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  90
  91MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
  92MODULE_FIRMWARE("radeon/KABINI_me.bin");
  93MODULE_FIRMWARE("radeon/KABINI_ce.bin");
  94MODULE_FIRMWARE("radeon/KABINI_mec.bin");
  95MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
  96MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
  97
  98MODULE_FIRMWARE("radeon/kabini_pfp.bin");
  99MODULE_FIRMWARE("radeon/kabini_me.bin");
 100MODULE_FIRMWARE("radeon/kabini_ce.bin");
 101MODULE_FIRMWARE("radeon/kabini_mec.bin");
 102MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 103MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 104
 105MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 106MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 107MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 108MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 109MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 110MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 111
 112MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 113MODULE_FIRMWARE("radeon/mullins_me.bin");
 114MODULE_FIRMWARE("radeon/mullins_ce.bin");
 115MODULE_FIRMWARE("radeon/mullins_mec.bin");
 116MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 117MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 118
 119extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 120extern void r600_ih_ring_fini(struct radeon_device *rdev);
 121extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 122extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 123extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 124extern void sumo_rlc_fini(struct radeon_device *rdev);
 125extern int sumo_rlc_init(struct radeon_device *rdev);
 126extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 127extern void si_rlc_reset(struct radeon_device *rdev);
 128extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 129static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 130extern int cik_sdma_resume(struct radeon_device *rdev);
 131extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 132extern void cik_sdma_fini(struct radeon_device *rdev);
 133extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 134static void cik_rlc_stop(struct radeon_device *rdev);
 135static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 136static void cik_program_aspm(struct radeon_device *rdev);
 137static void cik_init_pg(struct radeon_device *rdev);
 138static void cik_init_cg(struct radeon_device *rdev);
 139static void cik_fini_pg(struct radeon_device *rdev);
 140static void cik_fini_cg(struct radeon_device *rdev);
 141static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 142					  bool enable);
 143
 144/**
 145 * cik_get_allowed_info_register - fetch the register for the info ioctl
 146 *
 147 * @rdev: radeon_device pointer
 148 * @reg: register offset in bytes
 149 * @val: register value
 150 *
 151 * Returns 0 for success or -EINVAL for an invalid register
 152 *
 153 */
 154int cik_get_allowed_info_register(struct radeon_device *rdev,
 155				  u32 reg, u32 *val)
 156{
 157	switch (reg) {
 158	case GRBM_STATUS:
 159	case GRBM_STATUS2:
 160	case GRBM_STATUS_SE0:
 161	case GRBM_STATUS_SE1:
 162	case GRBM_STATUS_SE2:
 163	case GRBM_STATUS_SE3:
 164	case SRBM_STATUS:
 165	case SRBM_STATUS2:
 166	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 167	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 168	case UVD_STATUS:
 169	/* TODO VCE */
 170		*val = RREG32(reg);
 171		return 0;
 172	default:
 173		return -EINVAL;
 174	}
 175}
 176
 177/*
 178 * Indirect registers accessor
 179 */
 180u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 181{
 182	unsigned long flags;
 183	u32 r;
 184
 185	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 186	WREG32(CIK_DIDT_IND_INDEX, (reg));
 187	r = RREG32(CIK_DIDT_IND_DATA);
 188	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 189	return r;
 190}
 191
 192void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 193{
 194	unsigned long flags;
 195
 196	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 197	WREG32(CIK_DIDT_IND_INDEX, (reg));
 198	WREG32(CIK_DIDT_IND_DATA, (v));
 199	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 200}
 201
 202/* get temperature in millidegrees */
 203int ci_get_temp(struct radeon_device *rdev)
 204{
 205	u32 temp;
 206	int actual_temp = 0;
 207
 208	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 209		CTF_TEMP_SHIFT;
 210
 211	if (temp & 0x200)
 212		actual_temp = 255;
 213	else
 214		actual_temp = temp & 0x1ff;
 215
 216	actual_temp = actual_temp * 1000;
 217
 218	return actual_temp;
 219}
 220
 221/* get temperature in millidegrees */
 222int kv_get_temp(struct radeon_device *rdev)
 223{
 224	u32 temp;
 225	int actual_temp = 0;
 226
 227	temp = RREG32_SMC(0xC0300E0C);
 228
 229	if (temp)
 230		actual_temp = (temp / 8) - 49;
 231	else
 232		actual_temp = 0;
 233
 234	actual_temp = actual_temp * 1000;
 235
 236	return actual_temp;
 237}
 238
 239/*
 240 * Indirect registers accessor
 241 */
 242u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 243{
 244	unsigned long flags;
 245	u32 r;
 246
 247	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 248	WREG32(PCIE_INDEX, reg);
 249	(void)RREG32(PCIE_INDEX);
 250	r = RREG32(PCIE_DATA);
 251	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 252	return r;
 253}
 254
 255void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 256{
 257	unsigned long flags;
 258
 259	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 260	WREG32(PCIE_INDEX, reg);
 261	(void)RREG32(PCIE_INDEX);
 262	WREG32(PCIE_DATA, v);
 263	(void)RREG32(PCIE_DATA);
 264	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 265}
 266
 267static const u32 spectre_rlc_save_restore_register_list[] =
 268{
 269	(0x0e00 << 16) | (0xc12c >> 2),
 270	0x00000000,
 271	(0x0e00 << 16) | (0xc140 >> 2),
 272	0x00000000,
 273	(0x0e00 << 16) | (0xc150 >> 2),
 274	0x00000000,
 275	(0x0e00 << 16) | (0xc15c >> 2),
 276	0x00000000,
 277	(0x0e00 << 16) | (0xc168 >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc170 >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc178 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0xc204 >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0xc2b4 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0xc2b8 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0xc2bc >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0xc2c0 >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0x8228 >> 2),
 294	0x00000000,
 295	(0x0e00 << 16) | (0x829c >> 2),
 296	0x00000000,
 297	(0x0e00 << 16) | (0x869c >> 2),
 298	0x00000000,
 299	(0x0600 << 16) | (0x98f4 >> 2),
 300	0x00000000,
 301	(0x0e00 << 16) | (0x98f8 >> 2),
 302	0x00000000,
 303	(0x0e00 << 16) | (0x9900 >> 2),
 304	0x00000000,
 305	(0x0e00 << 16) | (0xc260 >> 2),
 306	0x00000000,
 307	(0x0e00 << 16) | (0x90e8 >> 2),
 308	0x00000000,
 309	(0x0e00 << 16) | (0x3c000 >> 2),
 310	0x00000000,
 311	(0x0e00 << 16) | (0x3c00c >> 2),
 312	0x00000000,
 313	(0x0e00 << 16) | (0x8c1c >> 2),
 314	0x00000000,
 315	(0x0e00 << 16) | (0x9700 >> 2),
 316	0x00000000,
 317	(0x0e00 << 16) | (0xcd20 >> 2),
 318	0x00000000,
 319	(0x4e00 << 16) | (0xcd20 >> 2),
 320	0x00000000,
 321	(0x5e00 << 16) | (0xcd20 >> 2),
 322	0x00000000,
 323	(0x6e00 << 16) | (0xcd20 >> 2),
 324	0x00000000,
 325	(0x7e00 << 16) | (0xcd20 >> 2),
 326	0x00000000,
 327	(0x8e00 << 16) | (0xcd20 >> 2),
 328	0x00000000,
 329	(0x9e00 << 16) | (0xcd20 >> 2),
 330	0x00000000,
 331	(0xae00 << 16) | (0xcd20 >> 2),
 332	0x00000000,
 333	(0xbe00 << 16) | (0xcd20 >> 2),
 334	0x00000000,
 335	(0x0e00 << 16) | (0x89bc >> 2),
 336	0x00000000,
 337	(0x0e00 << 16) | (0x8900 >> 2),
 338	0x00000000,
 339	0x3,
 340	(0x0e00 << 16) | (0xc130 >> 2),
 341	0x00000000,
 342	(0x0e00 << 16) | (0xc134 >> 2),
 343	0x00000000,
 344	(0x0e00 << 16) | (0xc1fc >> 2),
 345	0x00000000,
 346	(0x0e00 << 16) | (0xc208 >> 2),
 347	0x00000000,
 348	(0x0e00 << 16) | (0xc264 >> 2),
 349	0x00000000,
 350	(0x0e00 << 16) | (0xc268 >> 2),
 351	0x00000000,
 352	(0x0e00 << 16) | (0xc26c >> 2),
 353	0x00000000,
 354	(0x0e00 << 16) | (0xc270 >> 2),
 355	0x00000000,
 356	(0x0e00 << 16) | (0xc274 >> 2),
 357	0x00000000,
 358	(0x0e00 << 16) | (0xc278 >> 2),
 359	0x00000000,
 360	(0x0e00 << 16) | (0xc27c >> 2),
 361	0x00000000,
 362	(0x0e00 << 16) | (0xc280 >> 2),
 363	0x00000000,
 364	(0x0e00 << 16) | (0xc284 >> 2),
 365	0x00000000,
 366	(0x0e00 << 16) | (0xc288 >> 2),
 367	0x00000000,
 368	(0x0e00 << 16) | (0xc28c >> 2),
 369	0x00000000,
 370	(0x0e00 << 16) | (0xc290 >> 2),
 371	0x00000000,
 372	(0x0e00 << 16) | (0xc294 >> 2),
 373	0x00000000,
 374	(0x0e00 << 16) | (0xc298 >> 2),
 375	0x00000000,
 376	(0x0e00 << 16) | (0xc29c >> 2),
 377	0x00000000,
 378	(0x0e00 << 16) | (0xc2a0 >> 2),
 379	0x00000000,
 380	(0x0e00 << 16) | (0xc2a4 >> 2),
 381	0x00000000,
 382	(0x0e00 << 16) | (0xc2a8 >> 2),
 383	0x00000000,
 384	(0x0e00 << 16) | (0xc2ac  >> 2),
 385	0x00000000,
 386	(0x0e00 << 16) | (0xc2b0 >> 2),
 387	0x00000000,
 388	(0x0e00 << 16) | (0x301d0 >> 2),
 389	0x00000000,
 390	(0x0e00 << 16) | (0x30238 >> 2),
 391	0x00000000,
 392	(0x0e00 << 16) | (0x30250 >> 2),
 393	0x00000000,
 394	(0x0e00 << 16) | (0x30254 >> 2),
 395	0x00000000,
 396	(0x0e00 << 16) | (0x30258 >> 2),
 397	0x00000000,
 398	(0x0e00 << 16) | (0x3025c >> 2),
 399	0x00000000,
 400	(0x4e00 << 16) | (0xc900 >> 2),
 401	0x00000000,
 402	(0x5e00 << 16) | (0xc900 >> 2),
 403	0x00000000,
 404	(0x6e00 << 16) | (0xc900 >> 2),
 405	0x00000000,
 406	(0x7e00 << 16) | (0xc900 >> 2),
 407	0x00000000,
 408	(0x8e00 << 16) | (0xc900 >> 2),
 409	0x00000000,
 410	(0x9e00 << 16) | (0xc900 >> 2),
 411	0x00000000,
 412	(0xae00 << 16) | (0xc900 >> 2),
 413	0x00000000,
 414	(0xbe00 << 16) | (0xc900 >> 2),
 415	0x00000000,
 416	(0x4e00 << 16) | (0xc904 >> 2),
 417	0x00000000,
 418	(0x5e00 << 16) | (0xc904 >> 2),
 419	0x00000000,
 420	(0x6e00 << 16) | (0xc904 >> 2),
 421	0x00000000,
 422	(0x7e00 << 16) | (0xc904 >> 2),
 423	0x00000000,
 424	(0x8e00 << 16) | (0xc904 >> 2),
 425	0x00000000,
 426	(0x9e00 << 16) | (0xc904 >> 2),
 427	0x00000000,
 428	(0xae00 << 16) | (0xc904 >> 2),
 429	0x00000000,
 430	(0xbe00 << 16) | (0xc904 >> 2),
 431	0x00000000,
 432	(0x4e00 << 16) | (0xc908 >> 2),
 433	0x00000000,
 434	(0x5e00 << 16) | (0xc908 >> 2),
 435	0x00000000,
 436	(0x6e00 << 16) | (0xc908 >> 2),
 437	0x00000000,
 438	(0x7e00 << 16) | (0xc908 >> 2),
 439	0x00000000,
 440	(0x8e00 << 16) | (0xc908 >> 2),
 441	0x00000000,
 442	(0x9e00 << 16) | (0xc908 >> 2),
 443	0x00000000,
 444	(0xae00 << 16) | (0xc908 >> 2),
 445	0x00000000,
 446	(0xbe00 << 16) | (0xc908 >> 2),
 447	0x00000000,
 448	(0x4e00 << 16) | (0xc90c >> 2),
 449	0x00000000,
 450	(0x5e00 << 16) | (0xc90c >> 2),
 451	0x00000000,
 452	(0x6e00 << 16) | (0xc90c >> 2),
 453	0x00000000,
 454	(0x7e00 << 16) | (0xc90c >> 2),
 455	0x00000000,
 456	(0x8e00 << 16) | (0xc90c >> 2),
 457	0x00000000,
 458	(0x9e00 << 16) | (0xc90c >> 2),
 459	0x00000000,
 460	(0xae00 << 16) | (0xc90c >> 2),
 461	0x00000000,
 462	(0xbe00 << 16) | (0xc90c >> 2),
 463	0x00000000,
 464	(0x4e00 << 16) | (0xc910 >> 2),
 465	0x00000000,
 466	(0x5e00 << 16) | (0xc910 >> 2),
 467	0x00000000,
 468	(0x6e00 << 16) | (0xc910 >> 2),
 469	0x00000000,
 470	(0x7e00 << 16) | (0xc910 >> 2),
 471	0x00000000,
 472	(0x8e00 << 16) | (0xc910 >> 2),
 473	0x00000000,
 474	(0x9e00 << 16) | (0xc910 >> 2),
 475	0x00000000,
 476	(0xae00 << 16) | (0xc910 >> 2),
 477	0x00000000,
 478	(0xbe00 << 16) | (0xc910 >> 2),
 479	0x00000000,
 480	(0x0e00 << 16) | (0xc99c >> 2),
 481	0x00000000,
 482	(0x0e00 << 16) | (0x9834 >> 2),
 483	0x00000000,
 484	(0x0000 << 16) | (0x30f00 >> 2),
 485	0x00000000,
 486	(0x0001 << 16) | (0x30f00 >> 2),
 487	0x00000000,
 488	(0x0000 << 16) | (0x30f04 >> 2),
 489	0x00000000,
 490	(0x0001 << 16) | (0x30f04 >> 2),
 491	0x00000000,
 492	(0x0000 << 16) | (0x30f08 >> 2),
 493	0x00000000,
 494	(0x0001 << 16) | (0x30f08 >> 2),
 495	0x00000000,
 496	(0x0000 << 16) | (0x30f0c >> 2),
 497	0x00000000,
 498	(0x0001 << 16) | (0x30f0c >> 2),
 499	0x00000000,
 500	(0x0600 << 16) | (0x9b7c >> 2),
 501	0x00000000,
 502	(0x0e00 << 16) | (0x8a14 >> 2),
 503	0x00000000,
 504	(0x0e00 << 16) | (0x8a18 >> 2),
 505	0x00000000,
 506	(0x0600 << 16) | (0x30a00 >> 2),
 507	0x00000000,
 508	(0x0e00 << 16) | (0x8bf0 >> 2),
 509	0x00000000,
 510	(0x0e00 << 16) | (0x8bcc >> 2),
 511	0x00000000,
 512	(0x0e00 << 16) | (0x8b24 >> 2),
 513	0x00000000,
 514	(0x0e00 << 16) | (0x30a04 >> 2),
 515	0x00000000,
 516	(0x0600 << 16) | (0x30a10 >> 2),
 517	0x00000000,
 518	(0x0600 << 16) | (0x30a14 >> 2),
 519	0x00000000,
 520	(0x0600 << 16) | (0x30a18 >> 2),
 521	0x00000000,
 522	(0x0600 << 16) | (0x30a2c >> 2),
 523	0x00000000,
 524	(0x0e00 << 16) | (0xc700 >> 2),
 525	0x00000000,
 526	(0x0e00 << 16) | (0xc704 >> 2),
 527	0x00000000,
 528	(0x0e00 << 16) | (0xc708 >> 2),
 529	0x00000000,
 530	(0x0e00 << 16) | (0xc768 >> 2),
 531	0x00000000,
 532	(0x0400 << 16) | (0xc770 >> 2),
 533	0x00000000,
 534	(0x0400 << 16) | (0xc774 >> 2),
 535	0x00000000,
 536	(0x0400 << 16) | (0xc778 >> 2),
 537	0x00000000,
 538	(0x0400 << 16) | (0xc77c >> 2),
 539	0x00000000,
 540	(0x0400 << 16) | (0xc780 >> 2),
 541	0x00000000,
 542	(0x0400 << 16) | (0xc784 >> 2),
 543	0x00000000,
 544	(0x0400 << 16) | (0xc788 >> 2),
 545	0x00000000,
 546	(0x0400 << 16) | (0xc78c >> 2),
 547	0x00000000,
 548	(0x0400 << 16) | (0xc798 >> 2),
 549	0x00000000,
 550	(0x0400 << 16) | (0xc79c >> 2),
 551	0x00000000,
 552	(0x0400 << 16) | (0xc7a0 >> 2),
 553	0x00000000,
 554	(0x0400 << 16) | (0xc7a4 >> 2),
 555	0x00000000,
 556	(0x0400 << 16) | (0xc7a8 >> 2),
 557	0x00000000,
 558	(0x0400 << 16) | (0xc7ac >> 2),
 559	0x00000000,
 560	(0x0400 << 16) | (0xc7b0 >> 2),
 561	0x00000000,
 562	(0x0400 << 16) | (0xc7b4 >> 2),
 563	0x00000000,
 564	(0x0e00 << 16) | (0x9100 >> 2),
 565	0x00000000,
 566	(0x0e00 << 16) | (0x3c010 >> 2),
 567	0x00000000,
 568	(0x0e00 << 16) | (0x92a8 >> 2),
 569	0x00000000,
 570	(0x0e00 << 16) | (0x92ac >> 2),
 571	0x00000000,
 572	(0x0e00 << 16) | (0x92b4 >> 2),
 573	0x00000000,
 574	(0x0e00 << 16) | (0x92b8 >> 2),
 575	0x00000000,
 576	(0x0e00 << 16) | (0x92bc >> 2),
 577	0x00000000,
 578	(0x0e00 << 16) | (0x92c0 >> 2),
 579	0x00000000,
 580	(0x0e00 << 16) | (0x92c4 >> 2),
 581	0x00000000,
 582	(0x0e00 << 16) | (0x92c8 >> 2),
 583	0x00000000,
 584	(0x0e00 << 16) | (0x92cc >> 2),
 585	0x00000000,
 586	(0x0e00 << 16) | (0x92d0 >> 2),
 587	0x00000000,
 588	(0x0e00 << 16) | (0x8c00 >> 2),
 589	0x00000000,
 590	(0x0e00 << 16) | (0x8c04 >> 2),
 591	0x00000000,
 592	(0x0e00 << 16) | (0x8c20 >> 2),
 593	0x00000000,
 594	(0x0e00 << 16) | (0x8c38 >> 2),
 595	0x00000000,
 596	(0x0e00 << 16) | (0x8c3c >> 2),
 597	0x00000000,
 598	(0x0e00 << 16) | (0xae00 >> 2),
 599	0x00000000,
 600	(0x0e00 << 16) | (0x9604 >> 2),
 601	0x00000000,
 602	(0x0e00 << 16) | (0xac08 >> 2),
 603	0x00000000,
 604	(0x0e00 << 16) | (0xac0c >> 2),
 605	0x00000000,
 606	(0x0e00 << 16) | (0xac10 >> 2),
 607	0x00000000,
 608	(0x0e00 << 16) | (0xac14 >> 2),
 609	0x00000000,
 610	(0x0e00 << 16) | (0xac58 >> 2),
 611	0x00000000,
 612	(0x0e00 << 16) | (0xac68 >> 2),
 613	0x00000000,
 614	(0x0e00 << 16) | (0xac6c >> 2),
 615	0x00000000,
 616	(0x0e00 << 16) | (0xac70 >> 2),
 617	0x00000000,
 618	(0x0e00 << 16) | (0xac74 >> 2),
 619	0x00000000,
 620	(0x0e00 << 16) | (0xac78 >> 2),
 621	0x00000000,
 622	(0x0e00 << 16) | (0xac7c >> 2),
 623	0x00000000,
 624	(0x0e00 << 16) | (0xac80 >> 2),
 625	0x00000000,
 626	(0x0e00 << 16) | (0xac84 >> 2),
 627	0x00000000,
 628	(0x0e00 << 16) | (0xac88 >> 2),
 629	0x00000000,
 630	(0x0e00 << 16) | (0xac8c >> 2),
 631	0x00000000,
 632	(0x0e00 << 16) | (0x970c >> 2),
 633	0x00000000,
 634	(0x0e00 << 16) | (0x9714 >> 2),
 635	0x00000000,
 636	(0x0e00 << 16) | (0x9718 >> 2),
 637	0x00000000,
 638	(0x0e00 << 16) | (0x971c >> 2),
 639	0x00000000,
 640	(0x0e00 << 16) | (0x31068 >> 2),
 641	0x00000000,
 642	(0x4e00 << 16) | (0x31068 >> 2),
 643	0x00000000,
 644	(0x5e00 << 16) | (0x31068 >> 2),
 645	0x00000000,
 646	(0x6e00 << 16) | (0x31068 >> 2),
 647	0x00000000,
 648	(0x7e00 << 16) | (0x31068 >> 2),
 649	0x00000000,
 650	(0x8e00 << 16) | (0x31068 >> 2),
 651	0x00000000,
 652	(0x9e00 << 16) | (0x31068 >> 2),
 653	0x00000000,
 654	(0xae00 << 16) | (0x31068 >> 2),
 655	0x00000000,
 656	(0xbe00 << 16) | (0x31068 >> 2),
 657	0x00000000,
 658	(0x0e00 << 16) | (0xcd10 >> 2),
 659	0x00000000,
 660	(0x0e00 << 16) | (0xcd14 >> 2),
 661	0x00000000,
 662	(0x0e00 << 16) | (0x88b0 >> 2),
 663	0x00000000,
 664	(0x0e00 << 16) | (0x88b4 >> 2),
 665	0x00000000,
 666	(0x0e00 << 16) | (0x88b8 >> 2),
 667	0x00000000,
 668	(0x0e00 << 16) | (0x88bc >> 2),
 669	0x00000000,
 670	(0x0400 << 16) | (0x89c0 >> 2),
 671	0x00000000,
 672	(0x0e00 << 16) | (0x88c4 >> 2),
 673	0x00000000,
 674	(0x0e00 << 16) | (0x88c8 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0x88d0 >> 2),
 677	0x00000000,
 678	(0x0e00 << 16) | (0x88d4 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0x88d8 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0x8980 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0x30938 >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0x3093c >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0x30940 >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0x89a0 >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0x30900 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0x30904 >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0x89b4 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0x3c210 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0x3c214 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0x3c218 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0x8904 >> 2),
 705	0x00000000,
 706	0x5,
 707	(0x0e00 << 16) | (0x8c28 >> 2),
 708	(0x0e00 << 16) | (0x8c2c >> 2),
 709	(0x0e00 << 16) | (0x8c30 >> 2),
 710	(0x0e00 << 16) | (0x8c34 >> 2),
 711	(0x0e00 << 16) | (0x9600 >> 2),
 712};
 713
 714static const u32 kalindi_rlc_save_restore_register_list[] =
 715{
 716	(0x0e00 << 16) | (0xc12c >> 2),
 717	0x00000000,
 718	(0x0e00 << 16) | (0xc140 >> 2),
 719	0x00000000,
 720	(0x0e00 << 16) | (0xc150 >> 2),
 721	0x00000000,
 722	(0x0e00 << 16) | (0xc15c >> 2),
 723	0x00000000,
 724	(0x0e00 << 16) | (0xc168 >> 2),
 725	0x00000000,
 726	(0x0e00 << 16) | (0xc170 >> 2),
 727	0x00000000,
 728	(0x0e00 << 16) | (0xc204 >> 2),
 729	0x00000000,
 730	(0x0e00 << 16) | (0xc2b4 >> 2),
 731	0x00000000,
 732	(0x0e00 << 16) | (0xc2b8 >> 2),
 733	0x00000000,
 734	(0x0e00 << 16) | (0xc2bc >> 2),
 735	0x00000000,
 736	(0x0e00 << 16) | (0xc2c0 >> 2),
 737	0x00000000,
 738	(0x0e00 << 16) | (0x8228 >> 2),
 739	0x00000000,
 740	(0x0e00 << 16) | (0x829c >> 2),
 741	0x00000000,
 742	(0x0e00 << 16) | (0x869c >> 2),
 743	0x00000000,
 744	(0x0600 << 16) | (0x98f4 >> 2),
 745	0x00000000,
 746	(0x0e00 << 16) | (0x98f8 >> 2),
 747	0x00000000,
 748	(0x0e00 << 16) | (0x9900 >> 2),
 749	0x00000000,
 750	(0x0e00 << 16) | (0xc260 >> 2),
 751	0x00000000,
 752	(0x0e00 << 16) | (0x90e8 >> 2),
 753	0x00000000,
 754	(0x0e00 << 16) | (0x3c000 >> 2),
 755	0x00000000,
 756	(0x0e00 << 16) | (0x3c00c >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0x8c1c >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x9700 >> 2),
 761	0x00000000,
 762	(0x0e00 << 16) | (0xcd20 >> 2),
 763	0x00000000,
 764	(0x4e00 << 16) | (0xcd20 >> 2),
 765	0x00000000,
 766	(0x5e00 << 16) | (0xcd20 >> 2),
 767	0x00000000,
 768	(0x6e00 << 16) | (0xcd20 >> 2),
 769	0x00000000,
 770	(0x7e00 << 16) | (0xcd20 >> 2),
 771	0x00000000,
 772	(0x0e00 << 16) | (0x89bc >> 2),
 773	0x00000000,
 774	(0x0e00 << 16) | (0x8900 >> 2),
 775	0x00000000,
 776	0x3,
 777	(0x0e00 << 16) | (0xc130 >> 2),
 778	0x00000000,
 779	(0x0e00 << 16) | (0xc134 >> 2),
 780	0x00000000,
 781	(0x0e00 << 16) | (0xc1fc >> 2),
 782	0x00000000,
 783	(0x0e00 << 16) | (0xc208 >> 2),
 784	0x00000000,
 785	(0x0e00 << 16) | (0xc264 >> 2),
 786	0x00000000,
 787	(0x0e00 << 16) | (0xc268 >> 2),
 788	0x00000000,
 789	(0x0e00 << 16) | (0xc26c >> 2),
 790	0x00000000,
 791	(0x0e00 << 16) | (0xc270 >> 2),
 792	0x00000000,
 793	(0x0e00 << 16) | (0xc274 >> 2),
 794	0x00000000,
 795	(0x0e00 << 16) | (0xc28c >> 2),
 796	0x00000000,
 797	(0x0e00 << 16) | (0xc290 >> 2),
 798	0x00000000,
 799	(0x0e00 << 16) | (0xc294 >> 2),
 800	0x00000000,
 801	(0x0e00 << 16) | (0xc298 >> 2),
 802	0x00000000,
 803	(0x0e00 << 16) | (0xc2a0 >> 2),
 804	0x00000000,
 805	(0x0e00 << 16) | (0xc2a4 >> 2),
 806	0x00000000,
 807	(0x0e00 << 16) | (0xc2a8 >> 2),
 808	0x00000000,
 809	(0x0e00 << 16) | (0xc2ac >> 2),
 810	0x00000000,
 811	(0x0e00 << 16) | (0x301d0 >> 2),
 812	0x00000000,
 813	(0x0e00 << 16) | (0x30238 >> 2),
 814	0x00000000,
 815	(0x0e00 << 16) | (0x30250 >> 2),
 816	0x00000000,
 817	(0x0e00 << 16) | (0x30254 >> 2),
 818	0x00000000,
 819	(0x0e00 << 16) | (0x30258 >> 2),
 820	0x00000000,
 821	(0x0e00 << 16) | (0x3025c >> 2),
 822	0x00000000,
 823	(0x4e00 << 16) | (0xc900 >> 2),
 824	0x00000000,
 825	(0x5e00 << 16) | (0xc900 >> 2),
 826	0x00000000,
 827	(0x6e00 << 16) | (0xc900 >> 2),
 828	0x00000000,
 829	(0x7e00 << 16) | (0xc900 >> 2),
 830	0x00000000,
 831	(0x4e00 << 16) | (0xc904 >> 2),
 832	0x00000000,
 833	(0x5e00 << 16) | (0xc904 >> 2),
 834	0x00000000,
 835	(0x6e00 << 16) | (0xc904 >> 2),
 836	0x00000000,
 837	(0x7e00 << 16) | (0xc904 >> 2),
 838	0x00000000,
 839	(0x4e00 << 16) | (0xc908 >> 2),
 840	0x00000000,
 841	(0x5e00 << 16) | (0xc908 >> 2),
 842	0x00000000,
 843	(0x6e00 << 16) | (0xc908 >> 2),
 844	0x00000000,
 845	(0x7e00 << 16) | (0xc908 >> 2),
 846	0x00000000,
 847	(0x4e00 << 16) | (0xc90c >> 2),
 848	0x00000000,
 849	(0x5e00 << 16) | (0xc90c >> 2),
 850	0x00000000,
 851	(0x6e00 << 16) | (0xc90c >> 2),
 852	0x00000000,
 853	(0x7e00 << 16) | (0xc90c >> 2),
 854	0x00000000,
 855	(0x4e00 << 16) | (0xc910 >> 2),
 856	0x00000000,
 857	(0x5e00 << 16) | (0xc910 >> 2),
 858	0x00000000,
 859	(0x6e00 << 16) | (0xc910 >> 2),
 860	0x00000000,
 861	(0x7e00 << 16) | (0xc910 >> 2),
 862	0x00000000,
 863	(0x0e00 << 16) | (0xc99c >> 2),
 864	0x00000000,
 865	(0x0e00 << 16) | (0x9834 >> 2),
 866	0x00000000,
 867	(0x0000 << 16) | (0x30f00 >> 2),
 868	0x00000000,
 869	(0x0000 << 16) | (0x30f04 >> 2),
 870	0x00000000,
 871	(0x0000 << 16) | (0x30f08 >> 2),
 872	0x00000000,
 873	(0x0000 << 16) | (0x30f0c >> 2),
 874	0x00000000,
 875	(0x0600 << 16) | (0x9b7c >> 2),
 876	0x00000000,
 877	(0x0e00 << 16) | (0x8a14 >> 2),
 878	0x00000000,
 879	(0x0e00 << 16) | (0x8a18 >> 2),
 880	0x00000000,
 881	(0x0600 << 16) | (0x30a00 >> 2),
 882	0x00000000,
 883	(0x0e00 << 16) | (0x8bf0 >> 2),
 884	0x00000000,
 885	(0x0e00 << 16) | (0x8bcc >> 2),
 886	0x00000000,
 887	(0x0e00 << 16) | (0x8b24 >> 2),
 888	0x00000000,
 889	(0x0e00 << 16) | (0x30a04 >> 2),
 890	0x00000000,
 891	(0x0600 << 16) | (0x30a10 >> 2),
 892	0x00000000,
 893	(0x0600 << 16) | (0x30a14 >> 2),
 894	0x00000000,
 895	(0x0600 << 16) | (0x30a18 >> 2),
 896	0x00000000,
 897	(0x0600 << 16) | (0x30a2c >> 2),
 898	0x00000000,
 899	(0x0e00 << 16) | (0xc700 >> 2),
 900	0x00000000,
 901	(0x0e00 << 16) | (0xc704 >> 2),
 902	0x00000000,
 903	(0x0e00 << 16) | (0xc708 >> 2),
 904	0x00000000,
 905	(0x0e00 << 16) | (0xc768 >> 2),
 906	0x00000000,
 907	(0x0400 << 16) | (0xc770 >> 2),
 908	0x00000000,
 909	(0x0400 << 16) | (0xc774 >> 2),
 910	0x00000000,
 911	(0x0400 << 16) | (0xc798 >> 2),
 912	0x00000000,
 913	(0x0400 << 16) | (0xc79c >> 2),
 914	0x00000000,
 915	(0x0e00 << 16) | (0x9100 >> 2),
 916	0x00000000,
 917	(0x0e00 << 16) | (0x3c010 >> 2),
 918	0x00000000,
 919	(0x0e00 << 16) | (0x8c00 >> 2),
 920	0x00000000,
 921	(0x0e00 << 16) | (0x8c04 >> 2),
 922	0x00000000,
 923	(0x0e00 << 16) | (0x8c20 >> 2),
 924	0x00000000,
 925	(0x0e00 << 16) | (0x8c38 >> 2),
 926	0x00000000,
 927	(0x0e00 << 16) | (0x8c3c >> 2),
 928	0x00000000,
 929	(0x0e00 << 16) | (0xae00 >> 2),
 930	0x00000000,
 931	(0x0e00 << 16) | (0x9604 >> 2),
 932	0x00000000,
 933	(0x0e00 << 16) | (0xac08 >> 2),
 934	0x00000000,
 935	(0x0e00 << 16) | (0xac0c >> 2),
 936	0x00000000,
 937	(0x0e00 << 16) | (0xac10 >> 2),
 938	0x00000000,
 939	(0x0e00 << 16) | (0xac14 >> 2),
 940	0x00000000,
 941	(0x0e00 << 16) | (0xac58 >> 2),
 942	0x00000000,
 943	(0x0e00 << 16) | (0xac68 >> 2),
 944	0x00000000,
 945	(0x0e00 << 16) | (0xac6c >> 2),
 946	0x00000000,
 947	(0x0e00 << 16) | (0xac70 >> 2),
 948	0x00000000,
 949	(0x0e00 << 16) | (0xac74 >> 2),
 950	0x00000000,
 951	(0x0e00 << 16) | (0xac78 >> 2),
 952	0x00000000,
 953	(0x0e00 << 16) | (0xac7c >> 2),
 954	0x00000000,
 955	(0x0e00 << 16) | (0xac80 >> 2),
 956	0x00000000,
 957	(0x0e00 << 16) | (0xac84 >> 2),
 958	0x00000000,
 959	(0x0e00 << 16) | (0xac88 >> 2),
 960	0x00000000,
 961	(0x0e00 << 16) | (0xac8c >> 2),
 962	0x00000000,
 963	(0x0e00 << 16) | (0x970c >> 2),
 964	0x00000000,
 965	(0x0e00 << 16) | (0x9714 >> 2),
 966	0x00000000,
 967	(0x0e00 << 16) | (0x9718 >> 2),
 968	0x00000000,
 969	(0x0e00 << 16) | (0x971c >> 2),
 970	0x00000000,
 971	(0x0e00 << 16) | (0x31068 >> 2),
 972	0x00000000,
 973	(0x4e00 << 16) | (0x31068 >> 2),
 974	0x00000000,
 975	(0x5e00 << 16) | (0x31068 >> 2),
 976	0x00000000,
 977	(0x6e00 << 16) | (0x31068 >> 2),
 978	0x00000000,
 979	(0x7e00 << 16) | (0x31068 >> 2),
 980	0x00000000,
 981	(0x0e00 << 16) | (0xcd10 >> 2),
 982	0x00000000,
 983	(0x0e00 << 16) | (0xcd14 >> 2),
 984	0x00000000,
 985	(0x0e00 << 16) | (0x88b0 >> 2),
 986	0x00000000,
 987	(0x0e00 << 16) | (0x88b4 >> 2),
 988	0x00000000,
 989	(0x0e00 << 16) | (0x88b8 >> 2),
 990	0x00000000,
 991	(0x0e00 << 16) | (0x88bc >> 2),
 992	0x00000000,
 993	(0x0400 << 16) | (0x89c0 >> 2),
 994	0x00000000,
 995	(0x0e00 << 16) | (0x88c4 >> 2),
 996	0x00000000,
 997	(0x0e00 << 16) | (0x88c8 >> 2),
 998	0x00000000,
 999	(0x0e00 << 16) | (0x88d0 >> 2),
1000	0x00000000,
1001	(0x0e00 << 16) | (0x88d4 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88d8 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x8980 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x30938 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x3093c >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x30940 >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x89a0 >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x30900 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x30904 >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x89b4 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x3e1fc >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x3c210 >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x3c214 >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x3c218 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x8904 >> 2),
1030	0x00000000,
1031	0x5,
1032	(0x0e00 << 16) | (0x8c28 >> 2),
1033	(0x0e00 << 16) | (0x8c2c >> 2),
1034	(0x0e00 << 16) | (0x8c30 >> 2),
1035	(0x0e00 << 16) | (0x8c34 >> 2),
1036	(0x0e00 << 16) | (0x9600 >> 2),
1037};
1038
1039static const u32 bonaire_golden_spm_registers[] =
1040{
1041	0x30800, 0xe0ffffff, 0xe0000000
1042};
1043
1044static const u32 bonaire_golden_common_registers[] =
1045{
1046	0xc770, 0xffffffff, 0x00000800,
1047	0xc774, 0xffffffff, 0x00000800,
1048	0xc798, 0xffffffff, 0x00007fbf,
1049	0xc79c, 0xffffffff, 0x00007faf
1050};
1051
1052static const u32 bonaire_golden_registers[] =
1053{
1054	0x3354, 0x00000333, 0x00000333,
1055	0x3350, 0x000c0fc0, 0x00040200,
1056	0x9a10, 0x00010000, 0x00058208,
1057	0x3c000, 0xffff1fff, 0x00140000,
1058	0x3c200, 0xfdfc0fff, 0x00000100,
1059	0x3c234, 0x40000000, 0x40000200,
1060	0x9830, 0xffffffff, 0x00000000,
1061	0x9834, 0xf00fffff, 0x00000400,
1062	0x9838, 0x0002021c, 0x00020200,
1063	0xc78, 0x00000080, 0x00000000,
1064	0x5bb0, 0x000000f0, 0x00000070,
1065	0x5bc0, 0xf0311fff, 0x80300000,
1066	0x98f8, 0x73773777, 0x12010001,
1067	0x350c, 0x00810000, 0x408af000,
1068	0x7030, 0x31000111, 0x00000011,
1069	0x2f48, 0x73773777, 0x12010001,
1070	0x220c, 0x00007fb6, 0x0021a1b1,
1071	0x2210, 0x00007fb6, 0x002021b1,
1072	0x2180, 0x00007fb6, 0x00002191,
1073	0x2218, 0x00007fb6, 0x002121b1,
1074	0x221c, 0x00007fb6, 0x002021b1,
1075	0x21dc, 0x00007fb6, 0x00002191,
1076	0x21e0, 0x00007fb6, 0x00002191,
1077	0x3628, 0x0000003f, 0x0000000a,
1078	0x362c, 0x0000003f, 0x0000000a,
1079	0x2ae4, 0x00073ffe, 0x000022a2,
1080	0x240c, 0x000007ff, 0x00000000,
1081	0x8a14, 0xf000003f, 0x00000007,
1082	0x8bf0, 0x00002001, 0x00000001,
1083	0x8b24, 0xffffffff, 0x00ffffff,
1084	0x30a04, 0x0000ff0f, 0x00000000,
1085	0x28a4c, 0x07ffffff, 0x06000000,
1086	0x4d8, 0x00000fff, 0x00000100,
1087	0x3e78, 0x00000001, 0x00000002,
1088	0x9100, 0x03000000, 0x0362c688,
1089	0x8c00, 0x000000ff, 0x00000001,
1090	0xe40, 0x00001fff, 0x00001fff,
1091	0x9060, 0x0000007f, 0x00000020,
1092	0x9508, 0x00010000, 0x00010000,
1093	0xac14, 0x000003ff, 0x000000f3,
1094	0xac0c, 0xffffffff, 0x00001032
1095};
1096
1097static const u32 bonaire_mgcg_cgcg_init[] =
1098{
1099	0xc420, 0xffffffff, 0xfffffffc,
1100	0x30800, 0xffffffff, 0xe0000000,
1101	0x3c2a0, 0xffffffff, 0x00000100,
1102	0x3c208, 0xffffffff, 0x00000100,
1103	0x3c2c0, 0xffffffff, 0xc0000100,
1104	0x3c2c8, 0xffffffff, 0xc0000100,
1105	0x3c2c4, 0xffffffff, 0xc0000100,
1106	0x55e4, 0xffffffff, 0x00600100,
1107	0x3c280, 0xffffffff, 0x00000100,
1108	0x3c214, 0xffffffff, 0x06000100,
1109	0x3c220, 0xffffffff, 0x00000100,
1110	0x3c218, 0xffffffff, 0x06000100,
1111	0x3c204, 0xffffffff, 0x00000100,
1112	0x3c2e0, 0xffffffff, 0x00000100,
1113	0x3c224, 0xffffffff, 0x00000100,
1114	0x3c200, 0xffffffff, 0x00000100,
1115	0x3c230, 0xffffffff, 0x00000100,
1116	0x3c234, 0xffffffff, 0x00000100,
1117	0x3c250, 0xffffffff, 0x00000100,
1118	0x3c254, 0xffffffff, 0x00000100,
1119	0x3c258, 0xffffffff, 0x00000100,
1120	0x3c25c, 0xffffffff, 0x00000100,
1121	0x3c260, 0xffffffff, 0x00000100,
1122	0x3c27c, 0xffffffff, 0x00000100,
1123	0x3c278, 0xffffffff, 0x00000100,
1124	0x3c210, 0xffffffff, 0x06000100,
1125	0x3c290, 0xffffffff, 0x00000100,
1126	0x3c274, 0xffffffff, 0x00000100,
1127	0x3c2b4, 0xffffffff, 0x00000100,
1128	0x3c2b0, 0xffffffff, 0x00000100,
1129	0x3c270, 0xffffffff, 0x00000100,
1130	0x30800, 0xffffffff, 0xe0000000,
1131	0x3c020, 0xffffffff, 0x00010000,
1132	0x3c024, 0xffffffff, 0x00030002,
1133	0x3c028, 0xffffffff, 0x00040007,
1134	0x3c02c, 0xffffffff, 0x00060005,
1135	0x3c030, 0xffffffff, 0x00090008,
1136	0x3c034, 0xffffffff, 0x00010000,
1137	0x3c038, 0xffffffff, 0x00030002,
1138	0x3c03c, 0xffffffff, 0x00040007,
1139	0x3c040, 0xffffffff, 0x00060005,
1140	0x3c044, 0xffffffff, 0x00090008,
1141	0x3c048, 0xffffffff, 0x00010000,
1142	0x3c04c, 0xffffffff, 0x00030002,
1143	0x3c050, 0xffffffff, 0x00040007,
1144	0x3c054, 0xffffffff, 0x00060005,
1145	0x3c058, 0xffffffff, 0x00090008,
1146	0x3c05c, 0xffffffff, 0x00010000,
1147	0x3c060, 0xffffffff, 0x00030002,
1148	0x3c064, 0xffffffff, 0x00040007,
1149	0x3c068, 0xffffffff, 0x00060005,
1150	0x3c06c, 0xffffffff, 0x00090008,
1151	0x3c070, 0xffffffff, 0x00010000,
1152	0x3c074, 0xffffffff, 0x00030002,
1153	0x3c078, 0xffffffff, 0x00040007,
1154	0x3c07c, 0xffffffff, 0x00060005,
1155	0x3c080, 0xffffffff, 0x00090008,
1156	0x3c084, 0xffffffff, 0x00010000,
1157	0x3c088, 0xffffffff, 0x00030002,
1158	0x3c08c, 0xffffffff, 0x00040007,
1159	0x3c090, 0xffffffff, 0x00060005,
1160	0x3c094, 0xffffffff, 0x00090008,
1161	0x3c098, 0xffffffff, 0x00010000,
1162	0x3c09c, 0xffffffff, 0x00030002,
1163	0x3c0a0, 0xffffffff, 0x00040007,
1164	0x3c0a4, 0xffffffff, 0x00060005,
1165	0x3c0a8, 0xffffffff, 0x00090008,
1166	0x3c000, 0xffffffff, 0x96e00200,
1167	0x8708, 0xffffffff, 0x00900100,
1168	0xc424, 0xffffffff, 0x0020003f,
1169	0x38, 0xffffffff, 0x0140001c,
1170	0x3c, 0x000f0000, 0x000f0000,
1171	0x220, 0xffffffff, 0xC060000C,
1172	0x224, 0xc0000fff, 0x00000100,
1173	0xf90, 0xffffffff, 0x00000100,
1174	0xf98, 0x00000101, 0x00000000,
1175	0x20a8, 0xffffffff, 0x00000104,
1176	0x55e4, 0xff000fff, 0x00000100,
1177	0x30cc, 0xc0000fff, 0x00000104,
1178	0xc1e4, 0x00000001, 0x00000001,
1179	0xd00c, 0xff000ff0, 0x00000100,
1180	0xd80c, 0xff000ff0, 0x00000100
1181};
1182
1183static const u32 spectre_golden_spm_registers[] =
1184{
1185	0x30800, 0xe0ffffff, 0xe0000000
1186};
1187
1188static const u32 spectre_golden_common_registers[] =
1189{
1190	0xc770, 0xffffffff, 0x00000800,
1191	0xc774, 0xffffffff, 0x00000800,
1192	0xc798, 0xffffffff, 0x00007fbf,
1193	0xc79c, 0xffffffff, 0x00007faf
1194};
1195
1196static const u32 spectre_golden_registers[] =
1197{
1198	0x3c000, 0xffff1fff, 0x96940200,
1199	0x3c00c, 0xffff0001, 0xff000000,
1200	0x3c200, 0xfffc0fff, 0x00000100,
1201	0x6ed8, 0x00010101, 0x00010000,
1202	0x9834, 0xf00fffff, 0x00000400,
1203	0x9838, 0xfffffffc, 0x00020200,
1204	0x5bb0, 0x000000f0, 0x00000070,
1205	0x5bc0, 0xf0311fff, 0x80300000,
1206	0x98f8, 0x73773777, 0x12010001,
1207	0x9b7c, 0x00ff0000, 0x00fc0000,
1208	0x2f48, 0x73773777, 0x12010001,
1209	0x8a14, 0xf000003f, 0x00000007,
1210	0x8b24, 0xffffffff, 0x00ffffff,
1211	0x28350, 0x3f3f3fff, 0x00000082,
1212	0x28354, 0x0000003f, 0x00000000,
1213	0x3e78, 0x00000001, 0x00000002,
1214	0x913c, 0xffff03df, 0x00000004,
1215	0xc768, 0x00000008, 0x00000008,
1216	0x8c00, 0x000008ff, 0x00000800,
1217	0x9508, 0x00010000, 0x00010000,
1218	0xac0c, 0xffffffff, 0x54763210,
1219	0x214f8, 0x01ff01ff, 0x00000002,
1220	0x21498, 0x007ff800, 0x00200000,
1221	0x2015c, 0xffffffff, 0x00000f40,
1222	0x30934, 0xffffffff, 0x00000001
1223};
1224
1225static const u32 spectre_mgcg_cgcg_init[] =
1226{
1227	0xc420, 0xffffffff, 0xfffffffc,
1228	0x30800, 0xffffffff, 0xe0000000,
1229	0x3c2a0, 0xffffffff, 0x00000100,
1230	0x3c208, 0xffffffff, 0x00000100,
1231	0x3c2c0, 0xffffffff, 0x00000100,
1232	0x3c2c8, 0xffffffff, 0x00000100,
1233	0x3c2c4, 0xffffffff, 0x00000100,
1234	0x55e4, 0xffffffff, 0x00600100,
1235	0x3c280, 0xffffffff, 0x00000100,
1236	0x3c214, 0xffffffff, 0x06000100,
1237	0x3c220, 0xffffffff, 0x00000100,
1238	0x3c218, 0xffffffff, 0x06000100,
1239	0x3c204, 0xffffffff, 0x00000100,
1240	0x3c2e0, 0xffffffff, 0x00000100,
1241	0x3c224, 0xffffffff, 0x00000100,
1242	0x3c200, 0xffffffff, 0x00000100,
1243	0x3c230, 0xffffffff, 0x00000100,
1244	0x3c234, 0xffffffff, 0x00000100,
1245	0x3c250, 0xffffffff, 0x00000100,
1246	0x3c254, 0xffffffff, 0x00000100,
1247	0x3c258, 0xffffffff, 0x00000100,
1248	0x3c25c, 0xffffffff, 0x00000100,
1249	0x3c260, 0xffffffff, 0x00000100,
1250	0x3c27c, 0xffffffff, 0x00000100,
1251	0x3c278, 0xffffffff, 0x00000100,
1252	0x3c210, 0xffffffff, 0x06000100,
1253	0x3c290, 0xffffffff, 0x00000100,
1254	0x3c274, 0xffffffff, 0x00000100,
1255	0x3c2b4, 0xffffffff, 0x00000100,
1256	0x3c2b0, 0xffffffff, 0x00000100,
1257	0x3c270, 0xffffffff, 0x00000100,
1258	0x30800, 0xffffffff, 0xe0000000,
1259	0x3c020, 0xffffffff, 0x00010000,
1260	0x3c024, 0xffffffff, 0x00030002,
1261	0x3c028, 0xffffffff, 0x00040007,
1262	0x3c02c, 0xffffffff, 0x00060005,
1263	0x3c030, 0xffffffff, 0x00090008,
1264	0x3c034, 0xffffffff, 0x00010000,
1265	0x3c038, 0xffffffff, 0x00030002,
1266	0x3c03c, 0xffffffff, 0x00040007,
1267	0x3c040, 0xffffffff, 0x00060005,
1268	0x3c044, 0xffffffff, 0x00090008,
1269	0x3c048, 0xffffffff, 0x00010000,
1270	0x3c04c, 0xffffffff, 0x00030002,
1271	0x3c050, 0xffffffff, 0x00040007,
1272	0x3c054, 0xffffffff, 0x00060005,
1273	0x3c058, 0xffffffff, 0x00090008,
1274	0x3c05c, 0xffffffff, 0x00010000,
1275	0x3c060, 0xffffffff, 0x00030002,
1276	0x3c064, 0xffffffff, 0x00040007,
1277	0x3c068, 0xffffffff, 0x00060005,
1278	0x3c06c, 0xffffffff, 0x00090008,
1279	0x3c070, 0xffffffff, 0x00010000,
1280	0x3c074, 0xffffffff, 0x00030002,
1281	0x3c078, 0xffffffff, 0x00040007,
1282	0x3c07c, 0xffffffff, 0x00060005,
1283	0x3c080, 0xffffffff, 0x00090008,
1284	0x3c084, 0xffffffff, 0x00010000,
1285	0x3c088, 0xffffffff, 0x00030002,
1286	0x3c08c, 0xffffffff, 0x00040007,
1287	0x3c090, 0xffffffff, 0x00060005,
1288	0x3c094, 0xffffffff, 0x00090008,
1289	0x3c098, 0xffffffff, 0x00010000,
1290	0x3c09c, 0xffffffff, 0x00030002,
1291	0x3c0a0, 0xffffffff, 0x00040007,
1292	0x3c0a4, 0xffffffff, 0x00060005,
1293	0x3c0a8, 0xffffffff, 0x00090008,
1294	0x3c0ac, 0xffffffff, 0x00010000,
1295	0x3c0b0, 0xffffffff, 0x00030002,
1296	0x3c0b4, 0xffffffff, 0x00040007,
1297	0x3c0b8, 0xffffffff, 0x00060005,
1298	0x3c0bc, 0xffffffff, 0x00090008,
1299	0x3c000, 0xffffffff, 0x96e00200,
1300	0x8708, 0xffffffff, 0x00900100,
1301	0xc424, 0xffffffff, 0x0020003f,
1302	0x38, 0xffffffff, 0x0140001c,
1303	0x3c, 0x000f0000, 0x000f0000,
1304	0x220, 0xffffffff, 0xC060000C,
1305	0x224, 0xc0000fff, 0x00000100,
1306	0xf90, 0xffffffff, 0x00000100,
1307	0xf98, 0x00000101, 0x00000000,
1308	0x20a8, 0xffffffff, 0x00000104,
1309	0x55e4, 0xff000fff, 0x00000100,
1310	0x30cc, 0xc0000fff, 0x00000104,
1311	0xc1e4, 0x00000001, 0x00000001,
1312	0xd00c, 0xff000ff0, 0x00000100,
1313	0xd80c, 0xff000ff0, 0x00000100
1314};
1315
1316static const u32 kalindi_golden_spm_registers[] =
1317{
1318	0x30800, 0xe0ffffff, 0xe0000000
1319};
1320
1321static const u32 kalindi_golden_common_registers[] =
1322{
1323	0xc770, 0xffffffff, 0x00000800,
1324	0xc774, 0xffffffff, 0x00000800,
1325	0xc798, 0xffffffff, 0x00007fbf,
1326	0xc79c, 0xffffffff, 0x00007faf
1327};
1328
1329static const u32 kalindi_golden_registers[] =
1330{
1331	0x3c000, 0xffffdfff, 0x6e944040,
1332	0x55e4, 0xff607fff, 0xfc000100,
1333	0x3c220, 0xff000fff, 0x00000100,
1334	0x3c224, 0xff000fff, 0x00000100,
1335	0x3c200, 0xfffc0fff, 0x00000100,
1336	0x6ed8, 0x00010101, 0x00010000,
1337	0x9830, 0xffffffff, 0x00000000,
1338	0x9834, 0xf00fffff, 0x00000400,
1339	0x5bb0, 0x000000f0, 0x00000070,
1340	0x5bc0, 0xf0311fff, 0x80300000,
1341	0x98f8, 0x73773777, 0x12010001,
1342	0x98fc, 0xffffffff, 0x00000010,
1343	0x9b7c, 0x00ff0000, 0x00fc0000,
1344	0x8030, 0x00001f0f, 0x0000100a,
1345	0x2f48, 0x73773777, 0x12010001,
1346	0x2408, 0x000fffff, 0x000c007f,
1347	0x8a14, 0xf000003f, 0x00000007,
1348	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349	0x30a04, 0x0000ff0f, 0x00000000,
1350	0x28a4c, 0x07ffffff, 0x06000000,
1351	0x4d8, 0x00000fff, 0x00000100,
1352	0x3e78, 0x00000001, 0x00000002,
1353	0xc768, 0x00000008, 0x00000008,
1354	0x8c00, 0x000000ff, 0x00000003,
1355	0x214f8, 0x01ff01ff, 0x00000002,
1356	0x21498, 0x007ff800, 0x00200000,
1357	0x2015c, 0xffffffff, 0x00000f40,
1358	0x88c4, 0x001f3ae3, 0x00000082,
1359	0x88d4, 0x0000001f, 0x00000010,
1360	0x30934, 0xffffffff, 0x00000000
1361};
1362
1363static const u32 kalindi_mgcg_cgcg_init[] =
1364{
1365	0xc420, 0xffffffff, 0xfffffffc,
1366	0x30800, 0xffffffff, 0xe0000000,
1367	0x3c2a0, 0xffffffff, 0x00000100,
1368	0x3c208, 0xffffffff, 0x00000100,
1369	0x3c2c0, 0xffffffff, 0x00000100,
1370	0x3c2c8, 0xffffffff, 0x00000100,
1371	0x3c2c4, 0xffffffff, 0x00000100,
1372	0x55e4, 0xffffffff, 0x00600100,
1373	0x3c280, 0xffffffff, 0x00000100,
1374	0x3c214, 0xffffffff, 0x06000100,
1375	0x3c220, 0xffffffff, 0x00000100,
1376	0x3c218, 0xffffffff, 0x06000100,
1377	0x3c204, 0xffffffff, 0x00000100,
1378	0x3c2e0, 0xffffffff, 0x00000100,
1379	0x3c224, 0xffffffff, 0x00000100,
1380	0x3c200, 0xffffffff, 0x00000100,
1381	0x3c230, 0xffffffff, 0x00000100,
1382	0x3c234, 0xffffffff, 0x00000100,
1383	0x3c250, 0xffffffff, 0x00000100,
1384	0x3c254, 0xffffffff, 0x00000100,
1385	0x3c258, 0xffffffff, 0x00000100,
1386	0x3c25c, 0xffffffff, 0x00000100,
1387	0x3c260, 0xffffffff, 0x00000100,
1388	0x3c27c, 0xffffffff, 0x00000100,
1389	0x3c278, 0xffffffff, 0x00000100,
1390	0x3c210, 0xffffffff, 0x06000100,
1391	0x3c290, 0xffffffff, 0x00000100,
1392	0x3c274, 0xffffffff, 0x00000100,
1393	0x3c2b4, 0xffffffff, 0x00000100,
1394	0x3c2b0, 0xffffffff, 0x00000100,
1395	0x3c270, 0xffffffff, 0x00000100,
1396	0x30800, 0xffffffff, 0xe0000000,
1397	0x3c020, 0xffffffff, 0x00010000,
1398	0x3c024, 0xffffffff, 0x00030002,
1399	0x3c028, 0xffffffff, 0x00040007,
1400	0x3c02c, 0xffffffff, 0x00060005,
1401	0x3c030, 0xffffffff, 0x00090008,
1402	0x3c034, 0xffffffff, 0x00010000,
1403	0x3c038, 0xffffffff, 0x00030002,
1404	0x3c03c, 0xffffffff, 0x00040007,
1405	0x3c040, 0xffffffff, 0x00060005,
1406	0x3c044, 0xffffffff, 0x00090008,
1407	0x3c000, 0xffffffff, 0x96e00200,
1408	0x8708, 0xffffffff, 0x00900100,
1409	0xc424, 0xffffffff, 0x0020003f,
1410	0x38, 0xffffffff, 0x0140001c,
1411	0x3c, 0x000f0000, 0x000f0000,
1412	0x220, 0xffffffff, 0xC060000C,
1413	0x224, 0xc0000fff, 0x00000100,
1414	0x20a8, 0xffffffff, 0x00000104,
1415	0x55e4, 0xff000fff, 0x00000100,
1416	0x30cc, 0xc0000fff, 0x00000104,
1417	0xc1e4, 0x00000001, 0x00000001,
1418	0xd00c, 0xff000ff0, 0x00000100,
1419	0xd80c, 0xff000ff0, 0x00000100
1420};
1421
1422static const u32 hawaii_golden_spm_registers[] =
1423{
1424	0x30800, 0xe0ffffff, 0xe0000000
1425};
1426
1427static const u32 hawaii_golden_common_registers[] =
1428{
1429	0x30800, 0xffffffff, 0xe0000000,
1430	0x28350, 0xffffffff, 0x3a00161a,
1431	0x28354, 0xffffffff, 0x0000002e,
1432	0x9a10, 0xffffffff, 0x00018208,
1433	0x98f8, 0xffffffff, 0x12011003
1434};
1435
1436static const u32 hawaii_golden_registers[] =
1437{
1438	0x3354, 0x00000333, 0x00000333,
1439	0x9a10, 0x00010000, 0x00058208,
1440	0x9830, 0xffffffff, 0x00000000,
1441	0x9834, 0xf00fffff, 0x00000400,
1442	0x9838, 0x0002021c, 0x00020200,
1443	0xc78, 0x00000080, 0x00000000,
1444	0x5bb0, 0x000000f0, 0x00000070,
1445	0x5bc0, 0xf0311fff, 0x80300000,
1446	0x350c, 0x00810000, 0x408af000,
1447	0x7030, 0x31000111, 0x00000011,
1448	0x2f48, 0x73773777, 0x12010001,
1449	0x2120, 0x0000007f, 0x0000001b,
1450	0x21dc, 0x00007fb6, 0x00002191,
1451	0x3628, 0x0000003f, 0x0000000a,
1452	0x362c, 0x0000003f, 0x0000000a,
1453	0x2ae4, 0x00073ffe, 0x000022a2,
1454	0x240c, 0x000007ff, 0x00000000,
1455	0x8bf0, 0x00002001, 0x00000001,
1456	0x8b24, 0xffffffff, 0x00ffffff,
1457	0x30a04, 0x0000ff0f, 0x00000000,
1458	0x28a4c, 0x07ffffff, 0x06000000,
1459	0x3e78, 0x00000001, 0x00000002,
1460	0xc768, 0x00000008, 0x00000008,
1461	0xc770, 0x00000f00, 0x00000800,
1462	0xc774, 0x00000f00, 0x00000800,
1463	0xc798, 0x00ffffff, 0x00ff7fbf,
1464	0xc79c, 0x00ffffff, 0x00ff7faf,
1465	0x8c00, 0x000000ff, 0x00000800,
1466	0xe40, 0x00001fff, 0x00001fff,
1467	0x9060, 0x0000007f, 0x00000020,
1468	0x9508, 0x00010000, 0x00010000,
1469	0xae00, 0x00100000, 0x000ff07c,
1470	0xac14, 0x000003ff, 0x0000000f,
1471	0xac10, 0xffffffff, 0x7564fdec,
1472	0xac0c, 0xffffffff, 0x3120b9a8,
1473	0xac08, 0x20000000, 0x0f9c0000
1474};
1475
1476static const u32 hawaii_mgcg_cgcg_init[] =
1477{
1478	0xc420, 0xffffffff, 0xfffffffd,
1479	0x30800, 0xffffffff, 0xe0000000,
1480	0x3c2a0, 0xffffffff, 0x00000100,
1481	0x3c208, 0xffffffff, 0x00000100,
1482	0x3c2c0, 0xffffffff, 0x00000100,
1483	0x3c2c8, 0xffffffff, 0x00000100,
1484	0x3c2c4, 0xffffffff, 0x00000100,
1485	0x55e4, 0xffffffff, 0x00200100,
1486	0x3c280, 0xffffffff, 0x00000100,
1487	0x3c214, 0xffffffff, 0x06000100,
1488	0x3c220, 0xffffffff, 0x00000100,
1489	0x3c218, 0xffffffff, 0x06000100,
1490	0x3c204, 0xffffffff, 0x00000100,
1491	0x3c2e0, 0xffffffff, 0x00000100,
1492	0x3c224, 0xffffffff, 0x00000100,
1493	0x3c200, 0xffffffff, 0x00000100,
1494	0x3c230, 0xffffffff, 0x00000100,
1495	0x3c234, 0xffffffff, 0x00000100,
1496	0x3c250, 0xffffffff, 0x00000100,
1497	0x3c254, 0xffffffff, 0x00000100,
1498	0x3c258, 0xffffffff, 0x00000100,
1499	0x3c25c, 0xffffffff, 0x00000100,
1500	0x3c260, 0xffffffff, 0x00000100,
1501	0x3c27c, 0xffffffff, 0x00000100,
1502	0x3c278, 0xffffffff, 0x00000100,
1503	0x3c210, 0xffffffff, 0x06000100,
1504	0x3c290, 0xffffffff, 0x00000100,
1505	0x3c274, 0xffffffff, 0x00000100,
1506	0x3c2b4, 0xffffffff, 0x00000100,
1507	0x3c2b0, 0xffffffff, 0x00000100,
1508	0x3c270, 0xffffffff, 0x00000100,
1509	0x30800, 0xffffffff, 0xe0000000,
1510	0x3c020, 0xffffffff, 0x00010000,
1511	0x3c024, 0xffffffff, 0x00030002,
1512	0x3c028, 0xffffffff, 0x00040007,
1513	0x3c02c, 0xffffffff, 0x00060005,
1514	0x3c030, 0xffffffff, 0x00090008,
1515	0x3c034, 0xffffffff, 0x00010000,
1516	0x3c038, 0xffffffff, 0x00030002,
1517	0x3c03c, 0xffffffff, 0x00040007,
1518	0x3c040, 0xffffffff, 0x00060005,
1519	0x3c044, 0xffffffff, 0x00090008,
1520	0x3c048, 0xffffffff, 0x00010000,
1521	0x3c04c, 0xffffffff, 0x00030002,
1522	0x3c050, 0xffffffff, 0x00040007,
1523	0x3c054, 0xffffffff, 0x00060005,
1524	0x3c058, 0xffffffff, 0x00090008,
1525	0x3c05c, 0xffffffff, 0x00010000,
1526	0x3c060, 0xffffffff, 0x00030002,
1527	0x3c064, 0xffffffff, 0x00040007,
1528	0x3c068, 0xffffffff, 0x00060005,
1529	0x3c06c, 0xffffffff, 0x00090008,
1530	0x3c070, 0xffffffff, 0x00010000,
1531	0x3c074, 0xffffffff, 0x00030002,
1532	0x3c078, 0xffffffff, 0x00040007,
1533	0x3c07c, 0xffffffff, 0x00060005,
1534	0x3c080, 0xffffffff, 0x00090008,
1535	0x3c084, 0xffffffff, 0x00010000,
1536	0x3c088, 0xffffffff, 0x00030002,
1537	0x3c08c, 0xffffffff, 0x00040007,
1538	0x3c090, 0xffffffff, 0x00060005,
1539	0x3c094, 0xffffffff, 0x00090008,
1540	0x3c098, 0xffffffff, 0x00010000,
1541	0x3c09c, 0xffffffff, 0x00030002,
1542	0x3c0a0, 0xffffffff, 0x00040007,
1543	0x3c0a4, 0xffffffff, 0x00060005,
1544	0x3c0a8, 0xffffffff, 0x00090008,
1545	0x3c0ac, 0xffffffff, 0x00010000,
1546	0x3c0b0, 0xffffffff, 0x00030002,
1547	0x3c0b4, 0xffffffff, 0x00040007,
1548	0x3c0b8, 0xffffffff, 0x00060005,
1549	0x3c0bc, 0xffffffff, 0x00090008,
1550	0x3c0c0, 0xffffffff, 0x00010000,
1551	0x3c0c4, 0xffffffff, 0x00030002,
1552	0x3c0c8, 0xffffffff, 0x00040007,
1553	0x3c0cc, 0xffffffff, 0x00060005,
1554	0x3c0d0, 0xffffffff, 0x00090008,
1555	0x3c0d4, 0xffffffff, 0x00010000,
1556	0x3c0d8, 0xffffffff, 0x00030002,
1557	0x3c0dc, 0xffffffff, 0x00040007,
1558	0x3c0e0, 0xffffffff, 0x00060005,
1559	0x3c0e4, 0xffffffff, 0x00090008,
1560	0x3c0e8, 0xffffffff, 0x00010000,
1561	0x3c0ec, 0xffffffff, 0x00030002,
1562	0x3c0f0, 0xffffffff, 0x00040007,
1563	0x3c0f4, 0xffffffff, 0x00060005,
1564	0x3c0f8, 0xffffffff, 0x00090008,
1565	0xc318, 0xffffffff, 0x00020200,
1566	0x3350, 0xffffffff, 0x00000200,
1567	0x15c0, 0xffffffff, 0x00000400,
1568	0x55e8, 0xffffffff, 0x00000000,
1569	0x2f50, 0xffffffff, 0x00000902,
1570	0x3c000, 0xffffffff, 0x96940200,
1571	0x8708, 0xffffffff, 0x00900100,
1572	0xc424, 0xffffffff, 0x0020003f,
1573	0x38, 0xffffffff, 0x0140001c,
1574	0x3c, 0x000f0000, 0x000f0000,
1575	0x220, 0xffffffff, 0xc060000c,
1576	0x224, 0xc0000fff, 0x00000100,
1577	0xf90, 0xffffffff, 0x00000100,
1578	0xf98, 0x00000101, 0x00000000,
1579	0x20a8, 0xffffffff, 0x00000104,
1580	0x55e4, 0xff000fff, 0x00000100,
1581	0x30cc, 0xc0000fff, 0x00000104,
1582	0xc1e4, 0x00000001, 0x00000001,
1583	0xd00c, 0xff000ff0, 0x00000100,
1584	0xd80c, 0xff000ff0, 0x00000100
1585};
1586
1587static const u32 godavari_golden_registers[] =
1588{
1589	0x55e4, 0xff607fff, 0xfc000100,
1590	0x6ed8, 0x00010101, 0x00010000,
1591	0x9830, 0xffffffff, 0x00000000,
1592	0x98302, 0xf00fffff, 0x00000400,
1593	0x6130, 0xffffffff, 0x00010000,
1594	0x5bb0, 0x000000f0, 0x00000070,
1595	0x5bc0, 0xf0311fff, 0x80300000,
1596	0x98f8, 0x73773777, 0x12010001,
1597	0x98fc, 0xffffffff, 0x00000010,
1598	0x8030, 0x00001f0f, 0x0000100a,
1599	0x2f48, 0x73773777, 0x12010001,
1600	0x2408, 0x000fffff, 0x000c007f,
1601	0x8a14, 0xf000003f, 0x00000007,
1602	0x8b24, 0xffffffff, 0x00ff0fff,
1603	0x30a04, 0x0000ff0f, 0x00000000,
1604	0x28a4c, 0x07ffffff, 0x06000000,
1605	0x4d8, 0x00000fff, 0x00000100,
1606	0xd014, 0x00010000, 0x00810001,
1607	0xd814, 0x00010000, 0x00810001,
1608	0x3e78, 0x00000001, 0x00000002,
1609	0xc768, 0x00000008, 0x00000008,
1610	0xc770, 0x00000f00, 0x00000800,
1611	0xc774, 0x00000f00, 0x00000800,
1612	0xc798, 0x00ffffff, 0x00ff7fbf,
1613	0xc79c, 0x00ffffff, 0x00ff7faf,
1614	0x8c00, 0x000000ff, 0x00000001,
1615	0x214f8, 0x01ff01ff, 0x00000002,
1616	0x21498, 0x007ff800, 0x00200000,
1617	0x2015c, 0xffffffff, 0x00000f40,
1618	0x88c4, 0x001f3ae3, 0x00000082,
1619	0x88d4, 0x0000001f, 0x00000010,
1620	0x30934, 0xffffffff, 0x00000000
1621};
1622
1623
1624static void cik_init_golden_registers(struct radeon_device *rdev)
1625{
1626	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627	mutex_lock(&rdev->grbm_idx_mutex);
1628	switch (rdev->family) {
1629	case CHIP_BONAIRE:
1630		radeon_program_register_sequence(rdev,
1631						 bonaire_mgcg_cgcg_init,
1632						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633		radeon_program_register_sequence(rdev,
1634						 bonaire_golden_registers,
1635						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636		radeon_program_register_sequence(rdev,
1637						 bonaire_golden_common_registers,
1638						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639		radeon_program_register_sequence(rdev,
1640						 bonaire_golden_spm_registers,
1641						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642		break;
1643	case CHIP_KABINI:
1644		radeon_program_register_sequence(rdev,
1645						 kalindi_mgcg_cgcg_init,
1646						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647		radeon_program_register_sequence(rdev,
1648						 kalindi_golden_registers,
1649						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650		radeon_program_register_sequence(rdev,
1651						 kalindi_golden_common_registers,
1652						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653		radeon_program_register_sequence(rdev,
1654						 kalindi_golden_spm_registers,
1655						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656		break;
1657	case CHIP_MULLINS:
1658		radeon_program_register_sequence(rdev,
1659						 kalindi_mgcg_cgcg_init,
1660						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661		radeon_program_register_sequence(rdev,
1662						 godavari_golden_registers,
1663						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664		radeon_program_register_sequence(rdev,
1665						 kalindi_golden_common_registers,
1666						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667		radeon_program_register_sequence(rdev,
1668						 kalindi_golden_spm_registers,
1669						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670		break;
1671	case CHIP_KAVERI:
1672		radeon_program_register_sequence(rdev,
1673						 spectre_mgcg_cgcg_init,
1674						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675		radeon_program_register_sequence(rdev,
1676						 spectre_golden_registers,
1677						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678		radeon_program_register_sequence(rdev,
1679						 spectre_golden_common_registers,
1680						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681		radeon_program_register_sequence(rdev,
1682						 spectre_golden_spm_registers,
1683						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684		break;
1685	case CHIP_HAWAII:
1686		radeon_program_register_sequence(rdev,
1687						 hawaii_mgcg_cgcg_init,
1688						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689		radeon_program_register_sequence(rdev,
1690						 hawaii_golden_registers,
1691						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692		radeon_program_register_sequence(rdev,
1693						 hawaii_golden_common_registers,
1694						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695		radeon_program_register_sequence(rdev,
1696						 hawaii_golden_spm_registers,
1697						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698		break;
1699	default:
1700		break;
1701	}
1702	mutex_unlock(&rdev->grbm_idx_mutex);
1703}
1704
1705/**
1706 * cik_get_xclk - get the xclk
1707 *
1708 * @rdev: radeon_device pointer
1709 *
1710 * Returns the reference clock used by the gfx engine
1711 * (CIK).
1712 */
1713u32 cik_get_xclk(struct radeon_device *rdev)
1714{
1715	u32 reference_clock = rdev->clock.spll.reference_freq;
1716
1717	if (rdev->flags & RADEON_IS_IGP) {
1718		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719			return reference_clock / 2;
1720	} else {
1721		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722			return reference_clock / 4;
1723	}
1724	return reference_clock;
1725}
1726
1727/**
1728 * cik_mm_rdoorbell - read a doorbell dword
1729 *
1730 * @rdev: radeon_device pointer
1731 * @index: doorbell index
1732 *
1733 * Returns the value in the doorbell aperture at the
1734 * requested doorbell index (CIK).
1735 */
1736u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737{
1738	if (index < rdev->doorbell.num_doorbells) {
1739		return readl(rdev->doorbell.ptr + index);
1740	} else {
1741		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742		return 0;
1743	}
1744}
1745
1746/**
1747 * cik_mm_wdoorbell - write a doorbell dword
1748 *
1749 * @rdev: radeon_device pointer
1750 * @index: doorbell index
1751 * @v: value to write
1752 *
1753 * Writes @v to the doorbell aperture at the
1754 * requested doorbell index (CIK).
1755 */
1756void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757{
1758	if (index < rdev->doorbell.num_doorbells) {
1759		writel(v, rdev->doorbell.ptr + index);
1760	} else {
1761		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762	}
1763}
1764
1765#define BONAIRE_IO_MC_REGS_SIZE 36
1766
1767static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768{
1769	{0x00000070, 0x04400000},
1770	{0x00000071, 0x80c01803},
1771	{0x00000072, 0x00004004},
1772	{0x00000073, 0x00000100},
1773	{0x00000074, 0x00ff0000},
1774	{0x00000075, 0x34000000},
1775	{0x00000076, 0x08000014},
1776	{0x00000077, 0x00cc08ec},
1777	{0x00000078, 0x00000400},
1778	{0x00000079, 0x00000000},
1779	{0x0000007a, 0x04090000},
1780	{0x0000007c, 0x00000000},
1781	{0x0000007e, 0x4408a8e8},
1782	{0x0000007f, 0x00000304},
1783	{0x00000080, 0x00000000},
1784	{0x00000082, 0x00000001},
1785	{0x00000083, 0x00000002},
1786	{0x00000084, 0xf3e4f400},
1787	{0x00000085, 0x052024e3},
1788	{0x00000087, 0x00000000},
1789	{0x00000088, 0x01000000},
1790	{0x0000008a, 0x1c0a0000},
1791	{0x0000008b, 0xff010000},
1792	{0x0000008d, 0xffffefff},
1793	{0x0000008e, 0xfff3efff},
1794	{0x0000008f, 0xfff3efbf},
1795	{0x00000092, 0xf7ffffff},
1796	{0x00000093, 0xffffff7f},
1797	{0x00000095, 0x00101101},
1798	{0x00000096, 0x00000fff},
1799	{0x00000097, 0x00116fff},
1800	{0x00000098, 0x60010000},
1801	{0x00000099, 0x10010000},
1802	{0x0000009a, 0x00006000},
1803	{0x0000009b, 0x00001000},
1804	{0x0000009f, 0x00b48000}
1805};
1806
1807#define HAWAII_IO_MC_REGS_SIZE 22
1808
1809static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810{
1811	{0x0000007d, 0x40000000},
1812	{0x0000007e, 0x40180304},
1813	{0x0000007f, 0x0000ff00},
1814	{0x00000081, 0x00000000},
1815	{0x00000083, 0x00000800},
1816	{0x00000086, 0x00000000},
1817	{0x00000087, 0x00000100},
1818	{0x00000088, 0x00020100},
1819	{0x00000089, 0x00000000},
1820	{0x0000008b, 0x00040000},
1821	{0x0000008c, 0x00000100},
1822	{0x0000008e, 0xff010000},
1823	{0x00000090, 0xffffefff},
1824	{0x00000091, 0xfff3efff},
1825	{0x00000092, 0xfff3efbf},
1826	{0x00000093, 0xf7ffffff},
1827	{0x00000094, 0xffffff7f},
1828	{0x00000095, 0x00000fff},
1829	{0x00000096, 0x00116fff},
1830	{0x00000097, 0x60010000},
1831	{0x00000098, 0x10010000},
1832	{0x0000009f, 0x00c79000}
1833};
1834
1835
1836/**
1837 * cik_srbm_select - select specific register instances
1838 *
1839 * @rdev: radeon_device pointer
1840 * @me: selected ME (micro engine)
1841 * @pipe: pipe
1842 * @queue: queue
1843 * @vmid: VMID
1844 *
1845 * Switches the currently active registers instances.  Some
1846 * registers are instanced per VMID, others are instanced per
1847 * me/pipe/queue combination.
1848 */
1849static void cik_srbm_select(struct radeon_device *rdev,
1850			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851{
1852	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853			     MEID(me & 0x3) |
1854			     VMID(vmid & 0xf) |
1855			     QUEUEID(queue & 0x7));
1856	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857}
1858
1859/* ucode loading */
1860/**
1861 * ci_mc_load_microcode - load MC ucode into the hw
1862 *
1863 * @rdev: radeon_device pointer
1864 *
1865 * Load the GDDR MC ucode into the hw (CIK).
1866 * Returns 0 on success, error on failure.
1867 */
1868int ci_mc_load_microcode(struct radeon_device *rdev)
1869{
1870	const __be32 *fw_data = NULL;
1871	const __le32 *new_fw_data = NULL;
1872	u32 running, blackout = 0, tmp;
1873	u32 *io_mc_regs = NULL;
1874	const __le32 *new_io_mc_regs = NULL;
1875	int i, regs_size, ucode_size;
1876
1877	if (!rdev->mc_fw)
1878		return -EINVAL;
1879
1880	if (rdev->new_fw) {
1881		const struct mc_firmware_header_v1_0 *hdr =
1882			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883
1884		radeon_ucode_print_mc_hdr(&hdr->header);
1885
1886		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887		new_io_mc_regs = (const __le32 *)
1888			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890		new_fw_data = (const __le32 *)
1891			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892	} else {
1893		ucode_size = rdev->mc_fw->size / 4;
1894
1895		switch (rdev->family) {
1896		case CHIP_BONAIRE:
1897			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899			break;
1900		case CHIP_HAWAII:
1901			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903			break;
1904		default:
1905			return -EINVAL;
1906		}
1907		fw_data = (const __be32 *)rdev->mc_fw->data;
1908	}
1909
1910	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911
1912	if (running == 0) {
1913		if (running) {
1914			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916		}
1917
1918		/* reset the engine and set to writable */
1919		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922		/* load mc io regs */
1923		for (i = 0; i < regs_size; i++) {
1924			if (rdev->new_fw) {
1925				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927			} else {
1928				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930			}
1931		}
1932
1933		tmp = RREG32(MC_SEQ_MISC0);
1934		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939		}
1940
1941		/* load the MC ucode */
1942		for (i = 0; i < ucode_size; i++) {
1943			if (rdev->new_fw)
1944				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945			else
1946				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947		}
1948
1949		/* put the engine back into the active state */
1950		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954		/* wait for training to complete */
1955		for (i = 0; i < rdev->usec_timeout; i++) {
1956			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957				break;
1958			udelay(1);
1959		}
1960		for (i = 0; i < rdev->usec_timeout; i++) {
1961			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962				break;
1963			udelay(1);
1964		}
1965
1966		if (running)
1967			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968	}
1969
1970	return 0;
1971}
1972
1973/**
1974 * cik_init_microcode - load ucode images from disk
1975 *
1976 * @rdev: radeon_device pointer
1977 *
1978 * Use the firmware interface to load the ucode images into
1979 * the driver (not loaded into hw).
1980 * Returns 0 on success, error on failure.
1981 */
1982static int cik_init_microcode(struct radeon_device *rdev)
1983{
1984	const char *chip_name;
1985	const char *new_chip_name;
1986	size_t pfp_req_size, me_req_size, ce_req_size,
1987		mec_req_size, rlc_req_size, mc_req_size = 0,
1988		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989	char fw_name[30];
1990	int new_fw = 0;
1991	int err;
1992	int num_fw;
1993
1994	DRM_DEBUG("\n");
1995
1996	switch (rdev->family) {
1997	case CHIP_BONAIRE:
1998		chip_name = "BONAIRE";
1999		new_chip_name = "bonaire";
2000		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009		num_fw = 8;
2010		break;
2011	case CHIP_HAWAII:
2012		chip_name = "HAWAII";
2013		new_chip_name = "hawaii";
2014		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023		num_fw = 8;
2024		break;
2025	case CHIP_KAVERI:
2026		chip_name = "KAVERI";
2027		new_chip_name = "kaveri";
2028		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034		num_fw = 7;
2035		break;
2036	case CHIP_KABINI:
2037		chip_name = "KABINI";
2038		new_chip_name = "kabini";
2039		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045		num_fw = 6;
2046		break;
2047	case CHIP_MULLINS:
2048		chip_name = "MULLINS";
2049		new_chip_name = "mullins";
2050		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056		num_fw = 6;
2057		break;
2058	default: BUG();
2059	}
2060
2061	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062
2063	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065	if (err) {
2066		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068		if (err)
2069			goto out;
2070		if (rdev->pfp_fw->size != pfp_req_size) {
2071			printk(KERN_ERR
2072			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073			       rdev->pfp_fw->size, fw_name);
2074			err = -EINVAL;
2075			goto out;
2076		}
2077	} else {
2078		err = radeon_ucode_validate(rdev->pfp_fw);
2079		if (err) {
2080			printk(KERN_ERR
2081			       "cik_fw: validation failed for firmware \"%s\"\n",
2082			       fw_name);
2083			goto out;
2084		} else {
2085			new_fw++;
2086		}
2087	}
2088
2089	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091	if (err) {
2092		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094		if (err)
2095			goto out;
2096		if (rdev->me_fw->size != me_req_size) {
2097			printk(KERN_ERR
2098			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099			       rdev->me_fw->size, fw_name);
2100			err = -EINVAL;
2101		}
2102	} else {
2103		err = radeon_ucode_validate(rdev->me_fw);
2104		if (err) {
2105			printk(KERN_ERR
2106			       "cik_fw: validation failed for firmware \"%s\"\n",
2107			       fw_name);
2108			goto out;
2109		} else {
2110			new_fw++;
2111		}
2112	}
2113
2114	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116	if (err) {
2117		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119		if (err)
2120			goto out;
2121		if (rdev->ce_fw->size != ce_req_size) {
2122			printk(KERN_ERR
2123			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124			       rdev->ce_fw->size, fw_name);
2125			err = -EINVAL;
2126		}
2127	} else {
2128		err = radeon_ucode_validate(rdev->ce_fw);
2129		if (err) {
2130			printk(KERN_ERR
2131			       "cik_fw: validation failed for firmware \"%s\"\n",
2132			       fw_name);
2133			goto out;
2134		} else {
2135			new_fw++;
2136		}
2137	}
2138
2139	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141	if (err) {
2142		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144		if (err)
2145			goto out;
2146		if (rdev->mec_fw->size != mec_req_size) {
2147			printk(KERN_ERR
2148			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149			       rdev->mec_fw->size, fw_name);
2150			err = -EINVAL;
2151		}
2152	} else {
2153		err = radeon_ucode_validate(rdev->mec_fw);
2154		if (err) {
2155			printk(KERN_ERR
2156			       "cik_fw: validation failed for firmware \"%s\"\n",
2157			       fw_name);
2158			goto out;
2159		} else {
2160			new_fw++;
2161		}
2162	}
2163
2164	if (rdev->family == CHIP_KAVERI) {
2165		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167		if (err) {
2168			goto out;
2169		} else {
2170			err = radeon_ucode_validate(rdev->mec2_fw);
2171			if (err) {
2172				goto out;
2173			} else {
2174				new_fw++;
2175			}
2176		}
2177	}
2178
2179	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181	if (err) {
2182		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184		if (err)
2185			goto out;
2186		if (rdev->rlc_fw->size != rlc_req_size) {
2187			printk(KERN_ERR
2188			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189			       rdev->rlc_fw->size, fw_name);
2190			err = -EINVAL;
2191		}
2192	} else {
2193		err = radeon_ucode_validate(rdev->rlc_fw);
2194		if (err) {
2195			printk(KERN_ERR
2196			       "cik_fw: validation failed for firmware \"%s\"\n",
2197			       fw_name);
2198			goto out;
2199		} else {
2200			new_fw++;
2201		}
2202	}
2203
2204	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206	if (err) {
2207		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209		if (err)
2210			goto out;
2211		if (rdev->sdma_fw->size != sdma_req_size) {
2212			printk(KERN_ERR
2213			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214			       rdev->sdma_fw->size, fw_name);
2215			err = -EINVAL;
2216		}
2217	} else {
2218		err = radeon_ucode_validate(rdev->sdma_fw);
2219		if (err) {
2220			printk(KERN_ERR
2221			       "cik_fw: validation failed for firmware \"%s\"\n",
2222			       fw_name);
2223			goto out;
2224		} else {
2225			new_fw++;
2226		}
2227	}
2228
2229	/* No SMC, MC ucode on APUs */
2230	if (!(rdev->flags & RADEON_IS_IGP)) {
2231		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233		if (err) {
2234			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236			if (err) {
2237				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239				if (err)
2240					goto out;
2241			}
2242			if ((rdev->mc_fw->size != mc_req_size) &&
2243			    (rdev->mc_fw->size != mc2_req_size)){
2244				printk(KERN_ERR
2245				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246				       rdev->mc_fw->size, fw_name);
2247				err = -EINVAL;
2248			}
2249			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250		} else {
2251			err = radeon_ucode_validate(rdev->mc_fw);
2252			if (err) {
2253				printk(KERN_ERR
2254				       "cik_fw: validation failed for firmware \"%s\"\n",
2255				       fw_name);
2256				goto out;
2257			} else {
2258				new_fw++;
2259			}
2260		}
2261
2262		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264		if (err) {
2265			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267			if (err) {
2268				printk(KERN_ERR
2269				       "smc: error loading firmware \"%s\"\n",
2270				       fw_name);
2271				release_firmware(rdev->smc_fw);
2272				rdev->smc_fw = NULL;
2273				err = 0;
2274			} else if (rdev->smc_fw->size != smc_req_size) {
2275				printk(KERN_ERR
2276				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277				       rdev->smc_fw->size, fw_name);
2278				err = -EINVAL;
2279			}
2280		} else {
2281			err = radeon_ucode_validate(rdev->smc_fw);
2282			if (err) {
2283				printk(KERN_ERR
2284				       "cik_fw: validation failed for firmware \"%s\"\n",
2285				       fw_name);
2286				goto out;
2287			} else {
2288				new_fw++;
2289			}
2290		}
2291	}
2292
2293	if (new_fw == 0) {
2294		rdev->new_fw = false;
2295	} else if (new_fw < num_fw) {
2296		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297		err = -EINVAL;
2298	} else {
2299		rdev->new_fw = true;
2300	}
2301
2302out:
2303	if (err) {
2304		if (err != -EINVAL)
2305			printk(KERN_ERR
2306			       "cik_cp: Failed to load firmware \"%s\"\n",
2307			       fw_name);
2308		release_firmware(rdev->pfp_fw);
2309		rdev->pfp_fw = NULL;
2310		release_firmware(rdev->me_fw);
2311		rdev->me_fw = NULL;
2312		release_firmware(rdev->ce_fw);
2313		rdev->ce_fw = NULL;
2314		release_firmware(rdev->mec_fw);
2315		rdev->mec_fw = NULL;
2316		release_firmware(rdev->mec2_fw);
2317		rdev->mec2_fw = NULL;
2318		release_firmware(rdev->rlc_fw);
2319		rdev->rlc_fw = NULL;
2320		release_firmware(rdev->sdma_fw);
2321		rdev->sdma_fw = NULL;
2322		release_firmware(rdev->mc_fw);
2323		rdev->mc_fw = NULL;
2324		release_firmware(rdev->smc_fw);
2325		rdev->smc_fw = NULL;
2326	}
2327	return err;
2328}
2329
2330/*
2331 * Core functions
2332 */
2333/**
2334 * cik_tiling_mode_table_init - init the hw tiling table
2335 *
2336 * @rdev: radeon_device pointer
2337 *
2338 * Starting with SI, the tiling setup is done globally in a
2339 * set of 32 tiling modes.  Rather than selecting each set of
2340 * parameters per surface as on older asics, we just select
2341 * which index in the tiling table we want to use, and the
2342 * surface uses those parameters (CIK).
2343 */
2344static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345{
2346	u32 *tile = rdev->config.cik.tile_mode_array;
2347	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2348	const u32 num_tile_mode_states =
2349			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2350	const u32 num_secondary_tile_mode_states =
2351			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2352	u32 reg_offset, split_equal_to_row_size;
2353	u32 num_pipe_configs;
2354	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2355		rdev->config.cik.max_shader_engines;
2356
2357	switch (rdev->config.cik.mem_row_size_in_kb) {
2358	case 1:
2359		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2360		break;
2361	case 2:
2362	default:
2363		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2364		break;
2365	case 4:
2366		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2367		break;
2368	}
2369
2370	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2371	if (num_pipe_configs > 8)
2372		num_pipe_configs = 16;
2373
2374	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2375		tile[reg_offset] = 0;
2376	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2377		macrotile[reg_offset] = 0;
2378
2379	switch(num_pipe_configs) {
2380	case 16:
2381		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2383			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2385		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2389		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2393		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2397		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400			   TILE_SPLIT(split_equal_to_row_size));
2401		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2402			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2405			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2406			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2408		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411			   TILE_SPLIT(split_equal_to_row_size));
2412		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2413			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2414		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2417		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2419			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2432		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2446			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2447		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2449			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2454			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2456			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2458			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459
2460		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2463			   NUM_BANKS(ADDR_SURF_16_BANK));
2464		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467			   NUM_BANKS(ADDR_SURF_16_BANK));
2468		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471			   NUM_BANKS(ADDR_SURF_16_BANK));
2472		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475			   NUM_BANKS(ADDR_SURF_16_BANK));
2476		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479			   NUM_BANKS(ADDR_SURF_8_BANK));
2480		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483			   NUM_BANKS(ADDR_SURF_4_BANK));
2484		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487			   NUM_BANKS(ADDR_SURF_2_BANK));
2488		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491			   NUM_BANKS(ADDR_SURF_16_BANK));
2492		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495			   NUM_BANKS(ADDR_SURF_16_BANK));
2496		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499			    NUM_BANKS(ADDR_SURF_16_BANK));
2500		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503			    NUM_BANKS(ADDR_SURF_8_BANK));
2504		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507			    NUM_BANKS(ADDR_SURF_4_BANK));
2508		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511			    NUM_BANKS(ADDR_SURF_2_BANK));
2512		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515			    NUM_BANKS(ADDR_SURF_2_BANK));
2516
2517		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2518			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2519		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2520			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2521		break;
2522
2523	case 8:
2524		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2532		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2536		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2540		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543			   TILE_SPLIT(split_equal_to_row_size));
2544		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2548			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2551		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554			   TILE_SPLIT(split_equal_to_row_size));
2555		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2557		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2560		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2573			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2575		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2590		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2597			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602
2603		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606				NUM_BANKS(ADDR_SURF_16_BANK));
2607		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614				NUM_BANKS(ADDR_SURF_16_BANK));
2615		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				NUM_BANKS(ADDR_SURF_16_BANK));
2619		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2622				NUM_BANKS(ADDR_SURF_8_BANK));
2623		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				NUM_BANKS(ADDR_SURF_4_BANK));
2627		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				NUM_BANKS(ADDR_SURF_2_BANK));
2631		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634				NUM_BANKS(ADDR_SURF_16_BANK));
2635		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638				NUM_BANKS(ADDR_SURF_16_BANK));
2639		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642				NUM_BANKS(ADDR_SURF_16_BANK));
2643		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646				NUM_BANKS(ADDR_SURF_16_BANK));
2647		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650				NUM_BANKS(ADDR_SURF_8_BANK));
2651		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654				NUM_BANKS(ADDR_SURF_4_BANK));
2655		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658				NUM_BANKS(ADDR_SURF_2_BANK));
2659
2660		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2662		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2663			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2664		break;
2665
2666	case 4:
2667		if (num_rbs == 4) {
2668		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2670			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2672		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2676		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2684		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687			   TILE_SPLIT(split_equal_to_row_size));
2688		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2691		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2692			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2693			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2695		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698			   TILE_SPLIT(split_equal_to_row_size));
2699		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2700			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2701		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2704		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2719		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2732			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2734		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2735			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2736			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2743			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746
2747		} else if (num_rbs < 4) {
2748		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2750			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2752		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2756		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2760		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2764		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			   TILE_SPLIT(split_equal_to_row_size));
2768		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2769			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2771		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2772			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2773			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2775		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778			   TILE_SPLIT(split_equal_to_row_size));
2779		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2780			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2781		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2784		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2786			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2788		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2797			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2798			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2799		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2814		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2815			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826		}
2827
2828		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831				NUM_BANKS(ADDR_SURF_16_BANK));
2832		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843				NUM_BANKS(ADDR_SURF_16_BANK));
2844		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847				NUM_BANKS(ADDR_SURF_16_BANK));
2848		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_8_BANK));
2852		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2855				NUM_BANKS(ADDR_SURF_4_BANK));
2856		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859				NUM_BANKS(ADDR_SURF_16_BANK));
2860		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2862				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863				NUM_BANKS(ADDR_SURF_16_BANK));
2864		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867				NUM_BANKS(ADDR_SURF_16_BANK));
2868		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2870				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871				NUM_BANKS(ADDR_SURF_16_BANK));
2872		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2874				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2875				NUM_BANKS(ADDR_SURF_16_BANK));
2876		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879				NUM_BANKS(ADDR_SURF_8_BANK));
2880		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2883				NUM_BANKS(ADDR_SURF_4_BANK));
2884
2885		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2886			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2887		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2888			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2889		break;
2890
2891	case 2:
2892		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894			   PIPE_CONFIG(ADDR_SURF_P2) |
2895			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898			   PIPE_CONFIG(ADDR_SURF_P2) |
2899			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2900		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2908		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910			   PIPE_CONFIG(ADDR_SURF_P2) |
2911			   TILE_SPLIT(split_equal_to_row_size));
2912		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913			   PIPE_CONFIG(ADDR_SURF_P2) |
2914			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2916			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917			   PIPE_CONFIG(ADDR_SURF_P2) |
2918			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2919		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921			   PIPE_CONFIG(ADDR_SURF_P2) |
2922			   TILE_SPLIT(split_equal_to_row_size));
2923		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2924			   PIPE_CONFIG(ADDR_SURF_P2);
2925		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927			   PIPE_CONFIG(ADDR_SURF_P2));
2928		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2929			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930			    PIPE_CONFIG(ADDR_SURF_P2) |
2931			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2932		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941			    PIPE_CONFIG(ADDR_SURF_P2) |
2942			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2943		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945			    PIPE_CONFIG(ADDR_SURF_P2) |
2946			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957			    PIPE_CONFIG(ADDR_SURF_P2));
2958		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2959			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2960			    PIPE_CONFIG(ADDR_SURF_P2) |
2961			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964			    PIPE_CONFIG(ADDR_SURF_P2) |
2965			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2967			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968			    PIPE_CONFIG(ADDR_SURF_P2) |
2969			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970
2971		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2972				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2973				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2974				NUM_BANKS(ADDR_SURF_16_BANK));
2975		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978				NUM_BANKS(ADDR_SURF_16_BANK));
2979		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994				NUM_BANKS(ADDR_SURF_16_BANK));
2995		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2998				NUM_BANKS(ADDR_SURF_8_BANK));
2999		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3002				NUM_BANKS(ADDR_SURF_16_BANK));
3003		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3005				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006				NUM_BANKS(ADDR_SURF_16_BANK));
3007		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010				NUM_BANKS(ADDR_SURF_16_BANK));
3011		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3013				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014				NUM_BANKS(ADDR_SURF_16_BANK));
3015		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018				NUM_BANKS(ADDR_SURF_16_BANK));
3019		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3021				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022				NUM_BANKS(ADDR_SURF_16_BANK));
3023		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026				NUM_BANKS(ADDR_SURF_8_BANK));
3027
3028		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3029			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3030		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3031			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3032		break;
3033
3034	default:
3035		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3036	}
3037}
3038
3039/**
3040 * cik_select_se_sh - select which SE, SH to address
3041 *
3042 * @rdev: radeon_device pointer
3043 * @se_num: shader engine to address
3044 * @sh_num: sh block to address
3045 *
3046 * Select which SE, SH combinations to address. Certain
3047 * registers are instanced per SE or SH.  0xffffffff means
3048 * broadcast to all SEs or SHs (CIK).
3049 */
3050static void cik_select_se_sh(struct radeon_device *rdev,
3051			     u32 se_num, u32 sh_num)
3052{
3053	u32 data = INSTANCE_BROADCAST_WRITES;
3054
3055	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3056		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3057	else if (se_num == 0xffffffff)
3058		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3059	else if (sh_num == 0xffffffff)
3060		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3061	else
3062		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3063	WREG32(GRBM_GFX_INDEX, data);
3064}
3065
3066/**
3067 * cik_create_bitmask - create a bitmask
3068 *
3069 * @bit_width: length of the mask
3070 *
3071 * create a variable length bit mask (CIK).
3072 * Returns the bitmask.
3073 */
3074static u32 cik_create_bitmask(u32 bit_width)
3075{
3076	u32 i, mask = 0;
3077
3078	for (i = 0; i < bit_width; i++) {
3079		mask <<= 1;
3080		mask |= 1;
3081	}
3082	return mask;
3083}
3084
3085/**
3086 * cik_get_rb_disabled - computes the mask of disabled RBs
3087 *
3088 * @rdev: radeon_device pointer
3089 * @max_rb_num: max RBs (render backends) for the asic
3090 * @se_num: number of SEs (shader engines) for the asic
3091 * @sh_per_se: number of SH blocks per SE for the asic
3092 *
3093 * Calculates the bitmask of disabled RBs (CIK).
3094 * Returns the disabled RB bitmask.
3095 */
3096static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3097			      u32 max_rb_num_per_se,
3098			      u32 sh_per_se)
3099{
3100	u32 data, mask;
3101
3102	data = RREG32(CC_RB_BACKEND_DISABLE);
3103	if (data & 1)
3104		data &= BACKEND_DISABLE_MASK;
3105	else
3106		data = 0;
3107	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3108
3109	data >>= BACKEND_DISABLE_SHIFT;
3110
3111	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3112
3113	return data & mask;
3114}
3115
3116/**
3117 * cik_setup_rb - setup the RBs on the asic
3118 *
3119 * @rdev: radeon_device pointer
3120 * @se_num: number of SEs (shader engines) for the asic
3121 * @sh_per_se: number of SH blocks per SE for the asic
3122 * @max_rb_num: max RBs (render backends) for the asic
3123 *
3124 * Configures per-SE/SH RB registers (CIK).
3125 */
3126static void cik_setup_rb(struct radeon_device *rdev,
3127			 u32 se_num, u32 sh_per_se,
3128			 u32 max_rb_num_per_se)
3129{
3130	int i, j;
3131	u32 data, mask;
3132	u32 disabled_rbs = 0;
3133	u32 enabled_rbs = 0;
3134
3135	mutex_lock(&rdev->grbm_idx_mutex);
3136	for (i = 0; i < se_num; i++) {
3137		for (j = 0; j < sh_per_se; j++) {
3138			cik_select_se_sh(rdev, i, j);
3139			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3140			if (rdev->family == CHIP_HAWAII)
3141				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3142			else
3143				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3144		}
3145	}
3146	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3147	mutex_unlock(&rdev->grbm_idx_mutex);
3148
3149	mask = 1;
3150	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151		if (!(disabled_rbs & mask))
3152			enabled_rbs |= mask;
3153		mask <<= 1;
3154	}
3155
3156	rdev->config.cik.backend_enable_mask = enabled_rbs;
3157
3158	mutex_lock(&rdev->grbm_idx_mutex);
3159	for (i = 0; i < se_num; i++) {
3160		cik_select_se_sh(rdev, i, 0xffffffff);
3161		data = 0;
3162		for (j = 0; j < sh_per_se; j++) {
3163			switch (enabled_rbs & 3) {
3164			case 0:
3165				if (j == 0)
3166					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3167				else
3168					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3169				break;
3170			case 1:
3171				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3172				break;
3173			case 2:
3174				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3175				break;
3176			case 3:
3177			default:
3178				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3179				break;
3180			}
3181			enabled_rbs >>= 2;
3182		}
3183		WREG32(PA_SC_RASTER_CONFIG, data);
3184	}
3185	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3186	mutex_unlock(&rdev->grbm_idx_mutex);
3187}
3188
3189/**
3190 * cik_gpu_init - setup the 3D engine
3191 *
3192 * @rdev: radeon_device pointer
3193 *
3194 * Configures the 3D engine and tiling configuration
3195 * registers so that the 3D engine is usable.
3196 */
3197static void cik_gpu_init(struct radeon_device *rdev)
3198{
3199	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3200	u32 mc_shared_chmap, mc_arb_ramcfg;
3201	u32 hdp_host_path_cntl;
3202	u32 tmp;
3203	int i, j;
3204
3205	switch (rdev->family) {
3206	case CHIP_BONAIRE:
3207		rdev->config.cik.max_shader_engines = 2;
3208		rdev->config.cik.max_tile_pipes = 4;
3209		rdev->config.cik.max_cu_per_sh = 7;
3210		rdev->config.cik.max_sh_per_se = 1;
3211		rdev->config.cik.max_backends_per_se = 2;
3212		rdev->config.cik.max_texture_channel_caches = 4;
3213		rdev->config.cik.max_gprs = 256;
3214		rdev->config.cik.max_gs_threads = 32;
3215		rdev->config.cik.max_hw_contexts = 8;
3216
3217		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3218		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3219		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3220		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3221		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3222		break;
3223	case CHIP_HAWAII:
3224		rdev->config.cik.max_shader_engines = 4;
3225		rdev->config.cik.max_tile_pipes = 16;
3226		rdev->config.cik.max_cu_per_sh = 11;
3227		rdev->config.cik.max_sh_per_se = 1;
3228		rdev->config.cik.max_backends_per_se = 4;
3229		rdev->config.cik.max_texture_channel_caches = 16;
3230		rdev->config.cik.max_gprs = 256;
3231		rdev->config.cik.max_gs_threads = 32;
3232		rdev->config.cik.max_hw_contexts = 8;
3233
3234		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3235		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3236		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3237		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3238		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3239		break;
3240	case CHIP_KAVERI:
3241		rdev->config.cik.max_shader_engines = 1;
3242		rdev->config.cik.max_tile_pipes = 4;
3243		if ((rdev->pdev->device == 0x1304) ||
3244		    (rdev->pdev->device == 0x1305) ||
3245		    (rdev->pdev->device == 0x130C) ||
3246		    (rdev->pdev->device == 0x130F) ||
3247		    (rdev->pdev->device == 0x1310) ||
3248		    (rdev->pdev->device == 0x1311) ||
3249		    (rdev->pdev->device == 0x131C)) {
3250			rdev->config.cik.max_cu_per_sh = 8;
3251			rdev->config.cik.max_backends_per_se = 2;
3252		} else if ((rdev->pdev->device == 0x1309) ||
3253			   (rdev->pdev->device == 0x130A) ||
3254			   (rdev->pdev->device == 0x130D) ||
3255			   (rdev->pdev->device == 0x1313) ||
3256			   (rdev->pdev->device == 0x131D)) {
3257			rdev->config.cik.max_cu_per_sh = 6;
3258			rdev->config.cik.max_backends_per_se = 2;
3259		} else if ((rdev->pdev->device == 0x1306) ||
3260			   (rdev->pdev->device == 0x1307) ||
3261			   (rdev->pdev->device == 0x130B) ||
3262			   (rdev->pdev->device == 0x130E) ||
3263			   (rdev->pdev->device == 0x1315) ||
3264			   (rdev->pdev->device == 0x1318) ||
3265			   (rdev->pdev->device == 0x131B)) {
3266			rdev->config.cik.max_cu_per_sh = 4;
3267			rdev->config.cik.max_backends_per_se = 1;
3268		} else {
3269			rdev->config.cik.max_cu_per_sh = 3;
3270			rdev->config.cik.max_backends_per_se = 1;
3271		}
3272		rdev->config.cik.max_sh_per_se = 1;
3273		rdev->config.cik.max_texture_channel_caches = 4;
3274		rdev->config.cik.max_gprs = 256;
3275		rdev->config.cik.max_gs_threads = 16;
3276		rdev->config.cik.max_hw_contexts = 8;
3277
3278		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3279		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3280		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3281		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3282		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3283		break;
3284	case CHIP_KABINI:
3285	case CHIP_MULLINS:
3286	default:
3287		rdev->config.cik.max_shader_engines = 1;
3288		rdev->config.cik.max_tile_pipes = 2;
3289		rdev->config.cik.max_cu_per_sh = 2;
3290		rdev->config.cik.max_sh_per_se = 1;
3291		rdev->config.cik.max_backends_per_se = 1;
3292		rdev->config.cik.max_texture_channel_caches = 2;
3293		rdev->config.cik.max_gprs = 256;
3294		rdev->config.cik.max_gs_threads = 16;
3295		rdev->config.cik.max_hw_contexts = 8;
3296
3297		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3298		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3299		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3300		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3301		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3302		break;
3303	}
3304
3305	/* Initialize HDP */
3306	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3307		WREG32((0x2c14 + j), 0x00000000);
3308		WREG32((0x2c18 + j), 0x00000000);
3309		WREG32((0x2c1c + j), 0x00000000);
3310		WREG32((0x2c20 + j), 0x00000000);
3311		WREG32((0x2c24 + j), 0x00000000);
3312	}
3313
3314	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3315	WREG32(SRBM_INT_CNTL, 0x1);
3316	WREG32(SRBM_INT_ACK, 0x1);
3317
3318	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3319
3320	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3321	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3322
3323	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3324	rdev->config.cik.mem_max_burst_length_bytes = 256;
3325	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3326	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3327	if (rdev->config.cik.mem_row_size_in_kb > 4)
3328		rdev->config.cik.mem_row_size_in_kb = 4;
3329	/* XXX use MC settings? */
3330	rdev->config.cik.shader_engine_tile_size = 32;
3331	rdev->config.cik.num_gpus = 1;
3332	rdev->config.cik.multi_gpu_tile_size = 64;
3333
3334	/* fix up row size */
3335	gb_addr_config &= ~ROW_SIZE_MASK;
3336	switch (rdev->config.cik.mem_row_size_in_kb) {
3337	case 1:
3338	default:
3339		gb_addr_config |= ROW_SIZE(0);
3340		break;
3341	case 2:
3342		gb_addr_config |= ROW_SIZE(1);
3343		break;
3344	case 4:
3345		gb_addr_config |= ROW_SIZE(2);
3346		break;
3347	}
3348
3349	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3350	 * not have bank info, so create a custom tiling dword.
3351	 * bits 3:0   num_pipes
3352	 * bits 7:4   num_banks
3353	 * bits 11:8  group_size
3354	 * bits 15:12 row_size
3355	 */
3356	rdev->config.cik.tile_config = 0;
3357	switch (rdev->config.cik.num_tile_pipes) {
3358	case 1:
3359		rdev->config.cik.tile_config |= (0 << 0);
3360		break;
3361	case 2:
3362		rdev->config.cik.tile_config |= (1 << 0);
3363		break;
3364	case 4:
3365		rdev->config.cik.tile_config |= (2 << 0);
3366		break;
3367	case 8:
3368	default:
3369		/* XXX what about 12? */
3370		rdev->config.cik.tile_config |= (3 << 0);
3371		break;
3372	}
3373	rdev->config.cik.tile_config |=
3374		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3375	rdev->config.cik.tile_config |=
3376		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3377	rdev->config.cik.tile_config |=
3378		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3379
3380	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3381	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3382	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3383	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3384	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3385	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3386	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3387	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3388
3389	cik_tiling_mode_table_init(rdev);
3390
3391	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3392		     rdev->config.cik.max_sh_per_se,
3393		     rdev->config.cik.max_backends_per_se);
3394
3395	rdev->config.cik.active_cus = 0;
3396	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3397		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3398			rdev->config.cik.active_cus +=
3399				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3400		}
3401	}
3402
3403	/* set HW defaults for 3D engine */
3404	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3405
3406	mutex_lock(&rdev->grbm_idx_mutex);
3407	/*
3408	 * making sure that the following register writes will be broadcasted
3409	 * to all the shaders
3410	 */
3411	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3412	WREG32(SX_DEBUG_1, 0x20);
3413
3414	WREG32(TA_CNTL_AUX, 0x00010000);
3415
3416	tmp = RREG32(SPI_CONFIG_CNTL);
3417	tmp |= 0x03000000;
3418	WREG32(SPI_CONFIG_CNTL, tmp);
3419
3420	WREG32(SQ_CONFIG, 1);
3421
3422	WREG32(DB_DEBUG, 0);
3423
3424	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3425	tmp |= 0x00000400;
3426	WREG32(DB_DEBUG2, tmp);
3427
3428	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3429	tmp |= 0x00020200;
3430	WREG32(DB_DEBUG3, tmp);
3431
3432	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3433	tmp |= 0x00018208;
3434	WREG32(CB_HW_CONTROL, tmp);
3435
3436	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3437
3438	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3439				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3440				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3441				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3442
3443	WREG32(VGT_NUM_INSTANCES, 1);
3444
3445	WREG32(CP_PERFMON_CNTL, 0);
3446
3447	WREG32(SQ_CONFIG, 0);
3448
3449	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3450					  FORCE_EOV_MAX_REZ_CNT(255)));
3451
3452	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3453	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3454
3455	WREG32(VGT_GS_VERTEX_REUSE, 16);
3456	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3457
3458	tmp = RREG32(HDP_MISC_CNTL);
3459	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3460	WREG32(HDP_MISC_CNTL, tmp);
3461
3462	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3463	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3464
3465	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3466	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3467	mutex_unlock(&rdev->grbm_idx_mutex);
3468
3469	udelay(50);
3470}
3471
3472/*
3473 * GPU scratch registers helpers function.
3474 */
3475/**
3476 * cik_scratch_init - setup driver info for CP scratch regs
3477 *
3478 * @rdev: radeon_device pointer
3479 *
3480 * Set up the number and offset of the CP scratch registers.
3481 * NOTE: use of CP scratch registers is a legacy inferface and
3482 * is not used by default on newer asics (r6xx+).  On newer asics,
3483 * memory buffers are used for fences rather than scratch regs.
3484 */
3485static void cik_scratch_init(struct radeon_device *rdev)
3486{
3487	int i;
3488
3489	rdev->scratch.num_reg = 7;
3490	rdev->scratch.reg_base = SCRATCH_REG0;
3491	for (i = 0; i < rdev->scratch.num_reg; i++) {
3492		rdev->scratch.free[i] = true;
3493		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3494	}
3495}
3496
3497/**
3498 * cik_ring_test - basic gfx ring test
3499 *
3500 * @rdev: radeon_device pointer
3501 * @ring: radeon_ring structure holding ring information
3502 *
3503 * Allocate a scratch register and write to it using the gfx ring (CIK).
3504 * Provides a basic gfx ring test to verify that the ring is working.
3505 * Used by cik_cp_gfx_resume();
3506 * Returns 0 on success, error on failure.
3507 */
3508int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3509{
3510	uint32_t scratch;
3511	uint32_t tmp = 0;
3512	unsigned i;
3513	int r;
3514
3515	r = radeon_scratch_get(rdev, &scratch);
3516	if (r) {
3517		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3518		return r;
3519	}
3520	WREG32(scratch, 0xCAFEDEAD);
3521	r = radeon_ring_lock(rdev, ring, 3);
3522	if (r) {
3523		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3524		radeon_scratch_free(rdev, scratch);
3525		return r;
3526	}
3527	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3528	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3529	radeon_ring_write(ring, 0xDEADBEEF);
3530	radeon_ring_unlock_commit(rdev, ring, false);
3531
3532	for (i = 0; i < rdev->usec_timeout; i++) {
3533		tmp = RREG32(scratch);
3534		if (tmp == 0xDEADBEEF)
3535			break;
3536		DRM_UDELAY(1);
3537	}
3538	if (i < rdev->usec_timeout) {
3539		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3540	} else {
3541		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3542			  ring->idx, scratch, tmp);
3543		r = -EINVAL;
3544	}
3545	radeon_scratch_free(rdev, scratch);
3546	return r;
3547}
3548
3549/**
3550 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3551 *
3552 * @rdev: radeon_device pointer
3553 * @ridx: radeon ring index
3554 *
3555 * Emits an hdp flush on the cp.
3556 */
3557static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3558				       int ridx)
3559{
3560	struct radeon_ring *ring = &rdev->ring[ridx];
3561	u32 ref_and_mask;
3562
3563	switch (ring->idx) {
3564	case CAYMAN_RING_TYPE_CP1_INDEX:
3565	case CAYMAN_RING_TYPE_CP2_INDEX:
3566	default:
3567		switch (ring->me) {
3568		case 0:
3569			ref_and_mask = CP2 << ring->pipe;
3570			break;
3571		case 1:
3572			ref_and_mask = CP6 << ring->pipe;
3573			break;
3574		default:
3575			return;
3576		}
3577		break;
3578	case RADEON_RING_TYPE_GFX_INDEX:
3579		ref_and_mask = CP0;
3580		break;
3581	}
3582
3583	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3584	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3585				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3586				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3587	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3588	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3589	radeon_ring_write(ring, ref_and_mask);
3590	radeon_ring_write(ring, ref_and_mask);
3591	radeon_ring_write(ring, 0x20); /* poll interval */
3592}
3593
3594/**
3595 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3596 *
3597 * @rdev: radeon_device pointer
3598 * @fence: radeon fence object
3599 *
3600 * Emits a fence sequnce number on the gfx ring and flushes
3601 * GPU caches.
3602 */
3603void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3604			     struct radeon_fence *fence)
3605{
3606	struct radeon_ring *ring = &rdev->ring[fence->ring];
3607	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3608
3609	/* Workaround for cache flush problems. First send a dummy EOP
3610	 * event down the pipe with seq one below.
3611	 */
3612	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3613	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3614				 EOP_TC_ACTION_EN |
3615				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3616				 EVENT_INDEX(5)));
3617	radeon_ring_write(ring, addr & 0xfffffffc);
3618	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3619				DATA_SEL(1) | INT_SEL(0));
3620	radeon_ring_write(ring, fence->seq - 1);
3621	radeon_ring_write(ring, 0);
3622
3623	/* Then send the real EOP event down the pipe. */
3624	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3625	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3626				 EOP_TC_ACTION_EN |
3627				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3628				 EVENT_INDEX(5)));
3629	radeon_ring_write(ring, addr & 0xfffffffc);
3630	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3631	radeon_ring_write(ring, fence->seq);
3632	radeon_ring_write(ring, 0);
3633}
3634
3635/**
3636 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3637 *
3638 * @rdev: radeon_device pointer
3639 * @fence: radeon fence object
3640 *
3641 * Emits a fence sequnce number on the compute ring and flushes
3642 * GPU caches.
3643 */
3644void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3645				 struct radeon_fence *fence)
3646{
3647	struct radeon_ring *ring = &rdev->ring[fence->ring];
3648	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3649
3650	/* RELEASE_MEM - flush caches, send int */
3651	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3652	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3653				 EOP_TC_ACTION_EN |
3654				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3655				 EVENT_INDEX(5)));
3656	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3657	radeon_ring_write(ring, addr & 0xfffffffc);
3658	radeon_ring_write(ring, upper_32_bits(addr));
3659	radeon_ring_write(ring, fence->seq);
3660	radeon_ring_write(ring, 0);
3661}
3662
3663/**
3664 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3665 *
3666 * @rdev: radeon_device pointer
3667 * @ring: radeon ring buffer object
3668 * @semaphore: radeon semaphore object
3669 * @emit_wait: Is this a sempahore wait?
3670 *
3671 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3672 * from running ahead of semaphore waits.
3673 */
3674bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3675			     struct radeon_ring *ring,
3676			     struct radeon_semaphore *semaphore,
3677			     bool emit_wait)
3678{
3679	uint64_t addr = semaphore->gpu_addr;
3680	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3681
3682	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3683	radeon_ring_write(ring, lower_32_bits(addr));
3684	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3685
3686	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3687		/* Prevent the PFP from running ahead of the semaphore wait */
3688		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3689		radeon_ring_write(ring, 0x0);
3690	}
3691
3692	return true;
3693}
3694
3695/**
3696 * cik_copy_cpdma - copy pages using the CP DMA engine
3697 *
3698 * @rdev: radeon_device pointer
3699 * @src_offset: src GPU address
3700 * @dst_offset: dst GPU address
3701 * @num_gpu_pages: number of GPU pages to xfer
3702 * @resv: reservation object to sync to
3703 *
3704 * Copy GPU paging using the CP DMA engine (CIK+).
3705 * Used by the radeon ttm implementation to move pages if
3706 * registered as the asic copy callback.
3707 */
3708struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3709				    uint64_t src_offset, uint64_t dst_offset,
3710				    unsigned num_gpu_pages,
3711				    struct reservation_object *resv)
3712{
3713	struct radeon_fence *fence;
3714	struct radeon_sync sync;
3715	int ring_index = rdev->asic->copy.blit_ring_index;
3716	struct radeon_ring *ring = &rdev->ring[ring_index];
3717	u32 size_in_bytes, cur_size_in_bytes, control;
3718	int i, num_loops;
3719	int r = 0;
3720
3721	radeon_sync_create(&sync);
3722
3723	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3724	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3725	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3726	if (r) {
3727		DRM_ERROR("radeon: moving bo (%d).\n", r);
3728		radeon_sync_free(rdev, &sync, NULL);
3729		return ERR_PTR(r);
3730	}
3731
3732	radeon_sync_resv(rdev, &sync, resv, false);
3733	radeon_sync_rings(rdev, &sync, ring->idx);
3734
3735	for (i = 0; i < num_loops; i++) {
3736		cur_size_in_bytes = size_in_bytes;
3737		if (cur_size_in_bytes > 0x1fffff)
3738			cur_size_in_bytes = 0x1fffff;
3739		size_in_bytes -= cur_size_in_bytes;
3740		control = 0;
3741		if (size_in_bytes == 0)
3742			control |= PACKET3_DMA_DATA_CP_SYNC;
3743		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3744		radeon_ring_write(ring, control);
3745		radeon_ring_write(ring, lower_32_bits(src_offset));
3746		radeon_ring_write(ring, upper_32_bits(src_offset));
3747		radeon_ring_write(ring, lower_32_bits(dst_offset));
3748		radeon_ring_write(ring, upper_32_bits(dst_offset));
3749		radeon_ring_write(ring, cur_size_in_bytes);
3750		src_offset += cur_size_in_bytes;
3751		dst_offset += cur_size_in_bytes;
3752	}
3753
3754	r = radeon_fence_emit(rdev, &fence, ring->idx);
3755	if (r) {
3756		radeon_ring_unlock_undo(rdev, ring);
3757		radeon_sync_free(rdev, &sync, NULL);
3758		return ERR_PTR(r);
3759	}
3760
3761	radeon_ring_unlock_commit(rdev, ring, false);
3762	radeon_sync_free(rdev, &sync, fence);
3763
3764	return fence;
3765}
3766
3767/*
3768 * IB stuff
3769 */
3770/**
3771 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3772 *
3773 * @rdev: radeon_device pointer
3774 * @ib: radeon indirect buffer object
3775 *
3776 * Emits a DE (drawing engine) or CE (constant engine) IB
3777 * on the gfx ring.  IBs are usually generated by userspace
3778 * acceleration drivers and submitted to the kernel for
3779 * scheduling on the ring.  This function schedules the IB
3780 * on the gfx ring for execution by the GPU.
3781 */
3782void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3783{
3784	struct radeon_ring *ring = &rdev->ring[ib->ring];
3785	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3786	u32 header, control = INDIRECT_BUFFER_VALID;
3787
3788	if (ib->is_const_ib) {
3789		/* set switch buffer packet before const IB */
3790		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3791		radeon_ring_write(ring, 0);
3792
3793		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3794	} else {
3795		u32 next_rptr;
3796		if (ring->rptr_save_reg) {
3797			next_rptr = ring->wptr + 3 + 4;
3798			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3799			radeon_ring_write(ring, ((ring->rptr_save_reg -
3800						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3801			radeon_ring_write(ring, next_rptr);
3802		} else if (rdev->wb.enabled) {
3803			next_rptr = ring->wptr + 5 + 4;
3804			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3805			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3806			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3807			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3808			radeon_ring_write(ring, next_rptr);
3809		}
3810
3811		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3812	}
3813
3814	control |= ib->length_dw | (vm_id << 24);
3815
3816	radeon_ring_write(ring, header);
3817	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3818	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3819	radeon_ring_write(ring, control);
3820}
3821
3822/**
3823 * cik_ib_test - basic gfx ring IB test
3824 *
3825 * @rdev: radeon_device pointer
3826 * @ring: radeon_ring structure holding ring information
3827 *
3828 * Allocate an IB and execute it on the gfx ring (CIK).
3829 * Provides a basic gfx ring test to verify that IBs are working.
3830 * Returns 0 on success, error on failure.
3831 */
3832int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3833{
3834	struct radeon_ib ib;
3835	uint32_t scratch;
3836	uint32_t tmp = 0;
3837	unsigned i;
3838	int r;
3839
3840	r = radeon_scratch_get(rdev, &scratch);
3841	if (r) {
3842		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3843		return r;
3844	}
3845	WREG32(scratch, 0xCAFEDEAD);
3846	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3847	if (r) {
3848		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3849		radeon_scratch_free(rdev, scratch);
3850		return r;
3851	}
3852	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3853	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3854	ib.ptr[2] = 0xDEADBEEF;
3855	ib.length_dw = 3;
3856	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3857	if (r) {
3858		radeon_scratch_free(rdev, scratch);
3859		radeon_ib_free(rdev, &ib);
3860		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3861		return r;
3862	}
3863	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3864		RADEON_USEC_IB_TEST_TIMEOUT));
3865	if (r < 0) {
3866		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3867		radeon_scratch_free(rdev, scratch);
3868		radeon_ib_free(rdev, &ib);
3869		return r;
3870	} else if (r == 0) {
3871		DRM_ERROR("radeon: fence wait timed out.\n");
3872		radeon_scratch_free(rdev, scratch);
3873		radeon_ib_free(rdev, &ib);
3874		return -ETIMEDOUT;
3875	}
3876	r = 0;
3877	for (i = 0; i < rdev->usec_timeout; i++) {
3878		tmp = RREG32(scratch);
3879		if (tmp == 0xDEADBEEF)
3880			break;
3881		DRM_UDELAY(1);
3882	}
3883	if (i < rdev->usec_timeout) {
3884		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3885	} else {
3886		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3887			  scratch, tmp);
3888		r = -EINVAL;
3889	}
3890	radeon_scratch_free(rdev, scratch);
3891	radeon_ib_free(rdev, &ib);
3892	return r;
3893}
3894
3895/*
3896 * CP.
3897 * On CIK, gfx and compute now have independant command processors.
3898 *
3899 * GFX
3900 * Gfx consists of a single ring and can process both gfx jobs and
3901 * compute jobs.  The gfx CP consists of three microengines (ME):
3902 * PFP - Pre-Fetch Parser
3903 * ME - Micro Engine
3904 * CE - Constant Engine
3905 * The PFP and ME make up what is considered the Drawing Engine (DE).
3906 * The CE is an asynchronous engine used for updating buffer desciptors
3907 * used by the DE so that they can be loaded into cache in parallel
3908 * while the DE is processing state update packets.
3909 *
3910 * Compute
3911 * The compute CP consists of two microengines (ME):
3912 * MEC1 - Compute MicroEngine 1
3913 * MEC2 - Compute MicroEngine 2
3914 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3915 * The queues are exposed to userspace and are programmed directly
3916 * by the compute runtime.
3917 */
3918/**
3919 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3920 *
3921 * @rdev: radeon_device pointer
3922 * @enable: enable or disable the MEs
3923 *
3924 * Halts or unhalts the gfx MEs.
3925 */
3926static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3927{
3928	if (enable)
3929		WREG32(CP_ME_CNTL, 0);
3930	else {
3931		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3932			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3933		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3934		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3935	}
3936	udelay(50);
3937}
3938
3939/**
3940 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3941 *
3942 * @rdev: radeon_device pointer
3943 *
3944 * Loads the gfx PFP, ME, and CE ucode.
3945 * Returns 0 for success, -EINVAL if the ucode is not available.
3946 */
3947static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3948{
3949	int i;
3950
3951	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3952		return -EINVAL;
3953
3954	cik_cp_gfx_enable(rdev, false);
3955
3956	if (rdev->new_fw) {
3957		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3958			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3959		const struct gfx_firmware_header_v1_0 *ce_hdr =
3960			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3961		const struct gfx_firmware_header_v1_0 *me_hdr =
3962			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3963		const __le32 *fw_data;
3964		u32 fw_size;
3965
3966		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3967		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3968		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3969
3970		/* PFP */
3971		fw_data = (const __le32 *)
3972			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3973		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3974		WREG32(CP_PFP_UCODE_ADDR, 0);
3975		for (i = 0; i < fw_size; i++)
3976			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3977		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3978
3979		/* CE */
3980		fw_data = (const __le32 *)
3981			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3982		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3983		WREG32(CP_CE_UCODE_ADDR, 0);
3984		for (i = 0; i < fw_size; i++)
3985			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3986		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3987
3988		/* ME */
3989		fw_data = (const __be32 *)
3990			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3991		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3992		WREG32(CP_ME_RAM_WADDR, 0);
3993		for (i = 0; i < fw_size; i++)
3994			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3995		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3996		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3997	} else {
3998		const __be32 *fw_data;
3999
4000		/* PFP */
4001		fw_data = (const __be32 *)rdev->pfp_fw->data;
4002		WREG32(CP_PFP_UCODE_ADDR, 0);
4003		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4004			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4005		WREG32(CP_PFP_UCODE_ADDR, 0);
4006
4007		/* CE */
4008		fw_data = (const __be32 *)rdev->ce_fw->data;
4009		WREG32(CP_CE_UCODE_ADDR, 0);
4010		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4011			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4012		WREG32(CP_CE_UCODE_ADDR, 0);
4013
4014		/* ME */
4015		fw_data = (const __be32 *)rdev->me_fw->data;
4016		WREG32(CP_ME_RAM_WADDR, 0);
4017		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4018			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4019		WREG32(CP_ME_RAM_WADDR, 0);
4020	}
4021
4022	return 0;
4023}
4024
4025/**
4026 * cik_cp_gfx_start - start the gfx ring
4027 *
4028 * @rdev: radeon_device pointer
4029 *
4030 * Enables the ring and loads the clear state context and other
4031 * packets required to init the ring.
4032 * Returns 0 for success, error for failure.
4033 */
4034static int cik_cp_gfx_start(struct radeon_device *rdev)
4035{
4036	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4037	int r, i;
4038
4039	/* init the CP */
4040	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4041	WREG32(CP_ENDIAN_SWAP, 0);
4042	WREG32(CP_DEVICE_ID, 1);
4043
4044	cik_cp_gfx_enable(rdev, true);
4045
4046	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4047	if (r) {
4048		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4049		return r;
4050	}
4051
4052	/* init the CE partitions.  CE only used for gfx on CIK */
4053	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4054	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4055	radeon_ring_write(ring, 0x8000);
4056	radeon_ring_write(ring, 0x8000);
4057
4058	/* setup clear context state */
4059	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4060	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4061
4062	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4063	radeon_ring_write(ring, 0x80000000);
4064	radeon_ring_write(ring, 0x80000000);
4065
4066	for (i = 0; i < cik_default_size; i++)
4067		radeon_ring_write(ring, cik_default_state[i]);
4068
4069	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4070	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4071
4072	/* set clear context state */
4073	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4074	radeon_ring_write(ring, 0);
4075
4076	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4077	radeon_ring_write(ring, 0x00000316);
4078	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4079	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4080
4081	radeon_ring_unlock_commit(rdev, ring, false);
4082
4083	return 0;
4084}
4085
4086/**
4087 * cik_cp_gfx_fini - stop the gfx ring
4088 *
4089 * @rdev: radeon_device pointer
4090 *
4091 * Stop the gfx ring and tear down the driver ring
4092 * info.
4093 */
4094static void cik_cp_gfx_fini(struct radeon_device *rdev)
4095{
4096	cik_cp_gfx_enable(rdev, false);
4097	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4098}
4099
4100/**
4101 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4102 *
4103 * @rdev: radeon_device pointer
4104 *
4105 * Program the location and size of the gfx ring buffer
4106 * and test it to make sure it's working.
4107 * Returns 0 for success, error for failure.
4108 */
4109static int cik_cp_gfx_resume(struct radeon_device *rdev)
4110{
4111	struct radeon_ring *ring;
4112	u32 tmp;
4113	u32 rb_bufsz;
4114	u64 rb_addr;
4115	int r;
4116
4117	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4118	if (rdev->family != CHIP_HAWAII)
4119		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4120
4121	/* Set the write pointer delay */
4122	WREG32(CP_RB_WPTR_DELAY, 0);
4123
4124	/* set the RB to use vmid 0 */
4125	WREG32(CP_RB_VMID, 0);
4126
4127	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4128
4129	/* ring 0 - compute and gfx */
4130	/* Set ring buffer size */
4131	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4132	rb_bufsz = order_base_2(ring->ring_size / 8);
4133	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4134#ifdef __BIG_ENDIAN
4135	tmp |= BUF_SWAP_32BIT;
4136#endif
4137	WREG32(CP_RB0_CNTL, tmp);
4138
4139	/* Initialize the ring buffer's read and write pointers */
4140	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4141	ring->wptr = 0;
4142	WREG32(CP_RB0_WPTR, ring->wptr);
4143
4144	/* set the wb address wether it's enabled or not */
4145	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4146	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4147
4148	/* scratch register shadowing is no longer supported */
4149	WREG32(SCRATCH_UMSK, 0);
4150
4151	if (!rdev->wb.enabled)
4152		tmp |= RB_NO_UPDATE;
4153
4154	mdelay(1);
4155	WREG32(CP_RB0_CNTL, tmp);
4156
4157	rb_addr = ring->gpu_addr >> 8;
4158	WREG32(CP_RB0_BASE, rb_addr);
4159	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4160
4161	/* start the ring */
4162	cik_cp_gfx_start(rdev);
4163	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4164	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4165	if (r) {
4166		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4167		return r;
4168	}
4169
4170	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4171		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4172
4173	return 0;
4174}
4175
4176u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4177		     struct radeon_ring *ring)
4178{
4179	u32 rptr;
4180
4181	if (rdev->wb.enabled)
4182		rptr = rdev->wb.wb[ring->rptr_offs/4];
4183	else
4184		rptr = RREG32(CP_RB0_RPTR);
4185
4186	return rptr;
4187}
4188
4189u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4190		     struct radeon_ring *ring)
4191{
4192	u32 wptr;
4193
4194	wptr = RREG32(CP_RB0_WPTR);
4195
4196	return wptr;
4197}
4198
4199void cik_gfx_set_wptr(struct radeon_device *rdev,
4200		      struct radeon_ring *ring)
4201{
4202	WREG32(CP_RB0_WPTR, ring->wptr);
4203	(void)RREG32(CP_RB0_WPTR);
4204}
4205
4206u32 cik_compute_get_rptr(struct radeon_device *rdev,
4207			 struct radeon_ring *ring)
4208{
4209	u32 rptr;
4210
4211	if (rdev->wb.enabled) {
4212		rptr = rdev->wb.wb[ring->rptr_offs/4];
4213	} else {
4214		mutex_lock(&rdev->srbm_mutex);
4215		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4216		rptr = RREG32(CP_HQD_PQ_RPTR);
4217		cik_srbm_select(rdev, 0, 0, 0, 0);
4218		mutex_unlock(&rdev->srbm_mutex);
4219	}
4220
4221	return rptr;
4222}
4223
4224u32 cik_compute_get_wptr(struct radeon_device *rdev,
4225			 struct radeon_ring *ring)
4226{
4227	u32 wptr;
4228
4229	if (rdev->wb.enabled) {
4230		/* XXX check if swapping is necessary on BE */
4231		wptr = rdev->wb.wb[ring->wptr_offs/4];
4232	} else {
4233		mutex_lock(&rdev->srbm_mutex);
4234		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4235		wptr = RREG32(CP_HQD_PQ_WPTR);
4236		cik_srbm_select(rdev, 0, 0, 0, 0);
4237		mutex_unlock(&rdev->srbm_mutex);
4238	}
4239
4240	return wptr;
4241}
4242
4243void cik_compute_set_wptr(struct radeon_device *rdev,
4244			  struct radeon_ring *ring)
4245{
4246	/* XXX check if swapping is necessary on BE */
4247	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4248	WDOORBELL32(ring->doorbell_index, ring->wptr);
4249}
4250
4251static void cik_compute_stop(struct radeon_device *rdev,
4252			     struct radeon_ring *ring)
4253{
4254	u32 j, tmp;
4255
4256	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4257	/* Disable wptr polling. */
4258	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4259	tmp &= ~WPTR_POLL_EN;
4260	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4261	/* Disable HQD. */
4262	if (RREG32(CP_HQD_ACTIVE) & 1) {
4263		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4264		for (j = 0; j < rdev->usec_timeout; j++) {
4265			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4266				break;
4267			udelay(1);
4268		}
4269		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4270		WREG32(CP_HQD_PQ_RPTR, 0);
4271		WREG32(CP_HQD_PQ_WPTR, 0);
4272	}
4273	cik_srbm_select(rdev, 0, 0, 0, 0);
4274}
4275
4276/**
4277 * cik_cp_compute_enable - enable/disable the compute CP MEs
4278 *
4279 * @rdev: radeon_device pointer
4280 * @enable: enable or disable the MEs
4281 *
4282 * Halts or unhalts the compute MEs.
4283 */
4284static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4285{
4286	if (enable)
4287		WREG32(CP_MEC_CNTL, 0);
4288	else {
4289		/*
4290		 * To make hibernation reliable we need to clear compute ring
4291		 * configuration before halting the compute ring.
4292		 */
4293		mutex_lock(&rdev->srbm_mutex);
4294		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4295		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4296		mutex_unlock(&rdev->srbm_mutex);
4297
4298		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4299		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4300		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4301	}
4302	udelay(50);
4303}
4304
4305/**
4306 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4307 *
4308 * @rdev: radeon_device pointer
4309 *
4310 * Loads the compute MEC1&2 ucode.
4311 * Returns 0 for success, -EINVAL if the ucode is not available.
4312 */
4313static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4314{
4315	int i;
4316
4317	if (!rdev->mec_fw)
4318		return -EINVAL;
4319
4320	cik_cp_compute_enable(rdev, false);
4321
4322	if (rdev->new_fw) {
4323		const struct gfx_firmware_header_v1_0 *mec_hdr =
4324			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4325		const __le32 *fw_data;
4326		u32 fw_size;
4327
4328		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4329
4330		/* MEC1 */
4331		fw_data = (const __le32 *)
4332			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335		for (i = 0; i < fw_size; i++)
4336			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4337		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4338
4339		/* MEC2 */
4340		if (rdev->family == CHIP_KAVERI) {
4341			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4342				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4343
4344			fw_data = (const __le32 *)
4345				(rdev->mec2_fw->data +
4346				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4347			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4348			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349			for (i = 0; i < fw_size; i++)
4350				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4351			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4352		}
4353	} else {
4354		const __be32 *fw_data;
4355
4356		/* MEC1 */
4357		fw_data = (const __be32 *)rdev->mec_fw->data;
4358		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4359		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4360			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4361		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4362
4363		if (rdev->family == CHIP_KAVERI) {
4364			/* MEC2 */
4365			fw_data = (const __be32 *)rdev->mec_fw->data;
4366			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4367			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4368				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4369			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4370		}
4371	}
4372
4373	return 0;
4374}
4375
4376/**
4377 * cik_cp_compute_start - start the compute queues
4378 *
4379 * @rdev: radeon_device pointer
4380 *
4381 * Enable the compute queues.
4382 * Returns 0 for success, error for failure.
4383 */
4384static int cik_cp_compute_start(struct radeon_device *rdev)
4385{
4386	cik_cp_compute_enable(rdev, true);
4387
4388	return 0;
4389}
4390
4391/**
4392 * cik_cp_compute_fini - stop the compute queues
4393 *
4394 * @rdev: radeon_device pointer
4395 *
4396 * Stop the compute queues and tear down the driver queue
4397 * info.
4398 */
4399static void cik_cp_compute_fini(struct radeon_device *rdev)
4400{
4401	int i, idx, r;
4402
4403	cik_cp_compute_enable(rdev, false);
4404
4405	for (i = 0; i < 2; i++) {
4406		if (i == 0)
4407			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4408		else
4409			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4410
4411		if (rdev->ring[idx].mqd_obj) {
4412			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4413			if (unlikely(r != 0))
4414				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4415
4416			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4417			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4418
4419			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4420			rdev->ring[idx].mqd_obj = NULL;
4421		}
4422	}
4423}
4424
4425static void cik_mec_fini(struct radeon_device *rdev)
4426{
4427	int r;
4428
4429	if (rdev->mec.hpd_eop_obj) {
4430		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4431		if (unlikely(r != 0))
4432			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4433		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4434		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4435
4436		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4437		rdev->mec.hpd_eop_obj = NULL;
4438	}
4439}
4440
4441#define MEC_HPD_SIZE 2048
4442
4443static int cik_mec_init(struct radeon_device *rdev)
4444{
4445	int r;
4446	u32 *hpd;
4447
4448	/*
4449	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4450	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4451	 * Nonetheless, we assign only 1 pipe because all other pipes will
4452	 * be handled by KFD
4453	 */
4454	rdev->mec.num_mec = 1;
4455	rdev->mec.num_pipe = 1;
4456	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4457
4458	if (rdev->mec.hpd_eop_obj == NULL) {
4459		r = radeon_bo_create(rdev,
4460				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4461				     PAGE_SIZE, true,
4462				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4463				     &rdev->mec.hpd_eop_obj);
4464		if (r) {
4465			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4466			return r;
4467		}
4468	}
4469
4470	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4471	if (unlikely(r != 0)) {
4472		cik_mec_fini(rdev);
4473		return r;
4474	}
4475	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4476			  &rdev->mec.hpd_eop_gpu_addr);
4477	if (r) {
4478		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4479		cik_mec_fini(rdev);
4480		return r;
4481	}
4482	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4483	if (r) {
4484		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4485		cik_mec_fini(rdev);
4486		return r;
4487	}
4488
4489	/* clear memory.  Not sure if this is required or not */
4490	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4491
4492	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4493	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4494
4495	return 0;
4496}
4497
4498struct hqd_registers
4499{
4500	u32 cp_mqd_base_addr;
4501	u32 cp_mqd_base_addr_hi;
4502	u32 cp_hqd_active;
4503	u32 cp_hqd_vmid;
4504	u32 cp_hqd_persistent_state;
4505	u32 cp_hqd_pipe_priority;
4506	u32 cp_hqd_queue_priority;
4507	u32 cp_hqd_quantum;
4508	u32 cp_hqd_pq_base;
4509	u32 cp_hqd_pq_base_hi;
4510	u32 cp_hqd_pq_rptr;
4511	u32 cp_hqd_pq_rptr_report_addr;
4512	u32 cp_hqd_pq_rptr_report_addr_hi;
4513	u32 cp_hqd_pq_wptr_poll_addr;
4514	u32 cp_hqd_pq_wptr_poll_addr_hi;
4515	u32 cp_hqd_pq_doorbell_control;
4516	u32 cp_hqd_pq_wptr;
4517	u32 cp_hqd_pq_control;
4518	u32 cp_hqd_ib_base_addr;
4519	u32 cp_hqd_ib_base_addr_hi;
4520	u32 cp_hqd_ib_rptr;
4521	u32 cp_hqd_ib_control;
4522	u32 cp_hqd_iq_timer;
4523	u32 cp_hqd_iq_rptr;
4524	u32 cp_hqd_dequeue_request;
4525	u32 cp_hqd_dma_offload;
4526	u32 cp_hqd_sema_cmd;
4527	u32 cp_hqd_msg_type;
4528	u32 cp_hqd_atomic0_preop_lo;
4529	u32 cp_hqd_atomic0_preop_hi;
4530	u32 cp_hqd_atomic1_preop_lo;
4531	u32 cp_hqd_atomic1_preop_hi;
4532	u32 cp_hqd_hq_scheduler0;
4533	u32 cp_hqd_hq_scheduler1;
4534	u32 cp_mqd_control;
4535};
4536
4537struct bonaire_mqd
4538{
4539	u32 header;
4540	u32 dispatch_initiator;
4541	u32 dimensions[3];
4542	u32 start_idx[3];
4543	u32 num_threads[3];
4544	u32 pipeline_stat_enable;
4545	u32 perf_counter_enable;
4546	u32 pgm[2];
4547	u32 tba[2];
4548	u32 tma[2];
4549	u32 pgm_rsrc[2];
4550	u32 vmid;
4551	u32 resource_limits;
4552	u32 static_thread_mgmt01[2];
4553	u32 tmp_ring_size;
4554	u32 static_thread_mgmt23[2];
4555	u32 restart[3];
4556	u32 thread_trace_enable;
4557	u32 reserved1;
4558	u32 user_data[16];
4559	u32 vgtcs_invoke_count[2];
4560	struct hqd_registers queue_state;
4561	u32 dequeue_cntr;
4562	u32 interrupt_queue[64];
4563};
4564
4565/**
4566 * cik_cp_compute_resume - setup the compute queue registers
4567 *
4568 * @rdev: radeon_device pointer
4569 *
4570 * Program the compute queues and test them to make sure they
4571 * are working.
4572 * Returns 0 for success, error for failure.
4573 */
4574static int cik_cp_compute_resume(struct radeon_device *rdev)
4575{
4576	int r, i, j, idx;
4577	u32 tmp;
4578	bool use_doorbell = true;
4579	u64 hqd_gpu_addr;
4580	u64 mqd_gpu_addr;
4581	u64 eop_gpu_addr;
4582	u64 wb_gpu_addr;
4583	u32 *buf;
4584	struct bonaire_mqd *mqd;
4585
4586	r = cik_cp_compute_start(rdev);
4587	if (r)
4588		return r;
4589
4590	/* fix up chicken bits */
4591	tmp = RREG32(CP_CPF_DEBUG);
4592	tmp |= (1 << 23);
4593	WREG32(CP_CPF_DEBUG, tmp);
4594
4595	/* init the pipes */
4596	mutex_lock(&rdev->srbm_mutex);
4597
4598	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4599
4600	cik_srbm_select(rdev, 0, 0, 0, 0);
4601
4602	/* write the EOP addr */
4603	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4604	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4605
4606	/* set the VMID assigned */
4607	WREG32(CP_HPD_EOP_VMID, 0);
4608
4609	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4610	tmp = RREG32(CP_HPD_EOP_CONTROL);
4611	tmp &= ~EOP_SIZE_MASK;
4612	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4613	WREG32(CP_HPD_EOP_CONTROL, tmp);
4614
4615	mutex_unlock(&rdev->srbm_mutex);
4616
4617	/* init the queues.  Just two for now. */
4618	for (i = 0; i < 2; i++) {
4619		if (i == 0)
4620			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4621		else
4622			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4623
4624		if (rdev->ring[idx].mqd_obj == NULL) {
4625			r = radeon_bo_create(rdev,
4626					     sizeof(struct bonaire_mqd),
4627					     PAGE_SIZE, true,
4628					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4629					     NULL, &rdev->ring[idx].mqd_obj);
4630			if (r) {
4631				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4632				return r;
4633			}
4634		}
4635
4636		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4637		if (unlikely(r != 0)) {
4638			cik_cp_compute_fini(rdev);
4639			return r;
4640		}
4641		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4642				  &mqd_gpu_addr);
4643		if (r) {
4644			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4645			cik_cp_compute_fini(rdev);
4646			return r;
4647		}
4648		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4649		if (r) {
4650			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4651			cik_cp_compute_fini(rdev);
4652			return r;
4653		}
4654
4655		/* init the mqd struct */
4656		memset(buf, 0, sizeof(struct bonaire_mqd));
4657
4658		mqd = (struct bonaire_mqd *)buf;
4659		mqd->header = 0xC0310800;
4660		mqd->static_thread_mgmt01[0] = 0xffffffff;
4661		mqd->static_thread_mgmt01[1] = 0xffffffff;
4662		mqd->static_thread_mgmt23[0] = 0xffffffff;
4663		mqd->static_thread_mgmt23[1] = 0xffffffff;
4664
4665		mutex_lock(&rdev->srbm_mutex);
4666		cik_srbm_select(rdev, rdev->ring[idx].me,
4667				rdev->ring[idx].pipe,
4668				rdev->ring[idx].queue, 0);
4669
4670		/* disable wptr polling */
4671		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4672		tmp &= ~WPTR_POLL_EN;
4673		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4674
4675		/* enable doorbell? */
4676		mqd->queue_state.cp_hqd_pq_doorbell_control =
4677			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4678		if (use_doorbell)
4679			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4680		else
4681			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4682		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4683		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4684
4685		/* disable the queue if it's active */
4686		mqd->queue_state.cp_hqd_dequeue_request = 0;
4687		mqd->queue_state.cp_hqd_pq_rptr = 0;
4688		mqd->queue_state.cp_hqd_pq_wptr= 0;
4689		if (RREG32(CP_HQD_ACTIVE) & 1) {
4690			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4691			for (j = 0; j < rdev->usec_timeout; j++) {
4692				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4693					break;
4694				udelay(1);
4695			}
4696			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4697			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4698			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4699		}
4700
4701		/* set the pointer to the MQD */
4702		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4703		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4704		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4705		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4706		/* set MQD vmid to 0 */
4707		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4708		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4709		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4710
4711		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4712		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4713		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4714		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4715		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4716		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4717
4718		/* set up the HQD, this is similar to CP_RB0_CNTL */
4719		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4720		mqd->queue_state.cp_hqd_pq_control &=
4721			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4722
4723		mqd->queue_state.cp_hqd_pq_control |=
4724			order_base_2(rdev->ring[idx].ring_size / 8);
4725		mqd->queue_state.cp_hqd_pq_control |=
4726			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4727#ifdef __BIG_ENDIAN
4728		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4729#endif
4730		mqd->queue_state.cp_hqd_pq_control &=
4731			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4732		mqd->queue_state.cp_hqd_pq_control |=
4733			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4734		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4735
4736		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4737		if (i == 0)
4738			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4739		else
4740			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4741		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4742		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4744		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4745		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4746
4747		/* set the wb address wether it's enabled or not */
4748		if (i == 0)
4749			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4750		else
4751			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4752		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4753		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4754			upper_32_bits(wb_gpu_addr) & 0xffff;
4755		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4756		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4757		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4758		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4759
4760		/* enable the doorbell if requested */
4761		if (use_doorbell) {
4762			mqd->queue_state.cp_hqd_pq_doorbell_control =
4763				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4764			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4765			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4766				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4767			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4768			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4769				~(DOORBELL_SOURCE | DOORBELL_HIT);
4770
4771		} else {
4772			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4773		}
4774		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4775		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4776
4777		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4778		rdev->ring[idx].wptr = 0;
4779		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4780		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4781		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4782
4783		/* set the vmid for the queue */
4784		mqd->queue_state.cp_hqd_vmid = 0;
4785		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4786
4787		/* activate the queue */
4788		mqd->queue_state.cp_hqd_active = 1;
4789		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4790
4791		cik_srbm_select(rdev, 0, 0, 0, 0);
4792		mutex_unlock(&rdev->srbm_mutex);
4793
4794		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4795		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4796
4797		rdev->ring[idx].ready = true;
4798		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4799		if (r)
4800			rdev->ring[idx].ready = false;
4801	}
4802
4803	return 0;
4804}
4805
4806static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4807{
4808	cik_cp_gfx_enable(rdev, enable);
4809	cik_cp_compute_enable(rdev, enable);
4810}
4811
4812static int cik_cp_load_microcode(struct radeon_device *rdev)
4813{
4814	int r;
4815
4816	r = cik_cp_gfx_load_microcode(rdev);
4817	if (r)
4818		return r;
4819	r = cik_cp_compute_load_microcode(rdev);
4820	if (r)
4821		return r;
4822
4823	return 0;
4824}
4825
4826static void cik_cp_fini(struct radeon_device *rdev)
4827{
4828	cik_cp_gfx_fini(rdev);
4829	cik_cp_compute_fini(rdev);
4830}
4831
4832static int cik_cp_resume(struct radeon_device *rdev)
4833{
4834	int r;
4835
4836	cik_enable_gui_idle_interrupt(rdev, false);
4837
4838	r = cik_cp_load_microcode(rdev);
4839	if (r)
4840		return r;
4841
4842	r = cik_cp_gfx_resume(rdev);
4843	if (r)
4844		return r;
4845	r = cik_cp_compute_resume(rdev);
4846	if (r)
4847		return r;
4848
4849	cik_enable_gui_idle_interrupt(rdev, true);
4850
4851	return 0;
4852}
4853
4854static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4855{
4856	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4857		RREG32(GRBM_STATUS));
4858	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4859		RREG32(GRBM_STATUS2));
4860	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4861		RREG32(GRBM_STATUS_SE0));
4862	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4863		RREG32(GRBM_STATUS_SE1));
4864	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4865		RREG32(GRBM_STATUS_SE2));
4866	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4867		RREG32(GRBM_STATUS_SE3));
4868	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4869		RREG32(SRBM_STATUS));
4870	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4871		RREG32(SRBM_STATUS2));
4872	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4873		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4874	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4875		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4876	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4877	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4878		 RREG32(CP_STALLED_STAT1));
4879	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4880		 RREG32(CP_STALLED_STAT2));
4881	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4882		 RREG32(CP_STALLED_STAT3));
4883	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4884		 RREG32(CP_CPF_BUSY_STAT));
4885	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4886		 RREG32(CP_CPF_STALLED_STAT1));
4887	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4888	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4889	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4890		 RREG32(CP_CPC_STALLED_STAT1));
4891	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4892}
4893
4894/**
4895 * cik_gpu_check_soft_reset - check which blocks are busy
4896 *
4897 * @rdev: radeon_device pointer
4898 *
4899 * Check which blocks are busy and return the relevant reset
4900 * mask to be used by cik_gpu_soft_reset().
4901 * Returns a mask of the blocks to be reset.
4902 */
4903u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4904{
4905	u32 reset_mask = 0;
4906	u32 tmp;
4907
4908	/* GRBM_STATUS */
4909	tmp = RREG32(GRBM_STATUS);
4910	if (tmp & (PA_BUSY | SC_BUSY |
4911		   BCI_BUSY | SX_BUSY |
4912		   TA_BUSY | VGT_BUSY |
4913		   DB_BUSY | CB_BUSY |
4914		   GDS_BUSY | SPI_BUSY |
4915		   IA_BUSY | IA_BUSY_NO_DMA))
4916		reset_mask |= RADEON_RESET_GFX;
4917
4918	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4919		reset_mask |= RADEON_RESET_CP;
4920
4921	/* GRBM_STATUS2 */
4922	tmp = RREG32(GRBM_STATUS2);
4923	if (tmp & RLC_BUSY)
4924		reset_mask |= RADEON_RESET_RLC;
4925
4926	/* SDMA0_STATUS_REG */
4927	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4928	if (!(tmp & SDMA_IDLE))
4929		reset_mask |= RADEON_RESET_DMA;
4930
4931	/* SDMA1_STATUS_REG */
4932	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4933	if (!(tmp & SDMA_IDLE))
4934		reset_mask |= RADEON_RESET_DMA1;
4935
4936	/* SRBM_STATUS2 */
4937	tmp = RREG32(SRBM_STATUS2);
4938	if (tmp & SDMA_BUSY)
4939		reset_mask |= RADEON_RESET_DMA;
4940
4941	if (tmp & SDMA1_BUSY)
4942		reset_mask |= RADEON_RESET_DMA1;
4943
4944	/* SRBM_STATUS */
4945	tmp = RREG32(SRBM_STATUS);
4946
4947	if (tmp & IH_BUSY)
4948		reset_mask |= RADEON_RESET_IH;
4949
4950	if (tmp & SEM_BUSY)
4951		reset_mask |= RADEON_RESET_SEM;
4952
4953	if (tmp & GRBM_RQ_PENDING)
4954		reset_mask |= RADEON_RESET_GRBM;
4955
4956	if (tmp & VMC_BUSY)
4957		reset_mask |= RADEON_RESET_VMC;
4958
4959	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4960		   MCC_BUSY | MCD_BUSY))
4961		reset_mask |= RADEON_RESET_MC;
4962
4963	if (evergreen_is_display_hung(rdev))
4964		reset_mask |= RADEON_RESET_DISPLAY;
4965
4966	/* Skip MC reset as it's mostly likely not hung, just busy */
4967	if (reset_mask & RADEON_RESET_MC) {
4968		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4969		reset_mask &= ~RADEON_RESET_MC;
4970	}
4971
4972	return reset_mask;
4973}
4974
4975/**
4976 * cik_gpu_soft_reset - soft reset GPU
4977 *
4978 * @rdev: radeon_device pointer
4979 * @reset_mask: mask of which blocks to reset
4980 *
4981 * Soft reset the blocks specified in @reset_mask.
4982 */
4983static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4984{
4985	struct evergreen_mc_save save;
4986	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4987	u32 tmp;
4988
4989	if (reset_mask == 0)
4990		return;
4991
4992	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4993
4994	cik_print_gpu_status_regs(rdev);
4995	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4996		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4997	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4998		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4999
5000	/* disable CG/PG */
5001	cik_fini_pg(rdev);
5002	cik_fini_cg(rdev);
5003
5004	/* stop the rlc */
5005	cik_rlc_stop(rdev);
5006
5007	/* Disable GFX parsing/prefetching */
5008	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5009
5010	/* Disable MEC parsing/prefetching */
5011	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5012
5013	if (reset_mask & RADEON_RESET_DMA) {
5014		/* sdma0 */
5015		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5016		tmp |= SDMA_HALT;
5017		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5018	}
5019	if (reset_mask & RADEON_RESET_DMA1) {
5020		/* sdma1 */
5021		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5022		tmp |= SDMA_HALT;
5023		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5024	}
5025
5026	evergreen_mc_stop(rdev, &save);
5027	if (evergreen_mc_wait_for_idle(rdev)) {
5028		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5029	}
5030
5031	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5032		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5033
5034	if (reset_mask & RADEON_RESET_CP) {
5035		grbm_soft_reset |= SOFT_RESET_CP;
5036
5037		srbm_soft_reset |= SOFT_RESET_GRBM;
5038	}
5039
5040	if (reset_mask & RADEON_RESET_DMA)
5041		srbm_soft_reset |= SOFT_RESET_SDMA;
5042
5043	if (reset_mask & RADEON_RESET_DMA1)
5044		srbm_soft_reset |= SOFT_RESET_SDMA1;
5045
5046	if (reset_mask & RADEON_RESET_DISPLAY)
5047		srbm_soft_reset |= SOFT_RESET_DC;
5048
5049	if (reset_mask & RADEON_RESET_RLC)
5050		grbm_soft_reset |= SOFT_RESET_RLC;
5051
5052	if (reset_mask & RADEON_RESET_SEM)
5053		srbm_soft_reset |= SOFT_RESET_SEM;
5054
5055	if (reset_mask & RADEON_RESET_IH)
5056		srbm_soft_reset |= SOFT_RESET_IH;
5057
5058	if (reset_mask & RADEON_RESET_GRBM)
5059		srbm_soft_reset |= SOFT_RESET_GRBM;
5060
5061	if (reset_mask & RADEON_RESET_VMC)
5062		srbm_soft_reset |= SOFT_RESET_VMC;
5063
5064	if (!(rdev->flags & RADEON_IS_IGP)) {
5065		if (reset_mask & RADEON_RESET_MC)
5066			srbm_soft_reset |= SOFT_RESET_MC;
5067	}
5068
5069	if (grbm_soft_reset) {
5070		tmp = RREG32(GRBM_SOFT_RESET);
5071		tmp |= grbm_soft_reset;
5072		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5073		WREG32(GRBM_SOFT_RESET, tmp);
5074		tmp = RREG32(GRBM_SOFT_RESET);
5075
5076		udelay(50);
5077
5078		tmp &= ~grbm_soft_reset;
5079		WREG32(GRBM_SOFT_RESET, tmp);
5080		tmp = RREG32(GRBM_SOFT_RESET);
5081	}
5082
5083	if (srbm_soft_reset) {
5084		tmp = RREG32(SRBM_SOFT_RESET);
5085		tmp |= srbm_soft_reset;
5086		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5087		WREG32(SRBM_SOFT_RESET, tmp);
5088		tmp = RREG32(SRBM_SOFT_RESET);
5089
5090		udelay(50);
5091
5092		tmp &= ~srbm_soft_reset;
5093		WREG32(SRBM_SOFT_RESET, tmp);
5094		tmp = RREG32(SRBM_SOFT_RESET);
5095	}
5096
5097	/* Wait a little for things to settle down */
5098	udelay(50);
5099
5100	evergreen_mc_resume(rdev, &save);
5101	udelay(50);
5102
5103	cik_print_gpu_status_regs(rdev);
5104}
5105
5106struct kv_reset_save_regs {
5107	u32 gmcon_reng_execute;
5108	u32 gmcon_misc;
5109	u32 gmcon_misc3;
5110};
5111
5112static void kv_save_regs_for_reset(struct radeon_device *rdev,
5113				   struct kv_reset_save_regs *save)
5114{
5115	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5116	save->gmcon_misc = RREG32(GMCON_MISC);
5117	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5118
5119	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5120	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5121						STCTRL_STUTTER_EN));
5122}
5123
5124static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5125				      struct kv_reset_save_regs *save)
5126{
5127	int i;
5128
5129	WREG32(GMCON_PGFSM_WRITE, 0);
5130	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5131
5132	for (i = 0; i < 5; i++)
5133		WREG32(GMCON_PGFSM_WRITE, 0);
5134
5135	WREG32(GMCON_PGFSM_WRITE, 0);
5136	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5137
5138	for (i = 0; i < 5; i++)
5139		WREG32(GMCON_PGFSM_WRITE, 0);
5140
5141	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5142	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5143
5144	for (i = 0; i < 5; i++)
5145		WREG32(GMCON_PGFSM_WRITE, 0);
5146
5147	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5148	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5149
5150	for (i = 0; i < 5; i++)
5151		WREG32(GMCON_PGFSM_WRITE, 0);
5152
5153	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5154	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5155
5156	for (i = 0; i < 5; i++)
5157		WREG32(GMCON_PGFSM_WRITE, 0);
5158
5159	WREG32(GMCON_PGFSM_WRITE, 0);
5160	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5161
5162	for (i = 0; i < 5; i++)
5163		WREG32(GMCON_PGFSM_WRITE, 0);
5164
5165	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5166	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5167
5168	for (i = 0; i < 5; i++)
5169		WREG32(GMCON_PGFSM_WRITE, 0);
5170
5171	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5172	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5173
5174	for (i = 0; i < 5; i++)
5175		WREG32(GMCON_PGFSM_WRITE, 0);
5176
5177	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5178	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5179
5180	for (i = 0; i < 5; i++)
5181		WREG32(GMCON_PGFSM_WRITE, 0);
5182
5183	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5184	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5185
5186	for (i = 0; i < 5; i++)
5187		WREG32(GMCON_PGFSM_WRITE, 0);
5188
5189	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5190	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5191
5192	WREG32(GMCON_MISC3, save->gmcon_misc3);
5193	WREG32(GMCON_MISC, save->gmcon_misc);
5194	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5195}
5196
5197static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5198{
5199	struct evergreen_mc_save save;
5200	struct kv_reset_save_regs kv_save = { 0 };
5201	u32 tmp, i;
5202
5203	dev_info(rdev->dev, "GPU pci config reset\n");
5204
5205	/* disable dpm? */
5206
5207	/* disable cg/pg */
5208	cik_fini_pg(rdev);
5209	cik_fini_cg(rdev);
5210
5211	/* Disable GFX parsing/prefetching */
5212	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5213
5214	/* Disable MEC parsing/prefetching */
5215	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5216
5217	/* sdma0 */
5218	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5219	tmp |= SDMA_HALT;
5220	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5221	/* sdma1 */
5222	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5223	tmp |= SDMA_HALT;
5224	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5225	/* XXX other engines? */
5226
5227	/* halt the rlc, disable cp internal ints */
5228	cik_rlc_stop(rdev);
5229
5230	udelay(50);
5231
5232	/* disable mem access */
5233	evergreen_mc_stop(rdev, &save);
5234	if (evergreen_mc_wait_for_idle(rdev)) {
5235		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5236	}
5237
5238	if (rdev->flags & RADEON_IS_IGP)
5239		kv_save_regs_for_reset(rdev, &kv_save);
5240
5241	/* disable BM */
5242	pci_clear_master(rdev->pdev);
5243	/* reset */
5244	radeon_pci_config_reset(rdev);
5245
5246	udelay(100);
5247
5248	/* wait for asic to come out of reset */
5249	for (i = 0; i < rdev->usec_timeout; i++) {
5250		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5251			break;
5252		udelay(1);
5253	}
5254
5255	/* does asic init need to be run first??? */
5256	if (rdev->flags & RADEON_IS_IGP)
5257		kv_restore_regs_for_reset(rdev, &kv_save);
5258}
5259
5260/**
5261 * cik_asic_reset - soft reset GPU
5262 *
5263 * @rdev: radeon_device pointer
5264 *
5265 * Look up which blocks are hung and attempt
5266 * to reset them.
5267 * Returns 0 for success.
5268 */
5269int cik_asic_reset(struct radeon_device *rdev)
5270{
5271	u32 reset_mask;
5272
5273	reset_mask = cik_gpu_check_soft_reset(rdev);
5274
5275	if (reset_mask)
5276		r600_set_bios_scratch_engine_hung(rdev, true);
5277
5278	/* try soft reset */
5279	cik_gpu_soft_reset(rdev, reset_mask);
5280
5281	reset_mask = cik_gpu_check_soft_reset(rdev);
5282
5283	/* try pci config reset */
5284	if (reset_mask && radeon_hard_reset)
5285		cik_gpu_pci_config_reset(rdev);
5286
5287	reset_mask = cik_gpu_check_soft_reset(rdev);
5288
5289	if (!reset_mask)
5290		r600_set_bios_scratch_engine_hung(rdev, false);
5291
5292	return 0;
5293}
5294
5295/**
5296 * cik_gfx_is_lockup - check if the 3D engine is locked up
5297 *
5298 * @rdev: radeon_device pointer
5299 * @ring: radeon_ring structure holding ring information
5300 *
5301 * Check if the 3D engine is locked up (CIK).
5302 * Returns true if the engine is locked, false if not.
5303 */
5304bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5305{
5306	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5307
5308	if (!(reset_mask & (RADEON_RESET_GFX |
5309			    RADEON_RESET_COMPUTE |
5310			    RADEON_RESET_CP))) {
5311		radeon_ring_lockup_update(rdev, ring);
5312		return false;
5313	}
5314	return radeon_ring_test_lockup(rdev, ring);
5315}
5316
5317/* MC */
5318/**
5319 * cik_mc_program - program the GPU memory controller
5320 *
5321 * @rdev: radeon_device pointer
5322 *
5323 * Set the location of vram, gart, and AGP in the GPU's
5324 * physical address space (CIK).
5325 */
5326static void cik_mc_program(struct radeon_device *rdev)
5327{
5328	struct evergreen_mc_save save;
5329	u32 tmp;
5330	int i, j;
5331
5332	/* Initialize HDP */
5333	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5334		WREG32((0x2c14 + j), 0x00000000);
5335		WREG32((0x2c18 + j), 0x00000000);
5336		WREG32((0x2c1c + j), 0x00000000);
5337		WREG32((0x2c20 + j), 0x00000000);
5338		WREG32((0x2c24 + j), 0x00000000);
5339	}
5340	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5341
5342	evergreen_mc_stop(rdev, &save);
5343	if (radeon_mc_wait_for_idle(rdev)) {
5344		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5345	}
5346	/* Lockout access through VGA aperture*/
5347	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5348	/* Update configuration */
5349	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5350	       rdev->mc.vram_start >> 12);
5351	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5352	       rdev->mc.vram_end >> 12);
5353	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5354	       rdev->vram_scratch.gpu_addr >> 12);
5355	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5356	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5357	WREG32(MC_VM_FB_LOCATION, tmp);
5358	/* XXX double check these! */
5359	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5360	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5361	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5362	WREG32(MC_VM_AGP_BASE, 0);
5363	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5364	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5365	if (radeon_mc_wait_for_idle(rdev)) {
5366		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5367	}
5368	evergreen_mc_resume(rdev, &save);
5369	/* we need to own VRAM, so turn off the VGA renderer here
5370	 * to stop it overwriting our objects */
5371	rv515_vga_render_disable(rdev);
5372}
5373
5374/**
5375 * cik_mc_init - initialize the memory controller driver params
5376 *
5377 * @rdev: radeon_device pointer
5378 *
5379 * Look up the amount of vram, vram width, and decide how to place
5380 * vram and gart within the GPU's physical address space (CIK).
5381 * Returns 0 for success.
5382 */
5383static int cik_mc_init(struct radeon_device *rdev)
5384{
5385	u32 tmp;
5386	int chansize, numchan;
5387
5388	/* Get VRAM informations */
5389	rdev->mc.vram_is_ddr = true;
5390	tmp = RREG32(MC_ARB_RAMCFG);
5391	if (tmp & CHANSIZE_MASK) {
5392		chansize = 64;
5393	} else {
5394		chansize = 32;
5395	}
5396	tmp = RREG32(MC_SHARED_CHMAP);
5397	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5398	case 0:
5399	default:
5400		numchan = 1;
5401		break;
5402	case 1:
5403		numchan = 2;
5404		break;
5405	case 2:
5406		numchan = 4;
5407		break;
5408	case 3:
5409		numchan = 8;
5410		break;
5411	case 4:
5412		numchan = 3;
5413		break;
5414	case 5:
5415		numchan = 6;
5416		break;
5417	case 6:
5418		numchan = 10;
5419		break;
5420	case 7:
5421		numchan = 12;
5422		break;
5423	case 8:
5424		numchan = 16;
5425		break;
5426	}
5427	rdev->mc.vram_width = numchan * chansize;
5428	/* Could aper size report 0 ? */
5429	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5430	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5431	/* size in MB on si */
5432	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5433	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5434	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5435	si_vram_gtt_location(rdev, &rdev->mc);
5436	radeon_update_bandwidth_info(rdev);
5437
5438	return 0;
5439}
5440
5441/*
5442 * GART
5443 * VMID 0 is the physical GPU addresses as used by the kernel.
5444 * VMIDs 1-15 are used for userspace clients and are handled
5445 * by the radeon vm/hsa code.
5446 */
5447/**
5448 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5449 *
5450 * @rdev: radeon_device pointer
5451 *
5452 * Flush the TLB for the VMID 0 page table (CIK).
5453 */
5454void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5455{
5456	/* flush hdp cache */
5457	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5458
5459	/* bits 0-15 are the VM contexts0-15 */
5460	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5461}
5462
5463static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5464{
5465	int i;
5466	uint32_t sh_mem_bases, sh_mem_config;
5467
5468	sh_mem_bases = 0x6000 | 0x6000 << 16;
5469	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5470	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5471
5472	mutex_lock(&rdev->srbm_mutex);
5473	for (i = 8; i < 16; i++) {
5474		cik_srbm_select(rdev, 0, 0, 0, i);
5475		/* CP and shaders */
5476		WREG32(SH_MEM_CONFIG, sh_mem_config);
5477		WREG32(SH_MEM_APE1_BASE, 1);
5478		WREG32(SH_MEM_APE1_LIMIT, 0);
5479		WREG32(SH_MEM_BASES, sh_mem_bases);
5480	}
5481	cik_srbm_select(rdev, 0, 0, 0, 0);
5482	mutex_unlock(&rdev->srbm_mutex);
5483}
5484
5485/**
5486 * cik_pcie_gart_enable - gart enable
5487 *
5488 * @rdev: radeon_device pointer
5489 *
5490 * This sets up the TLBs, programs the page tables for VMID0,
5491 * sets up the hw for VMIDs 1-15 which are allocated on
5492 * demand, and sets up the global locations for the LDS, GDS,
5493 * and GPUVM for FSA64 clients (CIK).
5494 * Returns 0 for success, errors for failure.
5495 */
5496static int cik_pcie_gart_enable(struct radeon_device *rdev)
5497{
5498	int r, i;
5499
5500	if (rdev->gart.robj == NULL) {
5501		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5502		return -EINVAL;
5503	}
5504	r = radeon_gart_table_vram_pin(rdev);
5505	if (r)
5506		return r;
5507	/* Setup TLB control */
5508	WREG32(MC_VM_MX_L1_TLB_CNTL,
5509	       (0xA << 7) |
5510	       ENABLE_L1_TLB |
5511	       ENABLE_L1_FRAGMENT_PROCESSING |
5512	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5513	       ENABLE_ADVANCED_DRIVER_MODEL |
5514	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5515	/* Setup L2 cache */
5516	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5517	       ENABLE_L2_FRAGMENT_PROCESSING |
5518	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5519	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5520	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5521	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5522	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5523	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5524	       BANK_SELECT(4) |
5525	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5526	/* setup context0 */
5527	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5528	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5529	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5530	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5531			(u32)(rdev->dummy_page.addr >> 12));
5532	WREG32(VM_CONTEXT0_CNTL2, 0);
5533	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5534				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5535
5536	WREG32(0x15D4, 0);
5537	WREG32(0x15D8, 0);
5538	WREG32(0x15DC, 0);
5539
5540	/* restore context1-15 */
5541	/* set vm size, must be a multiple of 4 */
5542	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5543	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5544	for (i = 1; i < 16; i++) {
5545		if (i < 8)
5546			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5547			       rdev->vm_manager.saved_table_addr[i]);
5548		else
5549			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5550			       rdev->vm_manager.saved_table_addr[i]);
5551	}
5552
5553	/* enable context1-15 */
5554	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5555	       (u32)(rdev->dummy_page.addr >> 12));
5556	WREG32(VM_CONTEXT1_CNTL2, 4);
5557	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5558				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5559				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5560				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5561				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5562				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5563				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5564				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5565				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5566				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5567				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5568				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5569				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5570				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5571
5572	if (rdev->family == CHIP_KAVERI) {
5573		u32 tmp = RREG32(CHUB_CONTROL);
5574		tmp &= ~BYPASS_VM;
5575		WREG32(CHUB_CONTROL, tmp);
5576	}
5577
5578	/* XXX SH_MEM regs */
5579	/* where to put LDS, scratch, GPUVM in FSA64 space */
5580	mutex_lock(&rdev->srbm_mutex);
5581	for (i = 0; i < 16; i++) {
5582		cik_srbm_select(rdev, 0, 0, 0, i);
5583		/* CP and shaders */
5584		WREG32(SH_MEM_CONFIG, 0);
5585		WREG32(SH_MEM_APE1_BASE, 1);
5586		WREG32(SH_MEM_APE1_LIMIT, 0);
5587		WREG32(SH_MEM_BASES, 0);
5588		/* SDMA GFX */
5589		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5590		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5591		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5592		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5593		/* XXX SDMA RLC - todo */
5594	}
5595	cik_srbm_select(rdev, 0, 0, 0, 0);
5596	mutex_unlock(&rdev->srbm_mutex);
5597
5598	cik_pcie_init_compute_vmid(rdev);
5599
5600	cik_pcie_gart_tlb_flush(rdev);
5601	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5602		 (unsigned)(rdev->mc.gtt_size >> 20),
5603		 (unsigned long long)rdev->gart.table_addr);
5604	rdev->gart.ready = true;
5605	return 0;
5606}
5607
5608/**
5609 * cik_pcie_gart_disable - gart disable
5610 *
5611 * @rdev: radeon_device pointer
5612 *
5613 * This disables all VM page table (CIK).
5614 */
5615static void cik_pcie_gart_disable(struct radeon_device *rdev)
5616{
5617	unsigned i;
5618
5619	for (i = 1; i < 16; ++i) {
5620		uint32_t reg;
5621		if (i < 8)
5622			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5623		else
5624			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5625		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5626	}
5627
5628	/* Disable all tables */
5629	WREG32(VM_CONTEXT0_CNTL, 0);
5630	WREG32(VM_CONTEXT1_CNTL, 0);
5631	/* Setup TLB control */
5632	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5633	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5634	/* Setup L2 cache */
5635	WREG32(VM_L2_CNTL,
5636	       ENABLE_L2_FRAGMENT_PROCESSING |
5637	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5638	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5639	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5640	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5641	WREG32(VM_L2_CNTL2, 0);
5642	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5643	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5644	radeon_gart_table_vram_unpin(rdev);
5645}
5646
5647/**
5648 * cik_pcie_gart_fini - vm fini callback
5649 *
5650 * @rdev: radeon_device pointer
5651 *
5652 * Tears down the driver GART/VM setup (CIK).
5653 */
5654static void cik_pcie_gart_fini(struct radeon_device *rdev)
5655{
5656	cik_pcie_gart_disable(rdev);
5657	radeon_gart_table_vram_free(rdev);
5658	radeon_gart_fini(rdev);
5659}
5660
5661/* vm parser */
5662/**
5663 * cik_ib_parse - vm ib_parse callback
5664 *
5665 * @rdev: radeon_device pointer
5666 * @ib: indirect buffer pointer
5667 *
5668 * CIK uses hw IB checking so this is a nop (CIK).
5669 */
5670int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5671{
5672	return 0;
5673}
5674
5675/*
5676 * vm
5677 * VMID 0 is the physical GPU addresses as used by the kernel.
5678 * VMIDs 1-15 are used for userspace clients and are handled
5679 * by the radeon vm/hsa code.
5680 */
5681/**
5682 * cik_vm_init - cik vm init callback
5683 *
5684 * @rdev: radeon_device pointer
5685 *
5686 * Inits cik specific vm parameters (number of VMs, base of vram for
5687 * VMIDs 1-15) (CIK).
5688 * Returns 0 for success.
5689 */
5690int cik_vm_init(struct radeon_device *rdev)
5691{
5692	/*
5693	 * number of VMs
5694	 * VMID 0 is reserved for System
5695	 * radeon graphics/compute will use VMIDs 1-7
5696	 * amdkfd will use VMIDs 8-15
5697	 */
5698	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5699	/* base offset of vram pages */
5700	if (rdev->flags & RADEON_IS_IGP) {
5701		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5702		tmp <<= 22;
5703		rdev->vm_manager.vram_base_offset = tmp;
5704	} else
5705		rdev->vm_manager.vram_base_offset = 0;
5706
5707	return 0;
5708}
5709
5710/**
5711 * cik_vm_fini - cik vm fini callback
5712 *
5713 * @rdev: radeon_device pointer
5714 *
5715 * Tear down any asic specific VM setup (CIK).
5716 */
5717void cik_vm_fini(struct radeon_device *rdev)
5718{
5719}
5720
5721/**
5722 * cik_vm_decode_fault - print human readable fault info
5723 *
5724 * @rdev: radeon_device pointer
5725 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5726 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5727 *
5728 * Print human readable fault information (CIK).
5729 */
5730static void cik_vm_decode_fault(struct radeon_device *rdev,
5731				u32 status, u32 addr, u32 mc_client)
5732{
5733	u32 mc_id;
5734	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5735	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5736	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5737		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5738
5739	if (rdev->family == CHIP_HAWAII)
5740		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5741	else
5742		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5743
5744	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5745	       protections, vmid, addr,
5746	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5747	       block, mc_client, mc_id);
5748}
5749
5750/**
5751 * cik_vm_flush - cik vm flush using the CP
5752 *
5753 * @rdev: radeon_device pointer
5754 *
5755 * Update the page table base and flush the VM TLB
5756 * using the CP (CIK).
5757 */
5758void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5759		  unsigned vm_id, uint64_t pd_addr)
5760{
5761	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5762
5763	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5764	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5765				 WRITE_DATA_DST_SEL(0)));
5766	if (vm_id < 8) {
5767		radeon_ring_write(ring,
5768				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5769	} else {
5770		radeon_ring_write(ring,
5771				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5772	}
5773	radeon_ring_write(ring, 0);
5774	radeon_ring_write(ring, pd_addr >> 12);
5775
5776	/* update SH_MEM_* regs */
5777	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5778	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5779				 WRITE_DATA_DST_SEL(0)));
5780	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5781	radeon_ring_write(ring, 0);
5782	radeon_ring_write(ring, VMID(vm_id));
5783
5784	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5785	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5786				 WRITE_DATA_DST_SEL(0)));
5787	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5788	radeon_ring_write(ring, 0);
5789
5790	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5791	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5792	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5793	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5794
5795	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5796	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5797				 WRITE_DATA_DST_SEL(0)));
5798	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5799	radeon_ring_write(ring, 0);
5800	radeon_ring_write(ring, VMID(0));
5801
5802	/* HDP flush */
5803	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5804
5805	/* bits 0-15 are the VM contexts0-15 */
5806	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808				 WRITE_DATA_DST_SEL(0)));
5809	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5810	radeon_ring_write(ring, 0);
5811	radeon_ring_write(ring, 1 << vm_id);
5812
5813	/* wait for the invalidate to complete */
5814	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5815	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5816				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5817				 WAIT_REG_MEM_ENGINE(0))); /* me */
5818	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5819	radeon_ring_write(ring, 0);
5820	radeon_ring_write(ring, 0); /* ref */
5821	radeon_ring_write(ring, 0); /* mask */
5822	radeon_ring_write(ring, 0x20); /* poll interval */
5823
5824	/* compute doesn't have PFP */
5825	if (usepfp) {
5826		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5827		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5828		radeon_ring_write(ring, 0x0);
5829	}
5830}
5831
5832/*
5833 * RLC
5834 * The RLC is a multi-purpose microengine that handles a
5835 * variety of functions, the most important of which is
5836 * the interrupt controller.
5837 */
5838static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5839					  bool enable)
5840{
5841	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5842
5843	if (enable)
5844		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5845	else
5846		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5847	WREG32(CP_INT_CNTL_RING0, tmp);
5848}
5849
5850static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5851{
5852	u32 tmp;
5853
5854	tmp = RREG32(RLC_LB_CNTL);
5855	if (enable)
5856		tmp |= LOAD_BALANCE_ENABLE;
5857	else
5858		tmp &= ~LOAD_BALANCE_ENABLE;
5859	WREG32(RLC_LB_CNTL, tmp);
5860}
5861
5862static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5863{
5864	u32 i, j, k;
5865	u32 mask;
5866
5867	mutex_lock(&rdev->grbm_idx_mutex);
5868	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5869		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5870			cik_select_se_sh(rdev, i, j);
5871			for (k = 0; k < rdev->usec_timeout; k++) {
5872				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5873					break;
5874				udelay(1);
5875			}
5876		}
5877	}
5878	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5879	mutex_unlock(&rdev->grbm_idx_mutex);
5880
5881	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5882	for (k = 0; k < rdev->usec_timeout; k++) {
5883		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5884			break;
5885		udelay(1);
5886	}
5887}
5888
5889static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5890{
5891	u32 tmp;
5892
5893	tmp = RREG32(RLC_CNTL);
5894	if (tmp != rlc)
5895		WREG32(RLC_CNTL, rlc);
5896}
5897
5898static u32 cik_halt_rlc(struct radeon_device *rdev)
5899{
5900	u32 data, orig;
5901
5902	orig = data = RREG32(RLC_CNTL);
5903
5904	if (data & RLC_ENABLE) {
5905		u32 i;
5906
5907		data &= ~RLC_ENABLE;
5908		WREG32(RLC_CNTL, data);
5909
5910		for (i = 0; i < rdev->usec_timeout; i++) {
5911			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5912				break;
5913			udelay(1);
5914		}
5915
5916		cik_wait_for_rlc_serdes(rdev);
5917	}
5918
5919	return orig;
5920}
5921
5922void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5923{
5924	u32 tmp, i, mask;
5925
5926	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5927	WREG32(RLC_GPR_REG2, tmp);
5928
5929	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5930	for (i = 0; i < rdev->usec_timeout; i++) {
5931		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5932			break;
5933		udelay(1);
5934	}
5935
5936	for (i = 0; i < rdev->usec_timeout; i++) {
5937		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5938			break;
5939		udelay(1);
5940	}
5941}
5942
5943void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5944{
5945	u32 tmp;
5946
5947	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5948	WREG32(RLC_GPR_REG2, tmp);
5949}
5950
5951/**
5952 * cik_rlc_stop - stop the RLC ME
5953 *
5954 * @rdev: radeon_device pointer
5955 *
5956 * Halt the RLC ME (MicroEngine) (CIK).
5957 */
5958static void cik_rlc_stop(struct radeon_device *rdev)
5959{
5960	WREG32(RLC_CNTL, 0);
5961
5962	cik_enable_gui_idle_interrupt(rdev, false);
5963
5964	cik_wait_for_rlc_serdes(rdev);
5965}
5966
5967/**
5968 * cik_rlc_start - start the RLC ME
5969 *
5970 * @rdev: radeon_device pointer
5971 *
5972 * Unhalt the RLC ME (MicroEngine) (CIK).
5973 */
5974static void cik_rlc_start(struct radeon_device *rdev)
5975{
5976	WREG32(RLC_CNTL, RLC_ENABLE);
5977
5978	cik_enable_gui_idle_interrupt(rdev, true);
5979
5980	udelay(50);
5981}
5982
5983/**
5984 * cik_rlc_resume - setup the RLC hw
5985 *
5986 * @rdev: radeon_device pointer
5987 *
5988 * Initialize the RLC registers, load the ucode,
5989 * and start the RLC (CIK).
5990 * Returns 0 for success, -EINVAL if the ucode is not available.
5991 */
5992static int cik_rlc_resume(struct radeon_device *rdev)
5993{
5994	u32 i, size, tmp;
5995
5996	if (!rdev->rlc_fw)
5997		return -EINVAL;
5998
5999	cik_rlc_stop(rdev);
6000
6001	/* disable CG */
6002	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6003	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6004
6005	si_rlc_reset(rdev);
6006
6007	cik_init_pg(rdev);
6008
6009	cik_init_cg(rdev);
6010
6011	WREG32(RLC_LB_CNTR_INIT, 0);
6012	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6013
6014	mutex_lock(&rdev->grbm_idx_mutex);
6015	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6016	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6017	WREG32(RLC_LB_PARAMS, 0x00600408);
6018	WREG32(RLC_LB_CNTL, 0x80000004);
6019	mutex_unlock(&rdev->grbm_idx_mutex);
6020
6021	WREG32(RLC_MC_CNTL, 0);
6022	WREG32(RLC_UCODE_CNTL, 0);
6023
6024	if (rdev->new_fw) {
6025		const struct rlc_firmware_header_v1_0 *hdr =
6026			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6027		const __le32 *fw_data = (const __le32 *)
6028			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6029
6030		radeon_ucode_print_rlc_hdr(&hdr->header);
6031
6032		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6033		WREG32(RLC_GPM_UCODE_ADDR, 0);
6034		for (i = 0; i < size; i++)
6035			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6036		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6037	} else {
6038		const __be32 *fw_data;
6039
6040		switch (rdev->family) {
6041		case CHIP_BONAIRE:
6042		case CHIP_HAWAII:
6043		default:
6044			size = BONAIRE_RLC_UCODE_SIZE;
6045			break;
6046		case CHIP_KAVERI:
6047			size = KV_RLC_UCODE_SIZE;
6048			break;
6049		case CHIP_KABINI:
6050			size = KB_RLC_UCODE_SIZE;
6051			break;
6052		case CHIP_MULLINS:
6053			size = ML_RLC_UCODE_SIZE;
6054			break;
6055		}
6056
6057		fw_data = (const __be32 *)rdev->rlc_fw->data;
6058		WREG32(RLC_GPM_UCODE_ADDR, 0);
6059		for (i = 0; i < size; i++)
6060			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6061		WREG32(RLC_GPM_UCODE_ADDR, 0);
6062	}
6063
6064	/* XXX - find out what chips support lbpw */
6065	cik_enable_lbpw(rdev, false);
6066
6067	if (rdev->family == CHIP_BONAIRE)
6068		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6069
6070	cik_rlc_start(rdev);
6071
6072	return 0;
6073}
6074
6075static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6076{
6077	u32 data, orig, tmp, tmp2;
6078
6079	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6080
6081	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6082		cik_enable_gui_idle_interrupt(rdev, true);
6083
6084		tmp = cik_halt_rlc(rdev);
6085
6086		mutex_lock(&rdev->grbm_idx_mutex);
6087		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6088		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6089		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6090		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6091		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6092		mutex_unlock(&rdev->grbm_idx_mutex);
6093
6094		cik_update_rlc(rdev, tmp);
6095
6096		data |= CGCG_EN | CGLS_EN;
6097	} else {
6098		cik_enable_gui_idle_interrupt(rdev, false);
6099
6100		RREG32(CB_CGTT_SCLK_CTRL);
6101		RREG32(CB_CGTT_SCLK_CTRL);
6102		RREG32(CB_CGTT_SCLK_CTRL);
6103		RREG32(CB_CGTT_SCLK_CTRL);
6104
6105		data &= ~(CGCG_EN | CGLS_EN);
6106	}
6107
6108	if (orig != data)
6109		WREG32(RLC_CGCG_CGLS_CTRL, data);
6110
6111}
6112
6113static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6114{
6115	u32 data, orig, tmp = 0;
6116
6117	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6118		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6119			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6120				orig = data = RREG32(CP_MEM_SLP_CNTL);
6121				data |= CP_MEM_LS_EN;
6122				if (orig != data)
6123					WREG32(CP_MEM_SLP_CNTL, data);
6124			}
6125		}
6126
6127		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6128		data |= 0x00000001;
6129		data &= 0xfffffffd;
6130		if (orig != data)
6131			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6132
6133		tmp = cik_halt_rlc(rdev);
6134
6135		mutex_lock(&rdev->grbm_idx_mutex);
6136		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6137		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6138		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6139		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6140		WREG32(RLC_SERDES_WR_CTRL, data);
6141		mutex_unlock(&rdev->grbm_idx_mutex);
6142
6143		cik_update_rlc(rdev, tmp);
6144
6145		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6146			orig = data = RREG32(CGTS_SM_CTRL_REG);
6147			data &= ~SM_MODE_MASK;
6148			data |= SM_MODE(0x2);
6149			data |= SM_MODE_ENABLE;
6150			data &= ~CGTS_OVERRIDE;
6151			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6152			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6153				data &= ~CGTS_LS_OVERRIDE;
6154			data &= ~ON_MONITOR_ADD_MASK;
6155			data |= ON_MONITOR_ADD_EN;
6156			data |= ON_MONITOR_ADD(0x96);
6157			if (orig != data)
6158				WREG32(CGTS_SM_CTRL_REG, data);
6159		}
6160	} else {
6161		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6162		data |= 0x00000003;
6163		if (orig != data)
6164			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6165
6166		data = RREG32(RLC_MEM_SLP_CNTL);
6167		if (data & RLC_MEM_LS_EN) {
6168			data &= ~RLC_MEM_LS_EN;
6169			WREG32(RLC_MEM_SLP_CNTL, data);
6170		}
6171
6172		data = RREG32(CP_MEM_SLP_CNTL);
6173		if (data & CP_MEM_LS_EN) {
6174			data &= ~CP_MEM_LS_EN;
6175			WREG32(CP_MEM_SLP_CNTL, data);
6176		}
6177
6178		orig = data = RREG32(CGTS_SM_CTRL_REG);
6179		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6180		if (orig != data)
6181			WREG32(CGTS_SM_CTRL_REG, data);
6182
6183		tmp = cik_halt_rlc(rdev);
6184
6185		mutex_lock(&rdev->grbm_idx_mutex);
6186		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6187		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6188		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6189		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6190		WREG32(RLC_SERDES_WR_CTRL, data);
6191		mutex_unlock(&rdev->grbm_idx_mutex);
6192
6193		cik_update_rlc(rdev, tmp);
6194	}
6195}
6196
6197static const u32 mc_cg_registers[] =
6198{
6199	MC_HUB_MISC_HUB_CG,
6200	MC_HUB_MISC_SIP_CG,
6201	MC_HUB_MISC_VM_CG,
6202	MC_XPB_CLK_GAT,
6203	ATC_MISC_CG,
6204	MC_CITF_MISC_WR_CG,
6205	MC_CITF_MISC_RD_CG,
6206	MC_CITF_MISC_VM_CG,
6207	VM_L2_CG,
6208};
6209
6210static void cik_enable_mc_ls(struct radeon_device *rdev,
6211			     bool enable)
6212{
6213	int i;
6214	u32 orig, data;
6215
6216	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6217		orig = data = RREG32(mc_cg_registers[i]);
6218		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6219			data |= MC_LS_ENABLE;
6220		else
6221			data &= ~MC_LS_ENABLE;
6222		if (data != orig)
6223			WREG32(mc_cg_registers[i], data);
6224	}
6225}
6226
6227static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6228			       bool enable)
6229{
6230	int i;
6231	u32 orig, data;
6232
6233	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6234		orig = data = RREG32(mc_cg_registers[i]);
6235		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6236			data |= MC_CG_ENABLE;
6237		else
6238			data &= ~MC_CG_ENABLE;
6239		if (data != orig)
6240			WREG32(mc_cg_registers[i], data);
6241	}
6242}
6243
6244static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6245				 bool enable)
6246{
6247	u32 orig, data;
6248
6249	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6250		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6251		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6252	} else {
6253		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6254		data |= 0xff000000;
6255		if (data != orig)
6256			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6257
6258		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6259		data |= 0xff000000;
6260		if (data != orig)
6261			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6262	}
6263}
6264
6265static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6266				 bool enable)
6267{
6268	u32 orig, data;
6269
6270	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6271		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6272		data |= 0x100;
6273		if (orig != data)
6274			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6275
6276		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6277		data |= 0x100;
6278		if (orig != data)
6279			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6280	} else {
6281		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6282		data &= ~0x100;
6283		if (orig != data)
6284			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6285
6286		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6287		data &= ~0x100;
6288		if (orig != data)
6289			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6290	}
6291}
6292
6293static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6294				bool enable)
6295{
6296	u32 orig, data;
6297
6298	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6299		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6300		data = 0xfff;
6301		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6302
6303		orig = data = RREG32(UVD_CGC_CTRL);
6304		data |= DCM;
6305		if (orig != data)
6306			WREG32(UVD_CGC_CTRL, data);
6307	} else {
6308		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6309		data &= ~0xfff;
6310		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6311
6312		orig = data = RREG32(UVD_CGC_CTRL);
6313		data &= ~DCM;
6314		if (orig != data)
6315			WREG32(UVD_CGC_CTRL, data);
6316	}
6317}
6318
6319static void cik_enable_bif_mgls(struct radeon_device *rdev,
6320			       bool enable)
6321{
6322	u32 orig, data;
6323
6324	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6325
6326	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6327		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6328			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6329	else
6330		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6331			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6332
6333	if (orig != data)
6334		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6335}
6336
6337static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6338				bool enable)
6339{
6340	u32 orig, data;
6341
6342	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6343
6344	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6345		data &= ~CLOCK_GATING_DIS;
6346	else
6347		data |= CLOCK_GATING_DIS;
6348
6349	if (orig != data)
6350		WREG32(HDP_HOST_PATH_CNTL, data);
6351}
6352
6353static void cik_enable_hdp_ls(struct radeon_device *rdev,
6354			      bool enable)
6355{
6356	u32 orig, data;
6357
6358	orig = data = RREG32(HDP_MEM_POWER_LS);
6359
6360	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6361		data |= HDP_LS_ENABLE;
6362	else
6363		data &= ~HDP_LS_ENABLE;
6364
6365	if (orig != data)
6366		WREG32(HDP_MEM_POWER_LS, data);
6367}
6368
6369void cik_update_cg(struct radeon_device *rdev,
6370		   u32 block, bool enable)
6371{
6372
6373	if (block & RADEON_CG_BLOCK_GFX) {
6374		cik_enable_gui_idle_interrupt(rdev, false);
6375		/* order matters! */
6376		if (enable) {
6377			cik_enable_mgcg(rdev, true);
6378			cik_enable_cgcg(rdev, true);
6379		} else {
6380			cik_enable_cgcg(rdev, false);
6381			cik_enable_mgcg(rdev, false);
6382		}
6383		cik_enable_gui_idle_interrupt(rdev, true);
6384	}
6385
6386	if (block & RADEON_CG_BLOCK_MC) {
6387		if (!(rdev->flags & RADEON_IS_IGP)) {
6388			cik_enable_mc_mgcg(rdev, enable);
6389			cik_enable_mc_ls(rdev, enable);
6390		}
6391	}
6392
6393	if (block & RADEON_CG_BLOCK_SDMA) {
6394		cik_enable_sdma_mgcg(rdev, enable);
6395		cik_enable_sdma_mgls(rdev, enable);
6396	}
6397
6398	if (block & RADEON_CG_BLOCK_BIF) {
6399		cik_enable_bif_mgls(rdev, enable);
6400	}
6401
6402	if (block & RADEON_CG_BLOCK_UVD) {
6403		if (rdev->has_uvd)
6404			cik_enable_uvd_mgcg(rdev, enable);
6405	}
6406
6407	if (block & RADEON_CG_BLOCK_HDP) {
6408		cik_enable_hdp_mgcg(rdev, enable);
6409		cik_enable_hdp_ls(rdev, enable);
6410	}
6411
6412	if (block & RADEON_CG_BLOCK_VCE) {
6413		vce_v2_0_enable_mgcg(rdev, enable);
6414	}
6415}
6416
6417static void cik_init_cg(struct radeon_device *rdev)
6418{
6419
6420	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6421
6422	if (rdev->has_uvd)
6423		si_init_uvd_internal_cg(rdev);
6424
6425	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6426			     RADEON_CG_BLOCK_SDMA |
6427			     RADEON_CG_BLOCK_BIF |
6428			     RADEON_CG_BLOCK_UVD |
6429			     RADEON_CG_BLOCK_HDP), true);
6430}
6431
6432static void cik_fini_cg(struct radeon_device *rdev)
6433{
6434	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6435			     RADEON_CG_BLOCK_SDMA |
6436			     RADEON_CG_BLOCK_BIF |
6437			     RADEON_CG_BLOCK_UVD |
6438			     RADEON_CG_BLOCK_HDP), false);
6439
6440	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6441}
6442
6443static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6444					  bool enable)
6445{
6446	u32 data, orig;
6447
6448	orig = data = RREG32(RLC_PG_CNTL);
6449	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6450		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6451	else
6452		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6453	if (orig != data)
6454		WREG32(RLC_PG_CNTL, data);
6455}
6456
6457static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6458					  bool enable)
6459{
6460	u32 data, orig;
6461
6462	orig = data = RREG32(RLC_PG_CNTL);
6463	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6464		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6465	else
6466		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6467	if (orig != data)
6468		WREG32(RLC_PG_CNTL, data);
6469}
6470
6471static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6472{
6473	u32 data, orig;
6474
6475	orig = data = RREG32(RLC_PG_CNTL);
6476	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6477		data &= ~DISABLE_CP_PG;
6478	else
6479		data |= DISABLE_CP_PG;
6480	if (orig != data)
6481		WREG32(RLC_PG_CNTL, data);
6482}
6483
6484static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6485{
6486	u32 data, orig;
6487
6488	orig = data = RREG32(RLC_PG_CNTL);
6489	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6490		data &= ~DISABLE_GDS_PG;
6491	else
6492		data |= DISABLE_GDS_PG;
6493	if (orig != data)
6494		WREG32(RLC_PG_CNTL, data);
6495}
6496
6497#define CP_ME_TABLE_SIZE    96
6498#define CP_ME_TABLE_OFFSET  2048
6499#define CP_MEC_TABLE_OFFSET 4096
6500
6501void cik_init_cp_pg_table(struct radeon_device *rdev)
6502{
6503	volatile u32 *dst_ptr;
6504	int me, i, max_me = 4;
6505	u32 bo_offset = 0;
6506	u32 table_offset, table_size;
6507
6508	if (rdev->family == CHIP_KAVERI)
6509		max_me = 5;
6510
6511	if (rdev->rlc.cp_table_ptr == NULL)
6512		return;
6513
6514	/* write the cp table buffer */
6515	dst_ptr = rdev->rlc.cp_table_ptr;
6516	for (me = 0; me < max_me; me++) {
6517		if (rdev->new_fw) {
6518			const __le32 *fw_data;
6519			const struct gfx_firmware_header_v1_0 *hdr;
6520
6521			if (me == 0) {
6522				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6523				fw_data = (const __le32 *)
6524					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6525				table_offset = le32_to_cpu(hdr->jt_offset);
6526				table_size = le32_to_cpu(hdr->jt_size);
6527			} else if (me == 1) {
6528				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6529				fw_data = (const __le32 *)
6530					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531				table_offset = le32_to_cpu(hdr->jt_offset);
6532				table_size = le32_to_cpu(hdr->jt_size);
6533			} else if (me == 2) {
6534				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6535				fw_data = (const __le32 *)
6536					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537				table_offset = le32_to_cpu(hdr->jt_offset);
6538				table_size = le32_to_cpu(hdr->jt_size);
6539			} else if (me == 3) {
6540				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6541				fw_data = (const __le32 *)
6542					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6543				table_offset = le32_to_cpu(hdr->jt_offset);
6544				table_size = le32_to_cpu(hdr->jt_size);
6545			} else {
6546				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6547				fw_data = (const __le32 *)
6548					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6549				table_offset = le32_to_cpu(hdr->jt_offset);
6550				table_size = le32_to_cpu(hdr->jt_size);
6551			}
6552
6553			for (i = 0; i < table_size; i ++) {
6554				dst_ptr[bo_offset + i] =
6555					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6556			}
6557			bo_offset += table_size;
6558		} else {
6559			const __be32 *fw_data;
6560			table_size = CP_ME_TABLE_SIZE;
6561
6562			if (me == 0) {
6563				fw_data = (const __be32 *)rdev->ce_fw->data;
6564				table_offset = CP_ME_TABLE_OFFSET;
6565			} else if (me == 1) {
6566				fw_data = (const __be32 *)rdev->pfp_fw->data;
6567				table_offset = CP_ME_TABLE_OFFSET;
6568			} else if (me == 2) {
6569				fw_data = (const __be32 *)rdev->me_fw->data;
6570				table_offset = CP_ME_TABLE_OFFSET;
6571			} else {
6572				fw_data = (const __be32 *)rdev->mec_fw->data;
6573				table_offset = CP_MEC_TABLE_OFFSET;
6574			}
6575
6576			for (i = 0; i < table_size; i ++) {
6577				dst_ptr[bo_offset + i] =
6578					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6579			}
6580			bo_offset += table_size;
6581		}
6582	}
6583}
6584
6585static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6586				bool enable)
6587{
6588	u32 data, orig;
6589
6590	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6591		orig = data = RREG32(RLC_PG_CNTL);
6592		data |= GFX_PG_ENABLE;
6593		if (orig != data)
6594			WREG32(RLC_PG_CNTL, data);
6595
6596		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6597		data |= AUTO_PG_EN;
6598		if (orig != data)
6599			WREG32(RLC_AUTO_PG_CTRL, data);
6600	} else {
6601		orig = data = RREG32(RLC_PG_CNTL);
6602		data &= ~GFX_PG_ENABLE;
6603		if (orig != data)
6604			WREG32(RLC_PG_CNTL, data);
6605
6606		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6607		data &= ~AUTO_PG_EN;
6608		if (orig != data)
6609			WREG32(RLC_AUTO_PG_CTRL, data);
6610
6611		data = RREG32(DB_RENDER_CONTROL);
6612	}
6613}
6614
6615static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6616{
6617	u32 mask = 0, tmp, tmp1;
6618	int i;
6619
6620	mutex_lock(&rdev->grbm_idx_mutex);
6621	cik_select_se_sh(rdev, se, sh);
6622	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6623	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6624	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6625	mutex_unlock(&rdev->grbm_idx_mutex);
6626
6627	tmp &= 0xffff0000;
6628
6629	tmp |= tmp1;
6630	tmp >>= 16;
6631
6632	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6633		mask <<= 1;
6634		mask |= 1;
6635	}
6636
6637	return (~tmp) & mask;
6638}
6639
6640static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6641{
6642	u32 i, j, k, active_cu_number = 0;
6643	u32 mask, counter, cu_bitmap;
6644	u32 tmp = 0;
6645
6646	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6647		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6648			mask = 1;
6649			cu_bitmap = 0;
6650			counter = 0;
6651			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6652				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6653					if (counter < 2)
6654						cu_bitmap |= mask;
6655					counter ++;
6656				}
6657				mask <<= 1;
6658			}
6659
6660			active_cu_number += counter;
6661			tmp |= (cu_bitmap << (i * 16 + j * 8));
6662		}
6663	}
6664
6665	WREG32(RLC_PG_AO_CU_MASK, tmp);
6666
6667	tmp = RREG32(RLC_MAX_PG_CU);
6668	tmp &= ~MAX_PU_CU_MASK;
6669	tmp |= MAX_PU_CU(active_cu_number);
6670	WREG32(RLC_MAX_PG_CU, tmp);
6671}
6672
6673static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6674				       bool enable)
6675{
6676	u32 data, orig;
6677
6678	orig = data = RREG32(RLC_PG_CNTL);
6679	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6680		data |= STATIC_PER_CU_PG_ENABLE;
6681	else
6682		data &= ~STATIC_PER_CU_PG_ENABLE;
6683	if (orig != data)
6684		WREG32(RLC_PG_CNTL, data);
6685}
6686
6687static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6688					bool enable)
6689{
6690	u32 data, orig;
6691
6692	orig = data = RREG32(RLC_PG_CNTL);
6693	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6694		data |= DYN_PER_CU_PG_ENABLE;
6695	else
6696		data &= ~DYN_PER_CU_PG_ENABLE;
6697	if (orig != data)
6698		WREG32(RLC_PG_CNTL, data);
6699}
6700
6701#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6702#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6703
6704static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6705{
6706	u32 data, orig;
6707	u32 i;
6708
6709	if (rdev->rlc.cs_data) {
6710		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6711		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6712		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6713		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6714	} else {
6715		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6716		for (i = 0; i < 3; i++)
6717			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6718	}
6719	if (rdev->rlc.reg_list) {
6720		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6721		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6722			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6723	}
6724
6725	orig = data = RREG32(RLC_PG_CNTL);
6726	data |= GFX_PG_SRC;
6727	if (orig != data)
6728		WREG32(RLC_PG_CNTL, data);
6729
6730	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6731	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6732
6733	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6734	data &= ~IDLE_POLL_COUNT_MASK;
6735	data |= IDLE_POLL_COUNT(0x60);
6736	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6737
6738	data = 0x10101010;
6739	WREG32(RLC_PG_DELAY, data);
6740
6741	data = RREG32(RLC_PG_DELAY_2);
6742	data &= ~0xff;
6743	data |= 0x3;
6744	WREG32(RLC_PG_DELAY_2, data);
6745
6746	data = RREG32(RLC_AUTO_PG_CTRL);
6747	data &= ~GRBM_REG_SGIT_MASK;
6748	data |= GRBM_REG_SGIT(0x700);
6749	WREG32(RLC_AUTO_PG_CTRL, data);
6750
6751}
6752
6753static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6754{
6755	cik_enable_gfx_cgpg(rdev, enable);
6756	cik_enable_gfx_static_mgpg(rdev, enable);
6757	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6758}
6759
6760u32 cik_get_csb_size(struct radeon_device *rdev)
6761{
6762	u32 count = 0;
6763	const struct cs_section_def *sect = NULL;
6764	const struct cs_extent_def *ext = NULL;
6765
6766	if (rdev->rlc.cs_data == NULL)
6767		return 0;
6768
6769	/* begin clear state */
6770	count += 2;
6771	/* context control state */
6772	count += 3;
6773
6774	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6775		for (ext = sect->section; ext->extent != NULL; ++ext) {
6776			if (sect->id == SECT_CONTEXT)
6777				count += 2 + ext->reg_count;
6778			else
6779				return 0;
6780		}
6781	}
6782	/* pa_sc_raster_config/pa_sc_raster_config1 */
6783	count += 4;
6784	/* end clear state */
6785	count += 2;
6786	/* clear state */
6787	count += 2;
6788
6789	return count;
6790}
6791
6792void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6793{
6794	u32 count = 0, i;
6795	const struct cs_section_def *sect = NULL;
6796	const struct cs_extent_def *ext = NULL;
6797
6798	if (rdev->rlc.cs_data == NULL)
6799		return;
6800	if (buffer == NULL)
6801		return;
6802
6803	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6804	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6805
6806	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6807	buffer[count++] = cpu_to_le32(0x80000000);
6808	buffer[count++] = cpu_to_le32(0x80000000);
6809
6810	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6811		for (ext = sect->section; ext->extent != NULL; ++ext) {
6812			if (sect->id == SECT_CONTEXT) {
6813				buffer[count++] =
6814					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6815				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6816				for (i = 0; i < ext->reg_count; i++)
6817					buffer[count++] = cpu_to_le32(ext->extent[i]);
6818			} else {
6819				return;
6820			}
6821		}
6822	}
6823
6824	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6825	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6826	switch (rdev->family) {
6827	case CHIP_BONAIRE:
6828		buffer[count++] = cpu_to_le32(0x16000012);
6829		buffer[count++] = cpu_to_le32(0x00000000);
6830		break;
6831	case CHIP_KAVERI:
6832		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6833		buffer[count++] = cpu_to_le32(0x00000000);
6834		break;
6835	case CHIP_KABINI:
6836	case CHIP_MULLINS:
6837		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6838		buffer[count++] = cpu_to_le32(0x00000000);
6839		break;
6840	case CHIP_HAWAII:
6841		buffer[count++] = cpu_to_le32(0x3a00161a);
6842		buffer[count++] = cpu_to_le32(0x0000002e);
6843		break;
6844	default:
6845		buffer[count++] = cpu_to_le32(0x00000000);
6846		buffer[count++] = cpu_to_le32(0x00000000);
6847		break;
6848	}
6849
6850	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6851	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6852
6853	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6854	buffer[count++] = cpu_to_le32(0);
6855}
6856
6857static void cik_init_pg(struct radeon_device *rdev)
6858{
6859	if (rdev->pg_flags) {
6860		cik_enable_sck_slowdown_on_pu(rdev, true);
6861		cik_enable_sck_slowdown_on_pd(rdev, true);
6862		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6863			cik_init_gfx_cgpg(rdev);
6864			cik_enable_cp_pg(rdev, true);
6865			cik_enable_gds_pg(rdev, true);
6866		}
6867		cik_init_ao_cu_mask(rdev);
6868		cik_update_gfx_pg(rdev, true);
6869	}
6870}
6871
6872static void cik_fini_pg(struct radeon_device *rdev)
6873{
6874	if (rdev->pg_flags) {
6875		cik_update_gfx_pg(rdev, false);
6876		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6877			cik_enable_cp_pg(rdev, false);
6878			cik_enable_gds_pg(rdev, false);
6879		}
6880	}
6881}
6882
6883/*
6884 * Interrupts
6885 * Starting with r6xx, interrupts are handled via a ring buffer.
6886 * Ring buffers are areas of GPU accessible memory that the GPU
6887 * writes interrupt vectors into and the host reads vectors out of.
6888 * There is a rptr (read pointer) that determines where the
6889 * host is currently reading, and a wptr (write pointer)
6890 * which determines where the GPU has written.  When the
6891 * pointers are equal, the ring is idle.  When the GPU
6892 * writes vectors to the ring buffer, it increments the
6893 * wptr.  When there is an interrupt, the host then starts
6894 * fetching commands and processing them until the pointers are
6895 * equal again at which point it updates the rptr.
6896 */
6897
6898/**
6899 * cik_enable_interrupts - Enable the interrupt ring buffer
6900 *
6901 * @rdev: radeon_device pointer
6902 *
6903 * Enable the interrupt ring buffer (CIK).
6904 */
6905static void cik_enable_interrupts(struct radeon_device *rdev)
6906{
6907	u32 ih_cntl = RREG32(IH_CNTL);
6908	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6909
6910	ih_cntl |= ENABLE_INTR;
6911	ih_rb_cntl |= IH_RB_ENABLE;
6912	WREG32(IH_CNTL, ih_cntl);
6913	WREG32(IH_RB_CNTL, ih_rb_cntl);
6914	rdev->ih.enabled = true;
6915}
6916
6917/**
6918 * cik_disable_interrupts - Disable the interrupt ring buffer
6919 *
6920 * @rdev: radeon_device pointer
6921 *
6922 * Disable the interrupt ring buffer (CIK).
6923 */
6924static void cik_disable_interrupts(struct radeon_device *rdev)
6925{
6926	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6927	u32 ih_cntl = RREG32(IH_CNTL);
6928
6929	ih_rb_cntl &= ~IH_RB_ENABLE;
6930	ih_cntl &= ~ENABLE_INTR;
6931	WREG32(IH_RB_CNTL, ih_rb_cntl);
6932	WREG32(IH_CNTL, ih_cntl);
6933	/* set rptr, wptr to 0 */
6934	WREG32(IH_RB_RPTR, 0);
6935	WREG32(IH_RB_WPTR, 0);
6936	rdev->ih.enabled = false;
6937	rdev->ih.rptr = 0;
6938}
6939
6940/**
6941 * cik_disable_interrupt_state - Disable all interrupt sources
6942 *
6943 * @rdev: radeon_device pointer
6944 *
6945 * Clear all interrupt enable bits used by the driver (CIK).
6946 */
6947static void cik_disable_interrupt_state(struct radeon_device *rdev)
6948{
6949	u32 tmp;
6950
6951	/* gfx ring */
6952	tmp = RREG32(CP_INT_CNTL_RING0) &
6953		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6954	WREG32(CP_INT_CNTL_RING0, tmp);
6955	/* sdma */
6956	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6957	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6958	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6959	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6960	/* compute queues */
6961	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6962	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6963	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6964	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6965	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6966	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6967	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6968	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6969	/* grbm */
6970	WREG32(GRBM_INT_CNTL, 0);
6971	/* SRBM */
6972	WREG32(SRBM_INT_CNTL, 0);
6973	/* vline/vblank, etc. */
6974	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6975	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6976	if (rdev->num_crtc >= 4) {
6977		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6978		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6979	}
6980	if (rdev->num_crtc >= 6) {
6981		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6982		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6983	}
6984	/* pflip */
6985	if (rdev->num_crtc >= 2) {
6986		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6987		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6988	}
6989	if (rdev->num_crtc >= 4) {
6990		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6991		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6992	}
6993	if (rdev->num_crtc >= 6) {
6994		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6995		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6996	}
6997
6998	/* dac hotplug */
6999	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7000
7001	/* digital hotplug */
7002	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7003	WREG32(DC_HPD1_INT_CONTROL, tmp);
7004	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7005	WREG32(DC_HPD2_INT_CONTROL, tmp);
7006	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7007	WREG32(DC_HPD3_INT_CONTROL, tmp);
7008	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7009	WREG32(DC_HPD4_INT_CONTROL, tmp);
7010	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7011	WREG32(DC_HPD5_INT_CONTROL, tmp);
7012	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7013	WREG32(DC_HPD6_INT_CONTROL, tmp);
7014
7015}
7016
7017/**
7018 * cik_irq_init - init and enable the interrupt ring
7019 *
7020 * @rdev: radeon_device pointer
7021 *
7022 * Allocate a ring buffer for the interrupt controller,
7023 * enable the RLC, disable interrupts, enable the IH
7024 * ring buffer and enable it (CIK).
7025 * Called at device load and reume.
7026 * Returns 0 for success, errors for failure.
7027 */
7028static int cik_irq_init(struct radeon_device *rdev)
7029{
7030	int ret = 0;
7031	int rb_bufsz;
7032	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7033
7034	/* allocate ring */
7035	ret = r600_ih_ring_alloc(rdev);
7036	if (ret)
7037		return ret;
7038
7039	/* disable irqs */
7040	cik_disable_interrupts(rdev);
7041
7042	/* init rlc */
7043	ret = cik_rlc_resume(rdev);
7044	if (ret) {
7045		r600_ih_ring_fini(rdev);
7046		return ret;
7047	}
7048
7049	/* setup interrupt control */
7050	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7051	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7052	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7053	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7054	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7055	 */
7056	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7057	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7058	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7059	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7060
7061	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7062	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7063
7064	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7065		      IH_WPTR_OVERFLOW_CLEAR |
7066		      (rb_bufsz << 1));
7067
7068	if (rdev->wb.enabled)
7069		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7070
7071	/* set the writeback address whether it's enabled or not */
7072	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7073	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7074
7075	WREG32(IH_RB_CNTL, ih_rb_cntl);
7076
7077	/* set rptr, wptr to 0 */
7078	WREG32(IH_RB_RPTR, 0);
7079	WREG32(IH_RB_WPTR, 0);
7080
7081	/* Default settings for IH_CNTL (disabled at first) */
7082	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7083	/* RPTR_REARM only works if msi's are enabled */
7084	if (rdev->msi_enabled)
7085		ih_cntl |= RPTR_REARM;
7086	WREG32(IH_CNTL, ih_cntl);
7087
7088	/* force the active interrupt state to all disabled */
7089	cik_disable_interrupt_state(rdev);
7090
7091	pci_set_master(rdev->pdev);
7092
7093	/* enable irqs */
7094	cik_enable_interrupts(rdev);
7095
7096	return ret;
7097}
7098
7099/**
7100 * cik_irq_set - enable/disable interrupt sources
7101 *
7102 * @rdev: radeon_device pointer
7103 *
7104 * Enable interrupt sources on the GPU (vblanks, hpd,
7105 * etc.) (CIK).
7106 * Returns 0 for success, errors for failure.
7107 */
7108int cik_irq_set(struct radeon_device *rdev)
7109{
7110	u32 cp_int_cntl;
7111	u32 cp_m1p0;
7112	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7113	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7114	u32 grbm_int_cntl = 0;
7115	u32 dma_cntl, dma_cntl1;
7116
7117	if (!rdev->irq.installed) {
7118		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7119		return -EINVAL;
7120	}
7121	/* don't enable anything if the ih is disabled */
7122	if (!rdev->ih.enabled) {
7123		cik_disable_interrupts(rdev);
7124		/* force the active interrupt state to all disabled */
7125		cik_disable_interrupt_state(rdev);
7126		return 0;
7127	}
7128
7129	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7130		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7131	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7132
7133	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7134	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7135	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7136	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7137	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7138	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7139
7140	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7141	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7142
7143	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7144
7145	/* enable CP interrupts on all rings */
7146	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7147		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7148		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7149	}
7150	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7151		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7152		DRM_DEBUG("si_irq_set: sw int cp1\n");
7153		if (ring->me == 1) {
7154			switch (ring->pipe) {
7155			case 0:
7156				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7157				break;
7158			default:
7159				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7160				break;
7161			}
7162		} else {
7163			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7164		}
7165	}
7166	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7167		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7168		DRM_DEBUG("si_irq_set: sw int cp2\n");
7169		if (ring->me == 1) {
7170			switch (ring->pipe) {
7171			case 0:
7172				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7173				break;
7174			default:
7175				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7176				break;
7177			}
7178		} else {
7179			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7180		}
7181	}
7182
7183	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7184		DRM_DEBUG("cik_irq_set: sw int dma\n");
7185		dma_cntl |= TRAP_ENABLE;
7186	}
7187
7188	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7189		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7190		dma_cntl1 |= TRAP_ENABLE;
7191	}
7192
7193	if (rdev->irq.crtc_vblank_int[0] ||
7194	    atomic_read(&rdev->irq.pflip[0])) {
7195		DRM_DEBUG("cik_irq_set: vblank 0\n");
7196		crtc1 |= VBLANK_INTERRUPT_MASK;
7197	}
7198	if (rdev->irq.crtc_vblank_int[1] ||
7199	    atomic_read(&rdev->irq.pflip[1])) {
7200		DRM_DEBUG("cik_irq_set: vblank 1\n");
7201		crtc2 |= VBLANK_INTERRUPT_MASK;
7202	}
7203	if (rdev->irq.crtc_vblank_int[2] ||
7204	    atomic_read(&rdev->irq.pflip[2])) {
7205		DRM_DEBUG("cik_irq_set: vblank 2\n");
7206		crtc3 |= VBLANK_INTERRUPT_MASK;
7207	}
7208	if (rdev->irq.crtc_vblank_int[3] ||
7209	    atomic_read(&rdev->irq.pflip[3])) {
7210		DRM_DEBUG("cik_irq_set: vblank 3\n");
7211		crtc4 |= VBLANK_INTERRUPT_MASK;
7212	}
7213	if (rdev->irq.crtc_vblank_int[4] ||
7214	    atomic_read(&rdev->irq.pflip[4])) {
7215		DRM_DEBUG("cik_irq_set: vblank 4\n");
7216		crtc5 |= VBLANK_INTERRUPT_MASK;
7217	}
7218	if (rdev->irq.crtc_vblank_int[5] ||
7219	    atomic_read(&rdev->irq.pflip[5])) {
7220		DRM_DEBUG("cik_irq_set: vblank 5\n");
7221		crtc6 |= VBLANK_INTERRUPT_MASK;
7222	}
7223	if (rdev->irq.hpd[0]) {
7224		DRM_DEBUG("cik_irq_set: hpd 1\n");
7225		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226	}
7227	if (rdev->irq.hpd[1]) {
7228		DRM_DEBUG("cik_irq_set: hpd 2\n");
7229		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230	}
7231	if (rdev->irq.hpd[2]) {
7232		DRM_DEBUG("cik_irq_set: hpd 3\n");
7233		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7234	}
7235	if (rdev->irq.hpd[3]) {
7236		DRM_DEBUG("cik_irq_set: hpd 4\n");
7237		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7238	}
7239	if (rdev->irq.hpd[4]) {
7240		DRM_DEBUG("cik_irq_set: hpd 5\n");
7241		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7242	}
7243	if (rdev->irq.hpd[5]) {
7244		DRM_DEBUG("cik_irq_set: hpd 6\n");
7245		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7246	}
7247
7248	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7249
7250	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7251	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7252
7253	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7254
7255	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7256
7257	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7258	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7259	if (rdev->num_crtc >= 4) {
7260		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7261		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7262	}
7263	if (rdev->num_crtc >= 6) {
7264		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7265		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7266	}
7267
7268	if (rdev->num_crtc >= 2) {
7269		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7270		       GRPH_PFLIP_INT_MASK);
7271		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7272		       GRPH_PFLIP_INT_MASK);
7273	}
7274	if (rdev->num_crtc >= 4) {
7275		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7276		       GRPH_PFLIP_INT_MASK);
7277		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7278		       GRPH_PFLIP_INT_MASK);
7279	}
7280	if (rdev->num_crtc >= 6) {
7281		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7282		       GRPH_PFLIP_INT_MASK);
7283		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7284		       GRPH_PFLIP_INT_MASK);
7285	}
7286
7287	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7288	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7289	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7290	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7291	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7292	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7293
7294	/* posting read */
7295	RREG32(SRBM_STATUS);
7296
7297	return 0;
7298}
7299
7300/**
7301 * cik_irq_ack - ack interrupt sources
7302 *
7303 * @rdev: radeon_device pointer
7304 *
7305 * Ack interrupt sources on the GPU (vblanks, hpd,
7306 * etc.) (CIK).  Certain interrupts sources are sw
7307 * generated and do not require an explicit ack.
7308 */
7309static inline void cik_irq_ack(struct radeon_device *rdev)
7310{
7311	u32 tmp;
7312
7313	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7314	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7315	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7316	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7317	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7318	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7319	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7320
7321	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7322		EVERGREEN_CRTC0_REGISTER_OFFSET);
7323	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7324		EVERGREEN_CRTC1_REGISTER_OFFSET);
7325	if (rdev->num_crtc >= 4) {
7326		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7327			EVERGREEN_CRTC2_REGISTER_OFFSET);
7328		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7329			EVERGREEN_CRTC3_REGISTER_OFFSET);
7330	}
7331	if (rdev->num_crtc >= 6) {
7332		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7333			EVERGREEN_CRTC4_REGISTER_OFFSET);
7334		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7335			EVERGREEN_CRTC5_REGISTER_OFFSET);
7336	}
7337
7338	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7339		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7340		       GRPH_PFLIP_INT_CLEAR);
7341	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7342		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7343		       GRPH_PFLIP_INT_CLEAR);
7344	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7345		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7346	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7347		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7348	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7349		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7350	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7351		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7352
7353	if (rdev->num_crtc >= 4) {
7354		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7355			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7356			       GRPH_PFLIP_INT_CLEAR);
7357		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7358			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7359			       GRPH_PFLIP_INT_CLEAR);
7360		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7361			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7362		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7363			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7364		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7365			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7366		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7367			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7368	}
7369
7370	if (rdev->num_crtc >= 6) {
7371		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7372			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7373			       GRPH_PFLIP_INT_CLEAR);
7374		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7375			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7376			       GRPH_PFLIP_INT_CLEAR);
7377		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7378			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7379		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7380			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7381		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7382			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7383		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7384			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7385	}
7386
7387	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7388		tmp = RREG32(DC_HPD1_INT_CONTROL);
7389		tmp |= DC_HPDx_INT_ACK;
7390		WREG32(DC_HPD1_INT_CONTROL, tmp);
7391	}
7392	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7393		tmp = RREG32(DC_HPD2_INT_CONTROL);
7394		tmp |= DC_HPDx_INT_ACK;
7395		WREG32(DC_HPD2_INT_CONTROL, tmp);
7396	}
7397	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7398		tmp = RREG32(DC_HPD3_INT_CONTROL);
7399		tmp |= DC_HPDx_INT_ACK;
7400		WREG32(DC_HPD3_INT_CONTROL, tmp);
7401	}
7402	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7403		tmp = RREG32(DC_HPD4_INT_CONTROL);
7404		tmp |= DC_HPDx_INT_ACK;
7405		WREG32(DC_HPD4_INT_CONTROL, tmp);
7406	}
7407	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7408		tmp = RREG32(DC_HPD5_INT_CONTROL);
7409		tmp |= DC_HPDx_INT_ACK;
7410		WREG32(DC_HPD5_INT_CONTROL, tmp);
7411	}
7412	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7413		tmp = RREG32(DC_HPD5_INT_CONTROL);
7414		tmp |= DC_HPDx_INT_ACK;
7415		WREG32(DC_HPD6_INT_CONTROL, tmp);
7416	}
7417	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7418		tmp = RREG32(DC_HPD1_INT_CONTROL);
7419		tmp |= DC_HPDx_RX_INT_ACK;
7420		WREG32(DC_HPD1_INT_CONTROL, tmp);
7421	}
7422	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7423		tmp = RREG32(DC_HPD2_INT_CONTROL);
7424		tmp |= DC_HPDx_RX_INT_ACK;
7425		WREG32(DC_HPD2_INT_CONTROL, tmp);
7426	}
7427	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7428		tmp = RREG32(DC_HPD3_INT_CONTROL);
7429		tmp |= DC_HPDx_RX_INT_ACK;
7430		WREG32(DC_HPD3_INT_CONTROL, tmp);
7431	}
7432	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7433		tmp = RREG32(DC_HPD4_INT_CONTROL);
7434		tmp |= DC_HPDx_RX_INT_ACK;
7435		WREG32(DC_HPD4_INT_CONTROL, tmp);
7436	}
7437	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7438		tmp = RREG32(DC_HPD5_INT_CONTROL);
7439		tmp |= DC_HPDx_RX_INT_ACK;
7440		WREG32(DC_HPD5_INT_CONTROL, tmp);
7441	}
7442	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7443		tmp = RREG32(DC_HPD5_INT_CONTROL);
7444		tmp |= DC_HPDx_RX_INT_ACK;
7445		WREG32(DC_HPD6_INT_CONTROL, tmp);
7446	}
7447}
7448
7449/**
7450 * cik_irq_disable - disable interrupts
7451 *
7452 * @rdev: radeon_device pointer
7453 *
7454 * Disable interrupts on the hw (CIK).
7455 */
7456static void cik_irq_disable(struct radeon_device *rdev)
7457{
7458	cik_disable_interrupts(rdev);
7459	/* Wait and acknowledge irq */
7460	mdelay(1);
7461	cik_irq_ack(rdev);
7462	cik_disable_interrupt_state(rdev);
7463}
7464
7465/**
7466 * cik_irq_disable - disable interrupts for suspend
7467 *
7468 * @rdev: radeon_device pointer
7469 *
7470 * Disable interrupts and stop the RLC (CIK).
7471 * Used for suspend.
7472 */
7473static void cik_irq_suspend(struct radeon_device *rdev)
7474{
7475	cik_irq_disable(rdev);
7476	cik_rlc_stop(rdev);
7477}
7478
7479/**
7480 * cik_irq_fini - tear down interrupt support
7481 *
7482 * @rdev: radeon_device pointer
7483 *
7484 * Disable interrupts on the hw and free the IH ring
7485 * buffer (CIK).
7486 * Used for driver unload.
7487 */
7488static void cik_irq_fini(struct radeon_device *rdev)
7489{
7490	cik_irq_suspend(rdev);
7491	r600_ih_ring_fini(rdev);
7492}
7493
7494/**
7495 * cik_get_ih_wptr - get the IH ring buffer wptr
7496 *
7497 * @rdev: radeon_device pointer
7498 *
7499 * Get the IH ring buffer wptr from either the register
7500 * or the writeback memory buffer (CIK).  Also check for
7501 * ring buffer overflow and deal with it.
7502 * Used by cik_irq_process().
7503 * Returns the value of the wptr.
7504 */
7505static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7506{
7507	u32 wptr, tmp;
7508
7509	if (rdev->wb.enabled)
7510		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7511	else
7512		wptr = RREG32(IH_RB_WPTR);
7513
7514	if (wptr & RB_OVERFLOW) {
7515		wptr &= ~RB_OVERFLOW;
7516		/* When a ring buffer overflow happen start parsing interrupt
7517		 * from the last not overwritten vector (wptr + 16). Hopefully
7518		 * this should allow us to catchup.
7519		 */
7520		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7521			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7522		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7523		tmp = RREG32(IH_RB_CNTL);
7524		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7525		WREG32(IH_RB_CNTL, tmp);
7526	}
7527	return (wptr & rdev->ih.ptr_mask);
7528}
7529
7530/*        CIK IV Ring
7531 * Each IV ring entry is 128 bits:
7532 * [7:0]    - interrupt source id
7533 * [31:8]   - reserved
7534 * [59:32]  - interrupt source data
7535 * [63:60]  - reserved
7536 * [71:64]  - RINGID
7537 *            CP:
7538 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7539 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7540 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7541 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7542 *            PIPE_ID - ME0 0=3D
7543 *                    - ME1&2 compute dispatcher (4 pipes each)
7544 *            SDMA:
7545 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7546 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7547 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7548 * [79:72]  - VMID
7549 * [95:80]  - PASID
7550 * [127:96] - reserved
7551 */
7552/**
7553 * cik_irq_process - interrupt handler
7554 *
7555 * @rdev: radeon_device pointer
7556 *
7557 * Interrupt hander (CIK).  Walk the IH ring,
7558 * ack interrupts and schedule work to handle
7559 * interrupt events.
7560 * Returns irq process return code.
7561 */
7562int cik_irq_process(struct radeon_device *rdev)
7563{
7564	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7565	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7566	u32 wptr;
7567	u32 rptr;
7568	u32 src_id, src_data, ring_id;
7569	u8 me_id, pipe_id, queue_id;
7570	u32 ring_index;
7571	bool queue_hotplug = false;
7572	bool queue_dp = false;
7573	bool queue_reset = false;
7574	u32 addr, status, mc_client;
7575	bool queue_thermal = false;
7576
7577	if (!rdev->ih.enabled || rdev->shutdown)
7578		return IRQ_NONE;
7579
7580	wptr = cik_get_ih_wptr(rdev);
7581
7582restart_ih:
7583	/* is somebody else already processing irqs? */
7584	if (atomic_xchg(&rdev->ih.lock, 1))
7585		return IRQ_NONE;
7586
7587	rptr = rdev->ih.rptr;
7588	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7589
7590	/* Order reading of wptr vs. reading of IH ring data */
7591	rmb();
7592
7593	/* display interrupts */
7594	cik_irq_ack(rdev);
7595
7596	while (rptr != wptr) {
7597		/* wptr/rptr are in bytes! */
7598		ring_index = rptr / 4;
7599
7600		radeon_kfd_interrupt(rdev,
7601				(const void *) &rdev->ih.ring[ring_index]);
7602
7603		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7604		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7605		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7606
7607		switch (src_id) {
7608		case 1: /* D1 vblank/vline */
7609			switch (src_data) {
7610			case 0: /* D1 vblank */
7611				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7612					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7613
7614				if (rdev->irq.crtc_vblank_int[0]) {
7615					drm_handle_vblank(rdev->ddev, 0);
7616					rdev->pm.vblank_sync = true;
7617					wake_up(&rdev->irq.vblank_queue);
7618				}
7619				if (atomic_read(&rdev->irq.pflip[0]))
7620					radeon_crtc_handle_vblank(rdev, 0);
7621				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7622				DRM_DEBUG("IH: D1 vblank\n");
7623
7624				break;
7625			case 1: /* D1 vline */
7626				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7627					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7628
7629				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7630				DRM_DEBUG("IH: D1 vline\n");
7631
7632				break;
7633			default:
7634				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7635				break;
7636			}
7637			break;
7638		case 2: /* D2 vblank/vline */
7639			switch (src_data) {
7640			case 0: /* D2 vblank */
7641				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7642					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7643
7644				if (rdev->irq.crtc_vblank_int[1]) {
7645					drm_handle_vblank(rdev->ddev, 1);
7646					rdev->pm.vblank_sync = true;
7647					wake_up(&rdev->irq.vblank_queue);
7648				}
7649				if (atomic_read(&rdev->irq.pflip[1]))
7650					radeon_crtc_handle_vblank(rdev, 1);
7651				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7652				DRM_DEBUG("IH: D2 vblank\n");
7653
7654				break;
7655			case 1: /* D2 vline */
7656				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7657					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7658
7659				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7660				DRM_DEBUG("IH: D2 vline\n");
7661
7662				break;
7663			default:
7664				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7665				break;
7666			}
7667			break;
7668		case 3: /* D3 vblank/vline */
7669			switch (src_data) {
7670			case 0: /* D3 vblank */
7671				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7672					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7673
7674				if (rdev->irq.crtc_vblank_int[2]) {
7675					drm_handle_vblank(rdev->ddev, 2);
7676					rdev->pm.vblank_sync = true;
7677					wake_up(&rdev->irq.vblank_queue);
7678				}
7679				if (atomic_read(&rdev->irq.pflip[2]))
7680					radeon_crtc_handle_vblank(rdev, 2);
7681				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7682				DRM_DEBUG("IH: D3 vblank\n");
7683
7684				break;
7685			case 1: /* D3 vline */
7686				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7687					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688
7689				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7690				DRM_DEBUG("IH: D3 vline\n");
7691
7692				break;
7693			default:
7694				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7695				break;
7696			}
7697			break;
7698		case 4: /* D4 vblank/vline */
7699			switch (src_data) {
7700			case 0: /* D4 vblank */
7701				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7702					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7703
7704				if (rdev->irq.crtc_vblank_int[3]) {
7705					drm_handle_vblank(rdev->ddev, 3);
7706					rdev->pm.vblank_sync = true;
7707					wake_up(&rdev->irq.vblank_queue);
7708				}
7709				if (atomic_read(&rdev->irq.pflip[3]))
7710					radeon_crtc_handle_vblank(rdev, 3);
7711				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7712				DRM_DEBUG("IH: D4 vblank\n");
7713
7714				break;
7715			case 1: /* D4 vline */
7716				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7717					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7718
7719				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7720				DRM_DEBUG("IH: D4 vline\n");
7721
7722				break;
7723			default:
7724				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7725				break;
7726			}
7727			break;
7728		case 5: /* D5 vblank/vline */
7729			switch (src_data) {
7730			case 0: /* D5 vblank */
7731				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7732					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7733
7734				if (rdev->irq.crtc_vblank_int[4]) {
7735					drm_handle_vblank(rdev->ddev, 4);
7736					rdev->pm.vblank_sync = true;
7737					wake_up(&rdev->irq.vblank_queue);
7738				}
7739				if (atomic_read(&rdev->irq.pflip[4]))
7740					radeon_crtc_handle_vblank(rdev, 4);
7741				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7742				DRM_DEBUG("IH: D5 vblank\n");
7743
7744				break;
7745			case 1: /* D5 vline */
7746				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7747					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7748
7749				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7750				DRM_DEBUG("IH: D5 vline\n");
7751
7752				break;
7753			default:
7754				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7755				break;
7756			}
7757			break;
7758		case 6: /* D6 vblank/vline */
7759			switch (src_data) {
7760			case 0: /* D6 vblank */
7761				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7762					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7763
7764				if (rdev->irq.crtc_vblank_int[5]) {
7765					drm_handle_vblank(rdev->ddev, 5);
7766					rdev->pm.vblank_sync = true;
7767					wake_up(&rdev->irq.vblank_queue);
7768				}
7769				if (atomic_read(&rdev->irq.pflip[5]))
7770					radeon_crtc_handle_vblank(rdev, 5);
7771				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7772				DRM_DEBUG("IH: D6 vblank\n");
7773
7774				break;
7775			case 1: /* D6 vline */
7776				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7777					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7778
7779				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7780				DRM_DEBUG("IH: D6 vline\n");
7781
7782				break;
7783			default:
7784				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7785				break;
7786			}
7787			break;
7788		case 8: /* D1 page flip */
7789		case 10: /* D2 page flip */
7790		case 12: /* D3 page flip */
7791		case 14: /* D4 page flip */
7792		case 16: /* D5 page flip */
7793		case 18: /* D6 page flip */
7794			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7795			if (radeon_use_pflipirq > 0)
7796				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7797			break;
7798		case 42: /* HPD hotplug */
7799			switch (src_data) {
7800			case 0:
7801				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7802					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7805				queue_hotplug = true;
7806				DRM_DEBUG("IH: HPD1\n");
7807
7808				break;
7809			case 1:
7810				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7811					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7814				queue_hotplug = true;
7815				DRM_DEBUG("IH: HPD2\n");
7816
7817				break;
7818			case 2:
7819				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7820					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7823				queue_hotplug = true;
7824				DRM_DEBUG("IH: HPD3\n");
7825
7826				break;
7827			case 3:
7828				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7829					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7832				queue_hotplug = true;
7833				DRM_DEBUG("IH: HPD4\n");
7834
7835				break;
7836			case 4:
7837				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7838					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7841				queue_hotplug = true;
7842				DRM_DEBUG("IH: HPD5\n");
7843
7844				break;
7845			case 5:
7846				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7847					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7850				queue_hotplug = true;
7851				DRM_DEBUG("IH: HPD6\n");
7852
7853				break;
7854			case 6:
7855				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7856					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7859				queue_dp = true;
7860				DRM_DEBUG("IH: HPD_RX 1\n");
7861
7862				break;
7863			case 7:
7864				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7865					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7868				queue_dp = true;
7869				DRM_DEBUG("IH: HPD_RX 2\n");
7870
7871				break;
7872			case 8:
7873				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7874					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7877				queue_dp = true;
7878				DRM_DEBUG("IH: HPD_RX 3\n");
7879
7880				break;
7881			case 9:
7882				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7883					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884
7885				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7886				queue_dp = true;
7887				DRM_DEBUG("IH: HPD_RX 4\n");
7888
7889				break;
7890			case 10:
7891				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7892					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893
7894				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7895				queue_dp = true;
7896				DRM_DEBUG("IH: HPD_RX 5\n");
7897
7898				break;
7899			case 11:
7900				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7901					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7902
7903				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7904				queue_dp = true;
7905				DRM_DEBUG("IH: HPD_RX 6\n");
7906
7907				break;
7908			default:
7909				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7910				break;
7911			}
7912			break;
7913		case 96:
7914			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7915			WREG32(SRBM_INT_ACK, 0x1);
7916			break;
7917		case 124: /* UVD */
7918			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7919			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7920			break;
7921		case 146:
7922		case 147:
7923			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7924			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7925			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7926			/* reset addr and status */
7927			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7928			if (addr == 0x0 && status == 0x0)
7929				break;
7930			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7931			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7932				addr);
7933			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7934				status);
7935			cik_vm_decode_fault(rdev, status, addr, mc_client);
7936			break;
7937		case 167: /* VCE */
7938			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7939			switch (src_data) {
7940			case 0:
7941				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7942				break;
7943			case 1:
7944				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7945				break;
7946			default:
7947				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7948				break;
7949			}
7950			break;
7951		case 176: /* GFX RB CP_INT */
7952		case 177: /* GFX IB CP_INT */
7953			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7954			break;
7955		case 181: /* CP EOP event */
7956			DRM_DEBUG("IH: CP EOP\n");
7957			/* XXX check the bitfield order! */
7958			me_id = (ring_id & 0x60) >> 5;
7959			pipe_id = (ring_id & 0x18) >> 3;
7960			queue_id = (ring_id & 0x7) >> 0;
7961			switch (me_id) {
7962			case 0:
7963				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7964				break;
7965			case 1:
7966			case 2:
7967				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7968					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7969				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7970					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7971				break;
7972			}
7973			break;
7974		case 184: /* CP Privileged reg access */
7975			DRM_ERROR("Illegal register access in command stream\n");
7976			/* XXX check the bitfield order! */
7977			me_id = (ring_id & 0x60) >> 5;
7978			pipe_id = (ring_id & 0x18) >> 3;
7979			queue_id = (ring_id & 0x7) >> 0;
7980			switch (me_id) {
7981			case 0:
7982				/* This results in a full GPU reset, but all we need to do is soft
7983				 * reset the CP for gfx
7984				 */
7985				queue_reset = true;
7986				break;
7987			case 1:
7988				/* XXX compute */
7989				queue_reset = true;
7990				break;
7991			case 2:
7992				/* XXX compute */
7993				queue_reset = true;
7994				break;
7995			}
7996			break;
7997		case 185: /* CP Privileged inst */
7998			DRM_ERROR("Illegal instruction in command stream\n");
7999			/* XXX check the bitfield order! */
8000			me_id = (ring_id & 0x60) >> 5;
8001			pipe_id = (ring_id & 0x18) >> 3;
8002			queue_id = (ring_id & 0x7) >> 0;
8003			switch (me_id) {
8004			case 0:
8005				/* This results in a full GPU reset, but all we need to do is soft
8006				 * reset the CP for gfx
8007				 */
8008				queue_reset = true;
8009				break;
8010			case 1:
8011				/* XXX compute */
8012				queue_reset = true;
8013				break;
8014			case 2:
8015				/* XXX compute */
8016				queue_reset = true;
8017				break;
8018			}
8019			break;
8020		case 224: /* SDMA trap event */
8021			/* XXX check the bitfield order! */
8022			me_id = (ring_id & 0x3) >> 0;
8023			queue_id = (ring_id & 0xc) >> 2;
8024			DRM_DEBUG("IH: SDMA trap\n");
8025			switch (me_id) {
8026			case 0:
8027				switch (queue_id) {
8028				case 0:
8029					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8030					break;
8031				case 1:
8032					/* XXX compute */
8033					break;
8034				case 2:
8035					/* XXX compute */
8036					break;
8037				}
8038				break;
8039			case 1:
8040				switch (queue_id) {
8041				case 0:
8042					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8043					break;
8044				case 1:
8045					/* XXX compute */
8046					break;
8047				case 2:
8048					/* XXX compute */
8049					break;
8050				}
8051				break;
8052			}
8053			break;
8054		case 230: /* thermal low to high */
8055			DRM_DEBUG("IH: thermal low to high\n");
8056			rdev->pm.dpm.thermal.high_to_low = false;
8057			queue_thermal = true;
8058			break;
8059		case 231: /* thermal high to low */
8060			DRM_DEBUG("IH: thermal high to low\n");
8061			rdev->pm.dpm.thermal.high_to_low = true;
8062			queue_thermal = true;
8063			break;
8064		case 233: /* GUI IDLE */
8065			DRM_DEBUG("IH: GUI idle\n");
8066			break;
8067		case 241: /* SDMA Privileged inst */
8068		case 247: /* SDMA Privileged inst */
8069			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8070			/* XXX check the bitfield order! */
8071			me_id = (ring_id & 0x3) >> 0;
8072			queue_id = (ring_id & 0xc) >> 2;
8073			switch (me_id) {
8074			case 0:
8075				switch (queue_id) {
8076				case 0:
8077					queue_reset = true;
8078					break;
8079				case 1:
8080					/* XXX compute */
8081					queue_reset = true;
8082					break;
8083				case 2:
8084					/* XXX compute */
8085					queue_reset = true;
8086					break;
8087				}
8088				break;
8089			case 1:
8090				switch (queue_id) {
8091				case 0:
8092					queue_reset = true;
8093					break;
8094				case 1:
8095					/* XXX compute */
8096					queue_reset = true;
8097					break;
8098				case 2:
8099					/* XXX compute */
8100					queue_reset = true;
8101					break;
8102				}
8103				break;
8104			}
8105			break;
8106		default:
8107			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8108			break;
8109		}
8110
8111		/* wptr/rptr are in bytes! */
8112		rptr += 16;
8113		rptr &= rdev->ih.ptr_mask;
8114		WREG32(IH_RB_RPTR, rptr);
8115	}
8116	if (queue_dp)
8117		schedule_work(&rdev->dp_work);
8118	if (queue_hotplug)
8119		schedule_delayed_work(&rdev->hotplug_work, 0);
8120	if (queue_reset) {
8121		rdev->needs_reset = true;
8122		wake_up_all(&rdev->fence_queue);
8123	}
8124	if (queue_thermal)
8125		schedule_work(&rdev->pm.dpm.thermal.work);
8126	rdev->ih.rptr = rptr;
8127	atomic_set(&rdev->ih.lock, 0);
8128
8129	/* make sure wptr hasn't changed while processing */
8130	wptr = cik_get_ih_wptr(rdev);
8131	if (wptr != rptr)
8132		goto restart_ih;
8133
8134	return IRQ_HANDLED;
8135}
8136
8137/*
8138 * startup/shutdown callbacks
8139 */
8140/**
8141 * cik_startup - program the asic to a functional state
8142 *
8143 * @rdev: radeon_device pointer
8144 *
8145 * Programs the asic to a functional state (CIK).
8146 * Called by cik_init() and cik_resume().
8147 * Returns 0 for success, error for failure.
8148 */
8149static int cik_startup(struct radeon_device *rdev)
8150{
8151	struct radeon_ring *ring;
8152	u32 nop;
8153	int r;
8154
8155	/* enable pcie gen2/3 link */
8156	cik_pcie_gen3_enable(rdev);
8157	/* enable aspm */
8158	cik_program_aspm(rdev);
8159
8160	/* scratch needs to be initialized before MC */
8161	r = r600_vram_scratch_init(rdev);
8162	if (r)
8163		return r;
8164
8165	cik_mc_program(rdev);
8166
8167	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8168		r = ci_mc_load_microcode(rdev);
8169		if (r) {
8170			DRM_ERROR("Failed to load MC firmware!\n");
8171			return r;
8172		}
8173	}
8174
8175	r = cik_pcie_gart_enable(rdev);
8176	if (r)
8177		return r;
8178	cik_gpu_init(rdev);
8179
8180	/* allocate rlc buffers */
8181	if (rdev->flags & RADEON_IS_IGP) {
8182		if (rdev->family == CHIP_KAVERI) {
8183			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8184			rdev->rlc.reg_list_size =
8185				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8186		} else {
8187			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8188			rdev->rlc.reg_list_size =
8189				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8190		}
8191	}
8192	rdev->rlc.cs_data = ci_cs_data;
8193	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8194	r = sumo_rlc_init(rdev);
8195	if (r) {
8196		DRM_ERROR("Failed to init rlc BOs!\n");
8197		return r;
8198	}
8199
8200	/* allocate wb buffer */
8201	r = radeon_wb_init(rdev);
8202	if (r)
8203		return r;
8204
8205	/* allocate mec buffers */
8206	r = cik_mec_init(rdev);
8207	if (r) {
8208		DRM_ERROR("Failed to init MEC BOs!\n");
8209		return r;
8210	}
8211
8212	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8213	if (r) {
8214		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8215		return r;
8216	}
8217
8218	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8219	if (r) {
8220		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8221		return r;
8222	}
8223
8224	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8225	if (r) {
8226		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8227		return r;
8228	}
8229
8230	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8231	if (r) {
8232		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8233		return r;
8234	}
8235
8236	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8237	if (r) {
8238		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8239		return r;
8240	}
8241
8242	r = radeon_uvd_resume(rdev);
8243	if (!r) {
8244		r = uvd_v4_2_resume(rdev);
8245		if (!r) {
8246			r = radeon_fence_driver_start_ring(rdev,
8247							   R600_RING_TYPE_UVD_INDEX);
8248			if (r)
8249				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8250		}
8251	}
8252	if (r)
8253		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8254
8255	r = radeon_vce_resume(rdev);
8256	if (!r) {
8257		r = vce_v2_0_resume(rdev);
8258		if (!r)
8259			r = radeon_fence_driver_start_ring(rdev,
8260							   TN_RING_TYPE_VCE1_INDEX);
8261		if (!r)
8262			r = radeon_fence_driver_start_ring(rdev,
8263							   TN_RING_TYPE_VCE2_INDEX);
8264	}
8265	if (r) {
8266		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8267		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8268		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8269	}
8270
8271	/* Enable IRQ */
8272	if (!rdev->irq.installed) {
8273		r = radeon_irq_kms_init(rdev);
8274		if (r)
8275			return r;
8276	}
8277
8278	r = cik_irq_init(rdev);
8279	if (r) {
8280		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8281		radeon_irq_kms_fini(rdev);
8282		return r;
8283	}
8284	cik_irq_set(rdev);
8285
8286	if (rdev->family == CHIP_HAWAII) {
8287		if (rdev->new_fw)
8288			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8289		else
8290			nop = RADEON_CP_PACKET2;
8291	} else {
8292		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8293	}
8294
8295	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8296	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8297			     nop);
8298	if (r)
8299		return r;
8300
8301	/* set up the compute queues */
8302	/* type-2 packets are deprecated on MEC, use type-3 instead */
8303	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8304	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8305			     nop);
8306	if (r)
8307		return r;
8308	ring->me = 1; /* first MEC */
8309	ring->pipe = 0; /* first pipe */
8310	ring->queue = 0; /* first queue */
8311	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8312
8313	/* type-2 packets are deprecated on MEC, use type-3 instead */
8314	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8315	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8316			     nop);
8317	if (r)
8318		return r;
8319	/* dGPU only have 1 MEC */
8320	ring->me = 1; /* first MEC */
8321	ring->pipe = 0; /* first pipe */
8322	ring->queue = 1; /* second queue */
8323	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8324
8325	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8326	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8327			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8328	if (r)
8329		return r;
8330
8331	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8332	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8333			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8334	if (r)
8335		return r;
8336
8337	r = cik_cp_resume(rdev);
8338	if (r)
8339		return r;
8340
8341	r = cik_sdma_resume(rdev);
8342	if (r)
8343		return r;
8344
8345	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8346	if (ring->ring_size) {
8347		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8348				     RADEON_CP_PACKET2);
8349		if (!r)
8350			r = uvd_v1_0_init(rdev);
8351		if (r)
8352			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8353	}
8354
8355	r = -ENOENT;
8356
8357	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8358	if (ring->ring_size)
8359		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8360				     VCE_CMD_NO_OP);
8361
8362	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8363	if (ring->ring_size)
8364		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8365				     VCE_CMD_NO_OP);
8366
8367	if (!r)
8368		r = vce_v1_0_init(rdev);
8369	else if (r != -ENOENT)
8370		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8371
8372	r = radeon_ib_pool_init(rdev);
8373	if (r) {
8374		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8375		return r;
8376	}
8377
8378	r = radeon_vm_manager_init(rdev);
8379	if (r) {
8380		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8381		return r;
8382	}
8383
8384	r = radeon_audio_init(rdev);
8385	if (r)
8386		return r;
8387
8388	r = radeon_kfd_resume(rdev);
8389	if (r)
8390		return r;
8391
8392	return 0;
8393}
8394
8395/**
8396 * cik_resume - resume the asic to a functional state
8397 *
8398 * @rdev: radeon_device pointer
8399 *
8400 * Programs the asic to a functional state (CIK).
8401 * Called at resume.
8402 * Returns 0 for success, error for failure.
8403 */
8404int cik_resume(struct radeon_device *rdev)
8405{
8406	int r;
8407
8408	/* post card */
8409	atom_asic_init(rdev->mode_info.atom_context);
8410
8411	/* init golden registers */
8412	cik_init_golden_registers(rdev);
8413
8414	if (rdev->pm.pm_method == PM_METHOD_DPM)
8415		radeon_pm_resume(rdev);
8416
8417	rdev->accel_working = true;
8418	r = cik_startup(rdev);
8419	if (r) {
8420		DRM_ERROR("cik startup failed on resume\n");
8421		rdev->accel_working = false;
8422		return r;
8423	}
8424
8425	return r;
8426
8427}
8428
8429/**
8430 * cik_suspend - suspend the asic
8431 *
8432 * @rdev: radeon_device pointer
8433 *
8434 * Bring the chip into a state suitable for suspend (CIK).
8435 * Called at suspend.
8436 * Returns 0 for success.
8437 */
8438int cik_suspend(struct radeon_device *rdev)
8439{
8440	radeon_kfd_suspend(rdev);
8441	radeon_pm_suspend(rdev);
8442	radeon_audio_fini(rdev);
8443	radeon_vm_manager_fini(rdev);
8444	cik_cp_enable(rdev, false);
8445	cik_sdma_enable(rdev, false);
8446	uvd_v1_0_fini(rdev);
8447	radeon_uvd_suspend(rdev);
8448	radeon_vce_suspend(rdev);
8449	cik_fini_pg(rdev);
8450	cik_fini_cg(rdev);
8451	cik_irq_suspend(rdev);
8452	radeon_wb_disable(rdev);
8453	cik_pcie_gart_disable(rdev);
8454	return 0;
8455}
8456
8457/* Plan is to move initialization in that function and use
8458 * helper function so that radeon_device_init pretty much
8459 * do nothing more than calling asic specific function. This
8460 * should also allow to remove a bunch of callback function
8461 * like vram_info.
8462 */
8463/**
8464 * cik_init - asic specific driver and hw init
8465 *
8466 * @rdev: radeon_device pointer
8467 *
8468 * Setup asic specific driver variables and program the hw
8469 * to a functional state (CIK).
8470 * Called at driver startup.
8471 * Returns 0 for success, errors for failure.
8472 */
8473int cik_init(struct radeon_device *rdev)
8474{
8475	struct radeon_ring *ring;
8476	int r;
8477
8478	/* Read BIOS */
8479	if (!radeon_get_bios(rdev)) {
8480		if (ASIC_IS_AVIVO(rdev))
8481			return -EINVAL;
8482	}
8483	/* Must be an ATOMBIOS */
8484	if (!rdev->is_atom_bios) {
8485		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8486		return -EINVAL;
8487	}
8488	r = radeon_atombios_init(rdev);
8489	if (r)
8490		return r;
8491
8492	/* Post card if necessary */
8493	if (!radeon_card_posted(rdev)) {
8494		if (!rdev->bios) {
8495			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8496			return -EINVAL;
8497		}
8498		DRM_INFO("GPU not posted. posting now...\n");
8499		atom_asic_init(rdev->mode_info.atom_context);
8500	}
8501	/* init golden registers */
8502	cik_init_golden_registers(rdev);
8503	/* Initialize scratch registers */
8504	cik_scratch_init(rdev);
8505	/* Initialize surface registers */
8506	radeon_surface_init(rdev);
8507	/* Initialize clocks */
8508	radeon_get_clock_info(rdev->ddev);
8509
8510	/* Fence driver */
8511	r = radeon_fence_driver_init(rdev);
8512	if (r)
8513		return r;
8514
8515	/* initialize memory controller */
8516	r = cik_mc_init(rdev);
8517	if (r)
8518		return r;
8519	/* Memory manager */
8520	r = radeon_bo_init(rdev);
8521	if (r)
8522		return r;
8523
8524	if (rdev->flags & RADEON_IS_IGP) {
8525		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8526		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8527			r = cik_init_microcode(rdev);
8528			if (r) {
8529				DRM_ERROR("Failed to load firmware!\n");
8530				return r;
8531			}
8532		}
8533	} else {
8534		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8535		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8536		    !rdev->mc_fw) {
8537			r = cik_init_microcode(rdev);
8538			if (r) {
8539				DRM_ERROR("Failed to load firmware!\n");
8540				return r;
8541			}
8542		}
8543	}
8544
8545	/* Initialize power management */
8546	radeon_pm_init(rdev);
8547
8548	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8549	ring->ring_obj = NULL;
8550	r600_ring_init(rdev, ring, 1024 * 1024);
8551
8552	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8553	ring->ring_obj = NULL;
8554	r600_ring_init(rdev, ring, 1024 * 1024);
8555	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8556	if (r)
8557		return r;
8558
8559	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8560	ring->ring_obj = NULL;
8561	r600_ring_init(rdev, ring, 1024 * 1024);
8562	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8563	if (r)
8564		return r;
8565
8566	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8567	ring->ring_obj = NULL;
8568	r600_ring_init(rdev, ring, 256 * 1024);
8569
8570	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8571	ring->ring_obj = NULL;
8572	r600_ring_init(rdev, ring, 256 * 1024);
8573
8574	r = radeon_uvd_init(rdev);
8575	if (!r) {
8576		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8577		ring->ring_obj = NULL;
8578		r600_ring_init(rdev, ring, 4096);
8579	}
8580
8581	r = radeon_vce_init(rdev);
8582	if (!r) {
8583		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8584		ring->ring_obj = NULL;
8585		r600_ring_init(rdev, ring, 4096);
8586
8587		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8588		ring->ring_obj = NULL;
8589		r600_ring_init(rdev, ring, 4096);
8590	}
8591
8592	rdev->ih.ring_obj = NULL;
8593	r600_ih_ring_init(rdev, 64 * 1024);
8594
8595	r = r600_pcie_gart_init(rdev);
8596	if (r)
8597		return r;
8598
8599	rdev->accel_working = true;
8600	r = cik_startup(rdev);
8601	if (r) {
8602		dev_err(rdev->dev, "disabling GPU acceleration\n");
8603		cik_cp_fini(rdev);
8604		cik_sdma_fini(rdev);
8605		cik_irq_fini(rdev);
8606		sumo_rlc_fini(rdev);
8607		cik_mec_fini(rdev);
8608		radeon_wb_fini(rdev);
8609		radeon_ib_pool_fini(rdev);
8610		radeon_vm_manager_fini(rdev);
8611		radeon_irq_kms_fini(rdev);
8612		cik_pcie_gart_fini(rdev);
8613		rdev->accel_working = false;
8614	}
8615
8616	/* Don't start up if the MC ucode is missing.
8617	 * The default clocks and voltages before the MC ucode
8618	 * is loaded are not suffient for advanced operations.
8619	 */
8620	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8621		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8622		return -EINVAL;
8623	}
8624
8625	return 0;
8626}
8627
8628/**
8629 * cik_fini - asic specific driver and hw fini
8630 *
8631 * @rdev: radeon_device pointer
8632 *
8633 * Tear down the asic specific driver variables and program the hw
8634 * to an idle state (CIK).
8635 * Called at driver unload.
8636 */
8637void cik_fini(struct radeon_device *rdev)
8638{
8639	radeon_pm_fini(rdev);
8640	cik_cp_fini(rdev);
8641	cik_sdma_fini(rdev);
8642	cik_fini_pg(rdev);
8643	cik_fini_cg(rdev);
8644	cik_irq_fini(rdev);
8645	sumo_rlc_fini(rdev);
8646	cik_mec_fini(rdev);
8647	radeon_wb_fini(rdev);
8648	radeon_vm_manager_fini(rdev);
8649	radeon_ib_pool_fini(rdev);
8650	radeon_irq_kms_fini(rdev);
8651	uvd_v1_0_fini(rdev);
8652	radeon_uvd_fini(rdev);
8653	radeon_vce_fini(rdev);
8654	cik_pcie_gart_fini(rdev);
8655	r600_vram_scratch_fini(rdev);
8656	radeon_gem_fini(rdev);
8657	radeon_fence_driver_fini(rdev);
8658	radeon_bo_fini(rdev);
8659	radeon_atombios_fini(rdev);
8660	kfree(rdev->bios);
8661	rdev->bios = NULL;
8662}
8663
8664void dce8_program_fmt(struct drm_encoder *encoder)
8665{
8666	struct drm_device *dev = encoder->dev;
8667	struct radeon_device *rdev = dev->dev_private;
8668	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8669	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8670	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8671	int bpc = 0;
8672	u32 tmp = 0;
8673	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8674
8675	if (connector) {
8676		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8677		bpc = radeon_get_monitor_bpc(connector);
8678		dither = radeon_connector->dither;
8679	}
8680
8681	/* LVDS/eDP FMT is set up by atom */
8682	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8683		return;
8684
8685	/* not needed for analog */
8686	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8687	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8688		return;
8689
8690	if (bpc == 0)
8691		return;
8692
8693	switch (bpc) {
8694	case 6:
8695		if (dither == RADEON_FMT_DITHER_ENABLE)
8696			/* XXX sort out optimal dither settings */
8697			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8698				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8699		else
8700			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8701		break;
8702	case 8:
8703		if (dither == RADEON_FMT_DITHER_ENABLE)
8704			/* XXX sort out optimal dither settings */
8705			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8706				FMT_RGB_RANDOM_ENABLE |
8707				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8708		else
8709			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8710		break;
8711	case 10:
8712		if (dither == RADEON_FMT_DITHER_ENABLE)
8713			/* XXX sort out optimal dither settings */
8714			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8715				FMT_RGB_RANDOM_ENABLE |
8716				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8717		else
8718			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8719		break;
8720	default:
8721		/* not needed */
8722		break;
8723	}
8724
8725	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8726}
8727
8728/* display watermark setup */
8729/**
8730 * dce8_line_buffer_adjust - Set up the line buffer
8731 *
8732 * @rdev: radeon_device pointer
8733 * @radeon_crtc: the selected display controller
8734 * @mode: the current display mode on the selected display
8735 * controller
8736 *
8737 * Setup up the line buffer allocation for
8738 * the selected display controller (CIK).
8739 * Returns the line buffer size in pixels.
8740 */
8741static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8742				   struct radeon_crtc *radeon_crtc,
8743				   struct drm_display_mode *mode)
8744{
8745	u32 tmp, buffer_alloc, i;
8746	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8747	/*
8748	 * Line Buffer Setup
8749	 * There are 6 line buffers, one for each display controllers.
8750	 * There are 3 partitions per LB. Select the number of partitions
8751	 * to enable based on the display width.  For display widths larger
8752	 * than 4096, you need use to use 2 display controllers and combine
8753	 * them using the stereo blender.
8754	 */
8755	if (radeon_crtc->base.enabled && mode) {
8756		if (mode->crtc_hdisplay < 1920) {
8757			tmp = 1;
8758			buffer_alloc = 2;
8759		} else if (mode->crtc_hdisplay < 2560) {
8760			tmp = 2;
8761			buffer_alloc = 2;
8762		} else if (mode->crtc_hdisplay < 4096) {
8763			tmp = 0;
8764			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8765		} else {
8766			DRM_DEBUG_KMS("Mode too big for LB!\n");
8767			tmp = 0;
8768			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8769		}
8770	} else {
8771		tmp = 1;
8772		buffer_alloc = 0;
8773	}
8774
8775	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8776	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8777
8778	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8779	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8780	for (i = 0; i < rdev->usec_timeout; i++) {
8781		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8782		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8783			break;
8784		udelay(1);
8785	}
8786
8787	if (radeon_crtc->base.enabled && mode) {
8788		switch (tmp) {
8789		case 0:
8790		default:
8791			return 4096 * 2;
8792		case 1:
8793			return 1920 * 2;
8794		case 2:
8795			return 2560 * 2;
8796		}
8797	}
8798
8799	/* controller not enabled, so no lb used */
8800	return 0;
8801}
8802
8803/**
8804 * cik_get_number_of_dram_channels - get the number of dram channels
8805 *
8806 * @rdev: radeon_device pointer
8807 *
8808 * Look up the number of video ram channels (CIK).
8809 * Used for display watermark bandwidth calculations
8810 * Returns the number of dram channels
8811 */
8812static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8813{
8814	u32 tmp = RREG32(MC_SHARED_CHMAP);
8815
8816	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8817	case 0:
8818	default:
8819		return 1;
8820	case 1:
8821		return 2;
8822	case 2:
8823		return 4;
8824	case 3:
8825		return 8;
8826	case 4:
8827		return 3;
8828	case 5:
8829		return 6;
8830	case 6:
8831		return 10;
8832	case 7:
8833		return 12;
8834	case 8:
8835		return 16;
8836	}
8837}
8838
8839struct dce8_wm_params {
8840	u32 dram_channels; /* number of dram channels */
8841	u32 yclk;          /* bandwidth per dram data pin in kHz */
8842	u32 sclk;          /* engine clock in kHz */
8843	u32 disp_clk;      /* display clock in kHz */
8844	u32 src_width;     /* viewport width */
8845	u32 active_time;   /* active display time in ns */
8846	u32 blank_time;    /* blank time in ns */
8847	bool interlaced;    /* mode is interlaced */
8848	fixed20_12 vsc;    /* vertical scale ratio */
8849	u32 num_heads;     /* number of active crtcs */
8850	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8851	u32 lb_size;       /* line buffer allocated to pipe */
8852	u32 vtaps;         /* vertical scaler taps */
8853};
8854
8855/**
8856 * dce8_dram_bandwidth - get the dram bandwidth
8857 *
8858 * @wm: watermark calculation data
8859 *
8860 * Calculate the raw dram bandwidth (CIK).
8861 * Used for display watermark bandwidth calculations
8862 * Returns the dram bandwidth in MBytes/s
8863 */
8864static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8865{
8866	/* Calculate raw DRAM Bandwidth */
8867	fixed20_12 dram_efficiency; /* 0.7 */
8868	fixed20_12 yclk, dram_channels, bandwidth;
8869	fixed20_12 a;
8870
8871	a.full = dfixed_const(1000);
8872	yclk.full = dfixed_const(wm->yclk);
8873	yclk.full = dfixed_div(yclk, a);
8874	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8875	a.full = dfixed_const(10);
8876	dram_efficiency.full = dfixed_const(7);
8877	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8878	bandwidth.full = dfixed_mul(dram_channels, yclk);
8879	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8880
8881	return dfixed_trunc(bandwidth);
8882}
8883
8884/**
8885 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8886 *
8887 * @wm: watermark calculation data
8888 *
8889 * Calculate the dram bandwidth used for display (CIK).
8890 * Used for display watermark bandwidth calculations
8891 * Returns the dram bandwidth for display in MBytes/s
8892 */
8893static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8894{
8895	/* Calculate DRAM Bandwidth and the part allocated to display. */
8896	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8897	fixed20_12 yclk, dram_channels, bandwidth;
8898	fixed20_12 a;
8899
8900	a.full = dfixed_const(1000);
8901	yclk.full = dfixed_const(wm->yclk);
8902	yclk.full = dfixed_div(yclk, a);
8903	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8904	a.full = dfixed_const(10);
8905	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8906	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8907	bandwidth.full = dfixed_mul(dram_channels, yclk);
8908	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8909
8910	return dfixed_trunc(bandwidth);
8911}
8912
8913/**
8914 * dce8_data_return_bandwidth - get the data return bandwidth
8915 *
8916 * @wm: watermark calculation data
8917 *
8918 * Calculate the data return bandwidth used for display (CIK).
8919 * Used for display watermark bandwidth calculations
8920 * Returns the data return bandwidth in MBytes/s
8921 */
8922static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8923{
8924	/* Calculate the display Data return Bandwidth */
8925	fixed20_12 return_efficiency; /* 0.8 */
8926	fixed20_12 sclk, bandwidth;
8927	fixed20_12 a;
8928
8929	a.full = dfixed_const(1000);
8930	sclk.full = dfixed_const(wm->sclk);
8931	sclk.full = dfixed_div(sclk, a);
8932	a.full = dfixed_const(10);
8933	return_efficiency.full = dfixed_const(8);
8934	return_efficiency.full = dfixed_div(return_efficiency, a);
8935	a.full = dfixed_const(32);
8936	bandwidth.full = dfixed_mul(a, sclk);
8937	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8938
8939	return dfixed_trunc(bandwidth);
8940}
8941
8942/**
8943 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8944 *
8945 * @wm: watermark calculation data
8946 *
8947 * Calculate the dmif bandwidth used for display (CIK).
8948 * Used for display watermark bandwidth calculations
8949 * Returns the dmif bandwidth in MBytes/s
8950 */
8951static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8952{
8953	/* Calculate the DMIF Request Bandwidth */
8954	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8955	fixed20_12 disp_clk, bandwidth;
8956	fixed20_12 a, b;
8957
8958	a.full = dfixed_const(1000);
8959	disp_clk.full = dfixed_const(wm->disp_clk);
8960	disp_clk.full = dfixed_div(disp_clk, a);
8961	a.full = dfixed_const(32);
8962	b.full = dfixed_mul(a, disp_clk);
8963
8964	a.full = dfixed_const(10);
8965	disp_clk_request_efficiency.full = dfixed_const(8);
8966	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8967
8968	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8969
8970	return dfixed_trunc(bandwidth);
8971}
8972
8973/**
8974 * dce8_available_bandwidth - get the min available bandwidth
8975 *
8976 * @wm: watermark calculation data
8977 *
8978 * Calculate the min available bandwidth used for display (CIK).
8979 * Used for display watermark bandwidth calculations
8980 * Returns the min available bandwidth in MBytes/s
8981 */
8982static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8983{
8984	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8985	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8986	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8987	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8988
8989	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8990}
8991
8992/**
8993 * dce8_average_bandwidth - get the average available bandwidth
8994 *
8995 * @wm: watermark calculation data
8996 *
8997 * Calculate the average available bandwidth used for display (CIK).
8998 * Used for display watermark bandwidth calculations
8999 * Returns the average available bandwidth in MBytes/s
9000 */
9001static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9002{
9003	/* Calculate the display mode Average Bandwidth
9004	 * DisplayMode should contain the source and destination dimensions,
9005	 * timing, etc.
9006	 */
9007	fixed20_12 bpp;
9008	fixed20_12 line_time;
9009	fixed20_12 src_width;
9010	fixed20_12 bandwidth;
9011	fixed20_12 a;
9012
9013	a.full = dfixed_const(1000);
9014	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9015	line_time.full = dfixed_div(line_time, a);
9016	bpp.full = dfixed_const(wm->bytes_per_pixel);
9017	src_width.full = dfixed_const(wm->src_width);
9018	bandwidth.full = dfixed_mul(src_width, bpp);
9019	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9020	bandwidth.full = dfixed_div(bandwidth, line_time);
9021
9022	return dfixed_trunc(bandwidth);
9023}
9024
9025/**
9026 * dce8_latency_watermark - get the latency watermark
9027 *
9028 * @wm: watermark calculation data
9029 *
9030 * Calculate the latency watermark (CIK).
9031 * Used for display watermark bandwidth calculations
9032 * Returns the latency watermark in ns
9033 */
9034static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9035{
9036	/* First calculate the latency in ns */
9037	u32 mc_latency = 2000; /* 2000 ns. */
9038	u32 available_bandwidth = dce8_available_bandwidth(wm);
9039	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9040	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9041	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9042	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9043		(wm->num_heads * cursor_line_pair_return_time);
9044	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9045	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9046	u32 tmp, dmif_size = 12288;
9047	fixed20_12 a, b, c;
9048
9049	if (wm->num_heads == 0)
9050		return 0;
9051
9052	a.full = dfixed_const(2);
9053	b.full = dfixed_const(1);
9054	if ((wm->vsc.full > a.full) ||
9055	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9056	    (wm->vtaps >= 5) ||
9057	    ((wm->vsc.full >= a.full) && wm->interlaced))
9058		max_src_lines_per_dst_line = 4;
9059	else
9060		max_src_lines_per_dst_line = 2;
9061
9062	a.full = dfixed_const(available_bandwidth);
9063	b.full = dfixed_const(wm->num_heads);
9064	a.full = dfixed_div(a, b);
9065
9066	b.full = dfixed_const(mc_latency + 512);
9067	c.full = dfixed_const(wm->disp_clk);
9068	b.full = dfixed_div(b, c);
9069
9070	c.full = dfixed_const(dmif_size);
9071	b.full = dfixed_div(c, b);
9072
9073	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9074
9075	b.full = dfixed_const(1000);
9076	c.full = dfixed_const(wm->disp_clk);
9077	b.full = dfixed_div(c, b);
9078	c.full = dfixed_const(wm->bytes_per_pixel);
9079	b.full = dfixed_mul(b, c);
9080
9081	lb_fill_bw = min(tmp, dfixed_trunc(b));
9082
9083	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9084	b.full = dfixed_const(1000);
9085	c.full = dfixed_const(lb_fill_bw);
9086	b.full = dfixed_div(c, b);
9087	a.full = dfixed_div(a, b);
9088	line_fill_time = dfixed_trunc(a);
9089
9090	if (line_fill_time < wm->active_time)
9091		return latency;
9092	else
9093		return latency + (line_fill_time - wm->active_time);
9094
9095}
9096
9097/**
9098 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9099 * average and available dram bandwidth
9100 *
9101 * @wm: watermark calculation data
9102 *
9103 * Check if the display average bandwidth fits in the display
9104 * dram bandwidth (CIK).
9105 * Used for display watermark bandwidth calculations
9106 * Returns true if the display fits, false if not.
9107 */
9108static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9109{
9110	if (dce8_average_bandwidth(wm) <=
9111	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9112		return true;
9113	else
9114		return false;
9115}
9116
9117/**
9118 * dce8_average_bandwidth_vs_available_bandwidth - check
9119 * average and available bandwidth
9120 *
9121 * @wm: watermark calculation data
9122 *
9123 * Check if the display average bandwidth fits in the display
9124 * available bandwidth (CIK).
9125 * Used for display watermark bandwidth calculations
9126 * Returns true if the display fits, false if not.
9127 */
9128static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9129{
9130	if (dce8_average_bandwidth(wm) <=
9131	    (dce8_available_bandwidth(wm) / wm->num_heads))
9132		return true;
9133	else
9134		return false;
9135}
9136
9137/**
9138 * dce8_check_latency_hiding - check latency hiding
9139 *
9140 * @wm: watermark calculation data
9141 *
9142 * Check latency hiding (CIK).
9143 * Used for display watermark bandwidth calculations
9144 * Returns true if the display fits, false if not.
9145 */
9146static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9147{
9148	u32 lb_partitions = wm->lb_size / wm->src_width;
9149	u32 line_time = wm->active_time + wm->blank_time;
9150	u32 latency_tolerant_lines;
9151	u32 latency_hiding;
9152	fixed20_12 a;
9153
9154	a.full = dfixed_const(1);
9155	if (wm->vsc.full > a.full)
9156		latency_tolerant_lines = 1;
9157	else {
9158		if (lb_partitions <= (wm->vtaps + 1))
9159			latency_tolerant_lines = 1;
9160		else
9161			latency_tolerant_lines = 2;
9162	}
9163
9164	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9165
9166	if (dce8_latency_watermark(wm) <= latency_hiding)
9167		return true;
9168	else
9169		return false;
9170}
9171
9172/**
9173 * dce8_program_watermarks - program display watermarks
9174 *
9175 * @rdev: radeon_device pointer
9176 * @radeon_crtc: the selected display controller
9177 * @lb_size: line buffer size
9178 * @num_heads: number of display controllers in use
9179 *
9180 * Calculate and program the display watermarks for the
9181 * selected display controller (CIK).
9182 */
9183static void dce8_program_watermarks(struct radeon_device *rdev,
9184				    struct radeon_crtc *radeon_crtc,
9185				    u32 lb_size, u32 num_heads)
9186{
9187	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9188	struct dce8_wm_params wm_low, wm_high;
9189	u32 pixel_period;
9190	u32 line_time = 0;
9191	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9192	u32 tmp, wm_mask;
9193
9194	if (radeon_crtc->base.enabled && num_heads && mode) {
9195		pixel_period = 1000000 / (u32)mode->clock;
9196		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9197
9198		/* watermark for high clocks */
9199		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9200		    rdev->pm.dpm_enabled) {
9201			wm_high.yclk =
9202				radeon_dpm_get_mclk(rdev, false) * 10;
9203			wm_high.sclk =
9204				radeon_dpm_get_sclk(rdev, false) * 10;
9205		} else {
9206			wm_high.yclk = rdev->pm.current_mclk * 10;
9207			wm_high.sclk = rdev->pm.current_sclk * 10;
9208		}
9209
9210		wm_high.disp_clk = mode->clock;
9211		wm_high.src_width = mode->crtc_hdisplay;
9212		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9213		wm_high.blank_time = line_time - wm_high.active_time;
9214		wm_high.interlaced = false;
9215		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9216			wm_high.interlaced = true;
9217		wm_high.vsc = radeon_crtc->vsc;
9218		wm_high.vtaps = 1;
9219		if (radeon_crtc->rmx_type != RMX_OFF)
9220			wm_high.vtaps = 2;
9221		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9222		wm_high.lb_size = lb_size;
9223		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9224		wm_high.num_heads = num_heads;
9225
9226		/* set for high clocks */
9227		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9228
9229		/* possibly force display priority to high */
9230		/* should really do this at mode validation time... */
9231		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9232		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9233		    !dce8_check_latency_hiding(&wm_high) ||
9234		    (rdev->disp_priority == 2)) {
9235			DRM_DEBUG_KMS("force priority to high\n");
9236		}
9237
9238		/* watermark for low clocks */
9239		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9240		    rdev->pm.dpm_enabled) {
9241			wm_low.yclk =
9242				radeon_dpm_get_mclk(rdev, true) * 10;
9243			wm_low.sclk =
9244				radeon_dpm_get_sclk(rdev, true) * 10;
9245		} else {
9246			wm_low.yclk = rdev->pm.current_mclk * 10;
9247			wm_low.sclk = rdev->pm.current_sclk * 10;
9248		}
9249
9250		wm_low.disp_clk = mode->clock;
9251		wm_low.src_width = mode->crtc_hdisplay;
9252		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9253		wm_low.blank_time = line_time - wm_low.active_time;
9254		wm_low.interlaced = false;
9255		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9256			wm_low.interlaced = true;
9257		wm_low.vsc = radeon_crtc->vsc;
9258		wm_low.vtaps = 1;
9259		if (radeon_crtc->rmx_type != RMX_OFF)
9260			wm_low.vtaps = 2;
9261		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9262		wm_low.lb_size = lb_size;
9263		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9264		wm_low.num_heads = num_heads;
9265
9266		/* set for low clocks */
9267		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9268
9269		/* possibly force display priority to high */
9270		/* should really do this at mode validation time... */
9271		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9272		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9273		    !dce8_check_latency_hiding(&wm_low) ||
9274		    (rdev->disp_priority == 2)) {
9275			DRM_DEBUG_KMS("force priority to high\n");
9276		}
9277
9278		/* Save number of lines the linebuffer leads before the scanout */
9279		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9280	}
9281
9282	/* select wm A */
9283	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9284	tmp = wm_mask;
9285	tmp &= ~LATENCY_WATERMARK_MASK(3);
9286	tmp |= LATENCY_WATERMARK_MASK(1);
9287	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9288	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9289	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9290		LATENCY_HIGH_WATERMARK(line_time)));
9291	/* select wm B */
9292	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9293	tmp &= ~LATENCY_WATERMARK_MASK(3);
9294	tmp |= LATENCY_WATERMARK_MASK(2);
9295	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9296	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9297	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9298		LATENCY_HIGH_WATERMARK(line_time)));
9299	/* restore original selection */
9300	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9301
9302	/* save values for DPM */
9303	radeon_crtc->line_time = line_time;
9304	radeon_crtc->wm_high = latency_watermark_a;
9305	radeon_crtc->wm_low = latency_watermark_b;
9306}
9307
9308/**
9309 * dce8_bandwidth_update - program display watermarks
9310 *
9311 * @rdev: radeon_device pointer
9312 *
9313 * Calculate and program the display watermarks and line
9314 * buffer allocation (CIK).
9315 */
9316void dce8_bandwidth_update(struct radeon_device *rdev)
9317{
9318	struct drm_display_mode *mode = NULL;
9319	u32 num_heads = 0, lb_size;
9320	int i;
9321
9322	if (!rdev->mode_info.mode_config_initialized)
9323		return;
9324
9325	radeon_update_display_priority(rdev);
9326
9327	for (i = 0; i < rdev->num_crtc; i++) {
9328		if (rdev->mode_info.crtcs[i]->base.enabled)
9329			num_heads++;
9330	}
9331	for (i = 0; i < rdev->num_crtc; i++) {
9332		mode = &rdev->mode_info.crtcs[i]->base.mode;
9333		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9334		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9335	}
9336}
9337
9338/**
9339 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9340 *
9341 * @rdev: radeon_device pointer
9342 *
9343 * Fetches a GPU clock counter snapshot (SI).
9344 * Returns the 64 bit clock counter snapshot.
9345 */
9346uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9347{
9348	uint64_t clock;
9349
9350	mutex_lock(&rdev->gpu_clock_mutex);
9351	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9352	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9353		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9354	mutex_unlock(&rdev->gpu_clock_mutex);
9355	return clock;
9356}
9357
9358static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9359			     u32 cntl_reg, u32 status_reg)
9360{
9361	int r, i;
9362	struct atom_clock_dividers dividers;
9363	uint32_t tmp;
9364
9365	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9366					   clock, false, &dividers);
9367	if (r)
9368		return r;
9369
9370	tmp = RREG32_SMC(cntl_reg);
9371	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9372	tmp |= dividers.post_divider;
9373	WREG32_SMC(cntl_reg, tmp);
9374
9375	for (i = 0; i < 100; i++) {
9376		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9377			break;
9378		mdelay(10);
9379	}
9380	if (i == 100)
9381		return -ETIMEDOUT;
9382
9383	return 0;
9384}
9385
9386int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9387{
9388	int r = 0;
9389
9390	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9391	if (r)
9392		return r;
9393
9394	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9395	return r;
9396}
9397
9398int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9399{
9400	int r, i;
9401	struct atom_clock_dividers dividers;
9402	u32 tmp;
9403
9404	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9405					   ecclk, false, &dividers);
9406	if (r)
9407		return r;
9408
9409	for (i = 0; i < 100; i++) {
9410		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9411			break;
9412		mdelay(10);
9413	}
9414	if (i == 100)
9415		return -ETIMEDOUT;
9416
9417	tmp = RREG32_SMC(CG_ECLK_CNTL);
9418	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9419	tmp |= dividers.post_divider;
9420	WREG32_SMC(CG_ECLK_CNTL, tmp);
9421
9422	for (i = 0; i < 100; i++) {
9423		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9424			break;
9425		mdelay(10);
9426	}
9427	if (i == 100)
9428		return -ETIMEDOUT;
9429
9430	return 0;
9431}
9432
9433static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9434{
9435	struct pci_dev *root = rdev->pdev->bus->self;
9436	int bridge_pos, gpu_pos;
9437	u32 speed_cntl, mask, current_data_rate;
9438	int ret, i;
9439	u16 tmp16;
9440
9441	if (pci_is_root_bus(rdev->pdev->bus))
9442		return;
9443
9444	if (radeon_pcie_gen2 == 0)
9445		return;
9446
9447	if (rdev->flags & RADEON_IS_IGP)
9448		return;
9449
9450	if (!(rdev->flags & RADEON_IS_PCIE))
9451		return;
9452
9453	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9454	if (ret != 0)
9455		return;
9456
9457	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9458		return;
9459
9460	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9461	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9462		LC_CURRENT_DATA_RATE_SHIFT;
9463	if (mask & DRM_PCIE_SPEED_80) {
9464		if (current_data_rate == 2) {
9465			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9466			return;
9467		}
9468		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9469	} else if (mask & DRM_PCIE_SPEED_50) {
9470		if (current_data_rate == 1) {
9471			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9472			return;
9473		}
9474		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9475	}
9476
9477	bridge_pos = pci_pcie_cap(root);
9478	if (!bridge_pos)
9479		return;
9480
9481	gpu_pos = pci_pcie_cap(rdev->pdev);
9482	if (!gpu_pos)
9483		return;
9484
9485	if (mask & DRM_PCIE_SPEED_80) {
9486		/* re-try equalization if gen3 is not already enabled */
9487		if (current_data_rate != 2) {
9488			u16 bridge_cfg, gpu_cfg;
9489			u16 bridge_cfg2, gpu_cfg2;
9490			u32 max_lw, current_lw, tmp;
9491
9492			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9493			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9494
9495			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9496			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9497
9498			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9499			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9500
9501			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9502			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9503			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9504
9505			if (current_lw < max_lw) {
9506				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9507				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9508					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9509					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9510					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9511					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9512				}
9513			}
9514
9515			for (i = 0; i < 10; i++) {
9516				/* check status */
9517				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9518				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9519					break;
9520
9521				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9522				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9523
9524				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9525				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9526
9527				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9528				tmp |= LC_SET_QUIESCE;
9529				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9530
9531				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9532				tmp |= LC_REDO_EQ;
9533				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9534
9535				mdelay(100);
9536
9537				/* linkctl */
9538				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9539				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9540				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9541				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9542
9543				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9544				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9545				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9546				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9547
9548				/* linkctl2 */
9549				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9550				tmp16 &= ~((1 << 4) | (7 << 9));
9551				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9552				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9553
9554				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9555				tmp16 &= ~((1 << 4) | (7 << 9));
9556				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9557				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9558
9559				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9560				tmp &= ~LC_SET_QUIESCE;
9561				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9562			}
9563		}
9564	}
9565
9566	/* set the link speed */
9567	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9568	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9569	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9570
9571	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9572	tmp16 &= ~0xf;
9573	if (mask & DRM_PCIE_SPEED_80)
9574		tmp16 |= 3; /* gen3 */
9575	else if (mask & DRM_PCIE_SPEED_50)
9576		tmp16 |= 2; /* gen2 */
9577	else
9578		tmp16 |= 1; /* gen1 */
9579	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9580
9581	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9582	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9583	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9584
9585	for (i = 0; i < rdev->usec_timeout; i++) {
9586		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9587		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9588			break;
9589		udelay(1);
9590	}
9591}
9592
9593static void cik_program_aspm(struct radeon_device *rdev)
9594{
9595	u32 data, orig;
9596	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9597	bool disable_clkreq = false;
9598
9599	if (radeon_aspm == 0)
9600		return;
9601
9602	/* XXX double check IGPs */
9603	if (rdev->flags & RADEON_IS_IGP)
9604		return;
9605
9606	if (!(rdev->flags & RADEON_IS_PCIE))
9607		return;
9608
9609	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9610	data &= ~LC_XMIT_N_FTS_MASK;
9611	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9612	if (orig != data)
9613		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9614
9615	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9616	data |= LC_GO_TO_RECOVERY;
9617	if (orig != data)
9618		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9619
9620	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9621	data |= P_IGNORE_EDB_ERR;
9622	if (orig != data)
9623		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9624
9625	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9626	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9627	data |= LC_PMI_TO_L1_DIS;
9628	if (!disable_l0s)
9629		data |= LC_L0S_INACTIVITY(7);
9630
9631	if (!disable_l1) {
9632		data |= LC_L1_INACTIVITY(7);
9633		data &= ~LC_PMI_TO_L1_DIS;
9634		if (orig != data)
9635			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9636
9637		if (!disable_plloff_in_l1) {
9638			bool clk_req_support;
9639
9640			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9641			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9642			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9643			if (orig != data)
9644				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9645
9646			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9647			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9648			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9649			if (orig != data)
9650				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9651
9652			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9653			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9654			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9655			if (orig != data)
9656				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9657
9658			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9659			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9660			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9661			if (orig != data)
9662				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9663
9664			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9665			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9666			data |= LC_DYN_LANES_PWR_STATE(3);
9667			if (orig != data)
9668				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9669
9670			if (!disable_clkreq &&
9671			    !pci_is_root_bus(rdev->pdev->bus)) {
9672				struct pci_dev *root = rdev->pdev->bus->self;
9673				u32 lnkcap;
9674
9675				clk_req_support = false;
9676				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9677				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9678					clk_req_support = true;
9679			} else {
9680				clk_req_support = false;
9681			}
9682
9683			if (clk_req_support) {
9684				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9685				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9686				if (orig != data)
9687					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9688
9689				orig = data = RREG32_SMC(THM_CLK_CNTL);
9690				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9691				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9692				if (orig != data)
9693					WREG32_SMC(THM_CLK_CNTL, data);
9694
9695				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9696				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9697				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9698				if (orig != data)
9699					WREG32_SMC(MISC_CLK_CTRL, data);
9700
9701				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9702				data &= ~BCLK_AS_XCLK;
9703				if (orig != data)
9704					WREG32_SMC(CG_CLKPIN_CNTL, data);
9705
9706				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9707				data &= ~FORCE_BIF_REFCLK_EN;
9708				if (orig != data)
9709					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9710
9711				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9712				data &= ~MPLL_CLKOUT_SEL_MASK;
9713				data |= MPLL_CLKOUT_SEL(4);
9714				if (orig != data)
9715					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9716			}
9717		}
9718	} else {
9719		if (orig != data)
9720			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9721	}
9722
9723	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9724	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9725	if (orig != data)
9726		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9727
9728	if (!disable_l0s) {
9729		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9730		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9731			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9732			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9733				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9734				data &= ~LC_L0S_INACTIVITY_MASK;
9735				if (orig != data)
9736					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9737			}
9738		}
9739	}
9740}