Linux Audio

Check our new training course

Yocto distribution development and maintenance

Need a Yocto distribution for your embedded project?
Loading...
v6.13.7
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
  26#include <linux/module.h>
  27#include <linux/pci.h>
  28#include <linux/slab.h>
 
  29
 
  30#include <drm/drm_vblank.h>
  31
  32#include "atom.h"
  33#include "evergreen.h"
  34#include "cik_blit_shaders.h"
  35#include "cik.h"
  36#include "cikd.h"
  37#include "clearstate_ci.h"
  38#include "r600.h"
  39#include "radeon.h"
  40#include "radeon_asic.h"
  41#include "radeon_audio.h"
  42#include "radeon_ucode.h"
  43#include "si.h"
  44#include "vce.h"
  45
  46#define SH_MEM_CONFIG_GFX_DEFAULT \
  47	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  48
  49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  58
  59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  60MODULE_FIRMWARE("radeon/bonaire_me.bin");
  61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  68
  69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  78
  79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  80MODULE_FIRMWARE("radeon/hawaii_me.bin");
  81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  88
  89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  95
  96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  97MODULE_FIRMWARE("radeon/kaveri_me.bin");
  98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
 100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
 101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
 102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
 103
 104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 105MODULE_FIRMWARE("radeon/KABINI_me.bin");
 106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 110
 111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 112MODULE_FIRMWARE("radeon/kabini_me.bin");
 113MODULE_FIRMWARE("radeon/kabini_ce.bin");
 114MODULE_FIRMWARE("radeon/kabini_mec.bin");
 115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 117
 118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 124
 125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 126MODULE_FIRMWARE("radeon/mullins_me.bin");
 127MODULE_FIRMWARE("radeon/mullins_ce.bin");
 128MODULE_FIRMWARE("radeon/mullins_mec.bin");
 129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 131
 
 
 
 
 
 
 
 
 
 
 132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 
 
 
 
 133static void cik_rlc_stop(struct radeon_device *rdev);
 134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 135static void cik_program_aspm(struct radeon_device *rdev);
 136static void cik_init_pg(struct radeon_device *rdev);
 137static void cik_init_cg(struct radeon_device *rdev);
 138static void cik_fini_pg(struct radeon_device *rdev);
 139static void cik_fini_cg(struct radeon_device *rdev);
 140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 141					  bool enable);
 142
 143/**
 144 * cik_get_allowed_info_register - fetch the register for the info ioctl
 145 *
 146 * @rdev: radeon_device pointer
 147 * @reg: register offset in bytes
 148 * @val: register value
 149 *
 150 * Returns 0 for success or -EINVAL for an invalid register
 151 *
 152 */
 153int cik_get_allowed_info_register(struct radeon_device *rdev,
 154				  u32 reg, u32 *val)
 155{
 156	switch (reg) {
 157	case GRBM_STATUS:
 158	case GRBM_STATUS2:
 159	case GRBM_STATUS_SE0:
 160	case GRBM_STATUS_SE1:
 161	case GRBM_STATUS_SE2:
 162	case GRBM_STATUS_SE3:
 163	case SRBM_STATUS:
 164	case SRBM_STATUS2:
 165	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 166	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 167	case UVD_STATUS:
 168	/* TODO VCE */
 169		*val = RREG32(reg);
 170		return 0;
 171	default:
 172		return -EINVAL;
 173	}
 174}
 175
 176/*
 177 * Indirect registers accessor
 178 */
 179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 180{
 181	unsigned long flags;
 182	u32 r;
 183
 184	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 185	WREG32(CIK_DIDT_IND_INDEX, (reg));
 186	r = RREG32(CIK_DIDT_IND_DATA);
 187	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 188	return r;
 189}
 190
 191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 192{
 193	unsigned long flags;
 194
 195	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 196	WREG32(CIK_DIDT_IND_INDEX, (reg));
 197	WREG32(CIK_DIDT_IND_DATA, (v));
 198	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 199}
 200
 201/* get temperature in millidegrees */
 202int ci_get_temp(struct radeon_device *rdev)
 203{
 204	u32 temp;
 205	int actual_temp = 0;
 206
 207	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 208		CTF_TEMP_SHIFT;
 209
 210	if (temp & 0x200)
 211		actual_temp = 255;
 212	else
 213		actual_temp = temp & 0x1ff;
 214
 215	return actual_temp * 1000;
 
 
 216}
 217
 218/* get temperature in millidegrees */
 219int kv_get_temp(struct radeon_device *rdev)
 220{
 221	u32 temp;
 222	int actual_temp = 0;
 223
 224	temp = RREG32_SMC(0xC0300E0C);
 225
 226	if (temp)
 227		actual_temp = (temp / 8) - 49;
 228	else
 229		actual_temp = 0;
 230
 231	return actual_temp * 1000;
 
 
 232}
 233
 234/*
 235 * Indirect registers accessor
 236 */
 237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 238{
 239	unsigned long flags;
 240	u32 r;
 241
 242	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 243	WREG32(PCIE_INDEX, reg);
 244	(void)RREG32(PCIE_INDEX);
 245	r = RREG32(PCIE_DATA);
 246	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 247	return r;
 248}
 249
 250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 251{
 252	unsigned long flags;
 253
 254	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 255	WREG32(PCIE_INDEX, reg);
 256	(void)RREG32(PCIE_INDEX);
 257	WREG32(PCIE_DATA, v);
 258	(void)RREG32(PCIE_DATA);
 259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260}
 261
 262static const u32 spectre_rlc_save_restore_register_list[] =
 263{
 264	(0x0e00 << 16) | (0xc12c >> 2),
 265	0x00000000,
 266	(0x0e00 << 16) | (0xc140 >> 2),
 267	0x00000000,
 268	(0x0e00 << 16) | (0xc150 >> 2),
 269	0x00000000,
 270	(0x0e00 << 16) | (0xc15c >> 2),
 271	0x00000000,
 272	(0x0e00 << 16) | (0xc168 >> 2),
 273	0x00000000,
 274	(0x0e00 << 16) | (0xc170 >> 2),
 275	0x00000000,
 276	(0x0e00 << 16) | (0xc178 >> 2),
 277	0x00000000,
 278	(0x0e00 << 16) | (0xc204 >> 2),
 279	0x00000000,
 280	(0x0e00 << 16) | (0xc2b4 >> 2),
 281	0x00000000,
 282	(0x0e00 << 16) | (0xc2b8 >> 2),
 283	0x00000000,
 284	(0x0e00 << 16) | (0xc2bc >> 2),
 285	0x00000000,
 286	(0x0e00 << 16) | (0xc2c0 >> 2),
 287	0x00000000,
 288	(0x0e00 << 16) | (0x8228 >> 2),
 289	0x00000000,
 290	(0x0e00 << 16) | (0x829c >> 2),
 291	0x00000000,
 292	(0x0e00 << 16) | (0x869c >> 2),
 293	0x00000000,
 294	(0x0600 << 16) | (0x98f4 >> 2),
 295	0x00000000,
 296	(0x0e00 << 16) | (0x98f8 >> 2),
 297	0x00000000,
 298	(0x0e00 << 16) | (0x9900 >> 2),
 299	0x00000000,
 300	(0x0e00 << 16) | (0xc260 >> 2),
 301	0x00000000,
 302	(0x0e00 << 16) | (0x90e8 >> 2),
 303	0x00000000,
 304	(0x0e00 << 16) | (0x3c000 >> 2),
 305	0x00000000,
 306	(0x0e00 << 16) | (0x3c00c >> 2),
 307	0x00000000,
 308	(0x0e00 << 16) | (0x8c1c >> 2),
 309	0x00000000,
 310	(0x0e00 << 16) | (0x9700 >> 2),
 311	0x00000000,
 312	(0x0e00 << 16) | (0xcd20 >> 2),
 313	0x00000000,
 314	(0x4e00 << 16) | (0xcd20 >> 2),
 315	0x00000000,
 316	(0x5e00 << 16) | (0xcd20 >> 2),
 317	0x00000000,
 318	(0x6e00 << 16) | (0xcd20 >> 2),
 319	0x00000000,
 320	(0x7e00 << 16) | (0xcd20 >> 2),
 321	0x00000000,
 322	(0x8e00 << 16) | (0xcd20 >> 2),
 323	0x00000000,
 324	(0x9e00 << 16) | (0xcd20 >> 2),
 325	0x00000000,
 326	(0xae00 << 16) | (0xcd20 >> 2),
 327	0x00000000,
 328	(0xbe00 << 16) | (0xcd20 >> 2),
 329	0x00000000,
 330	(0x0e00 << 16) | (0x89bc >> 2),
 331	0x00000000,
 332	(0x0e00 << 16) | (0x8900 >> 2),
 333	0x00000000,
 334	0x3,
 335	(0x0e00 << 16) | (0xc130 >> 2),
 336	0x00000000,
 337	(0x0e00 << 16) | (0xc134 >> 2),
 338	0x00000000,
 339	(0x0e00 << 16) | (0xc1fc >> 2),
 340	0x00000000,
 341	(0x0e00 << 16) | (0xc208 >> 2),
 342	0x00000000,
 343	(0x0e00 << 16) | (0xc264 >> 2),
 344	0x00000000,
 345	(0x0e00 << 16) | (0xc268 >> 2),
 346	0x00000000,
 347	(0x0e00 << 16) | (0xc26c >> 2),
 348	0x00000000,
 349	(0x0e00 << 16) | (0xc270 >> 2),
 350	0x00000000,
 351	(0x0e00 << 16) | (0xc274 >> 2),
 352	0x00000000,
 353	(0x0e00 << 16) | (0xc278 >> 2),
 354	0x00000000,
 355	(0x0e00 << 16) | (0xc27c >> 2),
 356	0x00000000,
 357	(0x0e00 << 16) | (0xc280 >> 2),
 358	0x00000000,
 359	(0x0e00 << 16) | (0xc284 >> 2),
 360	0x00000000,
 361	(0x0e00 << 16) | (0xc288 >> 2),
 362	0x00000000,
 363	(0x0e00 << 16) | (0xc28c >> 2),
 364	0x00000000,
 365	(0x0e00 << 16) | (0xc290 >> 2),
 366	0x00000000,
 367	(0x0e00 << 16) | (0xc294 >> 2),
 368	0x00000000,
 369	(0x0e00 << 16) | (0xc298 >> 2),
 370	0x00000000,
 371	(0x0e00 << 16) | (0xc29c >> 2),
 372	0x00000000,
 373	(0x0e00 << 16) | (0xc2a0 >> 2),
 374	0x00000000,
 375	(0x0e00 << 16) | (0xc2a4 >> 2),
 376	0x00000000,
 377	(0x0e00 << 16) | (0xc2a8 >> 2),
 378	0x00000000,
 379	(0x0e00 << 16) | (0xc2ac  >> 2),
 380	0x00000000,
 381	(0x0e00 << 16) | (0xc2b0 >> 2),
 382	0x00000000,
 383	(0x0e00 << 16) | (0x301d0 >> 2),
 384	0x00000000,
 385	(0x0e00 << 16) | (0x30238 >> 2),
 386	0x00000000,
 387	(0x0e00 << 16) | (0x30250 >> 2),
 388	0x00000000,
 389	(0x0e00 << 16) | (0x30254 >> 2),
 390	0x00000000,
 391	(0x0e00 << 16) | (0x30258 >> 2),
 392	0x00000000,
 393	(0x0e00 << 16) | (0x3025c >> 2),
 394	0x00000000,
 395	(0x4e00 << 16) | (0xc900 >> 2),
 396	0x00000000,
 397	(0x5e00 << 16) | (0xc900 >> 2),
 398	0x00000000,
 399	(0x6e00 << 16) | (0xc900 >> 2),
 400	0x00000000,
 401	(0x7e00 << 16) | (0xc900 >> 2),
 402	0x00000000,
 403	(0x8e00 << 16) | (0xc900 >> 2),
 404	0x00000000,
 405	(0x9e00 << 16) | (0xc900 >> 2),
 406	0x00000000,
 407	(0xae00 << 16) | (0xc900 >> 2),
 408	0x00000000,
 409	(0xbe00 << 16) | (0xc900 >> 2),
 410	0x00000000,
 411	(0x4e00 << 16) | (0xc904 >> 2),
 412	0x00000000,
 413	(0x5e00 << 16) | (0xc904 >> 2),
 414	0x00000000,
 415	(0x6e00 << 16) | (0xc904 >> 2),
 416	0x00000000,
 417	(0x7e00 << 16) | (0xc904 >> 2),
 418	0x00000000,
 419	(0x8e00 << 16) | (0xc904 >> 2),
 420	0x00000000,
 421	(0x9e00 << 16) | (0xc904 >> 2),
 422	0x00000000,
 423	(0xae00 << 16) | (0xc904 >> 2),
 424	0x00000000,
 425	(0xbe00 << 16) | (0xc904 >> 2),
 426	0x00000000,
 427	(0x4e00 << 16) | (0xc908 >> 2),
 428	0x00000000,
 429	(0x5e00 << 16) | (0xc908 >> 2),
 430	0x00000000,
 431	(0x6e00 << 16) | (0xc908 >> 2),
 432	0x00000000,
 433	(0x7e00 << 16) | (0xc908 >> 2),
 434	0x00000000,
 435	(0x8e00 << 16) | (0xc908 >> 2),
 436	0x00000000,
 437	(0x9e00 << 16) | (0xc908 >> 2),
 438	0x00000000,
 439	(0xae00 << 16) | (0xc908 >> 2),
 440	0x00000000,
 441	(0xbe00 << 16) | (0xc908 >> 2),
 442	0x00000000,
 443	(0x4e00 << 16) | (0xc90c >> 2),
 444	0x00000000,
 445	(0x5e00 << 16) | (0xc90c >> 2),
 446	0x00000000,
 447	(0x6e00 << 16) | (0xc90c >> 2),
 448	0x00000000,
 449	(0x7e00 << 16) | (0xc90c >> 2),
 450	0x00000000,
 451	(0x8e00 << 16) | (0xc90c >> 2),
 452	0x00000000,
 453	(0x9e00 << 16) | (0xc90c >> 2),
 454	0x00000000,
 455	(0xae00 << 16) | (0xc90c >> 2),
 456	0x00000000,
 457	(0xbe00 << 16) | (0xc90c >> 2),
 458	0x00000000,
 459	(0x4e00 << 16) | (0xc910 >> 2),
 460	0x00000000,
 461	(0x5e00 << 16) | (0xc910 >> 2),
 462	0x00000000,
 463	(0x6e00 << 16) | (0xc910 >> 2),
 464	0x00000000,
 465	(0x7e00 << 16) | (0xc910 >> 2),
 466	0x00000000,
 467	(0x8e00 << 16) | (0xc910 >> 2),
 468	0x00000000,
 469	(0x9e00 << 16) | (0xc910 >> 2),
 470	0x00000000,
 471	(0xae00 << 16) | (0xc910 >> 2),
 472	0x00000000,
 473	(0xbe00 << 16) | (0xc910 >> 2),
 474	0x00000000,
 475	(0x0e00 << 16) | (0xc99c >> 2),
 476	0x00000000,
 477	(0x0e00 << 16) | (0x9834 >> 2),
 478	0x00000000,
 479	(0x0000 << 16) | (0x30f00 >> 2),
 480	0x00000000,
 481	(0x0001 << 16) | (0x30f00 >> 2),
 482	0x00000000,
 483	(0x0000 << 16) | (0x30f04 >> 2),
 484	0x00000000,
 485	(0x0001 << 16) | (0x30f04 >> 2),
 486	0x00000000,
 487	(0x0000 << 16) | (0x30f08 >> 2),
 488	0x00000000,
 489	(0x0001 << 16) | (0x30f08 >> 2),
 490	0x00000000,
 491	(0x0000 << 16) | (0x30f0c >> 2),
 492	0x00000000,
 493	(0x0001 << 16) | (0x30f0c >> 2),
 494	0x00000000,
 495	(0x0600 << 16) | (0x9b7c >> 2),
 496	0x00000000,
 497	(0x0e00 << 16) | (0x8a14 >> 2),
 498	0x00000000,
 499	(0x0e00 << 16) | (0x8a18 >> 2),
 500	0x00000000,
 501	(0x0600 << 16) | (0x30a00 >> 2),
 502	0x00000000,
 503	(0x0e00 << 16) | (0x8bf0 >> 2),
 504	0x00000000,
 505	(0x0e00 << 16) | (0x8bcc >> 2),
 506	0x00000000,
 507	(0x0e00 << 16) | (0x8b24 >> 2),
 508	0x00000000,
 509	(0x0e00 << 16) | (0x30a04 >> 2),
 510	0x00000000,
 511	(0x0600 << 16) | (0x30a10 >> 2),
 512	0x00000000,
 513	(0x0600 << 16) | (0x30a14 >> 2),
 514	0x00000000,
 515	(0x0600 << 16) | (0x30a18 >> 2),
 516	0x00000000,
 517	(0x0600 << 16) | (0x30a2c >> 2),
 518	0x00000000,
 519	(0x0e00 << 16) | (0xc700 >> 2),
 520	0x00000000,
 521	(0x0e00 << 16) | (0xc704 >> 2),
 522	0x00000000,
 523	(0x0e00 << 16) | (0xc708 >> 2),
 524	0x00000000,
 525	(0x0e00 << 16) | (0xc768 >> 2),
 526	0x00000000,
 527	(0x0400 << 16) | (0xc770 >> 2),
 528	0x00000000,
 529	(0x0400 << 16) | (0xc774 >> 2),
 530	0x00000000,
 531	(0x0400 << 16) | (0xc778 >> 2),
 532	0x00000000,
 533	(0x0400 << 16) | (0xc77c >> 2),
 534	0x00000000,
 535	(0x0400 << 16) | (0xc780 >> 2),
 536	0x00000000,
 537	(0x0400 << 16) | (0xc784 >> 2),
 538	0x00000000,
 539	(0x0400 << 16) | (0xc788 >> 2),
 540	0x00000000,
 541	(0x0400 << 16) | (0xc78c >> 2),
 542	0x00000000,
 543	(0x0400 << 16) | (0xc798 >> 2),
 544	0x00000000,
 545	(0x0400 << 16) | (0xc79c >> 2),
 546	0x00000000,
 547	(0x0400 << 16) | (0xc7a0 >> 2),
 548	0x00000000,
 549	(0x0400 << 16) | (0xc7a4 >> 2),
 550	0x00000000,
 551	(0x0400 << 16) | (0xc7a8 >> 2),
 552	0x00000000,
 553	(0x0400 << 16) | (0xc7ac >> 2),
 554	0x00000000,
 555	(0x0400 << 16) | (0xc7b0 >> 2),
 556	0x00000000,
 557	(0x0400 << 16) | (0xc7b4 >> 2),
 558	0x00000000,
 559	(0x0e00 << 16) | (0x9100 >> 2),
 560	0x00000000,
 561	(0x0e00 << 16) | (0x3c010 >> 2),
 562	0x00000000,
 563	(0x0e00 << 16) | (0x92a8 >> 2),
 564	0x00000000,
 565	(0x0e00 << 16) | (0x92ac >> 2),
 566	0x00000000,
 567	(0x0e00 << 16) | (0x92b4 >> 2),
 568	0x00000000,
 569	(0x0e00 << 16) | (0x92b8 >> 2),
 570	0x00000000,
 571	(0x0e00 << 16) | (0x92bc >> 2),
 572	0x00000000,
 573	(0x0e00 << 16) | (0x92c0 >> 2),
 574	0x00000000,
 575	(0x0e00 << 16) | (0x92c4 >> 2),
 576	0x00000000,
 577	(0x0e00 << 16) | (0x92c8 >> 2),
 578	0x00000000,
 579	(0x0e00 << 16) | (0x92cc >> 2),
 580	0x00000000,
 581	(0x0e00 << 16) | (0x92d0 >> 2),
 582	0x00000000,
 583	(0x0e00 << 16) | (0x8c00 >> 2),
 584	0x00000000,
 585	(0x0e00 << 16) | (0x8c04 >> 2),
 586	0x00000000,
 587	(0x0e00 << 16) | (0x8c20 >> 2),
 588	0x00000000,
 589	(0x0e00 << 16) | (0x8c38 >> 2),
 590	0x00000000,
 591	(0x0e00 << 16) | (0x8c3c >> 2),
 592	0x00000000,
 593	(0x0e00 << 16) | (0xae00 >> 2),
 594	0x00000000,
 595	(0x0e00 << 16) | (0x9604 >> 2),
 596	0x00000000,
 597	(0x0e00 << 16) | (0xac08 >> 2),
 598	0x00000000,
 599	(0x0e00 << 16) | (0xac0c >> 2),
 600	0x00000000,
 601	(0x0e00 << 16) | (0xac10 >> 2),
 602	0x00000000,
 603	(0x0e00 << 16) | (0xac14 >> 2),
 604	0x00000000,
 605	(0x0e00 << 16) | (0xac58 >> 2),
 606	0x00000000,
 607	(0x0e00 << 16) | (0xac68 >> 2),
 608	0x00000000,
 609	(0x0e00 << 16) | (0xac6c >> 2),
 610	0x00000000,
 611	(0x0e00 << 16) | (0xac70 >> 2),
 612	0x00000000,
 613	(0x0e00 << 16) | (0xac74 >> 2),
 614	0x00000000,
 615	(0x0e00 << 16) | (0xac78 >> 2),
 616	0x00000000,
 617	(0x0e00 << 16) | (0xac7c >> 2),
 618	0x00000000,
 619	(0x0e00 << 16) | (0xac80 >> 2),
 620	0x00000000,
 621	(0x0e00 << 16) | (0xac84 >> 2),
 622	0x00000000,
 623	(0x0e00 << 16) | (0xac88 >> 2),
 624	0x00000000,
 625	(0x0e00 << 16) | (0xac8c >> 2),
 626	0x00000000,
 627	(0x0e00 << 16) | (0x970c >> 2),
 628	0x00000000,
 629	(0x0e00 << 16) | (0x9714 >> 2),
 630	0x00000000,
 631	(0x0e00 << 16) | (0x9718 >> 2),
 632	0x00000000,
 633	(0x0e00 << 16) | (0x971c >> 2),
 634	0x00000000,
 635	(0x0e00 << 16) | (0x31068 >> 2),
 636	0x00000000,
 637	(0x4e00 << 16) | (0x31068 >> 2),
 638	0x00000000,
 639	(0x5e00 << 16) | (0x31068 >> 2),
 640	0x00000000,
 641	(0x6e00 << 16) | (0x31068 >> 2),
 642	0x00000000,
 643	(0x7e00 << 16) | (0x31068 >> 2),
 644	0x00000000,
 645	(0x8e00 << 16) | (0x31068 >> 2),
 646	0x00000000,
 647	(0x9e00 << 16) | (0x31068 >> 2),
 648	0x00000000,
 649	(0xae00 << 16) | (0x31068 >> 2),
 650	0x00000000,
 651	(0xbe00 << 16) | (0x31068 >> 2),
 652	0x00000000,
 653	(0x0e00 << 16) | (0xcd10 >> 2),
 654	0x00000000,
 655	(0x0e00 << 16) | (0xcd14 >> 2),
 656	0x00000000,
 657	(0x0e00 << 16) | (0x88b0 >> 2),
 658	0x00000000,
 659	(0x0e00 << 16) | (0x88b4 >> 2),
 660	0x00000000,
 661	(0x0e00 << 16) | (0x88b8 >> 2),
 662	0x00000000,
 663	(0x0e00 << 16) | (0x88bc >> 2),
 664	0x00000000,
 665	(0x0400 << 16) | (0x89c0 >> 2),
 666	0x00000000,
 667	(0x0e00 << 16) | (0x88c4 >> 2),
 668	0x00000000,
 669	(0x0e00 << 16) | (0x88c8 >> 2),
 670	0x00000000,
 671	(0x0e00 << 16) | (0x88d0 >> 2),
 672	0x00000000,
 673	(0x0e00 << 16) | (0x88d4 >> 2),
 674	0x00000000,
 675	(0x0e00 << 16) | (0x88d8 >> 2),
 676	0x00000000,
 677	(0x0e00 << 16) | (0x8980 >> 2),
 678	0x00000000,
 679	(0x0e00 << 16) | (0x30938 >> 2),
 680	0x00000000,
 681	(0x0e00 << 16) | (0x3093c >> 2),
 682	0x00000000,
 683	(0x0e00 << 16) | (0x30940 >> 2),
 684	0x00000000,
 685	(0x0e00 << 16) | (0x89a0 >> 2),
 686	0x00000000,
 687	(0x0e00 << 16) | (0x30900 >> 2),
 688	0x00000000,
 689	(0x0e00 << 16) | (0x30904 >> 2),
 690	0x00000000,
 691	(0x0e00 << 16) | (0x89b4 >> 2),
 692	0x00000000,
 693	(0x0e00 << 16) | (0x3c210 >> 2),
 694	0x00000000,
 695	(0x0e00 << 16) | (0x3c214 >> 2),
 696	0x00000000,
 697	(0x0e00 << 16) | (0x3c218 >> 2),
 698	0x00000000,
 699	(0x0e00 << 16) | (0x8904 >> 2),
 700	0x00000000,
 701	0x5,
 702	(0x0e00 << 16) | (0x8c28 >> 2),
 703	(0x0e00 << 16) | (0x8c2c >> 2),
 704	(0x0e00 << 16) | (0x8c30 >> 2),
 705	(0x0e00 << 16) | (0x8c34 >> 2),
 706	(0x0e00 << 16) | (0x9600 >> 2),
 707};
 708
 709static const u32 kalindi_rlc_save_restore_register_list[] =
 710{
 711	(0x0e00 << 16) | (0xc12c >> 2),
 712	0x00000000,
 713	(0x0e00 << 16) | (0xc140 >> 2),
 714	0x00000000,
 715	(0x0e00 << 16) | (0xc150 >> 2),
 716	0x00000000,
 717	(0x0e00 << 16) | (0xc15c >> 2),
 718	0x00000000,
 719	(0x0e00 << 16) | (0xc168 >> 2),
 720	0x00000000,
 721	(0x0e00 << 16) | (0xc170 >> 2),
 722	0x00000000,
 723	(0x0e00 << 16) | (0xc204 >> 2),
 724	0x00000000,
 725	(0x0e00 << 16) | (0xc2b4 >> 2),
 726	0x00000000,
 727	(0x0e00 << 16) | (0xc2b8 >> 2),
 728	0x00000000,
 729	(0x0e00 << 16) | (0xc2bc >> 2),
 730	0x00000000,
 731	(0x0e00 << 16) | (0xc2c0 >> 2),
 732	0x00000000,
 733	(0x0e00 << 16) | (0x8228 >> 2),
 734	0x00000000,
 735	(0x0e00 << 16) | (0x829c >> 2),
 736	0x00000000,
 737	(0x0e00 << 16) | (0x869c >> 2),
 738	0x00000000,
 739	(0x0600 << 16) | (0x98f4 >> 2),
 740	0x00000000,
 741	(0x0e00 << 16) | (0x98f8 >> 2),
 742	0x00000000,
 743	(0x0e00 << 16) | (0x9900 >> 2),
 744	0x00000000,
 745	(0x0e00 << 16) | (0xc260 >> 2),
 746	0x00000000,
 747	(0x0e00 << 16) | (0x90e8 >> 2),
 748	0x00000000,
 749	(0x0e00 << 16) | (0x3c000 >> 2),
 750	0x00000000,
 751	(0x0e00 << 16) | (0x3c00c >> 2),
 752	0x00000000,
 753	(0x0e00 << 16) | (0x8c1c >> 2),
 754	0x00000000,
 755	(0x0e00 << 16) | (0x9700 >> 2),
 756	0x00000000,
 757	(0x0e00 << 16) | (0xcd20 >> 2),
 758	0x00000000,
 759	(0x4e00 << 16) | (0xcd20 >> 2),
 760	0x00000000,
 761	(0x5e00 << 16) | (0xcd20 >> 2),
 762	0x00000000,
 763	(0x6e00 << 16) | (0xcd20 >> 2),
 764	0x00000000,
 765	(0x7e00 << 16) | (0xcd20 >> 2),
 766	0x00000000,
 767	(0x0e00 << 16) | (0x89bc >> 2),
 768	0x00000000,
 769	(0x0e00 << 16) | (0x8900 >> 2),
 770	0x00000000,
 771	0x3,
 772	(0x0e00 << 16) | (0xc130 >> 2),
 773	0x00000000,
 774	(0x0e00 << 16) | (0xc134 >> 2),
 775	0x00000000,
 776	(0x0e00 << 16) | (0xc1fc >> 2),
 777	0x00000000,
 778	(0x0e00 << 16) | (0xc208 >> 2),
 779	0x00000000,
 780	(0x0e00 << 16) | (0xc264 >> 2),
 781	0x00000000,
 782	(0x0e00 << 16) | (0xc268 >> 2),
 783	0x00000000,
 784	(0x0e00 << 16) | (0xc26c >> 2),
 785	0x00000000,
 786	(0x0e00 << 16) | (0xc270 >> 2),
 787	0x00000000,
 788	(0x0e00 << 16) | (0xc274 >> 2),
 789	0x00000000,
 790	(0x0e00 << 16) | (0xc28c >> 2),
 791	0x00000000,
 792	(0x0e00 << 16) | (0xc290 >> 2),
 793	0x00000000,
 794	(0x0e00 << 16) | (0xc294 >> 2),
 795	0x00000000,
 796	(0x0e00 << 16) | (0xc298 >> 2),
 797	0x00000000,
 798	(0x0e00 << 16) | (0xc2a0 >> 2),
 799	0x00000000,
 800	(0x0e00 << 16) | (0xc2a4 >> 2),
 801	0x00000000,
 802	(0x0e00 << 16) | (0xc2a8 >> 2),
 803	0x00000000,
 804	(0x0e00 << 16) | (0xc2ac >> 2),
 805	0x00000000,
 806	(0x0e00 << 16) | (0x301d0 >> 2),
 807	0x00000000,
 808	(0x0e00 << 16) | (0x30238 >> 2),
 809	0x00000000,
 810	(0x0e00 << 16) | (0x30250 >> 2),
 811	0x00000000,
 812	(0x0e00 << 16) | (0x30254 >> 2),
 813	0x00000000,
 814	(0x0e00 << 16) | (0x30258 >> 2),
 815	0x00000000,
 816	(0x0e00 << 16) | (0x3025c >> 2),
 817	0x00000000,
 818	(0x4e00 << 16) | (0xc900 >> 2),
 819	0x00000000,
 820	(0x5e00 << 16) | (0xc900 >> 2),
 821	0x00000000,
 822	(0x6e00 << 16) | (0xc900 >> 2),
 823	0x00000000,
 824	(0x7e00 << 16) | (0xc900 >> 2),
 825	0x00000000,
 826	(0x4e00 << 16) | (0xc904 >> 2),
 827	0x00000000,
 828	(0x5e00 << 16) | (0xc904 >> 2),
 829	0x00000000,
 830	(0x6e00 << 16) | (0xc904 >> 2),
 831	0x00000000,
 832	(0x7e00 << 16) | (0xc904 >> 2),
 833	0x00000000,
 834	(0x4e00 << 16) | (0xc908 >> 2),
 835	0x00000000,
 836	(0x5e00 << 16) | (0xc908 >> 2),
 837	0x00000000,
 838	(0x6e00 << 16) | (0xc908 >> 2),
 839	0x00000000,
 840	(0x7e00 << 16) | (0xc908 >> 2),
 841	0x00000000,
 842	(0x4e00 << 16) | (0xc90c >> 2),
 843	0x00000000,
 844	(0x5e00 << 16) | (0xc90c >> 2),
 845	0x00000000,
 846	(0x6e00 << 16) | (0xc90c >> 2),
 847	0x00000000,
 848	(0x7e00 << 16) | (0xc90c >> 2),
 849	0x00000000,
 850	(0x4e00 << 16) | (0xc910 >> 2),
 851	0x00000000,
 852	(0x5e00 << 16) | (0xc910 >> 2),
 853	0x00000000,
 854	(0x6e00 << 16) | (0xc910 >> 2),
 855	0x00000000,
 856	(0x7e00 << 16) | (0xc910 >> 2),
 857	0x00000000,
 858	(0x0e00 << 16) | (0xc99c >> 2),
 859	0x00000000,
 860	(0x0e00 << 16) | (0x9834 >> 2),
 861	0x00000000,
 862	(0x0000 << 16) | (0x30f00 >> 2),
 863	0x00000000,
 864	(0x0000 << 16) | (0x30f04 >> 2),
 865	0x00000000,
 866	(0x0000 << 16) | (0x30f08 >> 2),
 867	0x00000000,
 868	(0x0000 << 16) | (0x30f0c >> 2),
 869	0x00000000,
 870	(0x0600 << 16) | (0x9b7c >> 2),
 871	0x00000000,
 872	(0x0e00 << 16) | (0x8a14 >> 2),
 873	0x00000000,
 874	(0x0e00 << 16) | (0x8a18 >> 2),
 875	0x00000000,
 876	(0x0600 << 16) | (0x30a00 >> 2),
 877	0x00000000,
 878	(0x0e00 << 16) | (0x8bf0 >> 2),
 879	0x00000000,
 880	(0x0e00 << 16) | (0x8bcc >> 2),
 881	0x00000000,
 882	(0x0e00 << 16) | (0x8b24 >> 2),
 883	0x00000000,
 884	(0x0e00 << 16) | (0x30a04 >> 2),
 885	0x00000000,
 886	(0x0600 << 16) | (0x30a10 >> 2),
 887	0x00000000,
 888	(0x0600 << 16) | (0x30a14 >> 2),
 889	0x00000000,
 890	(0x0600 << 16) | (0x30a18 >> 2),
 891	0x00000000,
 892	(0x0600 << 16) | (0x30a2c >> 2),
 893	0x00000000,
 894	(0x0e00 << 16) | (0xc700 >> 2),
 895	0x00000000,
 896	(0x0e00 << 16) | (0xc704 >> 2),
 897	0x00000000,
 898	(0x0e00 << 16) | (0xc708 >> 2),
 899	0x00000000,
 900	(0x0e00 << 16) | (0xc768 >> 2),
 901	0x00000000,
 902	(0x0400 << 16) | (0xc770 >> 2),
 903	0x00000000,
 904	(0x0400 << 16) | (0xc774 >> 2),
 905	0x00000000,
 906	(0x0400 << 16) | (0xc798 >> 2),
 907	0x00000000,
 908	(0x0400 << 16) | (0xc79c >> 2),
 909	0x00000000,
 910	(0x0e00 << 16) | (0x9100 >> 2),
 911	0x00000000,
 912	(0x0e00 << 16) | (0x3c010 >> 2),
 913	0x00000000,
 914	(0x0e00 << 16) | (0x8c00 >> 2),
 915	0x00000000,
 916	(0x0e00 << 16) | (0x8c04 >> 2),
 917	0x00000000,
 918	(0x0e00 << 16) | (0x8c20 >> 2),
 919	0x00000000,
 920	(0x0e00 << 16) | (0x8c38 >> 2),
 921	0x00000000,
 922	(0x0e00 << 16) | (0x8c3c >> 2),
 923	0x00000000,
 924	(0x0e00 << 16) | (0xae00 >> 2),
 925	0x00000000,
 926	(0x0e00 << 16) | (0x9604 >> 2),
 927	0x00000000,
 928	(0x0e00 << 16) | (0xac08 >> 2),
 929	0x00000000,
 930	(0x0e00 << 16) | (0xac0c >> 2),
 931	0x00000000,
 932	(0x0e00 << 16) | (0xac10 >> 2),
 933	0x00000000,
 934	(0x0e00 << 16) | (0xac14 >> 2),
 935	0x00000000,
 936	(0x0e00 << 16) | (0xac58 >> 2),
 937	0x00000000,
 938	(0x0e00 << 16) | (0xac68 >> 2),
 939	0x00000000,
 940	(0x0e00 << 16) | (0xac6c >> 2),
 941	0x00000000,
 942	(0x0e00 << 16) | (0xac70 >> 2),
 943	0x00000000,
 944	(0x0e00 << 16) | (0xac74 >> 2),
 945	0x00000000,
 946	(0x0e00 << 16) | (0xac78 >> 2),
 947	0x00000000,
 948	(0x0e00 << 16) | (0xac7c >> 2),
 949	0x00000000,
 950	(0x0e00 << 16) | (0xac80 >> 2),
 951	0x00000000,
 952	(0x0e00 << 16) | (0xac84 >> 2),
 953	0x00000000,
 954	(0x0e00 << 16) | (0xac88 >> 2),
 955	0x00000000,
 956	(0x0e00 << 16) | (0xac8c >> 2),
 957	0x00000000,
 958	(0x0e00 << 16) | (0x970c >> 2),
 959	0x00000000,
 960	(0x0e00 << 16) | (0x9714 >> 2),
 961	0x00000000,
 962	(0x0e00 << 16) | (0x9718 >> 2),
 963	0x00000000,
 964	(0x0e00 << 16) | (0x971c >> 2),
 965	0x00000000,
 966	(0x0e00 << 16) | (0x31068 >> 2),
 967	0x00000000,
 968	(0x4e00 << 16) | (0x31068 >> 2),
 969	0x00000000,
 970	(0x5e00 << 16) | (0x31068 >> 2),
 971	0x00000000,
 972	(0x6e00 << 16) | (0x31068 >> 2),
 973	0x00000000,
 974	(0x7e00 << 16) | (0x31068 >> 2),
 975	0x00000000,
 976	(0x0e00 << 16) | (0xcd10 >> 2),
 977	0x00000000,
 978	(0x0e00 << 16) | (0xcd14 >> 2),
 979	0x00000000,
 980	(0x0e00 << 16) | (0x88b0 >> 2),
 981	0x00000000,
 982	(0x0e00 << 16) | (0x88b4 >> 2),
 983	0x00000000,
 984	(0x0e00 << 16) | (0x88b8 >> 2),
 985	0x00000000,
 986	(0x0e00 << 16) | (0x88bc >> 2),
 987	0x00000000,
 988	(0x0400 << 16) | (0x89c0 >> 2),
 989	0x00000000,
 990	(0x0e00 << 16) | (0x88c4 >> 2),
 991	0x00000000,
 992	(0x0e00 << 16) | (0x88c8 >> 2),
 993	0x00000000,
 994	(0x0e00 << 16) | (0x88d0 >> 2),
 995	0x00000000,
 996	(0x0e00 << 16) | (0x88d4 >> 2),
 997	0x00000000,
 998	(0x0e00 << 16) | (0x88d8 >> 2),
 999	0x00000000,
1000	(0x0e00 << 16) | (0x8980 >> 2),
1001	0x00000000,
1002	(0x0e00 << 16) | (0x30938 >> 2),
1003	0x00000000,
1004	(0x0e00 << 16) | (0x3093c >> 2),
1005	0x00000000,
1006	(0x0e00 << 16) | (0x30940 >> 2),
1007	0x00000000,
1008	(0x0e00 << 16) | (0x89a0 >> 2),
1009	0x00000000,
1010	(0x0e00 << 16) | (0x30900 >> 2),
1011	0x00000000,
1012	(0x0e00 << 16) | (0x30904 >> 2),
1013	0x00000000,
1014	(0x0e00 << 16) | (0x89b4 >> 2),
1015	0x00000000,
1016	(0x0e00 << 16) | (0x3e1fc >> 2),
1017	0x00000000,
1018	(0x0e00 << 16) | (0x3c210 >> 2),
1019	0x00000000,
1020	(0x0e00 << 16) | (0x3c214 >> 2),
1021	0x00000000,
1022	(0x0e00 << 16) | (0x3c218 >> 2),
1023	0x00000000,
1024	(0x0e00 << 16) | (0x8904 >> 2),
1025	0x00000000,
1026	0x5,
1027	(0x0e00 << 16) | (0x8c28 >> 2),
1028	(0x0e00 << 16) | (0x8c2c >> 2),
1029	(0x0e00 << 16) | (0x8c30 >> 2),
1030	(0x0e00 << 16) | (0x8c34 >> 2),
1031	(0x0e00 << 16) | (0x9600 >> 2),
1032};
1033
1034static const u32 bonaire_golden_spm_registers[] =
1035{
1036	0x30800, 0xe0ffffff, 0xe0000000
1037};
1038
1039static const u32 bonaire_golden_common_registers[] =
1040{
1041	0xc770, 0xffffffff, 0x00000800,
1042	0xc774, 0xffffffff, 0x00000800,
1043	0xc798, 0xffffffff, 0x00007fbf,
1044	0xc79c, 0xffffffff, 0x00007faf
1045};
1046
1047static const u32 bonaire_golden_registers[] =
1048{
1049	0x3354, 0x00000333, 0x00000333,
1050	0x3350, 0x000c0fc0, 0x00040200,
1051	0x9a10, 0x00010000, 0x00058208,
1052	0x3c000, 0xffff1fff, 0x00140000,
1053	0x3c200, 0xfdfc0fff, 0x00000100,
1054	0x3c234, 0x40000000, 0x40000200,
1055	0x9830, 0xffffffff, 0x00000000,
1056	0x9834, 0xf00fffff, 0x00000400,
1057	0x9838, 0x0002021c, 0x00020200,
1058	0xc78, 0x00000080, 0x00000000,
1059	0x5bb0, 0x000000f0, 0x00000070,
1060	0x5bc0, 0xf0311fff, 0x80300000,
1061	0x98f8, 0x73773777, 0x12010001,
1062	0x350c, 0x00810000, 0x408af000,
1063	0x7030, 0x31000111, 0x00000011,
1064	0x2f48, 0x73773777, 0x12010001,
1065	0x220c, 0x00007fb6, 0x0021a1b1,
1066	0x2210, 0x00007fb6, 0x002021b1,
1067	0x2180, 0x00007fb6, 0x00002191,
1068	0x2218, 0x00007fb6, 0x002121b1,
1069	0x221c, 0x00007fb6, 0x002021b1,
1070	0x21dc, 0x00007fb6, 0x00002191,
1071	0x21e0, 0x00007fb6, 0x00002191,
1072	0x3628, 0x0000003f, 0x0000000a,
1073	0x362c, 0x0000003f, 0x0000000a,
1074	0x2ae4, 0x00073ffe, 0x000022a2,
1075	0x240c, 0x000007ff, 0x00000000,
1076	0x8a14, 0xf000003f, 0x00000007,
1077	0x8bf0, 0x00002001, 0x00000001,
1078	0x8b24, 0xffffffff, 0x00ffffff,
1079	0x30a04, 0x0000ff0f, 0x00000000,
1080	0x28a4c, 0x07ffffff, 0x06000000,
1081	0x4d8, 0x00000fff, 0x00000100,
1082	0x3e78, 0x00000001, 0x00000002,
1083	0x9100, 0x03000000, 0x0362c688,
1084	0x8c00, 0x000000ff, 0x00000001,
1085	0xe40, 0x00001fff, 0x00001fff,
1086	0x9060, 0x0000007f, 0x00000020,
1087	0x9508, 0x00010000, 0x00010000,
1088	0xac14, 0x000003ff, 0x000000f3,
1089	0xac0c, 0xffffffff, 0x00001032
1090};
1091
1092static const u32 bonaire_mgcg_cgcg_init[] =
1093{
1094	0xc420, 0xffffffff, 0xfffffffc,
1095	0x30800, 0xffffffff, 0xe0000000,
1096	0x3c2a0, 0xffffffff, 0x00000100,
1097	0x3c208, 0xffffffff, 0x00000100,
1098	0x3c2c0, 0xffffffff, 0xc0000100,
1099	0x3c2c8, 0xffffffff, 0xc0000100,
1100	0x3c2c4, 0xffffffff, 0xc0000100,
1101	0x55e4, 0xffffffff, 0x00600100,
1102	0x3c280, 0xffffffff, 0x00000100,
1103	0x3c214, 0xffffffff, 0x06000100,
1104	0x3c220, 0xffffffff, 0x00000100,
1105	0x3c218, 0xffffffff, 0x06000100,
1106	0x3c204, 0xffffffff, 0x00000100,
1107	0x3c2e0, 0xffffffff, 0x00000100,
1108	0x3c224, 0xffffffff, 0x00000100,
1109	0x3c200, 0xffffffff, 0x00000100,
1110	0x3c230, 0xffffffff, 0x00000100,
1111	0x3c234, 0xffffffff, 0x00000100,
1112	0x3c250, 0xffffffff, 0x00000100,
1113	0x3c254, 0xffffffff, 0x00000100,
1114	0x3c258, 0xffffffff, 0x00000100,
1115	0x3c25c, 0xffffffff, 0x00000100,
1116	0x3c260, 0xffffffff, 0x00000100,
1117	0x3c27c, 0xffffffff, 0x00000100,
1118	0x3c278, 0xffffffff, 0x00000100,
1119	0x3c210, 0xffffffff, 0x06000100,
1120	0x3c290, 0xffffffff, 0x00000100,
1121	0x3c274, 0xffffffff, 0x00000100,
1122	0x3c2b4, 0xffffffff, 0x00000100,
1123	0x3c2b0, 0xffffffff, 0x00000100,
1124	0x3c270, 0xffffffff, 0x00000100,
1125	0x30800, 0xffffffff, 0xe0000000,
1126	0x3c020, 0xffffffff, 0x00010000,
1127	0x3c024, 0xffffffff, 0x00030002,
1128	0x3c028, 0xffffffff, 0x00040007,
1129	0x3c02c, 0xffffffff, 0x00060005,
1130	0x3c030, 0xffffffff, 0x00090008,
1131	0x3c034, 0xffffffff, 0x00010000,
1132	0x3c038, 0xffffffff, 0x00030002,
1133	0x3c03c, 0xffffffff, 0x00040007,
1134	0x3c040, 0xffffffff, 0x00060005,
1135	0x3c044, 0xffffffff, 0x00090008,
1136	0x3c048, 0xffffffff, 0x00010000,
1137	0x3c04c, 0xffffffff, 0x00030002,
1138	0x3c050, 0xffffffff, 0x00040007,
1139	0x3c054, 0xffffffff, 0x00060005,
1140	0x3c058, 0xffffffff, 0x00090008,
1141	0x3c05c, 0xffffffff, 0x00010000,
1142	0x3c060, 0xffffffff, 0x00030002,
1143	0x3c064, 0xffffffff, 0x00040007,
1144	0x3c068, 0xffffffff, 0x00060005,
1145	0x3c06c, 0xffffffff, 0x00090008,
1146	0x3c070, 0xffffffff, 0x00010000,
1147	0x3c074, 0xffffffff, 0x00030002,
1148	0x3c078, 0xffffffff, 0x00040007,
1149	0x3c07c, 0xffffffff, 0x00060005,
1150	0x3c080, 0xffffffff, 0x00090008,
1151	0x3c084, 0xffffffff, 0x00010000,
1152	0x3c088, 0xffffffff, 0x00030002,
1153	0x3c08c, 0xffffffff, 0x00040007,
1154	0x3c090, 0xffffffff, 0x00060005,
1155	0x3c094, 0xffffffff, 0x00090008,
1156	0x3c098, 0xffffffff, 0x00010000,
1157	0x3c09c, 0xffffffff, 0x00030002,
1158	0x3c0a0, 0xffffffff, 0x00040007,
1159	0x3c0a4, 0xffffffff, 0x00060005,
1160	0x3c0a8, 0xffffffff, 0x00090008,
1161	0x3c000, 0xffffffff, 0x96e00200,
1162	0x8708, 0xffffffff, 0x00900100,
1163	0xc424, 0xffffffff, 0x0020003f,
1164	0x38, 0xffffffff, 0x0140001c,
1165	0x3c, 0x000f0000, 0x000f0000,
1166	0x220, 0xffffffff, 0xC060000C,
1167	0x224, 0xc0000fff, 0x00000100,
1168	0xf90, 0xffffffff, 0x00000100,
1169	0xf98, 0x00000101, 0x00000000,
1170	0x20a8, 0xffffffff, 0x00000104,
1171	0x55e4, 0xff000fff, 0x00000100,
1172	0x30cc, 0xc0000fff, 0x00000104,
1173	0xc1e4, 0x00000001, 0x00000001,
1174	0xd00c, 0xff000ff0, 0x00000100,
1175	0xd80c, 0xff000ff0, 0x00000100
1176};
1177
1178static const u32 spectre_golden_spm_registers[] =
1179{
1180	0x30800, 0xe0ffffff, 0xe0000000
1181};
1182
1183static const u32 spectre_golden_common_registers[] =
1184{
1185	0xc770, 0xffffffff, 0x00000800,
1186	0xc774, 0xffffffff, 0x00000800,
1187	0xc798, 0xffffffff, 0x00007fbf,
1188	0xc79c, 0xffffffff, 0x00007faf
1189};
1190
1191static const u32 spectre_golden_registers[] =
1192{
1193	0x3c000, 0xffff1fff, 0x96940200,
1194	0x3c00c, 0xffff0001, 0xff000000,
1195	0x3c200, 0xfffc0fff, 0x00000100,
1196	0x6ed8, 0x00010101, 0x00010000,
1197	0x9834, 0xf00fffff, 0x00000400,
1198	0x9838, 0xfffffffc, 0x00020200,
1199	0x5bb0, 0x000000f0, 0x00000070,
1200	0x5bc0, 0xf0311fff, 0x80300000,
1201	0x98f8, 0x73773777, 0x12010001,
1202	0x9b7c, 0x00ff0000, 0x00fc0000,
1203	0x2f48, 0x73773777, 0x12010001,
1204	0x8a14, 0xf000003f, 0x00000007,
1205	0x8b24, 0xffffffff, 0x00ffffff,
1206	0x28350, 0x3f3f3fff, 0x00000082,
1207	0x28354, 0x0000003f, 0x00000000,
1208	0x3e78, 0x00000001, 0x00000002,
1209	0x913c, 0xffff03df, 0x00000004,
1210	0xc768, 0x00000008, 0x00000008,
1211	0x8c00, 0x000008ff, 0x00000800,
1212	0x9508, 0x00010000, 0x00010000,
1213	0xac0c, 0xffffffff, 0x54763210,
1214	0x214f8, 0x01ff01ff, 0x00000002,
1215	0x21498, 0x007ff800, 0x00200000,
1216	0x2015c, 0xffffffff, 0x00000f40,
1217	0x30934, 0xffffffff, 0x00000001
1218};
1219
1220static const u32 spectre_mgcg_cgcg_init[] =
1221{
1222	0xc420, 0xffffffff, 0xfffffffc,
1223	0x30800, 0xffffffff, 0xe0000000,
1224	0x3c2a0, 0xffffffff, 0x00000100,
1225	0x3c208, 0xffffffff, 0x00000100,
1226	0x3c2c0, 0xffffffff, 0x00000100,
1227	0x3c2c8, 0xffffffff, 0x00000100,
1228	0x3c2c4, 0xffffffff, 0x00000100,
1229	0x55e4, 0xffffffff, 0x00600100,
1230	0x3c280, 0xffffffff, 0x00000100,
1231	0x3c214, 0xffffffff, 0x06000100,
1232	0x3c220, 0xffffffff, 0x00000100,
1233	0x3c218, 0xffffffff, 0x06000100,
1234	0x3c204, 0xffffffff, 0x00000100,
1235	0x3c2e0, 0xffffffff, 0x00000100,
1236	0x3c224, 0xffffffff, 0x00000100,
1237	0x3c200, 0xffffffff, 0x00000100,
1238	0x3c230, 0xffffffff, 0x00000100,
1239	0x3c234, 0xffffffff, 0x00000100,
1240	0x3c250, 0xffffffff, 0x00000100,
1241	0x3c254, 0xffffffff, 0x00000100,
1242	0x3c258, 0xffffffff, 0x00000100,
1243	0x3c25c, 0xffffffff, 0x00000100,
1244	0x3c260, 0xffffffff, 0x00000100,
1245	0x3c27c, 0xffffffff, 0x00000100,
1246	0x3c278, 0xffffffff, 0x00000100,
1247	0x3c210, 0xffffffff, 0x06000100,
1248	0x3c290, 0xffffffff, 0x00000100,
1249	0x3c274, 0xffffffff, 0x00000100,
1250	0x3c2b4, 0xffffffff, 0x00000100,
1251	0x3c2b0, 0xffffffff, 0x00000100,
1252	0x3c270, 0xffffffff, 0x00000100,
1253	0x30800, 0xffffffff, 0xe0000000,
1254	0x3c020, 0xffffffff, 0x00010000,
1255	0x3c024, 0xffffffff, 0x00030002,
1256	0x3c028, 0xffffffff, 0x00040007,
1257	0x3c02c, 0xffffffff, 0x00060005,
1258	0x3c030, 0xffffffff, 0x00090008,
1259	0x3c034, 0xffffffff, 0x00010000,
1260	0x3c038, 0xffffffff, 0x00030002,
1261	0x3c03c, 0xffffffff, 0x00040007,
1262	0x3c040, 0xffffffff, 0x00060005,
1263	0x3c044, 0xffffffff, 0x00090008,
1264	0x3c048, 0xffffffff, 0x00010000,
1265	0x3c04c, 0xffffffff, 0x00030002,
1266	0x3c050, 0xffffffff, 0x00040007,
1267	0x3c054, 0xffffffff, 0x00060005,
1268	0x3c058, 0xffffffff, 0x00090008,
1269	0x3c05c, 0xffffffff, 0x00010000,
1270	0x3c060, 0xffffffff, 0x00030002,
1271	0x3c064, 0xffffffff, 0x00040007,
1272	0x3c068, 0xffffffff, 0x00060005,
1273	0x3c06c, 0xffffffff, 0x00090008,
1274	0x3c070, 0xffffffff, 0x00010000,
1275	0x3c074, 0xffffffff, 0x00030002,
1276	0x3c078, 0xffffffff, 0x00040007,
1277	0x3c07c, 0xffffffff, 0x00060005,
1278	0x3c080, 0xffffffff, 0x00090008,
1279	0x3c084, 0xffffffff, 0x00010000,
1280	0x3c088, 0xffffffff, 0x00030002,
1281	0x3c08c, 0xffffffff, 0x00040007,
1282	0x3c090, 0xffffffff, 0x00060005,
1283	0x3c094, 0xffffffff, 0x00090008,
1284	0x3c098, 0xffffffff, 0x00010000,
1285	0x3c09c, 0xffffffff, 0x00030002,
1286	0x3c0a0, 0xffffffff, 0x00040007,
1287	0x3c0a4, 0xffffffff, 0x00060005,
1288	0x3c0a8, 0xffffffff, 0x00090008,
1289	0x3c0ac, 0xffffffff, 0x00010000,
1290	0x3c0b0, 0xffffffff, 0x00030002,
1291	0x3c0b4, 0xffffffff, 0x00040007,
1292	0x3c0b8, 0xffffffff, 0x00060005,
1293	0x3c0bc, 0xffffffff, 0x00090008,
1294	0x3c000, 0xffffffff, 0x96e00200,
1295	0x8708, 0xffffffff, 0x00900100,
1296	0xc424, 0xffffffff, 0x0020003f,
1297	0x38, 0xffffffff, 0x0140001c,
1298	0x3c, 0x000f0000, 0x000f0000,
1299	0x220, 0xffffffff, 0xC060000C,
1300	0x224, 0xc0000fff, 0x00000100,
1301	0xf90, 0xffffffff, 0x00000100,
1302	0xf98, 0x00000101, 0x00000000,
1303	0x20a8, 0xffffffff, 0x00000104,
1304	0x55e4, 0xff000fff, 0x00000100,
1305	0x30cc, 0xc0000fff, 0x00000104,
1306	0xc1e4, 0x00000001, 0x00000001,
1307	0xd00c, 0xff000ff0, 0x00000100,
1308	0xd80c, 0xff000ff0, 0x00000100
1309};
1310
1311static const u32 kalindi_golden_spm_registers[] =
1312{
1313	0x30800, 0xe0ffffff, 0xe0000000
1314};
1315
1316static const u32 kalindi_golden_common_registers[] =
1317{
1318	0xc770, 0xffffffff, 0x00000800,
1319	0xc774, 0xffffffff, 0x00000800,
1320	0xc798, 0xffffffff, 0x00007fbf,
1321	0xc79c, 0xffffffff, 0x00007faf
1322};
1323
1324static const u32 kalindi_golden_registers[] =
1325{
1326	0x3c000, 0xffffdfff, 0x6e944040,
1327	0x55e4, 0xff607fff, 0xfc000100,
1328	0x3c220, 0xff000fff, 0x00000100,
1329	0x3c224, 0xff000fff, 0x00000100,
1330	0x3c200, 0xfffc0fff, 0x00000100,
1331	0x6ed8, 0x00010101, 0x00010000,
1332	0x9830, 0xffffffff, 0x00000000,
1333	0x9834, 0xf00fffff, 0x00000400,
1334	0x5bb0, 0x000000f0, 0x00000070,
1335	0x5bc0, 0xf0311fff, 0x80300000,
1336	0x98f8, 0x73773777, 0x12010001,
1337	0x98fc, 0xffffffff, 0x00000010,
1338	0x9b7c, 0x00ff0000, 0x00fc0000,
1339	0x8030, 0x00001f0f, 0x0000100a,
1340	0x2f48, 0x73773777, 0x12010001,
1341	0x2408, 0x000fffff, 0x000c007f,
1342	0x8a14, 0xf000003f, 0x00000007,
1343	0x8b24, 0x3fff3fff, 0x00ffcfff,
1344	0x30a04, 0x0000ff0f, 0x00000000,
1345	0x28a4c, 0x07ffffff, 0x06000000,
1346	0x4d8, 0x00000fff, 0x00000100,
1347	0x3e78, 0x00000001, 0x00000002,
1348	0xc768, 0x00000008, 0x00000008,
1349	0x8c00, 0x000000ff, 0x00000003,
1350	0x214f8, 0x01ff01ff, 0x00000002,
1351	0x21498, 0x007ff800, 0x00200000,
1352	0x2015c, 0xffffffff, 0x00000f40,
1353	0x88c4, 0x001f3ae3, 0x00000082,
1354	0x88d4, 0x0000001f, 0x00000010,
1355	0x30934, 0xffffffff, 0x00000000
1356};
1357
1358static const u32 kalindi_mgcg_cgcg_init[] =
1359{
1360	0xc420, 0xffffffff, 0xfffffffc,
1361	0x30800, 0xffffffff, 0xe0000000,
1362	0x3c2a0, 0xffffffff, 0x00000100,
1363	0x3c208, 0xffffffff, 0x00000100,
1364	0x3c2c0, 0xffffffff, 0x00000100,
1365	0x3c2c8, 0xffffffff, 0x00000100,
1366	0x3c2c4, 0xffffffff, 0x00000100,
1367	0x55e4, 0xffffffff, 0x00600100,
1368	0x3c280, 0xffffffff, 0x00000100,
1369	0x3c214, 0xffffffff, 0x06000100,
1370	0x3c220, 0xffffffff, 0x00000100,
1371	0x3c218, 0xffffffff, 0x06000100,
1372	0x3c204, 0xffffffff, 0x00000100,
1373	0x3c2e0, 0xffffffff, 0x00000100,
1374	0x3c224, 0xffffffff, 0x00000100,
1375	0x3c200, 0xffffffff, 0x00000100,
1376	0x3c230, 0xffffffff, 0x00000100,
1377	0x3c234, 0xffffffff, 0x00000100,
1378	0x3c250, 0xffffffff, 0x00000100,
1379	0x3c254, 0xffffffff, 0x00000100,
1380	0x3c258, 0xffffffff, 0x00000100,
1381	0x3c25c, 0xffffffff, 0x00000100,
1382	0x3c260, 0xffffffff, 0x00000100,
1383	0x3c27c, 0xffffffff, 0x00000100,
1384	0x3c278, 0xffffffff, 0x00000100,
1385	0x3c210, 0xffffffff, 0x06000100,
1386	0x3c290, 0xffffffff, 0x00000100,
1387	0x3c274, 0xffffffff, 0x00000100,
1388	0x3c2b4, 0xffffffff, 0x00000100,
1389	0x3c2b0, 0xffffffff, 0x00000100,
1390	0x3c270, 0xffffffff, 0x00000100,
1391	0x30800, 0xffffffff, 0xe0000000,
1392	0x3c020, 0xffffffff, 0x00010000,
1393	0x3c024, 0xffffffff, 0x00030002,
1394	0x3c028, 0xffffffff, 0x00040007,
1395	0x3c02c, 0xffffffff, 0x00060005,
1396	0x3c030, 0xffffffff, 0x00090008,
1397	0x3c034, 0xffffffff, 0x00010000,
1398	0x3c038, 0xffffffff, 0x00030002,
1399	0x3c03c, 0xffffffff, 0x00040007,
1400	0x3c040, 0xffffffff, 0x00060005,
1401	0x3c044, 0xffffffff, 0x00090008,
1402	0x3c000, 0xffffffff, 0x96e00200,
1403	0x8708, 0xffffffff, 0x00900100,
1404	0xc424, 0xffffffff, 0x0020003f,
1405	0x38, 0xffffffff, 0x0140001c,
1406	0x3c, 0x000f0000, 0x000f0000,
1407	0x220, 0xffffffff, 0xC060000C,
1408	0x224, 0xc0000fff, 0x00000100,
1409	0x20a8, 0xffffffff, 0x00000104,
1410	0x55e4, 0xff000fff, 0x00000100,
1411	0x30cc, 0xc0000fff, 0x00000104,
1412	0xc1e4, 0x00000001, 0x00000001,
1413	0xd00c, 0xff000ff0, 0x00000100,
1414	0xd80c, 0xff000ff0, 0x00000100
1415};
1416
1417static const u32 hawaii_golden_spm_registers[] =
1418{
1419	0x30800, 0xe0ffffff, 0xe0000000
1420};
1421
1422static const u32 hawaii_golden_common_registers[] =
1423{
1424	0x30800, 0xffffffff, 0xe0000000,
1425	0x28350, 0xffffffff, 0x3a00161a,
1426	0x28354, 0xffffffff, 0x0000002e,
1427	0x9a10, 0xffffffff, 0x00018208,
1428	0x98f8, 0xffffffff, 0x12011003
1429};
1430
1431static const u32 hawaii_golden_registers[] =
1432{
1433	0x3354, 0x00000333, 0x00000333,
1434	0x9a10, 0x00010000, 0x00058208,
1435	0x9830, 0xffffffff, 0x00000000,
1436	0x9834, 0xf00fffff, 0x00000400,
1437	0x9838, 0x0002021c, 0x00020200,
1438	0xc78, 0x00000080, 0x00000000,
1439	0x5bb0, 0x000000f0, 0x00000070,
1440	0x5bc0, 0xf0311fff, 0x80300000,
1441	0x350c, 0x00810000, 0x408af000,
1442	0x7030, 0x31000111, 0x00000011,
1443	0x2f48, 0x73773777, 0x12010001,
1444	0x2120, 0x0000007f, 0x0000001b,
1445	0x21dc, 0x00007fb6, 0x00002191,
1446	0x3628, 0x0000003f, 0x0000000a,
1447	0x362c, 0x0000003f, 0x0000000a,
1448	0x2ae4, 0x00073ffe, 0x000022a2,
1449	0x240c, 0x000007ff, 0x00000000,
1450	0x8bf0, 0x00002001, 0x00000001,
1451	0x8b24, 0xffffffff, 0x00ffffff,
1452	0x30a04, 0x0000ff0f, 0x00000000,
1453	0x28a4c, 0x07ffffff, 0x06000000,
1454	0x3e78, 0x00000001, 0x00000002,
1455	0xc768, 0x00000008, 0x00000008,
1456	0xc770, 0x00000f00, 0x00000800,
1457	0xc774, 0x00000f00, 0x00000800,
1458	0xc798, 0x00ffffff, 0x00ff7fbf,
1459	0xc79c, 0x00ffffff, 0x00ff7faf,
1460	0x8c00, 0x000000ff, 0x00000800,
1461	0xe40, 0x00001fff, 0x00001fff,
1462	0x9060, 0x0000007f, 0x00000020,
1463	0x9508, 0x00010000, 0x00010000,
1464	0xae00, 0x00100000, 0x000ff07c,
1465	0xac14, 0x000003ff, 0x0000000f,
1466	0xac10, 0xffffffff, 0x7564fdec,
1467	0xac0c, 0xffffffff, 0x3120b9a8,
1468	0xac08, 0x20000000, 0x0f9c0000
1469};
1470
1471static const u32 hawaii_mgcg_cgcg_init[] =
1472{
1473	0xc420, 0xffffffff, 0xfffffffd,
1474	0x30800, 0xffffffff, 0xe0000000,
1475	0x3c2a0, 0xffffffff, 0x00000100,
1476	0x3c208, 0xffffffff, 0x00000100,
1477	0x3c2c0, 0xffffffff, 0x00000100,
1478	0x3c2c8, 0xffffffff, 0x00000100,
1479	0x3c2c4, 0xffffffff, 0x00000100,
1480	0x55e4, 0xffffffff, 0x00200100,
1481	0x3c280, 0xffffffff, 0x00000100,
1482	0x3c214, 0xffffffff, 0x06000100,
1483	0x3c220, 0xffffffff, 0x00000100,
1484	0x3c218, 0xffffffff, 0x06000100,
1485	0x3c204, 0xffffffff, 0x00000100,
1486	0x3c2e0, 0xffffffff, 0x00000100,
1487	0x3c224, 0xffffffff, 0x00000100,
1488	0x3c200, 0xffffffff, 0x00000100,
1489	0x3c230, 0xffffffff, 0x00000100,
1490	0x3c234, 0xffffffff, 0x00000100,
1491	0x3c250, 0xffffffff, 0x00000100,
1492	0x3c254, 0xffffffff, 0x00000100,
1493	0x3c258, 0xffffffff, 0x00000100,
1494	0x3c25c, 0xffffffff, 0x00000100,
1495	0x3c260, 0xffffffff, 0x00000100,
1496	0x3c27c, 0xffffffff, 0x00000100,
1497	0x3c278, 0xffffffff, 0x00000100,
1498	0x3c210, 0xffffffff, 0x06000100,
1499	0x3c290, 0xffffffff, 0x00000100,
1500	0x3c274, 0xffffffff, 0x00000100,
1501	0x3c2b4, 0xffffffff, 0x00000100,
1502	0x3c2b0, 0xffffffff, 0x00000100,
1503	0x3c270, 0xffffffff, 0x00000100,
1504	0x30800, 0xffffffff, 0xe0000000,
1505	0x3c020, 0xffffffff, 0x00010000,
1506	0x3c024, 0xffffffff, 0x00030002,
1507	0x3c028, 0xffffffff, 0x00040007,
1508	0x3c02c, 0xffffffff, 0x00060005,
1509	0x3c030, 0xffffffff, 0x00090008,
1510	0x3c034, 0xffffffff, 0x00010000,
1511	0x3c038, 0xffffffff, 0x00030002,
1512	0x3c03c, 0xffffffff, 0x00040007,
1513	0x3c040, 0xffffffff, 0x00060005,
1514	0x3c044, 0xffffffff, 0x00090008,
1515	0x3c048, 0xffffffff, 0x00010000,
1516	0x3c04c, 0xffffffff, 0x00030002,
1517	0x3c050, 0xffffffff, 0x00040007,
1518	0x3c054, 0xffffffff, 0x00060005,
1519	0x3c058, 0xffffffff, 0x00090008,
1520	0x3c05c, 0xffffffff, 0x00010000,
1521	0x3c060, 0xffffffff, 0x00030002,
1522	0x3c064, 0xffffffff, 0x00040007,
1523	0x3c068, 0xffffffff, 0x00060005,
1524	0x3c06c, 0xffffffff, 0x00090008,
1525	0x3c070, 0xffffffff, 0x00010000,
1526	0x3c074, 0xffffffff, 0x00030002,
1527	0x3c078, 0xffffffff, 0x00040007,
1528	0x3c07c, 0xffffffff, 0x00060005,
1529	0x3c080, 0xffffffff, 0x00090008,
1530	0x3c084, 0xffffffff, 0x00010000,
1531	0x3c088, 0xffffffff, 0x00030002,
1532	0x3c08c, 0xffffffff, 0x00040007,
1533	0x3c090, 0xffffffff, 0x00060005,
1534	0x3c094, 0xffffffff, 0x00090008,
1535	0x3c098, 0xffffffff, 0x00010000,
1536	0x3c09c, 0xffffffff, 0x00030002,
1537	0x3c0a0, 0xffffffff, 0x00040007,
1538	0x3c0a4, 0xffffffff, 0x00060005,
1539	0x3c0a8, 0xffffffff, 0x00090008,
1540	0x3c0ac, 0xffffffff, 0x00010000,
1541	0x3c0b0, 0xffffffff, 0x00030002,
1542	0x3c0b4, 0xffffffff, 0x00040007,
1543	0x3c0b8, 0xffffffff, 0x00060005,
1544	0x3c0bc, 0xffffffff, 0x00090008,
1545	0x3c0c0, 0xffffffff, 0x00010000,
1546	0x3c0c4, 0xffffffff, 0x00030002,
1547	0x3c0c8, 0xffffffff, 0x00040007,
1548	0x3c0cc, 0xffffffff, 0x00060005,
1549	0x3c0d0, 0xffffffff, 0x00090008,
1550	0x3c0d4, 0xffffffff, 0x00010000,
1551	0x3c0d8, 0xffffffff, 0x00030002,
1552	0x3c0dc, 0xffffffff, 0x00040007,
1553	0x3c0e0, 0xffffffff, 0x00060005,
1554	0x3c0e4, 0xffffffff, 0x00090008,
1555	0x3c0e8, 0xffffffff, 0x00010000,
1556	0x3c0ec, 0xffffffff, 0x00030002,
1557	0x3c0f0, 0xffffffff, 0x00040007,
1558	0x3c0f4, 0xffffffff, 0x00060005,
1559	0x3c0f8, 0xffffffff, 0x00090008,
1560	0xc318, 0xffffffff, 0x00020200,
1561	0x3350, 0xffffffff, 0x00000200,
1562	0x15c0, 0xffffffff, 0x00000400,
1563	0x55e8, 0xffffffff, 0x00000000,
1564	0x2f50, 0xffffffff, 0x00000902,
1565	0x3c000, 0xffffffff, 0x96940200,
1566	0x8708, 0xffffffff, 0x00900100,
1567	0xc424, 0xffffffff, 0x0020003f,
1568	0x38, 0xffffffff, 0x0140001c,
1569	0x3c, 0x000f0000, 0x000f0000,
1570	0x220, 0xffffffff, 0xc060000c,
1571	0x224, 0xc0000fff, 0x00000100,
1572	0xf90, 0xffffffff, 0x00000100,
1573	0xf98, 0x00000101, 0x00000000,
1574	0x20a8, 0xffffffff, 0x00000104,
1575	0x55e4, 0xff000fff, 0x00000100,
1576	0x30cc, 0xc0000fff, 0x00000104,
1577	0xc1e4, 0x00000001, 0x00000001,
1578	0xd00c, 0xff000ff0, 0x00000100,
1579	0xd80c, 0xff000ff0, 0x00000100
1580};
1581
1582static const u32 godavari_golden_registers[] =
1583{
1584	0x55e4, 0xff607fff, 0xfc000100,
1585	0x6ed8, 0x00010101, 0x00010000,
1586	0x9830, 0xffffffff, 0x00000000,
1587	0x98302, 0xf00fffff, 0x00000400,
1588	0x6130, 0xffffffff, 0x00010000,
1589	0x5bb0, 0x000000f0, 0x00000070,
1590	0x5bc0, 0xf0311fff, 0x80300000,
1591	0x98f8, 0x73773777, 0x12010001,
1592	0x98fc, 0xffffffff, 0x00000010,
1593	0x8030, 0x00001f0f, 0x0000100a,
1594	0x2f48, 0x73773777, 0x12010001,
1595	0x2408, 0x000fffff, 0x000c007f,
1596	0x8a14, 0xf000003f, 0x00000007,
1597	0x8b24, 0xffffffff, 0x00ff0fff,
1598	0x30a04, 0x0000ff0f, 0x00000000,
1599	0x28a4c, 0x07ffffff, 0x06000000,
1600	0x4d8, 0x00000fff, 0x00000100,
1601	0xd014, 0x00010000, 0x00810001,
1602	0xd814, 0x00010000, 0x00810001,
1603	0x3e78, 0x00000001, 0x00000002,
1604	0xc768, 0x00000008, 0x00000008,
1605	0xc770, 0x00000f00, 0x00000800,
1606	0xc774, 0x00000f00, 0x00000800,
1607	0xc798, 0x00ffffff, 0x00ff7fbf,
1608	0xc79c, 0x00ffffff, 0x00ff7faf,
1609	0x8c00, 0x000000ff, 0x00000001,
1610	0x214f8, 0x01ff01ff, 0x00000002,
1611	0x21498, 0x007ff800, 0x00200000,
1612	0x2015c, 0xffffffff, 0x00000f40,
1613	0x88c4, 0x001f3ae3, 0x00000082,
1614	0x88d4, 0x0000001f, 0x00000010,
1615	0x30934, 0xffffffff, 0x00000000
1616};
1617
1618
1619static void cik_init_golden_registers(struct radeon_device *rdev)
1620{
1621	switch (rdev->family) {
1622	case CHIP_BONAIRE:
1623		radeon_program_register_sequence(rdev,
1624						 bonaire_mgcg_cgcg_init,
1625						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626		radeon_program_register_sequence(rdev,
1627						 bonaire_golden_registers,
1628						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629		radeon_program_register_sequence(rdev,
1630						 bonaire_golden_common_registers,
1631						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632		radeon_program_register_sequence(rdev,
1633						 bonaire_golden_spm_registers,
1634						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635		break;
1636	case CHIP_KABINI:
1637		radeon_program_register_sequence(rdev,
1638						 kalindi_mgcg_cgcg_init,
1639						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640		radeon_program_register_sequence(rdev,
1641						 kalindi_golden_registers,
1642						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643		radeon_program_register_sequence(rdev,
1644						 kalindi_golden_common_registers,
1645						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646		radeon_program_register_sequence(rdev,
1647						 kalindi_golden_spm_registers,
1648						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649		break;
1650	case CHIP_MULLINS:
1651		radeon_program_register_sequence(rdev,
1652						 kalindi_mgcg_cgcg_init,
1653						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654		radeon_program_register_sequence(rdev,
1655						 godavari_golden_registers,
1656						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657		radeon_program_register_sequence(rdev,
1658						 kalindi_golden_common_registers,
1659						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660		radeon_program_register_sequence(rdev,
1661						 kalindi_golden_spm_registers,
1662						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663		break;
1664	case CHIP_KAVERI:
1665		radeon_program_register_sequence(rdev,
1666						 spectre_mgcg_cgcg_init,
1667						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668		radeon_program_register_sequence(rdev,
1669						 spectre_golden_registers,
1670						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671		radeon_program_register_sequence(rdev,
1672						 spectre_golden_common_registers,
1673						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674		radeon_program_register_sequence(rdev,
1675						 spectre_golden_spm_registers,
1676						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677		break;
1678	case CHIP_HAWAII:
1679		radeon_program_register_sequence(rdev,
1680						 hawaii_mgcg_cgcg_init,
1681						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682		radeon_program_register_sequence(rdev,
1683						 hawaii_golden_registers,
1684						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685		radeon_program_register_sequence(rdev,
1686						 hawaii_golden_common_registers,
1687						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688		radeon_program_register_sequence(rdev,
1689						 hawaii_golden_spm_registers,
1690						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691		break;
1692	default:
1693		break;
1694	}
1695}
1696
1697/**
1698 * cik_get_xclk - get the xclk
1699 *
1700 * @rdev: radeon_device pointer
1701 *
1702 * Returns the reference clock used by the gfx engine
1703 * (CIK).
1704 */
1705u32 cik_get_xclk(struct radeon_device *rdev)
1706{
1707	u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709	if (rdev->flags & RADEON_IS_IGP) {
1710		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711			return reference_clock / 2;
1712	} else {
1713		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714			return reference_clock / 4;
1715	}
1716	return reference_clock;
1717}
1718
1719/**
1720 * cik_mm_rdoorbell - read a doorbell dword
1721 *
1722 * @rdev: radeon_device pointer
1723 * @index: doorbell index
1724 *
1725 * Returns the value in the doorbell aperture at the
1726 * requested doorbell index (CIK).
1727 */
1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729{
1730	if (index < rdev->doorbell.num_doorbells) {
1731		return readl(rdev->doorbell.ptr + index);
1732	} else {
1733		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734		return 0;
1735	}
1736}
1737
1738/**
1739 * cik_mm_wdoorbell - write a doorbell dword
1740 *
1741 * @rdev: radeon_device pointer
1742 * @index: doorbell index
1743 * @v: value to write
1744 *
1745 * Writes @v to the doorbell aperture at the
1746 * requested doorbell index (CIK).
1747 */
1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749{
1750	if (index < rdev->doorbell.num_doorbells) {
1751		writel(v, rdev->doorbell.ptr + index);
1752	} else {
1753		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754	}
1755}
1756
1757#define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760{
1761	{0x00000070, 0x04400000},
1762	{0x00000071, 0x80c01803},
1763	{0x00000072, 0x00004004},
1764	{0x00000073, 0x00000100},
1765	{0x00000074, 0x00ff0000},
1766	{0x00000075, 0x34000000},
1767	{0x00000076, 0x08000014},
1768	{0x00000077, 0x00cc08ec},
1769	{0x00000078, 0x00000400},
1770	{0x00000079, 0x00000000},
1771	{0x0000007a, 0x04090000},
1772	{0x0000007c, 0x00000000},
1773	{0x0000007e, 0x4408a8e8},
1774	{0x0000007f, 0x00000304},
1775	{0x00000080, 0x00000000},
1776	{0x00000082, 0x00000001},
1777	{0x00000083, 0x00000002},
1778	{0x00000084, 0xf3e4f400},
1779	{0x00000085, 0x052024e3},
1780	{0x00000087, 0x00000000},
1781	{0x00000088, 0x01000000},
1782	{0x0000008a, 0x1c0a0000},
1783	{0x0000008b, 0xff010000},
1784	{0x0000008d, 0xffffefff},
1785	{0x0000008e, 0xfff3efff},
1786	{0x0000008f, 0xfff3efbf},
1787	{0x00000092, 0xf7ffffff},
1788	{0x00000093, 0xffffff7f},
1789	{0x00000095, 0x00101101},
1790	{0x00000096, 0x00000fff},
1791	{0x00000097, 0x00116fff},
1792	{0x00000098, 0x60010000},
1793	{0x00000099, 0x10010000},
1794	{0x0000009a, 0x00006000},
1795	{0x0000009b, 0x00001000},
1796	{0x0000009f, 0x00b48000}
1797};
1798
1799#define HAWAII_IO_MC_REGS_SIZE 22
1800
1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802{
1803	{0x0000007d, 0x40000000},
1804	{0x0000007e, 0x40180304},
1805	{0x0000007f, 0x0000ff00},
1806	{0x00000081, 0x00000000},
1807	{0x00000083, 0x00000800},
1808	{0x00000086, 0x00000000},
1809	{0x00000087, 0x00000100},
1810	{0x00000088, 0x00020100},
1811	{0x00000089, 0x00000000},
1812	{0x0000008b, 0x00040000},
1813	{0x0000008c, 0x00000100},
1814	{0x0000008e, 0xff010000},
1815	{0x00000090, 0xffffefff},
1816	{0x00000091, 0xfff3efff},
1817	{0x00000092, 0xfff3efbf},
1818	{0x00000093, 0xf7ffffff},
1819	{0x00000094, 0xffffff7f},
1820	{0x00000095, 0x00000fff},
1821	{0x00000096, 0x00116fff},
1822	{0x00000097, 0x60010000},
1823	{0x00000098, 0x10010000},
1824	{0x0000009f, 0x00c79000}
1825};
1826
1827
1828/**
1829 * cik_srbm_select - select specific register instances
1830 *
1831 * @rdev: radeon_device pointer
1832 * @me: selected ME (micro engine)
1833 * @pipe: pipe
1834 * @queue: queue
1835 * @vmid: VMID
1836 *
1837 * Switches the currently active registers instances.  Some
1838 * registers are instanced per VMID, others are instanced per
1839 * me/pipe/queue combination.
1840 */
1841static void cik_srbm_select(struct radeon_device *rdev,
1842			    u32 me, u32 pipe, u32 queue, u32 vmid)
1843{
1844	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845			     MEID(me & 0x3) |
1846			     VMID(vmid & 0xf) |
1847			     QUEUEID(queue & 0x7));
1848	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849}
1850
1851/* ucode loading */
1852/**
1853 * ci_mc_load_microcode - load MC ucode into the hw
1854 *
1855 * @rdev: radeon_device pointer
1856 *
1857 * Load the GDDR MC ucode into the hw (CIK).
1858 * Returns 0 on success, error on failure.
1859 */
1860int ci_mc_load_microcode(struct radeon_device *rdev)
1861{
1862	const __be32 *fw_data = NULL;
1863	const __le32 *new_fw_data = NULL;
1864	u32 running, tmp;
1865	u32 *io_mc_regs = NULL;
1866	const __le32 *new_io_mc_regs = NULL;
1867	int i, regs_size, ucode_size;
1868
1869	if (!rdev->mc_fw)
1870		return -EINVAL;
1871
1872	if (rdev->new_fw) {
1873		const struct mc_firmware_header_v1_0 *hdr =
1874			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876		radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879		new_io_mc_regs = (const __le32 *)
1880			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882		new_fw_data = (const __le32 *)
1883			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884	} else {
1885		ucode_size = rdev->mc_fw->size / 4;
1886
1887		switch (rdev->family) {
1888		case CHIP_BONAIRE:
1889			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891			break;
1892		case CHIP_HAWAII:
1893			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894			regs_size = HAWAII_IO_MC_REGS_SIZE;
1895			break;
1896		default:
1897			return -EINVAL;
1898		}
1899		fw_data = (const __be32 *)rdev->mc_fw->data;
1900	}
1901
1902	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904	if (running == 0) {
1905		/* reset the engine and set to writable */
1906		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909		/* load mc io regs */
1910		for (i = 0; i < regs_size; i++) {
1911			if (rdev->new_fw) {
1912				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914			} else {
1915				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917			}
1918		}
1919
1920		tmp = RREG32(MC_SEQ_MISC0);
1921		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926		}
1927
1928		/* load the MC ucode */
1929		for (i = 0; i < ucode_size; i++) {
1930			if (rdev->new_fw)
1931				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932			else
1933				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934		}
1935
1936		/* put the engine back into the active state */
1937		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941		/* wait for training to complete */
1942		for (i = 0; i < rdev->usec_timeout; i++) {
1943			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944				break;
1945			udelay(1);
1946		}
1947		for (i = 0; i < rdev->usec_timeout; i++) {
1948			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949				break;
1950			udelay(1);
1951		}
1952	}
1953
1954	return 0;
1955}
1956
1957/**
1958 * cik_init_microcode - load ucode images from disk
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Use the firmware interface to load the ucode images into
1963 * the driver (not loaded into hw).
1964 * Returns 0 on success, error on failure.
1965 */
1966static int cik_init_microcode(struct radeon_device *rdev)
1967{
1968	const char *chip_name;
1969	const char *new_chip_name;
1970	size_t pfp_req_size, me_req_size, ce_req_size,
1971		mec_req_size, rlc_req_size, mc_req_size = 0,
1972		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973	char fw_name[30];
1974	int new_fw = 0;
1975	int err;
1976	int num_fw;
1977	bool new_smc = false;
1978
1979	DRM_DEBUG("\n");
1980
1981	switch (rdev->family) {
1982	case CHIP_BONAIRE:
1983		chip_name = "BONAIRE";
1984		if ((rdev->pdev->revision == 0x80) ||
1985		    (rdev->pdev->revision == 0x81) ||
1986		    (rdev->pdev->device == 0x665f))
1987			new_smc = true;
1988		new_chip_name = "bonaire";
1989		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990		me_req_size = CIK_ME_UCODE_SIZE * 4;
1991		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998		num_fw = 8;
1999		break;
2000	case CHIP_HAWAII:
2001		chip_name = "HAWAII";
2002		if (rdev->pdev->revision == 0x80)
2003			new_smc = true;
2004		new_chip_name = "hawaii";
2005		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006		me_req_size = CIK_ME_UCODE_SIZE * 4;
2007		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014		num_fw = 8;
2015		break;
2016	case CHIP_KAVERI:
2017		chip_name = "KAVERI";
2018		new_chip_name = "kaveri";
2019		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025		num_fw = 7;
2026		break;
2027	case CHIP_KABINI:
2028		chip_name = "KABINI";
2029		new_chip_name = "kabini";
2030		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036		num_fw = 6;
2037		break;
2038	case CHIP_MULLINS:
2039		chip_name = "MULLINS";
2040		new_chip_name = "mullins";
2041		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042		me_req_size = CIK_ME_UCODE_SIZE * 4;
2043		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047		num_fw = 6;
2048		break;
2049	default: BUG();
2050	}
2051
2052	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056	if (err) {
2057		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059		if (err)
2060			goto out;
2061		if (rdev->pfp_fw->size != pfp_req_size) {
2062			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063			       rdev->pfp_fw->size, fw_name);
2064			err = -EINVAL;
2065			goto out;
2066		}
2067	} else {
2068		err = radeon_ucode_validate(rdev->pfp_fw);
2069		if (err) {
2070			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071			       fw_name);
2072			goto out;
2073		} else {
2074			new_fw++;
2075		}
2076	}
2077
2078	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080	if (err) {
2081		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083		if (err)
2084			goto out;
2085		if (rdev->me_fw->size != me_req_size) {
2086			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087			       rdev->me_fw->size, fw_name);
2088			err = -EINVAL;
2089		}
2090	} else {
2091		err = radeon_ucode_validate(rdev->me_fw);
2092		if (err) {
2093			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094			       fw_name);
2095			goto out;
2096		} else {
2097			new_fw++;
2098		}
2099	}
2100
2101	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103	if (err) {
2104		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106		if (err)
2107			goto out;
2108		if (rdev->ce_fw->size != ce_req_size) {
2109			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110			       rdev->ce_fw->size, fw_name);
2111			err = -EINVAL;
2112		}
2113	} else {
2114		err = radeon_ucode_validate(rdev->ce_fw);
2115		if (err) {
2116			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117			       fw_name);
2118			goto out;
2119		} else {
2120			new_fw++;
2121		}
2122	}
2123
2124	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126	if (err) {
2127		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129		if (err)
2130			goto out;
2131		if (rdev->mec_fw->size != mec_req_size) {
2132			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133			       rdev->mec_fw->size, fw_name);
2134			err = -EINVAL;
2135		}
2136	} else {
2137		err = radeon_ucode_validate(rdev->mec_fw);
2138		if (err) {
2139			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140			       fw_name);
2141			goto out;
2142		} else {
2143			new_fw++;
2144		}
2145	}
2146
2147	if (rdev->family == CHIP_KAVERI) {
2148		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150		if (err) {
2151			goto out;
2152		} else {
2153			err = radeon_ucode_validate(rdev->mec2_fw);
2154			if (err) {
2155				goto out;
2156			} else {
2157				new_fw++;
2158			}
2159		}
2160	}
2161
2162	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164	if (err) {
2165		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167		if (err)
2168			goto out;
2169		if (rdev->rlc_fw->size != rlc_req_size) {
2170			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171			       rdev->rlc_fw->size, fw_name);
2172			err = -EINVAL;
2173		}
2174	} else {
2175		err = radeon_ucode_validate(rdev->rlc_fw);
2176		if (err) {
2177			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178			       fw_name);
2179			goto out;
2180		} else {
2181			new_fw++;
2182		}
2183	}
2184
2185	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187	if (err) {
2188		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190		if (err)
2191			goto out;
2192		if (rdev->sdma_fw->size != sdma_req_size) {
2193			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194			       rdev->sdma_fw->size, fw_name);
2195			err = -EINVAL;
2196		}
2197	} else {
2198		err = radeon_ucode_validate(rdev->sdma_fw);
2199		if (err) {
2200			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201			       fw_name);
2202			goto out;
2203		} else {
2204			new_fw++;
2205		}
2206	}
2207
2208	/* No SMC, MC ucode on APUs */
2209	if (!(rdev->flags & RADEON_IS_IGP)) {
2210		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212		if (err) {
2213			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215			if (err) {
2216				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218				if (err)
2219					goto out;
2220			}
2221			if ((rdev->mc_fw->size != mc_req_size) &&
2222			    (rdev->mc_fw->size != mc2_req_size)){
2223				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224				       rdev->mc_fw->size, fw_name);
2225				err = -EINVAL;
2226			}
2227			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228		} else {
2229			err = radeon_ucode_validate(rdev->mc_fw);
2230			if (err) {
2231				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232				       fw_name);
2233				goto out;
2234			} else {
2235				new_fw++;
2236			}
2237		}
2238
2239		if (new_smc)
2240			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241		else
2242			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244		if (err) {
2245			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247			if (err) {
2248				pr_err("smc: error loading firmware \"%s\"\n",
2249				       fw_name);
2250				release_firmware(rdev->smc_fw);
2251				rdev->smc_fw = NULL;
2252				err = 0;
2253			} else if (rdev->smc_fw->size != smc_req_size) {
2254				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255				       rdev->smc_fw->size, fw_name);
2256				err = -EINVAL;
2257			}
2258		} else {
2259			err = radeon_ucode_validate(rdev->smc_fw);
2260			if (err) {
2261				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262				       fw_name);
2263				goto out;
2264			} else {
2265				new_fw++;
2266			}
2267		}
2268	}
2269
2270	if (new_fw == 0) {
2271		rdev->new_fw = false;
2272	} else if (new_fw < num_fw) {
2273		pr_err("ci_fw: mixing new and old firmware!\n");
2274		err = -EINVAL;
2275	} else {
2276		rdev->new_fw = true;
2277	}
2278
2279out:
2280	if (err) {
2281		if (err != -EINVAL)
2282			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283			       fw_name);
2284		release_firmware(rdev->pfp_fw);
2285		rdev->pfp_fw = NULL;
2286		release_firmware(rdev->me_fw);
2287		rdev->me_fw = NULL;
2288		release_firmware(rdev->ce_fw);
2289		rdev->ce_fw = NULL;
2290		release_firmware(rdev->mec_fw);
2291		rdev->mec_fw = NULL;
2292		release_firmware(rdev->mec2_fw);
2293		rdev->mec2_fw = NULL;
2294		release_firmware(rdev->rlc_fw);
2295		rdev->rlc_fw = NULL;
2296		release_firmware(rdev->sdma_fw);
2297		rdev->sdma_fw = NULL;
2298		release_firmware(rdev->mc_fw);
2299		rdev->mc_fw = NULL;
2300		release_firmware(rdev->smc_fw);
2301		rdev->smc_fw = NULL;
2302	}
2303	return err;
2304}
2305
2306/*
2307 * Core functions
2308 */
2309/**
2310 * cik_tiling_mode_table_init - init the hw tiling table
2311 *
2312 * @rdev: radeon_device pointer
2313 *
2314 * Starting with SI, the tiling setup is done globally in a
2315 * set of 32 tiling modes.  Rather than selecting each set of
2316 * parameters per surface as on older asics, we just select
2317 * which index in the tiling table we want to use, and the
2318 * surface uses those parameters (CIK).
2319 */
2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321{
2322	u32 *tile = rdev->config.cik.tile_mode_array;
2323	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324	const u32 num_tile_mode_states =
2325			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326	const u32 num_secondary_tile_mode_states =
2327			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328	u32 reg_offset, split_equal_to_row_size;
2329	u32 num_pipe_configs;
2330	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331		rdev->config.cik.max_shader_engines;
2332
2333	switch (rdev->config.cik.mem_row_size_in_kb) {
2334	case 1:
2335		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336		break;
2337	case 2:
2338	default:
2339		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340		break;
2341	case 4:
2342		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343		break;
2344	}
2345
2346	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347	if (num_pipe_configs > 8)
2348		num_pipe_configs = 16;
2349
2350	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351		tile[reg_offset] = 0;
2352	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353		macrotile[reg_offset] = 0;
2354
2355	switch(num_pipe_configs) {
2356	case 16:
2357		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376			   TILE_SPLIT(split_equal_to_row_size));
2377		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387			   TILE_SPLIT(split_equal_to_row_size));
2388		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439			   NUM_BANKS(ADDR_SURF_16_BANK));
2440		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443			   NUM_BANKS(ADDR_SURF_16_BANK));
2444		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447			   NUM_BANKS(ADDR_SURF_16_BANK));
2448		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451			   NUM_BANKS(ADDR_SURF_16_BANK));
2452		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455			   NUM_BANKS(ADDR_SURF_8_BANK));
2456		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459			   NUM_BANKS(ADDR_SURF_4_BANK));
2460		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463			   NUM_BANKS(ADDR_SURF_2_BANK));
2464		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467			   NUM_BANKS(ADDR_SURF_16_BANK));
2468		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471			   NUM_BANKS(ADDR_SURF_16_BANK));
2472		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475			    NUM_BANKS(ADDR_SURF_16_BANK));
2476		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479			    NUM_BANKS(ADDR_SURF_8_BANK));
2480		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483			    NUM_BANKS(ADDR_SURF_4_BANK));
2484		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487			    NUM_BANKS(ADDR_SURF_2_BANK));
2488		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491			    NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497		break;
2498
2499	case 8:
2500		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519			   TILE_SPLIT(split_equal_to_row_size));
2520		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530			   TILE_SPLIT(split_equal_to_row_size));
2531		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582				NUM_BANKS(ADDR_SURF_16_BANK));
2583		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586				NUM_BANKS(ADDR_SURF_16_BANK));
2587		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590				NUM_BANKS(ADDR_SURF_16_BANK));
2591		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594				NUM_BANKS(ADDR_SURF_16_BANK));
2595		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598				NUM_BANKS(ADDR_SURF_8_BANK));
2599		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602				NUM_BANKS(ADDR_SURF_4_BANK));
2603		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606				NUM_BANKS(ADDR_SURF_2_BANK));
2607		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610				NUM_BANKS(ADDR_SURF_16_BANK));
2611		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614				NUM_BANKS(ADDR_SURF_16_BANK));
2615		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618				NUM_BANKS(ADDR_SURF_16_BANK));
2619		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622				NUM_BANKS(ADDR_SURF_16_BANK));
2623		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626				NUM_BANKS(ADDR_SURF_8_BANK));
2627		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630				NUM_BANKS(ADDR_SURF_4_BANK));
2631		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634				NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640		break;
2641
2642	case 4:
2643		if (num_rbs == 4) {
2644		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663			   TILE_SPLIT(split_equal_to_row_size));
2664		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674			   TILE_SPLIT(split_equal_to_row_size));
2675		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723		} else if (num_rbs < 4) {
2724		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743			   TILE_SPLIT(split_equal_to_row_size));
2744		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754			   TILE_SPLIT(split_equal_to_row_size));
2755		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802		}
2803
2804		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807				NUM_BANKS(ADDR_SURF_16_BANK));
2808		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811				NUM_BANKS(ADDR_SURF_16_BANK));
2812		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815				NUM_BANKS(ADDR_SURF_16_BANK));
2816		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819				NUM_BANKS(ADDR_SURF_16_BANK));
2820		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823				NUM_BANKS(ADDR_SURF_16_BANK));
2824		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827				NUM_BANKS(ADDR_SURF_8_BANK));
2828		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831				NUM_BANKS(ADDR_SURF_4_BANK));
2832		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835				NUM_BANKS(ADDR_SURF_16_BANK));
2836		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839				NUM_BANKS(ADDR_SURF_16_BANK));
2840		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843				NUM_BANKS(ADDR_SURF_16_BANK));
2844		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847				NUM_BANKS(ADDR_SURF_16_BANK));
2848		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851				NUM_BANKS(ADDR_SURF_16_BANK));
2852		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855				NUM_BANKS(ADDR_SURF_8_BANK));
2856		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859				NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865		break;
2866
2867	case 2:
2868		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870			   PIPE_CONFIG(ADDR_SURF_P2) |
2871			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874			   PIPE_CONFIG(ADDR_SURF_P2) |
2875			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878			   PIPE_CONFIG(ADDR_SURF_P2) |
2879			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882			   PIPE_CONFIG(ADDR_SURF_P2) |
2883			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886			   PIPE_CONFIG(ADDR_SURF_P2) |
2887			   TILE_SPLIT(split_equal_to_row_size));
2888		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889			   PIPE_CONFIG(ADDR_SURF_P2) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893			   PIPE_CONFIG(ADDR_SURF_P2) |
2894			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897			   PIPE_CONFIG(ADDR_SURF_P2) |
2898			   TILE_SPLIT(split_equal_to_row_size));
2899		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900			   PIPE_CONFIG(ADDR_SURF_P2);
2901		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903			   PIPE_CONFIG(ADDR_SURF_P2));
2904		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906			    PIPE_CONFIG(ADDR_SURF_P2) |
2907			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910			    PIPE_CONFIG(ADDR_SURF_P2) |
2911			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914			    PIPE_CONFIG(ADDR_SURF_P2) |
2915			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917			    PIPE_CONFIG(ADDR_SURF_P2) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921			    PIPE_CONFIG(ADDR_SURF_P2) |
2922			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925			    PIPE_CONFIG(ADDR_SURF_P2) |
2926			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929			    PIPE_CONFIG(ADDR_SURF_P2) |
2930			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933			    PIPE_CONFIG(ADDR_SURF_P2));
2934		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936			    PIPE_CONFIG(ADDR_SURF_P2) |
2937			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940			    PIPE_CONFIG(ADDR_SURF_P2) |
2941			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944			    PIPE_CONFIG(ADDR_SURF_P2) |
2945			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950				NUM_BANKS(ADDR_SURF_16_BANK));
2951		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954				NUM_BANKS(ADDR_SURF_16_BANK));
2955		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958				NUM_BANKS(ADDR_SURF_16_BANK));
2959		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962				NUM_BANKS(ADDR_SURF_16_BANK));
2963		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966				NUM_BANKS(ADDR_SURF_16_BANK));
2967		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970				NUM_BANKS(ADDR_SURF_16_BANK));
2971		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974				NUM_BANKS(ADDR_SURF_8_BANK));
2975		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978				NUM_BANKS(ADDR_SURF_16_BANK));
2979		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982				NUM_BANKS(ADDR_SURF_16_BANK));
2983		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986				NUM_BANKS(ADDR_SURF_16_BANK));
2987		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990				NUM_BANKS(ADDR_SURF_16_BANK));
2991		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994				NUM_BANKS(ADDR_SURF_16_BANK));
2995		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998				NUM_BANKS(ADDR_SURF_16_BANK));
2999		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002				NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008		break;
3009
3010	default:
3011		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012	}
3013}
3014
3015/**
3016 * cik_select_se_sh - select which SE, SH to address
3017 *
3018 * @rdev: radeon_device pointer
3019 * @se_num: shader engine to address
3020 * @sh_num: sh block to address
3021 *
3022 * Select which SE, SH combinations to address. Certain
3023 * registers are instanced per SE or SH.  0xffffffff means
3024 * broadcast to all SEs or SHs (CIK).
3025 */
3026static void cik_select_se_sh(struct radeon_device *rdev,
3027			     u32 se_num, u32 sh_num)
3028{
3029	u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033	else if (se_num == 0xffffffff)
3034		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035	else if (sh_num == 0xffffffff)
3036		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037	else
3038		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039	WREG32(GRBM_GFX_INDEX, data);
3040}
3041
3042/**
3043 * cik_create_bitmask - create a bitmask
3044 *
3045 * @bit_width: length of the mask
3046 *
3047 * create a variable length bit mask (CIK).
3048 * Returns the bitmask.
3049 */
3050static u32 cik_create_bitmask(u32 bit_width)
3051{
3052	u32 i, mask = 0;
3053
3054	for (i = 0; i < bit_width; i++) {
3055		mask <<= 1;
3056		mask |= 1;
3057	}
3058	return mask;
3059}
3060
3061/**
3062 * cik_get_rb_disabled - computes the mask of disabled RBs
3063 *
3064 * @rdev: radeon_device pointer
3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
 
3066 * @sh_per_se: number of SH blocks per SE for the asic
3067 *
3068 * Calculates the bitmask of disabled RBs (CIK).
3069 * Returns the disabled RB bitmask.
3070 */
3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072			      u32 max_rb_num_per_se,
3073			      u32 sh_per_se)
3074{
3075	u32 data, mask;
3076
3077	data = RREG32(CC_RB_BACKEND_DISABLE);
3078	if (data & 1)
3079		data &= BACKEND_DISABLE_MASK;
3080	else
3081		data = 0;
3082	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084	data >>= BACKEND_DISABLE_SHIFT;
3085
3086	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088	return data & mask;
3089}
3090
3091/**
3092 * cik_setup_rb - setup the RBs on the asic
3093 *
3094 * @rdev: radeon_device pointer
3095 * @se_num: number of SEs (shader engines) for the asic
3096 * @sh_per_se: number of SH blocks per SE for the asic
3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098 *
3099 * Configures per-SE/SH RB registers (CIK).
3100 */
3101static void cik_setup_rb(struct radeon_device *rdev,
3102			 u32 se_num, u32 sh_per_se,
3103			 u32 max_rb_num_per_se)
3104{
3105	int i, j;
3106	u32 data, mask;
3107	u32 disabled_rbs = 0;
3108	u32 enabled_rbs = 0;
3109
3110	for (i = 0; i < se_num; i++) {
3111		for (j = 0; j < sh_per_se; j++) {
3112			cik_select_se_sh(rdev, i, j);
3113			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114			if (rdev->family == CHIP_HAWAII)
3115				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116			else
3117				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118		}
3119	}
3120	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122	mask = 1;
3123	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124		if (!(disabled_rbs & mask))
3125			enabled_rbs |= mask;
3126		mask <<= 1;
3127	}
3128
3129	rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131	for (i = 0; i < se_num; i++) {
3132		cik_select_se_sh(rdev, i, 0xffffffff);
3133		data = 0;
3134		for (j = 0; j < sh_per_se; j++) {
3135			switch (enabled_rbs & 3) {
3136			case 0:
3137				if (j == 0)
3138					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139				else
3140					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141				break;
3142			case 1:
3143				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144				break;
3145			case 2:
3146				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147				break;
3148			case 3:
3149			default:
3150				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151				break;
3152			}
3153			enabled_rbs >>= 2;
3154		}
3155		WREG32(PA_SC_RASTER_CONFIG, data);
3156	}
3157	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158}
3159
3160/**
3161 * cik_gpu_init - setup the 3D engine
3162 *
3163 * @rdev: radeon_device pointer
3164 *
3165 * Configures the 3D engine and tiling configuration
3166 * registers so that the 3D engine is usable.
3167 */
3168static void cik_gpu_init(struct radeon_device *rdev)
3169{
3170	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171	u32 mc_arb_ramcfg;
3172	u32 hdp_host_path_cntl;
3173	u32 tmp;
3174	int i, j;
3175
3176	switch (rdev->family) {
3177	case CHIP_BONAIRE:
3178		rdev->config.cik.max_shader_engines = 2;
3179		rdev->config.cik.max_tile_pipes = 4;
3180		rdev->config.cik.max_cu_per_sh = 7;
3181		rdev->config.cik.max_sh_per_se = 1;
3182		rdev->config.cik.max_backends_per_se = 2;
3183		rdev->config.cik.max_texture_channel_caches = 4;
3184		rdev->config.cik.max_gprs = 256;
3185		rdev->config.cik.max_gs_threads = 32;
3186		rdev->config.cik.max_hw_contexts = 8;
3187
3188		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193		break;
3194	case CHIP_HAWAII:
3195		rdev->config.cik.max_shader_engines = 4;
3196		rdev->config.cik.max_tile_pipes = 16;
3197		rdev->config.cik.max_cu_per_sh = 11;
3198		rdev->config.cik.max_sh_per_se = 1;
3199		rdev->config.cik.max_backends_per_se = 4;
3200		rdev->config.cik.max_texture_channel_caches = 16;
3201		rdev->config.cik.max_gprs = 256;
3202		rdev->config.cik.max_gs_threads = 32;
3203		rdev->config.cik.max_hw_contexts = 8;
3204
3205		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210		break;
3211	case CHIP_KAVERI:
3212		rdev->config.cik.max_shader_engines = 1;
3213		rdev->config.cik.max_tile_pipes = 4;
3214		rdev->config.cik.max_cu_per_sh = 8;
3215		rdev->config.cik.max_backends_per_se = 2;
3216		rdev->config.cik.max_sh_per_se = 1;
3217		rdev->config.cik.max_texture_channel_caches = 4;
3218		rdev->config.cik.max_gprs = 256;
3219		rdev->config.cik.max_gs_threads = 16;
3220		rdev->config.cik.max_hw_contexts = 8;
3221
3222		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227		break;
3228	case CHIP_KABINI:
3229	case CHIP_MULLINS:
3230	default:
3231		rdev->config.cik.max_shader_engines = 1;
3232		rdev->config.cik.max_tile_pipes = 2;
3233		rdev->config.cik.max_cu_per_sh = 2;
3234		rdev->config.cik.max_sh_per_se = 1;
3235		rdev->config.cik.max_backends_per_se = 1;
3236		rdev->config.cik.max_texture_channel_caches = 2;
3237		rdev->config.cik.max_gprs = 256;
3238		rdev->config.cik.max_gs_threads = 16;
3239		rdev->config.cik.max_hw_contexts = 8;
3240
3241		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246		break;
3247	}
3248
3249	/* Initialize HDP */
3250	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251		WREG32((0x2c14 + j), 0x00000000);
3252		WREG32((0x2c18 + j), 0x00000000);
3253		WREG32((0x2c1c + j), 0x00000000);
3254		WREG32((0x2c20 + j), 0x00000000);
3255		WREG32((0x2c24 + j), 0x00000000);
3256	}
3257
3258	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259	WREG32(SRBM_INT_CNTL, 0x1);
3260	WREG32(SRBM_INT_ACK, 0x1);
3261
3262	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264	RREG32(MC_SHARED_CHMAP);
3265	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268	rdev->config.cik.mem_max_burst_length_bytes = 256;
3269	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271	if (rdev->config.cik.mem_row_size_in_kb > 4)
3272		rdev->config.cik.mem_row_size_in_kb = 4;
3273	/* XXX use MC settings? */
3274	rdev->config.cik.shader_engine_tile_size = 32;
3275	rdev->config.cik.num_gpus = 1;
3276	rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278	/* fix up row size */
3279	gb_addr_config &= ~ROW_SIZE_MASK;
3280	switch (rdev->config.cik.mem_row_size_in_kb) {
3281	case 1:
3282	default:
3283		gb_addr_config |= ROW_SIZE(0);
3284		break;
3285	case 2:
3286		gb_addr_config |= ROW_SIZE(1);
3287		break;
3288	case 4:
3289		gb_addr_config |= ROW_SIZE(2);
3290		break;
3291	}
3292
3293	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3294	 * not have bank info, so create a custom tiling dword.
3295	 * bits 3:0   num_pipes
3296	 * bits 7:4   num_banks
3297	 * bits 11:8  group_size
3298	 * bits 15:12 row_size
3299	 */
3300	rdev->config.cik.tile_config = 0;
3301	switch (rdev->config.cik.num_tile_pipes) {
3302	case 1:
3303		rdev->config.cik.tile_config |= (0 << 0);
3304		break;
3305	case 2:
3306		rdev->config.cik.tile_config |= (1 << 0);
3307		break;
3308	case 4:
3309		rdev->config.cik.tile_config |= (2 << 0);
3310		break;
3311	case 8:
3312	default:
3313		/* XXX what about 12? */
3314		rdev->config.cik.tile_config |= (3 << 0);
3315		break;
3316	}
3317	rdev->config.cik.tile_config |=
3318		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319	rdev->config.cik.tile_config |=
3320		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321	rdev->config.cik.tile_config |=
3322		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333	cik_tiling_mode_table_init(rdev);
3334
3335	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336		     rdev->config.cik.max_sh_per_se,
3337		     rdev->config.cik.max_backends_per_se);
3338
3339	rdev->config.cik.active_cus = 0;
3340	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342			rdev->config.cik.active_cus +=
3343				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344		}
3345	}
3346
3347	/* set HW defaults for 3D engine */
3348	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350	WREG32(SX_DEBUG_1, 0x20);
3351
3352	WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354	tmp = RREG32(SPI_CONFIG_CNTL);
3355	tmp |= 0x03000000;
3356	WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358	WREG32(SQ_CONFIG, 1);
3359
3360	WREG32(DB_DEBUG, 0);
3361
3362	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363	tmp |= 0x00000400;
3364	WREG32(DB_DEBUG2, tmp);
3365
3366	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367	tmp |= 0x00020200;
3368	WREG32(DB_DEBUG3, tmp);
3369
3370	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371	tmp |= 0x00018208;
3372	WREG32(CB_HW_CONTROL, tmp);
3373
3374	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381	WREG32(VGT_NUM_INSTANCES, 1);
3382
3383	WREG32(CP_PERFMON_CNTL, 0);
3384
3385	WREG32(SQ_CONFIG, 0);
3386
3387	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388					  FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393	WREG32(VGT_GS_VERTEX_REUSE, 16);
3394	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396	tmp = RREG32(HDP_MISC_CNTL);
3397	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398	WREG32(HDP_MISC_CNTL, tmp);
3399
3400	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406	udelay(50);
3407}
3408
3409/*
3410 * GPU scratch registers helpers function.
3411 */
3412/**
3413 * cik_scratch_init - setup driver info for CP scratch regs
3414 *
3415 * @rdev: radeon_device pointer
3416 *
3417 * Set up the number and offset of the CP scratch registers.
3418 * NOTE: use of CP scratch registers is a legacy inferface and
3419 * is not used by default on newer asics (r6xx+).  On newer asics,
3420 * memory buffers are used for fences rather than scratch regs.
3421 */
3422static void cik_scratch_init(struct radeon_device *rdev)
3423{
3424	int i;
3425
3426	rdev->scratch.num_reg = 7;
3427	rdev->scratch.reg_base = SCRATCH_REG0;
3428	for (i = 0; i < rdev->scratch.num_reg; i++) {
3429		rdev->scratch.free[i] = true;
3430		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431	}
3432}
3433
3434/**
3435 * cik_ring_test - basic gfx ring test
3436 *
3437 * @rdev: radeon_device pointer
3438 * @ring: radeon_ring structure holding ring information
3439 *
3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
3441 * Provides a basic gfx ring test to verify that the ring is working.
3442 * Used by cik_cp_gfx_resume();
3443 * Returns 0 on success, error on failure.
3444 */
3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446{
3447	uint32_t scratch;
3448	uint32_t tmp = 0;
3449	unsigned i;
3450	int r;
3451
3452	r = radeon_scratch_get(rdev, &scratch);
3453	if (r) {
3454		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455		return r;
3456	}
3457	WREG32(scratch, 0xCAFEDEAD);
3458	r = radeon_ring_lock(rdev, ring, 3);
3459	if (r) {
3460		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461		radeon_scratch_free(rdev, scratch);
3462		return r;
3463	}
3464	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466	radeon_ring_write(ring, 0xDEADBEEF);
3467	radeon_ring_unlock_commit(rdev, ring, false);
3468
3469	for (i = 0; i < rdev->usec_timeout; i++) {
3470		tmp = RREG32(scratch);
3471		if (tmp == 0xDEADBEEF)
3472			break;
3473		udelay(1);
3474	}
3475	if (i < rdev->usec_timeout) {
3476		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477	} else {
3478		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479			  ring->idx, scratch, tmp);
3480		r = -EINVAL;
3481	}
3482	radeon_scratch_free(rdev, scratch);
3483	return r;
3484}
3485
3486/**
3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488 *
3489 * @rdev: radeon_device pointer
3490 * @ridx: radeon ring index
3491 *
3492 * Emits an hdp flush on the cp.
3493 */
3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495				       int ridx)
3496{
3497	struct radeon_ring *ring = &rdev->ring[ridx];
3498	u32 ref_and_mask;
3499
3500	switch (ring->idx) {
3501	case CAYMAN_RING_TYPE_CP1_INDEX:
3502	case CAYMAN_RING_TYPE_CP2_INDEX:
3503	default:
3504		switch (ring->me) {
3505		case 0:
3506			ref_and_mask = CP2 << ring->pipe;
3507			break;
3508		case 1:
3509			ref_and_mask = CP6 << ring->pipe;
3510			break;
3511		default:
3512			return;
3513		}
3514		break;
3515	case RADEON_RING_TYPE_GFX_INDEX:
3516		ref_and_mask = CP0;
3517		break;
3518	}
3519
3520	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3523				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3524	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526	radeon_ring_write(ring, ref_and_mask);
3527	radeon_ring_write(ring, ref_and_mask);
3528	radeon_ring_write(ring, 0x20); /* poll interval */
3529}
3530
3531/**
3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533 *
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3536 *
3537 * Emits a fence sequnce number on the gfx ring and flushes
3538 * GPU caches.
3539 */
3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541			     struct radeon_fence *fence)
3542{
3543	struct radeon_ring *ring = &rdev->ring[fence->ring];
3544	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546	/* Workaround for cache flush problems. First send a dummy EOP
3547	 * event down the pipe with seq one below.
3548	 */
3549	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551				 EOP_TC_ACTION_EN |
3552				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553				 EVENT_INDEX(5)));
3554	radeon_ring_write(ring, addr & 0xfffffffc);
3555	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556				DATA_SEL(1) | INT_SEL(0));
3557	radeon_ring_write(ring, fence->seq - 1);
3558	radeon_ring_write(ring, 0);
3559
3560	/* Then send the real EOP event down the pipe. */
3561	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563				 EOP_TC_ACTION_EN |
3564				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565				 EVENT_INDEX(5)));
3566	radeon_ring_write(ring, addr & 0xfffffffc);
3567	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568	radeon_ring_write(ring, fence->seq);
3569	radeon_ring_write(ring, 0);
3570}
3571
3572/**
3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574 *
3575 * @rdev: radeon_device pointer
3576 * @fence: radeon fence object
3577 *
3578 * Emits a fence sequnce number on the compute ring and flushes
3579 * GPU caches.
3580 */
3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582				 struct radeon_fence *fence)
3583{
3584	struct radeon_ring *ring = &rdev->ring[fence->ring];
3585	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587	/* RELEASE_MEM - flush caches, send int */
3588	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590				 EOP_TC_ACTION_EN |
3591				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592				 EVENT_INDEX(5)));
3593	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594	radeon_ring_write(ring, addr & 0xfffffffc);
3595	radeon_ring_write(ring, upper_32_bits(addr));
3596	radeon_ring_write(ring, fence->seq);
3597	radeon_ring_write(ring, 0);
3598}
3599
3600/**
3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602 *
3603 * @rdev: radeon_device pointer
3604 * @ring: radeon ring buffer object
3605 * @semaphore: radeon semaphore object
3606 * @emit_wait: Is this a semaphore wait?
3607 *
3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609 * from running ahead of semaphore waits.
3610 */
3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612			     struct radeon_ring *ring,
3613			     struct radeon_semaphore *semaphore,
3614			     bool emit_wait)
3615{
3616	uint64_t addr = semaphore->gpu_addr;
3617	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620	radeon_ring_write(ring, lower_32_bits(addr));
3621	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624		/* Prevent the PFP from running ahead of the semaphore wait */
3625		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626		radeon_ring_write(ring, 0x0);
3627	}
3628
3629	return true;
3630}
3631
3632/**
3633 * cik_copy_cpdma - copy pages using the CP DMA engine
3634 *
3635 * @rdev: radeon_device pointer
3636 * @src_offset: src GPU address
3637 * @dst_offset: dst GPU address
3638 * @num_gpu_pages: number of GPU pages to xfer
3639 * @resv: reservation object to sync to
3640 *
3641 * Copy GPU paging using the CP DMA engine (CIK+).
3642 * Used by the radeon ttm implementation to move pages if
3643 * registered as the asic copy callback.
3644 */
3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646				    uint64_t src_offset, uint64_t dst_offset,
3647				    unsigned num_gpu_pages,
3648				    struct dma_resv *resv)
3649{
3650	struct radeon_fence *fence;
3651	struct radeon_sync sync;
3652	int ring_index = rdev->asic->copy.blit_ring_index;
3653	struct radeon_ring *ring = &rdev->ring[ring_index];
3654	u32 size_in_bytes, cur_size_in_bytes, control;
3655	int i, num_loops;
3656	int r = 0;
3657
3658	radeon_sync_create(&sync);
3659
3660	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663	if (r) {
3664		DRM_ERROR("radeon: moving bo (%d).\n", r);
3665		radeon_sync_free(rdev, &sync, NULL);
3666		return ERR_PTR(r);
3667	}
3668
3669	radeon_sync_resv(rdev, &sync, resv, false);
3670	radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672	for (i = 0; i < num_loops; i++) {
3673		cur_size_in_bytes = size_in_bytes;
3674		if (cur_size_in_bytes > 0x1fffff)
3675			cur_size_in_bytes = 0x1fffff;
3676		size_in_bytes -= cur_size_in_bytes;
3677		control = 0;
3678		if (size_in_bytes == 0)
3679			control |= PACKET3_DMA_DATA_CP_SYNC;
3680		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681		radeon_ring_write(ring, control);
3682		radeon_ring_write(ring, lower_32_bits(src_offset));
3683		radeon_ring_write(ring, upper_32_bits(src_offset));
3684		radeon_ring_write(ring, lower_32_bits(dst_offset));
3685		radeon_ring_write(ring, upper_32_bits(dst_offset));
3686		radeon_ring_write(ring, cur_size_in_bytes);
3687		src_offset += cur_size_in_bytes;
3688		dst_offset += cur_size_in_bytes;
3689	}
3690
3691	r = radeon_fence_emit(rdev, &fence, ring->idx);
3692	if (r) {
3693		radeon_ring_unlock_undo(rdev, ring);
3694		radeon_sync_free(rdev, &sync, NULL);
3695		return ERR_PTR(r);
3696	}
3697
3698	radeon_ring_unlock_commit(rdev, ring, false);
3699	radeon_sync_free(rdev, &sync, fence);
3700
3701	return fence;
3702}
3703
3704/*
3705 * IB stuff
3706 */
3707/**
3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709 *
3710 * @rdev: radeon_device pointer
3711 * @ib: radeon indirect buffer object
3712 *
3713 * Emits a DE (drawing engine) or CE (constant engine) IB
3714 * on the gfx ring.  IBs are usually generated by userspace
3715 * acceleration drivers and submitted to the kernel for
3716 * scheduling on the ring.  This function schedules the IB
3717 * on the gfx ring for execution by the GPU.
3718 */
3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720{
3721	struct radeon_ring *ring = &rdev->ring[ib->ring];
3722	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723	u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725	if (ib->is_const_ib) {
3726		/* set switch buffer packet before const IB */
3727		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728		radeon_ring_write(ring, 0);
3729
3730		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731	} else {
3732		u32 next_rptr;
3733		if (ring->rptr_save_reg) {
3734			next_rptr = ring->wptr + 3 + 4;
3735			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736			radeon_ring_write(ring, ((ring->rptr_save_reg -
3737						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3738			radeon_ring_write(ring, next_rptr);
3739		} else if (rdev->wb.enabled) {
3740			next_rptr = ring->wptr + 5 + 4;
3741			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745			radeon_ring_write(ring, next_rptr);
3746		}
3747
3748		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749	}
3750
3751	control |= ib->length_dw | (vm_id << 24);
3752
3753	radeon_ring_write(ring, header);
3754	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756	radeon_ring_write(ring, control);
3757}
3758
3759/**
3760 * cik_ib_test - basic gfx ring IB test
3761 *
3762 * @rdev: radeon_device pointer
3763 * @ring: radeon_ring structure holding ring information
3764 *
3765 * Allocate an IB and execute it on the gfx ring (CIK).
3766 * Provides a basic gfx ring test to verify that IBs are working.
3767 * Returns 0 on success, error on failure.
3768 */
3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770{
3771	struct radeon_ib ib;
3772	uint32_t scratch;
3773	uint32_t tmp = 0;
3774	unsigned i;
3775	int r;
3776
3777	r = radeon_scratch_get(rdev, &scratch);
3778	if (r) {
3779		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780		return r;
3781	}
3782	WREG32(scratch, 0xCAFEDEAD);
3783	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784	if (r) {
3785		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786		radeon_scratch_free(rdev, scratch);
3787		return r;
3788	}
3789	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791	ib.ptr[2] = 0xDEADBEEF;
3792	ib.length_dw = 3;
3793	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794	if (r) {
3795		radeon_scratch_free(rdev, scratch);
3796		radeon_ib_free(rdev, &ib);
3797		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798		return r;
3799	}
3800	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801		RADEON_USEC_IB_TEST_TIMEOUT));
3802	if (r < 0) {
3803		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804		radeon_scratch_free(rdev, scratch);
3805		radeon_ib_free(rdev, &ib);
3806		return r;
3807	} else if (r == 0) {
3808		DRM_ERROR("radeon: fence wait timed out.\n");
3809		radeon_scratch_free(rdev, scratch);
3810		radeon_ib_free(rdev, &ib);
3811		return -ETIMEDOUT;
3812	}
3813	r = 0;
3814	for (i = 0; i < rdev->usec_timeout; i++) {
3815		tmp = RREG32(scratch);
3816		if (tmp == 0xDEADBEEF)
3817			break;
3818		udelay(1);
3819	}
3820	if (i < rdev->usec_timeout) {
3821		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822	} else {
3823		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824			  scratch, tmp);
3825		r = -EINVAL;
3826	}
3827	radeon_scratch_free(rdev, scratch);
3828	radeon_ib_free(rdev, &ib);
3829	return r;
3830}
3831
3832/*
3833 * CP.
3834 * On CIK, gfx and compute now have independant command processors.
3835 *
3836 * GFX
3837 * Gfx consists of a single ring and can process both gfx jobs and
3838 * compute jobs.  The gfx CP consists of three microengines (ME):
3839 * PFP - Pre-Fetch Parser
3840 * ME - Micro Engine
3841 * CE - Constant Engine
3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
3843 * The CE is an asynchronous engine used for updating buffer desciptors
3844 * used by the DE so that they can be loaded into cache in parallel
3845 * while the DE is processing state update packets.
3846 *
3847 * Compute
3848 * The compute CP consists of two microengines (ME):
3849 * MEC1 - Compute MicroEngine 1
3850 * MEC2 - Compute MicroEngine 2
3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852 * The queues are exposed to userspace and are programmed directly
3853 * by the compute runtime.
3854 */
3855/**
3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857 *
3858 * @rdev: radeon_device pointer
3859 * @enable: enable or disable the MEs
3860 *
3861 * Halts or unhalts the gfx MEs.
3862 */
3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864{
3865	if (enable)
3866		WREG32(CP_ME_CNTL, 0);
3867	else {
3868		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872	}
3873	udelay(50);
3874}
3875
3876/**
3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878 *
3879 * @rdev: radeon_device pointer
3880 *
3881 * Loads the gfx PFP, ME, and CE ucode.
3882 * Returns 0 for success, -EINVAL if the ucode is not available.
3883 */
3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885{
3886	int i;
3887
3888	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889		return -EINVAL;
3890
3891	cik_cp_gfx_enable(rdev, false);
3892
3893	if (rdev->new_fw) {
3894		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896		const struct gfx_firmware_header_v1_0 *ce_hdr =
3897			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898		const struct gfx_firmware_header_v1_0 *me_hdr =
3899			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900		const __le32 *fw_data;
3901		u32 fw_size;
3902
3903		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907		/* PFP */
3908		fw_data = (const __le32 *)
3909			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911		WREG32(CP_PFP_UCODE_ADDR, 0);
3912		for (i = 0; i < fw_size; i++)
3913			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916		/* CE */
3917		fw_data = (const __le32 *)
3918			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920		WREG32(CP_CE_UCODE_ADDR, 0);
3921		for (i = 0; i < fw_size; i++)
3922			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925		/* ME */
3926		fw_data = (const __be32 *)
3927			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929		WREG32(CP_ME_RAM_WADDR, 0);
3930		for (i = 0; i < fw_size; i++)
3931			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934	} else {
3935		const __be32 *fw_data;
3936
3937		/* PFP */
3938		fw_data = (const __be32 *)rdev->pfp_fw->data;
3939		WREG32(CP_PFP_UCODE_ADDR, 0);
3940		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942		WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944		/* CE */
3945		fw_data = (const __be32 *)rdev->ce_fw->data;
3946		WREG32(CP_CE_UCODE_ADDR, 0);
3947		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949		WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951		/* ME */
3952		fw_data = (const __be32 *)rdev->me_fw->data;
3953		WREG32(CP_ME_RAM_WADDR, 0);
3954		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956		WREG32(CP_ME_RAM_WADDR, 0);
3957	}
3958
3959	return 0;
3960}
3961
3962/**
3963 * cik_cp_gfx_start - start the gfx ring
3964 *
3965 * @rdev: radeon_device pointer
3966 *
3967 * Enables the ring and loads the clear state context and other
3968 * packets required to init the ring.
3969 * Returns 0 for success, error for failure.
3970 */
3971static int cik_cp_gfx_start(struct radeon_device *rdev)
3972{
3973	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974	int r, i;
3975
3976	/* init the CP */
3977	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978	WREG32(CP_ENDIAN_SWAP, 0);
3979	WREG32(CP_DEVICE_ID, 1);
3980
3981	cik_cp_gfx_enable(rdev, true);
3982
3983	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984	if (r) {
3985		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986		return r;
3987	}
3988
3989	/* init the CE partitions.  CE only used for gfx on CIK */
3990	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992	radeon_ring_write(ring, 0x8000);
3993	radeon_ring_write(ring, 0x8000);
3994
3995	/* setup clear context state */
3996	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000	radeon_ring_write(ring, 0x80000000);
4001	radeon_ring_write(ring, 0x80000000);
4002
4003	for (i = 0; i < cik_default_size; i++)
4004		radeon_ring_write(ring, cik_default_state[i]);
4005
4006	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009	/* set clear context state */
4010	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011	radeon_ring_write(ring, 0);
4012
4013	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014	radeon_ring_write(ring, 0x00000316);
4015	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018	radeon_ring_unlock_commit(rdev, ring, false);
4019
4020	return 0;
4021}
4022
4023/**
4024 * cik_cp_gfx_fini - stop the gfx ring
4025 *
4026 * @rdev: radeon_device pointer
4027 *
4028 * Stop the gfx ring and tear down the driver ring
4029 * info.
4030 */
4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032{
4033	cik_cp_gfx_enable(rdev, false);
4034	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035}
4036
4037/**
4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Program the location and size of the gfx ring buffer
4043 * and test it to make sure it's working.
4044 * Returns 0 for success, error for failure.
4045 */
4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047{
4048	struct radeon_ring *ring;
4049	u32 tmp;
4050	u32 rb_bufsz;
4051	u64 rb_addr;
4052	int r;
4053
4054	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055	if (rdev->family != CHIP_HAWAII)
4056		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058	/* Set the write pointer delay */
4059	WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061	/* set the RB to use vmid 0 */
4062	WREG32(CP_RB_VMID, 0);
4063
4064	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066	/* ring 0 - compute and gfx */
4067	/* Set ring buffer size */
4068	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069	rb_bufsz = order_base_2(ring->ring_size / 8);
4070	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071#ifdef __BIG_ENDIAN
4072	tmp |= BUF_SWAP_32BIT;
4073#endif
4074	WREG32(CP_RB0_CNTL, tmp);
4075
4076	/* Initialize the ring buffer's read and write pointers */
4077	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078	ring->wptr = 0;
4079	WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081	/* set the wb address wether it's enabled or not */
4082	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085	/* scratch register shadowing is no longer supported */
4086	WREG32(SCRATCH_UMSK, 0);
4087
4088	if (!rdev->wb.enabled)
4089		tmp |= RB_NO_UPDATE;
4090
4091	mdelay(1);
4092	WREG32(CP_RB0_CNTL, tmp);
4093
4094	rb_addr = ring->gpu_addr >> 8;
4095	WREG32(CP_RB0_BASE, rb_addr);
4096	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098	/* start the ring */
4099	cik_cp_gfx_start(rdev);
4100	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102	if (r) {
4103		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104		return r;
4105	}
4106
4107	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110	return 0;
4111}
4112
4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114		     struct radeon_ring *ring)
4115{
4116	u32 rptr;
4117
4118	if (rdev->wb.enabled)
4119		rptr = rdev->wb.wb[ring->rptr_offs/4];
4120	else
4121		rptr = RREG32(CP_RB0_RPTR);
4122
4123	return rptr;
4124}
4125
4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127		     struct radeon_ring *ring)
4128{
4129	return RREG32(CP_RB0_WPTR);
4130}
4131
4132void cik_gfx_set_wptr(struct radeon_device *rdev,
4133		      struct radeon_ring *ring)
4134{
4135	WREG32(CP_RB0_WPTR, ring->wptr);
4136	(void)RREG32(CP_RB0_WPTR);
4137}
4138
4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140			 struct radeon_ring *ring)
4141{
4142	u32 rptr;
4143
4144	if (rdev->wb.enabled) {
4145		rptr = rdev->wb.wb[ring->rptr_offs/4];
4146	} else {
4147		mutex_lock(&rdev->srbm_mutex);
4148		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149		rptr = RREG32(CP_HQD_PQ_RPTR);
4150		cik_srbm_select(rdev, 0, 0, 0, 0);
4151		mutex_unlock(&rdev->srbm_mutex);
4152	}
4153
4154	return rptr;
4155}
4156
4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158			 struct radeon_ring *ring)
4159{
4160	u32 wptr;
4161
4162	if (rdev->wb.enabled) {
4163		/* XXX check if swapping is necessary on BE */
4164		wptr = rdev->wb.wb[ring->wptr_offs/4];
4165	} else {
4166		mutex_lock(&rdev->srbm_mutex);
4167		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168		wptr = RREG32(CP_HQD_PQ_WPTR);
4169		cik_srbm_select(rdev, 0, 0, 0, 0);
4170		mutex_unlock(&rdev->srbm_mutex);
4171	}
4172
4173	return wptr;
4174}
4175
4176void cik_compute_set_wptr(struct radeon_device *rdev,
4177			  struct radeon_ring *ring)
4178{
4179	/* XXX check if swapping is necessary on BE */
4180	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181	WDOORBELL32(ring->doorbell_index, ring->wptr);
4182}
4183
4184static void cik_compute_stop(struct radeon_device *rdev,
4185			     struct radeon_ring *ring)
4186{
4187	u32 j, tmp;
4188
4189	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190	/* Disable wptr polling. */
4191	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192	tmp &= ~WPTR_POLL_EN;
4193	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194	/* Disable HQD. */
4195	if (RREG32(CP_HQD_ACTIVE) & 1) {
4196		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197		for (j = 0; j < rdev->usec_timeout; j++) {
4198			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199				break;
4200			udelay(1);
4201		}
4202		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203		WREG32(CP_HQD_PQ_RPTR, 0);
4204		WREG32(CP_HQD_PQ_WPTR, 0);
4205	}
4206	cik_srbm_select(rdev, 0, 0, 0, 0);
4207}
4208
4209/**
4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
4211 *
4212 * @rdev: radeon_device pointer
4213 * @enable: enable or disable the MEs
4214 *
4215 * Halts or unhalts the compute MEs.
4216 */
4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218{
4219	if (enable)
4220		WREG32(CP_MEC_CNTL, 0);
4221	else {
4222		/*
4223		 * To make hibernation reliable we need to clear compute ring
4224		 * configuration before halting the compute ring.
4225		 */
4226		mutex_lock(&rdev->srbm_mutex);
4227		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229		mutex_unlock(&rdev->srbm_mutex);
4230
4231		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234	}
4235	udelay(50);
4236}
4237
4238/**
4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Loads the compute MEC1&2 ucode.
4244 * Returns 0 for success, -EINVAL if the ucode is not available.
4245 */
4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247{
4248	int i;
4249
4250	if (!rdev->mec_fw)
4251		return -EINVAL;
4252
4253	cik_cp_compute_enable(rdev, false);
4254
4255	if (rdev->new_fw) {
4256		const struct gfx_firmware_header_v1_0 *mec_hdr =
4257			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258		const __le32 *fw_data;
4259		u32 fw_size;
4260
4261		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263		/* MEC1 */
4264		fw_data = (const __le32 *)
4265			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268		for (i = 0; i < fw_size; i++)
4269			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272		/* MEC2 */
4273		if (rdev->family == CHIP_KAVERI) {
4274			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277			fw_data = (const __le32 *)
4278				(rdev->mec2_fw->data +
4279				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282			for (i = 0; i < fw_size; i++)
4283				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285		}
4286	} else {
4287		const __be32 *fw_data;
4288
4289		/* MEC1 */
4290		fw_data = (const __be32 *)rdev->mec_fw->data;
4291		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296		if (rdev->family == CHIP_KAVERI) {
4297			/* MEC2 */
4298			fw_data = (const __be32 *)rdev->mec_fw->data;
4299			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303		}
4304	}
4305
4306	return 0;
4307}
4308
4309/**
4310 * cik_cp_compute_start - start the compute queues
4311 *
4312 * @rdev: radeon_device pointer
4313 *
4314 * Enable the compute queues.
4315 * Returns 0 for success, error for failure.
4316 */
4317static int cik_cp_compute_start(struct radeon_device *rdev)
4318{
4319	cik_cp_compute_enable(rdev, true);
4320
4321	return 0;
4322}
4323
4324/**
4325 * cik_cp_compute_fini - stop the compute queues
4326 *
4327 * @rdev: radeon_device pointer
4328 *
4329 * Stop the compute queues and tear down the driver queue
4330 * info.
4331 */
4332static void cik_cp_compute_fini(struct radeon_device *rdev)
4333{
4334	int i, idx, r;
4335
4336	cik_cp_compute_enable(rdev, false);
4337
4338	for (i = 0; i < 2; i++) {
4339		if (i == 0)
4340			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341		else
4342			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344		if (rdev->ring[idx].mqd_obj) {
4345			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346			if (unlikely(r != 0))
4347				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353			rdev->ring[idx].mqd_obj = NULL;
4354		}
4355	}
4356}
4357
4358static void cik_mec_fini(struct radeon_device *rdev)
4359{
4360	int r;
4361
4362	if (rdev->mec.hpd_eop_obj) {
4363		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364		if (unlikely(r != 0))
4365			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370		rdev->mec.hpd_eop_obj = NULL;
4371	}
4372}
4373
4374#define MEC_HPD_SIZE 2048
4375
4376static int cik_mec_init(struct radeon_device *rdev)
4377{
4378	int r;
4379	u32 *hpd;
4380
4381	/*
4382	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384	 */
4385	if (rdev->family == CHIP_KAVERI)
4386		rdev->mec.num_mec = 2;
4387	else
4388		rdev->mec.num_mec = 1;
4389	rdev->mec.num_pipe = 4;
4390	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392	if (rdev->mec.hpd_eop_obj == NULL) {
4393		r = radeon_bo_create(rdev,
4394				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395				     PAGE_SIZE, true,
4396				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397				     &rdev->mec.hpd_eop_obj);
4398		if (r) {
4399			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400			return r;
4401		}
4402	}
4403
4404	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405	if (unlikely(r != 0)) {
4406		cik_mec_fini(rdev);
4407		return r;
4408	}
4409	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410			  &rdev->mec.hpd_eop_gpu_addr);
4411	if (r) {
4412		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413		cik_mec_fini(rdev);
4414		return r;
4415	}
4416	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417	if (r) {
4418		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419		cik_mec_fini(rdev);
4420		return r;
4421	}
4422
4423	/* clear memory.  Not sure if this is required or not */
4424	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429	return 0;
4430}
4431
4432struct hqd_registers
4433{
4434	u32 cp_mqd_base_addr;
4435	u32 cp_mqd_base_addr_hi;
4436	u32 cp_hqd_active;
4437	u32 cp_hqd_vmid;
4438	u32 cp_hqd_persistent_state;
4439	u32 cp_hqd_pipe_priority;
4440	u32 cp_hqd_queue_priority;
4441	u32 cp_hqd_quantum;
4442	u32 cp_hqd_pq_base;
4443	u32 cp_hqd_pq_base_hi;
4444	u32 cp_hqd_pq_rptr;
4445	u32 cp_hqd_pq_rptr_report_addr;
4446	u32 cp_hqd_pq_rptr_report_addr_hi;
4447	u32 cp_hqd_pq_wptr_poll_addr;
4448	u32 cp_hqd_pq_wptr_poll_addr_hi;
4449	u32 cp_hqd_pq_doorbell_control;
4450	u32 cp_hqd_pq_wptr;
4451	u32 cp_hqd_pq_control;
4452	u32 cp_hqd_ib_base_addr;
4453	u32 cp_hqd_ib_base_addr_hi;
4454	u32 cp_hqd_ib_rptr;
4455	u32 cp_hqd_ib_control;
4456	u32 cp_hqd_iq_timer;
4457	u32 cp_hqd_iq_rptr;
4458	u32 cp_hqd_dequeue_request;
4459	u32 cp_hqd_dma_offload;
4460	u32 cp_hqd_sema_cmd;
4461	u32 cp_hqd_msg_type;
4462	u32 cp_hqd_atomic0_preop_lo;
4463	u32 cp_hqd_atomic0_preop_hi;
4464	u32 cp_hqd_atomic1_preop_lo;
4465	u32 cp_hqd_atomic1_preop_hi;
4466	u32 cp_hqd_hq_scheduler0;
4467	u32 cp_hqd_hq_scheduler1;
4468	u32 cp_mqd_control;
4469};
4470
4471struct bonaire_mqd
4472{
4473	u32 header;
4474	u32 dispatch_initiator;
4475	u32 dimensions[3];
4476	u32 start_idx[3];
4477	u32 num_threads[3];
4478	u32 pipeline_stat_enable;
4479	u32 perf_counter_enable;
4480	u32 pgm[2];
4481	u32 tba[2];
4482	u32 tma[2];
4483	u32 pgm_rsrc[2];
4484	u32 vmid;
4485	u32 resource_limits;
4486	u32 static_thread_mgmt01[2];
4487	u32 tmp_ring_size;
4488	u32 static_thread_mgmt23[2];
4489	u32 restart[3];
4490	u32 thread_trace_enable;
4491	u32 reserved1;
4492	u32 user_data[16];
4493	u32 vgtcs_invoke_count[2];
4494	struct hqd_registers queue_state;
4495	u32 dequeue_cntr;
4496	u32 interrupt_queue[64];
4497};
4498
4499/**
4500 * cik_cp_compute_resume - setup the compute queue registers
4501 *
4502 * @rdev: radeon_device pointer
4503 *
4504 * Program the compute queues and test them to make sure they
4505 * are working.
4506 * Returns 0 for success, error for failure.
4507 */
4508static int cik_cp_compute_resume(struct radeon_device *rdev)
4509{
4510	int r, i, j, idx;
4511	u32 tmp;
4512	bool use_doorbell = true;
4513	u64 hqd_gpu_addr;
4514	u64 mqd_gpu_addr;
4515	u64 eop_gpu_addr;
4516	u64 wb_gpu_addr;
4517	u32 *buf;
4518	struct bonaire_mqd *mqd;
4519
4520	r = cik_cp_compute_start(rdev);
4521	if (r)
4522		return r;
4523
4524	/* fix up chicken bits */
4525	tmp = RREG32(CP_CPF_DEBUG);
4526	tmp |= (1 << 23);
4527	WREG32(CP_CPF_DEBUG, tmp);
4528
4529	/* init the pipes */
4530	mutex_lock(&rdev->srbm_mutex);
4531
4532	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533		int me = (i < 4) ? 1 : 2;
4534		int pipe = (i < 4) ? i : (i - 4);
4535
4536		cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539		/* write the EOP addr */
4540		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543		/* set the VMID assigned */
4544		WREG32(CP_HPD_EOP_VMID, 0);
4545
4546		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547		tmp = RREG32(CP_HPD_EOP_CONTROL);
4548		tmp &= ~EOP_SIZE_MASK;
4549		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550		WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552	}
4553	cik_srbm_select(rdev, 0, 0, 0, 0);
4554	mutex_unlock(&rdev->srbm_mutex);
4555
4556	/* init the queues.  Just two for now. */
4557	for (i = 0; i < 2; i++) {
4558		if (i == 0)
4559			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560		else
4561			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563		if (rdev->ring[idx].mqd_obj == NULL) {
4564			r = radeon_bo_create(rdev,
4565					     sizeof(struct bonaire_mqd),
4566					     PAGE_SIZE, true,
4567					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568					     NULL, &rdev->ring[idx].mqd_obj);
4569			if (r) {
4570				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571				return r;
4572			}
4573		}
4574
4575		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576		if (unlikely(r != 0)) {
4577			cik_cp_compute_fini(rdev);
4578			return r;
4579		}
4580		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581				  &mqd_gpu_addr);
4582		if (r) {
4583			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584			cik_cp_compute_fini(rdev);
4585			return r;
4586		}
4587		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588		if (r) {
4589			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590			cik_cp_compute_fini(rdev);
4591			return r;
4592		}
4593
4594		/* init the mqd struct */
4595		memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597		mqd = (struct bonaire_mqd *)buf;
4598		mqd->header = 0xC0310800;
4599		mqd->static_thread_mgmt01[0] = 0xffffffff;
4600		mqd->static_thread_mgmt01[1] = 0xffffffff;
4601		mqd->static_thread_mgmt23[0] = 0xffffffff;
4602		mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604		mutex_lock(&rdev->srbm_mutex);
4605		cik_srbm_select(rdev, rdev->ring[idx].me,
4606				rdev->ring[idx].pipe,
4607				rdev->ring[idx].queue, 0);
4608
4609		/* disable wptr polling */
4610		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611		tmp &= ~WPTR_POLL_EN;
4612		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614		/* enable doorbell? */
4615		mqd->queue_state.cp_hqd_pq_doorbell_control =
4616			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617		if (use_doorbell)
4618			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619		else
4620			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624		/* disable the queue if it's active */
4625		mqd->queue_state.cp_hqd_dequeue_request = 0;
4626		mqd->queue_state.cp_hqd_pq_rptr = 0;
4627		mqd->queue_state.cp_hqd_pq_wptr= 0;
4628		if (RREG32(CP_HQD_ACTIVE) & 1) {
4629			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630			for (j = 0; j < rdev->usec_timeout; j++) {
4631				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632					break;
4633				udelay(1);
4634			}
4635			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638		}
4639
4640		/* set the pointer to the MQD */
4641		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645		/* set MQD vmid to 0 */
4646		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657		/* set up the HQD, this is similar to CP_RB0_CNTL */
4658		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659		mqd->queue_state.cp_hqd_pq_control &=
4660			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662		mqd->queue_state.cp_hqd_pq_control |=
4663			order_base_2(rdev->ring[idx].ring_size / 8);
4664		mqd->queue_state.cp_hqd_pq_control |=
4665			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666#ifdef __BIG_ENDIAN
4667		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668#endif
4669		mqd->queue_state.cp_hqd_pq_control &=
4670			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671		mqd->queue_state.cp_hqd_pq_control |=
4672			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676		if (i == 0)
4677			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678		else
4679			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686		/* set the wb address wether it's enabled or not */
4687		if (i == 0)
4688			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689		else
4690			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693			upper_32_bits(wb_gpu_addr) & 0xffff;
4694		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699		/* enable the doorbell if requested */
4700		if (use_doorbell) {
4701			mqd->queue_state.cp_hqd_pq_doorbell_control =
4702				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708				~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710		} else {
4711			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712		}
4713		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717		rdev->ring[idx].wptr = 0;
4718		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722		/* set the vmid for the queue */
4723		mqd->queue_state.cp_hqd_vmid = 0;
4724		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726		/* activate the queue */
4727		mqd->queue_state.cp_hqd_active = 1;
4728		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730		cik_srbm_select(rdev, 0, 0, 0, 0);
4731		mutex_unlock(&rdev->srbm_mutex);
4732
4733		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736		rdev->ring[idx].ready = true;
4737		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738		if (r)
4739			rdev->ring[idx].ready = false;
4740	}
4741
4742	return 0;
4743}
4744
4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746{
4747	cik_cp_gfx_enable(rdev, enable);
4748	cik_cp_compute_enable(rdev, enable);
4749}
4750
4751static int cik_cp_load_microcode(struct radeon_device *rdev)
4752{
4753	int r;
4754
4755	r = cik_cp_gfx_load_microcode(rdev);
4756	if (r)
4757		return r;
4758	r = cik_cp_compute_load_microcode(rdev);
4759	if (r)
4760		return r;
4761
4762	return 0;
4763}
4764
4765static void cik_cp_fini(struct radeon_device *rdev)
4766{
4767	cik_cp_gfx_fini(rdev);
4768	cik_cp_compute_fini(rdev);
4769}
4770
4771static int cik_cp_resume(struct radeon_device *rdev)
4772{
4773	int r;
4774
4775	cik_enable_gui_idle_interrupt(rdev, false);
4776
4777	r = cik_cp_load_microcode(rdev);
4778	if (r)
4779		return r;
4780
4781	r = cik_cp_gfx_resume(rdev);
4782	if (r)
4783		return r;
4784	r = cik_cp_compute_resume(rdev);
4785	if (r)
4786		return r;
4787
4788	cik_enable_gui_idle_interrupt(rdev, true);
4789
4790	return 0;
4791}
4792
4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794{
4795	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4796		RREG32(GRBM_STATUS));
4797	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4798		RREG32(GRBM_STATUS2));
4799	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4800		RREG32(GRBM_STATUS_SE0));
4801	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4802		RREG32(GRBM_STATUS_SE1));
4803	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4804		RREG32(GRBM_STATUS_SE2));
4805	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4806		RREG32(GRBM_STATUS_SE3));
4807	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4808		RREG32(SRBM_STATUS));
4809	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4810		RREG32(SRBM_STATUS2));
4811	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4812		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4814		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4817		 RREG32(CP_STALLED_STAT1));
4818	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4819		 RREG32(CP_STALLED_STAT2));
4820	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4821		 RREG32(CP_STALLED_STAT3));
4822	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4823		 RREG32(CP_CPF_BUSY_STAT));
4824	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825		 RREG32(CP_CPF_STALLED_STAT1));
4826	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829		 RREG32(CP_CPC_STALLED_STAT1));
4830	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831}
4832
4833/**
4834 * cik_gpu_check_soft_reset - check which blocks are busy
4835 *
4836 * @rdev: radeon_device pointer
4837 *
4838 * Check which blocks are busy and return the relevant reset
4839 * mask to be used by cik_gpu_soft_reset().
4840 * Returns a mask of the blocks to be reset.
4841 */
4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843{
4844	u32 reset_mask = 0;
4845	u32 tmp;
4846
4847	/* GRBM_STATUS */
4848	tmp = RREG32(GRBM_STATUS);
4849	if (tmp & (PA_BUSY | SC_BUSY |
4850		   BCI_BUSY | SX_BUSY |
4851		   TA_BUSY | VGT_BUSY |
4852		   DB_BUSY | CB_BUSY |
4853		   GDS_BUSY | SPI_BUSY |
4854		   IA_BUSY | IA_BUSY_NO_DMA))
4855		reset_mask |= RADEON_RESET_GFX;
4856
4857	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858		reset_mask |= RADEON_RESET_CP;
4859
4860	/* GRBM_STATUS2 */
4861	tmp = RREG32(GRBM_STATUS2);
4862	if (tmp & RLC_BUSY)
4863		reset_mask |= RADEON_RESET_RLC;
4864
4865	/* SDMA0_STATUS_REG */
4866	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867	if (!(tmp & SDMA_IDLE))
4868		reset_mask |= RADEON_RESET_DMA;
4869
4870	/* SDMA1_STATUS_REG */
4871	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872	if (!(tmp & SDMA_IDLE))
4873		reset_mask |= RADEON_RESET_DMA1;
4874
4875	/* SRBM_STATUS2 */
4876	tmp = RREG32(SRBM_STATUS2);
4877	if (tmp & SDMA_BUSY)
4878		reset_mask |= RADEON_RESET_DMA;
4879
4880	if (tmp & SDMA1_BUSY)
4881		reset_mask |= RADEON_RESET_DMA1;
4882
4883	/* SRBM_STATUS */
4884	tmp = RREG32(SRBM_STATUS);
4885
4886	if (tmp & IH_BUSY)
4887		reset_mask |= RADEON_RESET_IH;
4888
4889	if (tmp & SEM_BUSY)
4890		reset_mask |= RADEON_RESET_SEM;
4891
4892	if (tmp & GRBM_RQ_PENDING)
4893		reset_mask |= RADEON_RESET_GRBM;
4894
4895	if (tmp & VMC_BUSY)
4896		reset_mask |= RADEON_RESET_VMC;
4897
4898	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899		   MCC_BUSY | MCD_BUSY))
4900		reset_mask |= RADEON_RESET_MC;
4901
4902	if (evergreen_is_display_hung(rdev))
4903		reset_mask |= RADEON_RESET_DISPLAY;
4904
4905	/* Skip MC reset as it's mostly likely not hung, just busy */
4906	if (reset_mask & RADEON_RESET_MC) {
4907		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908		reset_mask &= ~RADEON_RESET_MC;
4909	}
4910
4911	return reset_mask;
4912}
4913
4914/**
4915 * cik_gpu_soft_reset - soft reset GPU
4916 *
4917 * @rdev: radeon_device pointer
4918 * @reset_mask: mask of which blocks to reset
4919 *
4920 * Soft reset the blocks specified in @reset_mask.
4921 */
4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923{
4924	struct evergreen_mc_save save;
4925	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926	u32 tmp;
4927
4928	if (reset_mask == 0)
4929		return;
4930
4931	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933	cik_print_gpu_status_regs(rdev);
4934	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4935		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939	/* disable CG/PG */
4940	cik_fini_pg(rdev);
4941	cik_fini_cg(rdev);
4942
4943	/* stop the rlc */
4944	cik_rlc_stop(rdev);
4945
4946	/* Disable GFX parsing/prefetching */
4947	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949	/* Disable MEC parsing/prefetching */
4950	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952	if (reset_mask & RADEON_RESET_DMA) {
4953		/* sdma0 */
4954		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955		tmp |= SDMA_HALT;
4956		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957	}
4958	if (reset_mask & RADEON_RESET_DMA1) {
4959		/* sdma1 */
4960		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961		tmp |= SDMA_HALT;
4962		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963	}
4964
4965	evergreen_mc_stop(rdev, &save);
4966	if (evergreen_mc_wait_for_idle(rdev)) {
4967		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968	}
4969
4970	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973	if (reset_mask & RADEON_RESET_CP) {
4974		grbm_soft_reset |= SOFT_RESET_CP;
4975
4976		srbm_soft_reset |= SOFT_RESET_GRBM;
4977	}
4978
4979	if (reset_mask & RADEON_RESET_DMA)
4980		srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982	if (reset_mask & RADEON_RESET_DMA1)
4983		srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985	if (reset_mask & RADEON_RESET_DISPLAY)
4986		srbm_soft_reset |= SOFT_RESET_DC;
4987
4988	if (reset_mask & RADEON_RESET_RLC)
4989		grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991	if (reset_mask & RADEON_RESET_SEM)
4992		srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994	if (reset_mask & RADEON_RESET_IH)
4995		srbm_soft_reset |= SOFT_RESET_IH;
4996
4997	if (reset_mask & RADEON_RESET_GRBM)
4998		srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000	if (reset_mask & RADEON_RESET_VMC)
5001		srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003	if (!(rdev->flags & RADEON_IS_IGP)) {
5004		if (reset_mask & RADEON_RESET_MC)
5005			srbm_soft_reset |= SOFT_RESET_MC;
5006	}
5007
5008	if (grbm_soft_reset) {
5009		tmp = RREG32(GRBM_SOFT_RESET);
5010		tmp |= grbm_soft_reset;
5011		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012		WREG32(GRBM_SOFT_RESET, tmp);
5013		tmp = RREG32(GRBM_SOFT_RESET);
5014
5015		udelay(50);
5016
5017		tmp &= ~grbm_soft_reset;
5018		WREG32(GRBM_SOFT_RESET, tmp);
5019		tmp = RREG32(GRBM_SOFT_RESET);
5020	}
5021
5022	if (srbm_soft_reset) {
5023		tmp = RREG32(SRBM_SOFT_RESET);
5024		tmp |= srbm_soft_reset;
5025		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026		WREG32(SRBM_SOFT_RESET, tmp);
5027		tmp = RREG32(SRBM_SOFT_RESET);
5028
5029		udelay(50);
5030
5031		tmp &= ~srbm_soft_reset;
5032		WREG32(SRBM_SOFT_RESET, tmp);
5033		tmp = RREG32(SRBM_SOFT_RESET);
5034	}
5035
5036	/* Wait a little for things to settle down */
5037	udelay(50);
5038
5039	evergreen_mc_resume(rdev, &save);
5040	udelay(50);
5041
5042	cik_print_gpu_status_regs(rdev);
5043}
5044
5045struct kv_reset_save_regs {
5046	u32 gmcon_reng_execute;
5047	u32 gmcon_misc;
5048	u32 gmcon_misc3;
5049};
5050
5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052				   struct kv_reset_save_regs *save)
5053{
5054	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055	save->gmcon_misc = RREG32(GMCON_MISC);
5056	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060						STCTRL_STUTTER_EN));
5061}
5062
5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064				      struct kv_reset_save_regs *save)
5065{
5066	int i;
5067
5068	WREG32(GMCON_PGFSM_WRITE, 0);
5069	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071	for (i = 0; i < 5; i++)
5072		WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074	WREG32(GMCON_PGFSM_WRITE, 0);
5075	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077	for (i = 0; i < 5; i++)
5078		WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083	for (i = 0; i < 5; i++)
5084		WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089	for (i = 0; i < 5; i++)
5090		WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095	for (i = 0; i < 5; i++)
5096		WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098	WREG32(GMCON_PGFSM_WRITE, 0);
5099	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101	for (i = 0; i < 5; i++)
5102		WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107	for (i = 0; i < 5; i++)
5108		WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113	for (i = 0; i < 5; i++)
5114		WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119	for (i = 0; i < 5; i++)
5120		WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125	for (i = 0; i < 5; i++)
5126		WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131	WREG32(GMCON_MISC3, save->gmcon_misc3);
5132	WREG32(GMCON_MISC, save->gmcon_misc);
5133	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134}
5135
5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137{
5138	struct evergreen_mc_save save;
5139	struct kv_reset_save_regs kv_save = { 0 };
5140	u32 tmp, i;
5141
5142	dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144	/* disable dpm? */
5145
5146	/* disable cg/pg */
5147	cik_fini_pg(rdev);
5148	cik_fini_cg(rdev);
5149
5150	/* Disable GFX parsing/prefetching */
5151	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153	/* Disable MEC parsing/prefetching */
5154	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156	/* sdma0 */
5157	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158	tmp |= SDMA_HALT;
5159	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160	/* sdma1 */
5161	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162	tmp |= SDMA_HALT;
5163	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164	/* XXX other engines? */
5165
5166	/* halt the rlc, disable cp internal ints */
5167	cik_rlc_stop(rdev);
5168
5169	udelay(50);
5170
5171	/* disable mem access */
5172	evergreen_mc_stop(rdev, &save);
5173	if (evergreen_mc_wait_for_idle(rdev)) {
5174		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175	}
5176
5177	if (rdev->flags & RADEON_IS_IGP)
5178		kv_save_regs_for_reset(rdev, &kv_save);
5179
5180	/* disable BM */
5181	pci_clear_master(rdev->pdev);
5182	/* reset */
5183	radeon_pci_config_reset(rdev);
5184
5185	udelay(100);
5186
5187	/* wait for asic to come out of reset */
5188	for (i = 0; i < rdev->usec_timeout; i++) {
5189		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190			break;
5191		udelay(1);
5192	}
5193
5194	/* does asic init need to be run first??? */
5195	if (rdev->flags & RADEON_IS_IGP)
5196		kv_restore_regs_for_reset(rdev, &kv_save);
5197}
5198
5199/**
5200 * cik_asic_reset - soft reset GPU
5201 *
5202 * @rdev: radeon_device pointer
5203 * @hard: force hard reset
5204 *
5205 * Look up which blocks are hung and attempt
5206 * to reset them.
5207 * Returns 0 for success.
5208 */
5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210{
5211	u32 reset_mask;
5212
5213	if (hard) {
5214		cik_gpu_pci_config_reset(rdev);
5215		return 0;
5216	}
5217
5218	reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220	if (reset_mask)
5221		r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223	/* try soft reset */
5224	cik_gpu_soft_reset(rdev, reset_mask);
5225
5226	reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228	/* try pci config reset */
5229	if (reset_mask && radeon_hard_reset)
5230		cik_gpu_pci_config_reset(rdev);
5231
5232	reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234	if (!reset_mask)
5235		r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237	return 0;
5238}
5239
5240/**
5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
5242 *
5243 * @rdev: radeon_device pointer
5244 * @ring: radeon_ring structure holding ring information
5245 *
5246 * Check if the 3D engine is locked up (CIK).
5247 * Returns true if the engine is locked, false if not.
5248 */
5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250{
5251	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253	if (!(reset_mask & (RADEON_RESET_GFX |
5254			    RADEON_RESET_COMPUTE |
5255			    RADEON_RESET_CP))) {
5256		radeon_ring_lockup_update(rdev, ring);
5257		return false;
5258	}
5259	return radeon_ring_test_lockup(rdev, ring);
5260}
5261
5262/* MC */
5263/**
5264 * cik_mc_program - program the GPU memory controller
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Set the location of vram, gart, and AGP in the GPU's
5269 * physical address space (CIK).
5270 */
5271static void cik_mc_program(struct radeon_device *rdev)
5272{
5273	struct evergreen_mc_save save;
5274	u32 tmp;
5275	int i, j;
5276
5277	/* Initialize HDP */
5278	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279		WREG32((0x2c14 + j), 0x00000000);
5280		WREG32((0x2c18 + j), 0x00000000);
5281		WREG32((0x2c1c + j), 0x00000000);
5282		WREG32((0x2c20 + j), 0x00000000);
5283		WREG32((0x2c24 + j), 0x00000000);
5284	}
5285	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287	evergreen_mc_stop(rdev, &save);
5288	if (radeon_mc_wait_for_idle(rdev)) {
5289		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290	}
5291	/* Lockout access through VGA aperture*/
5292	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293	/* Update configuration */
5294	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295	       rdev->mc.vram_start >> 12);
5296	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297	       rdev->mc.vram_end >> 12);
5298	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299	       rdev->vram_scratch.gpu_addr >> 12);
5300	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302	WREG32(MC_VM_FB_LOCATION, tmp);
5303	/* XXX double check these! */
5304	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307	WREG32(MC_VM_AGP_BASE, 0);
5308	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310	if (radeon_mc_wait_for_idle(rdev)) {
5311		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312	}
5313	evergreen_mc_resume(rdev, &save);
5314	/* we need to own VRAM, so turn off the VGA renderer here
5315	 * to stop it overwriting our objects */
5316	rv515_vga_render_disable(rdev);
5317}
5318
5319/**
5320 * cik_mc_init - initialize the memory controller driver params
5321 *
5322 * @rdev: radeon_device pointer
5323 *
5324 * Look up the amount of vram, vram width, and decide how to place
5325 * vram and gart within the GPU's physical address space (CIK).
5326 * Returns 0 for success.
5327 */
5328static int cik_mc_init(struct radeon_device *rdev)
5329{
5330	u32 tmp;
5331	int chansize, numchan;
5332
5333	/* Get VRAM informations */
5334	rdev->mc.vram_is_ddr = true;
5335	tmp = RREG32(MC_ARB_RAMCFG);
5336	if (tmp & CHANSIZE_MASK) {
5337		chansize = 64;
5338	} else {
5339		chansize = 32;
5340	}
5341	tmp = RREG32(MC_SHARED_CHMAP);
5342	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343	case 0:
5344	default:
5345		numchan = 1;
5346		break;
5347	case 1:
5348		numchan = 2;
5349		break;
5350	case 2:
5351		numchan = 4;
5352		break;
5353	case 3:
5354		numchan = 8;
5355		break;
5356	case 4:
5357		numchan = 3;
5358		break;
5359	case 5:
5360		numchan = 6;
5361		break;
5362	case 6:
5363		numchan = 10;
5364		break;
5365	case 7:
5366		numchan = 12;
5367		break;
5368	case 8:
5369		numchan = 16;
5370		break;
5371	}
5372	rdev->mc.vram_width = numchan * chansize;
5373	/* Could aper size report 0 ? */
5374	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376	/* size in MB on si */
5377	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380	si_vram_gtt_location(rdev, &rdev->mc);
5381	radeon_update_bandwidth_info(rdev);
5382
5383	return 0;
5384}
5385
5386/*
5387 * GART
5388 * VMID 0 is the physical GPU addresses as used by the kernel.
5389 * VMIDs 1-15 are used for userspace clients and are handled
5390 * by the radeon vm/hsa code.
5391 */
5392/**
5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Flush the TLB for the VMID 0 page table (CIK).
5398 */
5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400{
5401	/* flush hdp cache */
5402	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404	/* bits 0-15 are the VM contexts0-15 */
5405	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406}
5407
5408/**
5409 * cik_pcie_gart_enable - gart enable
5410 *
5411 * @rdev: radeon_device pointer
5412 *
5413 * This sets up the TLBs, programs the page tables for VMID0,
5414 * sets up the hw for VMIDs 1-15 which are allocated on
5415 * demand, and sets up the global locations for the LDS, GDS,
5416 * and GPUVM for FSA64 clients (CIK).
5417 * Returns 0 for success, errors for failure.
5418 */
5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420{
5421	int r, i;
5422
5423	if (rdev->gart.robj == NULL) {
5424		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425		return -EINVAL;
5426	}
5427	r = radeon_gart_table_vram_pin(rdev);
5428	if (r)
5429		return r;
5430	/* Setup TLB control */
5431	WREG32(MC_VM_MX_L1_TLB_CNTL,
5432	       (0xA << 7) |
5433	       ENABLE_L1_TLB |
5434	       ENABLE_L1_FRAGMENT_PROCESSING |
5435	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436	       ENABLE_ADVANCED_DRIVER_MODEL |
5437	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438	/* Setup L2 cache */
5439	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440	       ENABLE_L2_FRAGMENT_PROCESSING |
5441	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5444	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447	       BANK_SELECT(4) |
5448	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449	/* setup context0 */
5450	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454			(u32)(rdev->dummy_page.addr >> 12));
5455	WREG32(VM_CONTEXT0_CNTL2, 0);
5456	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459	WREG32(0x15D4, 0);
5460	WREG32(0x15D8, 0);
5461	WREG32(0x15DC, 0);
5462
5463	/* restore context1-15 */
5464	/* set vm size, must be a multiple of 4 */
5465	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467	for (i = 1; i < 16; i++) {
5468		if (i < 8)
5469			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470			       rdev->vm_manager.saved_table_addr[i]);
5471		else
5472			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473			       rdev->vm_manager.saved_table_addr[i]);
5474	}
5475
5476	/* enable context1-15 */
5477	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478	       (u32)(rdev->dummy_page.addr >> 12));
5479	WREG32(VM_CONTEXT1_CNTL2, 4);
5480	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495	if (rdev->family == CHIP_KAVERI) {
5496		u32 tmp = RREG32(CHUB_CONTROL);
5497		tmp &= ~BYPASS_VM;
5498		WREG32(CHUB_CONTROL, tmp);
5499	}
5500
5501	/* XXX SH_MEM regs */
5502	/* where to put LDS, scratch, GPUVM in FSA64 space */
5503	mutex_lock(&rdev->srbm_mutex);
5504	for (i = 0; i < 16; i++) {
5505		cik_srbm_select(rdev, 0, 0, 0, i);
5506		/* CP and shaders */
5507		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508		WREG32(SH_MEM_APE1_BASE, 1);
5509		WREG32(SH_MEM_APE1_LIMIT, 0);
5510		WREG32(SH_MEM_BASES, 0);
5511		/* SDMA GFX */
5512		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516		/* XXX SDMA RLC - todo */
5517	}
5518	cik_srbm_select(rdev, 0, 0, 0, 0);
5519	mutex_unlock(&rdev->srbm_mutex);
5520
5521	cik_pcie_gart_tlb_flush(rdev);
5522	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523		 (unsigned)(rdev->mc.gtt_size >> 20),
5524		 (unsigned long long)rdev->gart.table_addr);
5525	rdev->gart.ready = true;
5526	return 0;
5527}
5528
5529/**
5530 * cik_pcie_gart_disable - gart disable
5531 *
5532 * @rdev: radeon_device pointer
5533 *
5534 * This disables all VM page table (CIK).
5535 */
5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537{
5538	unsigned i;
5539
5540	for (i = 1; i < 16; ++i) {
5541		uint32_t reg;
5542		if (i < 8)
5543			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544		else
5545			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547	}
5548
5549	/* Disable all tables */
5550	WREG32(VM_CONTEXT0_CNTL, 0);
5551	WREG32(VM_CONTEXT1_CNTL, 0);
5552	/* Setup TLB control */
5553	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555	/* Setup L2 cache */
5556	WREG32(VM_L2_CNTL,
5557	       ENABLE_L2_FRAGMENT_PROCESSING |
5558	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5561	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562	WREG32(VM_L2_CNTL2, 0);
5563	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565	radeon_gart_table_vram_unpin(rdev);
5566}
5567
5568/**
5569 * cik_pcie_gart_fini - vm fini callback
5570 *
5571 * @rdev: radeon_device pointer
5572 *
5573 * Tears down the driver GART/VM setup (CIK).
5574 */
5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576{
5577	cik_pcie_gart_disable(rdev);
5578	radeon_gart_table_vram_free(rdev);
5579	radeon_gart_fini(rdev);
5580}
5581
5582/* vm parser */
5583/**
5584 * cik_ib_parse - vm ib_parse callback
5585 *
5586 * @rdev: radeon_device pointer
5587 * @ib: indirect buffer pointer
5588 *
5589 * CIK uses hw IB checking so this is a nop (CIK).
5590 */
5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592{
5593	return 0;
5594}
5595
5596/*
5597 * vm
5598 * VMID 0 is the physical GPU addresses as used by the kernel.
5599 * VMIDs 1-15 are used for userspace clients and are handled
5600 * by the radeon vm/hsa code.
5601 */
5602/**
5603 * cik_vm_init - cik vm init callback
5604 *
5605 * @rdev: radeon_device pointer
5606 *
5607 * Inits cik specific vm parameters (number of VMs, base of vram for
5608 * VMIDs 1-15) (CIK).
5609 * Returns 0 for success.
5610 */
5611int cik_vm_init(struct radeon_device *rdev)
5612{
5613	/*
5614	 * number of VMs
5615	 * VMID 0 is reserved for System
5616	 * radeon graphics/compute will use VMIDs 1-15
5617	 */
5618	rdev->vm_manager.nvm = 16;
5619	/* base offset of vram pages */
5620	if (rdev->flags & RADEON_IS_IGP) {
5621		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622		tmp <<= 22;
5623		rdev->vm_manager.vram_base_offset = tmp;
5624	} else
5625		rdev->vm_manager.vram_base_offset = 0;
5626
5627	return 0;
5628}
5629
5630/**
5631 * cik_vm_fini - cik vm fini callback
5632 *
5633 * @rdev: radeon_device pointer
5634 *
5635 * Tear down any asic specific VM setup (CIK).
5636 */
5637void cik_vm_fini(struct radeon_device *rdev)
5638{
5639}
5640
5641/**
5642 * cik_vm_decode_fault - print human readable fault info
5643 *
5644 * @rdev: radeon_device pointer
5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648 *
5649 * Print human readable fault information (CIK).
5650 */
5651static void cik_vm_decode_fault(struct radeon_device *rdev,
5652				u32 status, u32 addr, u32 mc_client)
5653{
5654	u32 mc_id;
5655	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660	if (rdev->family == CHIP_HAWAII)
5661		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662	else
5663		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666	       protections, vmid, addr,
5667	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668	       block, mc_client, mc_id);
5669}
5670
5671/*
5672 * cik_vm_flush - cik vm flush using the CP
5673 *
 
 
5674 * Update the page table base and flush the VM TLB
5675 * using the CP (CIK).
5676 */
5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678		  unsigned vm_id, uint64_t pd_addr)
5679{
5680	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684				 WRITE_DATA_DST_SEL(0)));
5685	if (vm_id < 8) {
5686		radeon_ring_write(ring,
5687				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688	} else {
5689		radeon_ring_write(ring,
5690				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691	}
5692	radeon_ring_write(ring, 0);
5693	radeon_ring_write(ring, pd_addr >> 12);
5694
5695	/* update SH_MEM_* regs */
5696	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698				 WRITE_DATA_DST_SEL(0)));
5699	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700	radeon_ring_write(ring, 0);
5701	radeon_ring_write(ring, VMID(vm_id));
5702
5703	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705				 WRITE_DATA_DST_SEL(0)));
5706	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707	radeon_ring_write(ring, 0);
5708
5709	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716				 WRITE_DATA_DST_SEL(0)));
5717	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718	radeon_ring_write(ring, 0);
5719	radeon_ring_write(ring, VMID(0));
5720
5721	/* HDP flush */
5722	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724	/* bits 0-15 are the VM contexts0-15 */
5725	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727				 WRITE_DATA_DST_SEL(0)));
5728	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729	radeon_ring_write(ring, 0);
5730	radeon_ring_write(ring, 1 << vm_id);
5731
5732	/* wait for the invalidate to complete */
5733	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5736				 WAIT_REG_MEM_ENGINE(0))); /* me */
5737	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738	radeon_ring_write(ring, 0);
5739	radeon_ring_write(ring, 0); /* ref */
5740	radeon_ring_write(ring, 0); /* mask */
5741	radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743	/* compute doesn't have PFP */
5744	if (usepfp) {
5745		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5746		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747		radeon_ring_write(ring, 0x0);
5748	}
5749}
5750
5751/*
5752 * RLC
5753 * The RLC is a multi-purpose microengine that handles a
5754 * variety of functions, the most important of which is
5755 * the interrupt controller.
5756 */
5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758					  bool enable)
5759{
5760	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762	if (enable)
5763		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764	else
5765		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766	WREG32(CP_INT_CNTL_RING0, tmp);
5767}
5768
5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770{
5771	u32 tmp;
5772
5773	tmp = RREG32(RLC_LB_CNTL);
5774	if (enable)
5775		tmp |= LOAD_BALANCE_ENABLE;
5776	else
5777		tmp &= ~LOAD_BALANCE_ENABLE;
5778	WREG32(RLC_LB_CNTL, tmp);
5779}
5780
5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782{
5783	u32 i, j, k;
5784	u32 mask;
5785
5786	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788			cik_select_se_sh(rdev, i, j);
5789			for (k = 0; k < rdev->usec_timeout; k++) {
5790				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791					break;
5792				udelay(1);
5793			}
5794		}
5795	}
5796	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799	for (k = 0; k < rdev->usec_timeout; k++) {
5800		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801			break;
5802		udelay(1);
5803	}
5804}
5805
5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807{
5808	u32 tmp;
5809
5810	tmp = RREG32(RLC_CNTL);
5811	if (tmp != rlc)
5812		WREG32(RLC_CNTL, rlc);
5813}
5814
5815static u32 cik_halt_rlc(struct radeon_device *rdev)
5816{
5817	u32 data, orig;
5818
5819	orig = data = RREG32(RLC_CNTL);
5820
5821	if (data & RLC_ENABLE) {
5822		u32 i;
5823
5824		data &= ~RLC_ENABLE;
5825		WREG32(RLC_CNTL, data);
5826
5827		for (i = 0; i < rdev->usec_timeout; i++) {
5828			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829				break;
5830			udelay(1);
5831		}
5832
5833		cik_wait_for_rlc_serdes(rdev);
5834	}
5835
5836	return orig;
5837}
5838
5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840{
5841	u32 tmp, i, mask;
5842
5843	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844	WREG32(RLC_GPR_REG2, tmp);
5845
5846	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847	for (i = 0; i < rdev->usec_timeout; i++) {
5848		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849			break;
5850		udelay(1);
5851	}
5852
5853	for (i = 0; i < rdev->usec_timeout; i++) {
5854		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855			break;
5856		udelay(1);
5857	}
5858}
5859
5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861{
5862	u32 tmp;
5863
5864	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865	WREG32(RLC_GPR_REG2, tmp);
5866}
5867
5868/**
5869 * cik_rlc_stop - stop the RLC ME
5870 *
5871 * @rdev: radeon_device pointer
5872 *
5873 * Halt the RLC ME (MicroEngine) (CIK).
5874 */
5875static void cik_rlc_stop(struct radeon_device *rdev)
5876{
5877	WREG32(RLC_CNTL, 0);
5878
5879	cik_enable_gui_idle_interrupt(rdev, false);
5880
5881	cik_wait_for_rlc_serdes(rdev);
5882}
5883
5884/**
5885 * cik_rlc_start - start the RLC ME
5886 *
5887 * @rdev: radeon_device pointer
5888 *
5889 * Unhalt the RLC ME (MicroEngine) (CIK).
5890 */
5891static void cik_rlc_start(struct radeon_device *rdev)
5892{
5893	WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895	cik_enable_gui_idle_interrupt(rdev, true);
5896
5897	udelay(50);
5898}
5899
5900/**
5901 * cik_rlc_resume - setup the RLC hw
5902 *
5903 * @rdev: radeon_device pointer
5904 *
5905 * Initialize the RLC registers, load the ucode,
5906 * and start the RLC (CIK).
5907 * Returns 0 for success, -EINVAL if the ucode is not available.
5908 */
5909static int cik_rlc_resume(struct radeon_device *rdev)
5910{
5911	u32 i, size, tmp;
5912
5913	if (!rdev->rlc_fw)
5914		return -EINVAL;
5915
5916	cik_rlc_stop(rdev);
5917
5918	/* disable CG */
5919	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922	si_rlc_reset(rdev);
5923
5924	cik_init_pg(rdev);
5925
5926	cik_init_cg(rdev);
5927
5928	WREG32(RLC_LB_CNTR_INIT, 0);
5929	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933	WREG32(RLC_LB_PARAMS, 0x00600408);
5934	WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936	WREG32(RLC_MC_CNTL, 0);
5937	WREG32(RLC_UCODE_CNTL, 0);
5938
5939	if (rdev->new_fw) {
5940		const struct rlc_firmware_header_v1_0 *hdr =
5941			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942		const __le32 *fw_data = (const __le32 *)
5943			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945		radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948		WREG32(RLC_GPM_UCODE_ADDR, 0);
5949		for (i = 0; i < size; i++)
5950			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952	} else {
5953		const __be32 *fw_data;
5954
5955		switch (rdev->family) {
5956		case CHIP_BONAIRE:
5957		case CHIP_HAWAII:
5958		default:
5959			size = BONAIRE_RLC_UCODE_SIZE;
5960			break;
5961		case CHIP_KAVERI:
5962			size = KV_RLC_UCODE_SIZE;
5963			break;
5964		case CHIP_KABINI:
5965			size = KB_RLC_UCODE_SIZE;
5966			break;
5967		case CHIP_MULLINS:
5968			size = ML_RLC_UCODE_SIZE;
5969			break;
5970		}
5971
5972		fw_data = (const __be32 *)rdev->rlc_fw->data;
5973		WREG32(RLC_GPM_UCODE_ADDR, 0);
5974		for (i = 0; i < size; i++)
5975			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977	}
5978
5979	/* XXX - find out what chips support lbpw */
5980	cik_enable_lbpw(rdev, false);
5981
5982	if (rdev->family == CHIP_BONAIRE)
5983		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985	cik_rlc_start(rdev);
5986
5987	return 0;
5988}
5989
5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991{
5992	u32 data, orig, tmp, tmp2;
5993
5994	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997		cik_enable_gui_idle_interrupt(rdev, true);
5998
5999		tmp = cik_halt_rlc(rdev);
6000
6001		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007		cik_update_rlc(rdev, tmp);
6008
6009		data |= CGCG_EN | CGLS_EN;
6010	} else {
6011		cik_enable_gui_idle_interrupt(rdev, false);
6012
6013		RREG32(CB_CGTT_SCLK_CTRL);
6014		RREG32(CB_CGTT_SCLK_CTRL);
6015		RREG32(CB_CGTT_SCLK_CTRL);
6016		RREG32(CB_CGTT_SCLK_CTRL);
6017
6018		data &= ~(CGCG_EN | CGLS_EN);
6019	}
6020
6021	if (orig != data)
6022		WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024}
6025
6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027{
6028	u32 data, orig, tmp = 0;
6029
6030	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033				orig = data = RREG32(CP_MEM_SLP_CNTL);
6034				data |= CP_MEM_LS_EN;
6035				if (orig != data)
6036					WREG32(CP_MEM_SLP_CNTL, data);
6037			}
6038		}
6039
6040		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041		data |= 0x00000001;
6042		data &= 0xfffffffd;
6043		if (orig != data)
6044			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046		tmp = cik_halt_rlc(rdev);
6047
6048		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052		WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054		cik_update_rlc(rdev, tmp);
6055
6056		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057			orig = data = RREG32(CGTS_SM_CTRL_REG);
6058			data &= ~SM_MODE_MASK;
6059			data |= SM_MODE(0x2);
6060			data |= SM_MODE_ENABLE;
6061			data &= ~CGTS_OVERRIDE;
6062			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064				data &= ~CGTS_LS_OVERRIDE;
6065			data &= ~ON_MONITOR_ADD_MASK;
6066			data |= ON_MONITOR_ADD_EN;
6067			data |= ON_MONITOR_ADD(0x96);
6068			if (orig != data)
6069				WREG32(CGTS_SM_CTRL_REG, data);
6070		}
6071	} else {
6072		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073		data |= 0x00000003;
6074		if (orig != data)
6075			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077		data = RREG32(RLC_MEM_SLP_CNTL);
6078		if (data & RLC_MEM_LS_EN) {
6079			data &= ~RLC_MEM_LS_EN;
6080			WREG32(RLC_MEM_SLP_CNTL, data);
6081		}
6082
6083		data = RREG32(CP_MEM_SLP_CNTL);
6084		if (data & CP_MEM_LS_EN) {
6085			data &= ~CP_MEM_LS_EN;
6086			WREG32(CP_MEM_SLP_CNTL, data);
6087		}
6088
6089		orig = data = RREG32(CGTS_SM_CTRL_REG);
6090		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091		if (orig != data)
6092			WREG32(CGTS_SM_CTRL_REG, data);
6093
6094		tmp = cik_halt_rlc(rdev);
6095
6096		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100		WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102		cik_update_rlc(rdev, tmp);
6103	}
6104}
6105
6106static const u32 mc_cg_registers[] =
6107{
6108	MC_HUB_MISC_HUB_CG,
6109	MC_HUB_MISC_SIP_CG,
6110	MC_HUB_MISC_VM_CG,
6111	MC_XPB_CLK_GAT,
6112	ATC_MISC_CG,
6113	MC_CITF_MISC_WR_CG,
6114	MC_CITF_MISC_RD_CG,
6115	MC_CITF_MISC_VM_CG,
6116	VM_L2_CG,
6117};
6118
6119static void cik_enable_mc_ls(struct radeon_device *rdev,
6120			     bool enable)
6121{
6122	int i;
6123	u32 orig, data;
6124
6125	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126		orig = data = RREG32(mc_cg_registers[i]);
6127		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128			data |= MC_LS_ENABLE;
6129		else
6130			data &= ~MC_LS_ENABLE;
6131		if (data != orig)
6132			WREG32(mc_cg_registers[i], data);
6133	}
6134}
6135
6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137			       bool enable)
6138{
6139	int i;
6140	u32 orig, data;
6141
6142	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143		orig = data = RREG32(mc_cg_registers[i]);
6144		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145			data |= MC_CG_ENABLE;
6146		else
6147			data &= ~MC_CG_ENABLE;
6148		if (data != orig)
6149			WREG32(mc_cg_registers[i], data);
6150	}
6151}
6152
6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154				 bool enable)
6155{
6156	u32 orig, data;
6157
6158	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161	} else {
6162		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163		data |= 0xff000000;
6164		if (data != orig)
6165			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168		data |= 0xff000000;
6169		if (data != orig)
6170			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171	}
6172}
6173
6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175				 bool enable)
6176{
6177	u32 orig, data;
6178
6179	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181		data |= 0x100;
6182		if (orig != data)
6183			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186		data |= 0x100;
6187		if (orig != data)
6188			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189	} else {
6190		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191		data &= ~0x100;
6192		if (orig != data)
6193			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196		data &= ~0x100;
6197		if (orig != data)
6198			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199	}
6200}
6201
6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203				bool enable)
6204{
6205	u32 orig, data;
6206
6207	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209		data = 0xfff;
6210		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212		orig = data = RREG32(UVD_CGC_CTRL);
6213		data |= DCM;
6214		if (orig != data)
6215			WREG32(UVD_CGC_CTRL, data);
6216	} else {
6217		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218		data &= ~0xfff;
6219		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221		orig = data = RREG32(UVD_CGC_CTRL);
6222		data &= ~DCM;
6223		if (orig != data)
6224			WREG32(UVD_CGC_CTRL, data);
6225	}
6226}
6227
6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229			       bool enable)
6230{
6231	u32 orig, data;
6232
6233	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238	else
6239		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242	if (orig != data)
6243		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244}
6245
6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247				bool enable)
6248{
6249	u32 orig, data;
6250
6251	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254		data &= ~CLOCK_GATING_DIS;
6255	else
6256		data |= CLOCK_GATING_DIS;
6257
6258	if (orig != data)
6259		WREG32(HDP_HOST_PATH_CNTL, data);
6260}
6261
6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263			      bool enable)
6264{
6265	u32 orig, data;
6266
6267	orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270		data |= HDP_LS_ENABLE;
6271	else
6272		data &= ~HDP_LS_ENABLE;
6273
6274	if (orig != data)
6275		WREG32(HDP_MEM_POWER_LS, data);
6276}
6277
6278void cik_update_cg(struct radeon_device *rdev,
6279		   u32 block, bool enable)
6280{
6281
6282	if (block & RADEON_CG_BLOCK_GFX) {
6283		cik_enable_gui_idle_interrupt(rdev, false);
6284		/* order matters! */
6285		if (enable) {
6286			cik_enable_mgcg(rdev, true);
6287			cik_enable_cgcg(rdev, true);
6288		} else {
6289			cik_enable_cgcg(rdev, false);
6290			cik_enable_mgcg(rdev, false);
6291		}
6292		cik_enable_gui_idle_interrupt(rdev, true);
6293	}
6294
6295	if (block & RADEON_CG_BLOCK_MC) {
6296		if (!(rdev->flags & RADEON_IS_IGP)) {
6297			cik_enable_mc_mgcg(rdev, enable);
6298			cik_enable_mc_ls(rdev, enable);
6299		}
6300	}
6301
6302	if (block & RADEON_CG_BLOCK_SDMA) {
6303		cik_enable_sdma_mgcg(rdev, enable);
6304		cik_enable_sdma_mgls(rdev, enable);
6305	}
6306
6307	if (block & RADEON_CG_BLOCK_BIF) {
6308		cik_enable_bif_mgls(rdev, enable);
6309	}
6310
6311	if (block & RADEON_CG_BLOCK_UVD) {
6312		if (rdev->has_uvd)
6313			cik_enable_uvd_mgcg(rdev, enable);
6314	}
6315
6316	if (block & RADEON_CG_BLOCK_HDP) {
6317		cik_enable_hdp_mgcg(rdev, enable);
6318		cik_enable_hdp_ls(rdev, enable);
6319	}
6320
6321	if (block & RADEON_CG_BLOCK_VCE) {
6322		vce_v2_0_enable_mgcg(rdev, enable);
6323	}
6324}
6325
6326static void cik_init_cg(struct radeon_device *rdev)
6327{
6328
6329	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331	if (rdev->has_uvd)
6332		si_init_uvd_internal_cg(rdev);
6333
6334	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335			     RADEON_CG_BLOCK_SDMA |
6336			     RADEON_CG_BLOCK_BIF |
6337			     RADEON_CG_BLOCK_UVD |
6338			     RADEON_CG_BLOCK_HDP), true);
6339}
6340
6341static void cik_fini_cg(struct radeon_device *rdev)
6342{
6343	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344			     RADEON_CG_BLOCK_SDMA |
6345			     RADEON_CG_BLOCK_BIF |
6346			     RADEON_CG_BLOCK_UVD |
6347			     RADEON_CG_BLOCK_HDP), false);
6348
6349	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350}
6351
6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353					  bool enable)
6354{
6355	u32 data, orig;
6356
6357	orig = data = RREG32(RLC_PG_CNTL);
6358	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360	else
6361		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362	if (orig != data)
6363		WREG32(RLC_PG_CNTL, data);
6364}
6365
6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367					  bool enable)
6368{
6369	u32 data, orig;
6370
6371	orig = data = RREG32(RLC_PG_CNTL);
6372	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374	else
6375		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376	if (orig != data)
6377		WREG32(RLC_PG_CNTL, data);
6378}
6379
6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381{
6382	u32 data, orig;
6383
6384	orig = data = RREG32(RLC_PG_CNTL);
6385	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386		data &= ~DISABLE_CP_PG;
6387	else
6388		data |= DISABLE_CP_PG;
6389	if (orig != data)
6390		WREG32(RLC_PG_CNTL, data);
6391}
6392
6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394{
6395	u32 data, orig;
6396
6397	orig = data = RREG32(RLC_PG_CNTL);
6398	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399		data &= ~DISABLE_GDS_PG;
6400	else
6401		data |= DISABLE_GDS_PG;
6402	if (orig != data)
6403		WREG32(RLC_PG_CNTL, data);
6404}
6405
6406#define CP_ME_TABLE_SIZE    96
6407#define CP_ME_TABLE_OFFSET  2048
6408#define CP_MEC_TABLE_OFFSET 4096
6409
6410void cik_init_cp_pg_table(struct radeon_device *rdev)
6411{
6412	volatile u32 *dst_ptr;
6413	int me, i, max_me = 4;
6414	u32 bo_offset = 0;
6415	u32 table_offset, table_size;
6416
6417	if (rdev->family == CHIP_KAVERI)
6418		max_me = 5;
6419
6420	if (rdev->rlc.cp_table_ptr == NULL)
6421		return;
6422
6423	/* write the cp table buffer */
6424	dst_ptr = rdev->rlc.cp_table_ptr;
6425	for (me = 0; me < max_me; me++) {
6426		if (rdev->new_fw) {
6427			const __le32 *fw_data;
6428			const struct gfx_firmware_header_v1_0 *hdr;
6429
6430			if (me == 0) {
6431				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432				fw_data = (const __le32 *)
6433					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434				table_offset = le32_to_cpu(hdr->jt_offset);
6435				table_size = le32_to_cpu(hdr->jt_size);
6436			} else if (me == 1) {
6437				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438				fw_data = (const __le32 *)
6439					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440				table_offset = le32_to_cpu(hdr->jt_offset);
6441				table_size = le32_to_cpu(hdr->jt_size);
6442			} else if (me == 2) {
6443				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444				fw_data = (const __le32 *)
6445					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446				table_offset = le32_to_cpu(hdr->jt_offset);
6447				table_size = le32_to_cpu(hdr->jt_size);
6448			} else if (me == 3) {
6449				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450				fw_data = (const __le32 *)
6451					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452				table_offset = le32_to_cpu(hdr->jt_offset);
6453				table_size = le32_to_cpu(hdr->jt_size);
6454			} else {
6455				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456				fw_data = (const __le32 *)
6457					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458				table_offset = le32_to_cpu(hdr->jt_offset);
6459				table_size = le32_to_cpu(hdr->jt_size);
6460			}
6461
6462			for (i = 0; i < table_size; i ++) {
6463				dst_ptr[bo_offset + i] =
6464					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465			}
6466			bo_offset += table_size;
6467		} else {
6468			const __be32 *fw_data;
6469			table_size = CP_ME_TABLE_SIZE;
6470
6471			if (me == 0) {
6472				fw_data = (const __be32 *)rdev->ce_fw->data;
6473				table_offset = CP_ME_TABLE_OFFSET;
6474			} else if (me == 1) {
6475				fw_data = (const __be32 *)rdev->pfp_fw->data;
6476				table_offset = CP_ME_TABLE_OFFSET;
6477			} else if (me == 2) {
6478				fw_data = (const __be32 *)rdev->me_fw->data;
6479				table_offset = CP_ME_TABLE_OFFSET;
6480			} else {
6481				fw_data = (const __be32 *)rdev->mec_fw->data;
6482				table_offset = CP_MEC_TABLE_OFFSET;
6483			}
6484
6485			for (i = 0; i < table_size; i ++) {
6486				dst_ptr[bo_offset + i] =
6487					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488			}
6489			bo_offset += table_size;
6490		}
6491	}
6492}
6493
6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495				bool enable)
6496{
6497	u32 data, orig;
6498
6499	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500		orig = data = RREG32(RLC_PG_CNTL);
6501		data |= GFX_PG_ENABLE;
6502		if (orig != data)
6503			WREG32(RLC_PG_CNTL, data);
6504
6505		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506		data |= AUTO_PG_EN;
6507		if (orig != data)
6508			WREG32(RLC_AUTO_PG_CTRL, data);
6509	} else {
6510		orig = data = RREG32(RLC_PG_CNTL);
6511		data &= ~GFX_PG_ENABLE;
6512		if (orig != data)
6513			WREG32(RLC_PG_CNTL, data);
6514
6515		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516		data &= ~AUTO_PG_EN;
6517		if (orig != data)
6518			WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520		data = RREG32(DB_RENDER_CONTROL);
6521	}
6522}
6523
6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525{
6526	u32 mask = 0, tmp, tmp1;
6527	int i;
6528
6529	cik_select_se_sh(rdev, se, sh);
6530	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534	tmp &= 0xffff0000;
6535
6536	tmp |= tmp1;
6537	tmp >>= 16;
6538
6539	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540		mask <<= 1;
6541		mask |= 1;
6542	}
6543
6544	return (~tmp) & mask;
6545}
6546
6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548{
6549	u32 i, j, k, active_cu_number = 0;
6550	u32 mask, counter, cu_bitmap;
6551	u32 tmp = 0;
6552
6553	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555			mask = 1;
6556			cu_bitmap = 0;
6557			counter = 0;
6558			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560					if (counter < 2)
6561						cu_bitmap |= mask;
6562					counter ++;
6563				}
6564				mask <<= 1;
6565			}
6566
6567			active_cu_number += counter;
6568			tmp |= (cu_bitmap << (i * 16 + j * 8));
6569		}
6570	}
6571
6572	WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574	tmp = RREG32(RLC_MAX_PG_CU);
6575	tmp &= ~MAX_PU_CU_MASK;
6576	tmp |= MAX_PU_CU(active_cu_number);
6577	WREG32(RLC_MAX_PG_CU, tmp);
6578}
6579
6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581				       bool enable)
6582{
6583	u32 data, orig;
6584
6585	orig = data = RREG32(RLC_PG_CNTL);
6586	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587		data |= STATIC_PER_CU_PG_ENABLE;
6588	else
6589		data &= ~STATIC_PER_CU_PG_ENABLE;
6590	if (orig != data)
6591		WREG32(RLC_PG_CNTL, data);
6592}
6593
6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595					bool enable)
6596{
6597	u32 data, orig;
6598
6599	orig = data = RREG32(RLC_PG_CNTL);
6600	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601		data |= DYN_PER_CU_PG_ENABLE;
6602	else
6603		data &= ~DYN_PER_CU_PG_ENABLE;
6604	if (orig != data)
6605		WREG32(RLC_PG_CNTL, data);
6606}
6607
6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6610
6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612{
6613	u32 data, orig;
6614	u32 i;
6615
6616	if (rdev->rlc.cs_data) {
6617		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621	} else {
6622		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623		for (i = 0; i < 3; i++)
6624			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625	}
6626	if (rdev->rlc.reg_list) {
6627		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630	}
6631
6632	orig = data = RREG32(RLC_PG_CNTL);
6633	data |= GFX_PG_SRC;
6634	if (orig != data)
6635		WREG32(RLC_PG_CNTL, data);
6636
6637	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641	data &= ~IDLE_POLL_COUNT_MASK;
6642	data |= IDLE_POLL_COUNT(0x60);
6643	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645	data = 0x10101010;
6646	WREG32(RLC_PG_DELAY, data);
6647
6648	data = RREG32(RLC_PG_DELAY_2);
6649	data &= ~0xff;
6650	data |= 0x3;
6651	WREG32(RLC_PG_DELAY_2, data);
6652
6653	data = RREG32(RLC_AUTO_PG_CTRL);
6654	data &= ~GRBM_REG_SGIT_MASK;
6655	data |= GRBM_REG_SGIT(0x700);
6656	WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658}
6659
6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661{
6662	cik_enable_gfx_cgpg(rdev, enable);
6663	cik_enable_gfx_static_mgpg(rdev, enable);
6664	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665}
6666
6667u32 cik_get_csb_size(struct radeon_device *rdev)
6668{
6669	u32 count = 0;
6670	const struct cs_section_def *sect = NULL;
6671	const struct cs_extent_def *ext = NULL;
6672
6673	if (rdev->rlc.cs_data == NULL)
6674		return 0;
6675
6676	/* begin clear state */
6677	count += 2;
6678	/* context control state */
6679	count += 3;
6680
6681	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682		for (ext = sect->section; ext->extent != NULL; ++ext) {
6683			if (sect->id == SECT_CONTEXT)
6684				count += 2 + ext->reg_count;
6685			else
6686				return 0;
6687		}
6688	}
6689	/* pa_sc_raster_config/pa_sc_raster_config1 */
6690	count += 4;
6691	/* end clear state */
6692	count += 2;
6693	/* clear state */
6694	count += 2;
6695
6696	return count;
6697}
6698
6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700{
6701	u32 count = 0, i;
6702	const struct cs_section_def *sect = NULL;
6703	const struct cs_extent_def *ext = NULL;
6704
6705	if (rdev->rlc.cs_data == NULL)
6706		return;
6707	if (buffer == NULL)
6708		return;
6709
6710	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714	buffer[count++] = cpu_to_le32(0x80000000);
6715	buffer[count++] = cpu_to_le32(0x80000000);
6716
6717	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718		for (ext = sect->section; ext->extent != NULL; ++ext) {
6719			if (sect->id == SECT_CONTEXT) {
6720				buffer[count++] =
6721					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723				for (i = 0; i < ext->reg_count; i++)
6724					buffer[count++] = cpu_to_le32(ext->extent[i]);
6725			} else {
6726				return;
6727			}
6728		}
6729	}
6730
6731	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733	switch (rdev->family) {
6734	case CHIP_BONAIRE:
6735		buffer[count++] = cpu_to_le32(0x16000012);
6736		buffer[count++] = cpu_to_le32(0x00000000);
6737		break;
6738	case CHIP_KAVERI:
6739		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740		buffer[count++] = cpu_to_le32(0x00000000);
6741		break;
6742	case CHIP_KABINI:
6743	case CHIP_MULLINS:
6744		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745		buffer[count++] = cpu_to_le32(0x00000000);
6746		break;
6747	case CHIP_HAWAII:
6748		buffer[count++] = cpu_to_le32(0x3a00161a);
6749		buffer[count++] = cpu_to_le32(0x0000002e);
6750		break;
6751	default:
6752		buffer[count++] = cpu_to_le32(0x00000000);
6753		buffer[count++] = cpu_to_le32(0x00000000);
6754		break;
6755	}
6756
6757	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761	buffer[count++] = cpu_to_le32(0);
6762}
6763
6764static void cik_init_pg(struct radeon_device *rdev)
6765{
6766	if (rdev->pg_flags) {
6767		cik_enable_sck_slowdown_on_pu(rdev, true);
6768		cik_enable_sck_slowdown_on_pd(rdev, true);
6769		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770			cik_init_gfx_cgpg(rdev);
6771			cik_enable_cp_pg(rdev, true);
6772			cik_enable_gds_pg(rdev, true);
6773		}
6774		cik_init_ao_cu_mask(rdev);
6775		cik_update_gfx_pg(rdev, true);
6776	}
6777}
6778
6779static void cik_fini_pg(struct radeon_device *rdev)
6780{
6781	if (rdev->pg_flags) {
6782		cik_update_gfx_pg(rdev, false);
6783		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784			cik_enable_cp_pg(rdev, false);
6785			cik_enable_gds_pg(rdev, false);
6786		}
6787	}
6788}
6789
6790/*
6791 * Interrupts
6792 * Starting with r6xx, interrupts are handled via a ring buffer.
6793 * Ring buffers are areas of GPU accessible memory that the GPU
6794 * writes interrupt vectors into and the host reads vectors out of.
6795 * There is a rptr (read pointer) that determines where the
6796 * host is currently reading, and a wptr (write pointer)
6797 * which determines where the GPU has written.  When the
6798 * pointers are equal, the ring is idle.  When the GPU
6799 * writes vectors to the ring buffer, it increments the
6800 * wptr.  When there is an interrupt, the host then starts
6801 * fetching commands and processing them until the pointers are
6802 * equal again at which point it updates the rptr.
6803 */
6804
6805/**
6806 * cik_enable_interrupts - Enable the interrupt ring buffer
6807 *
6808 * @rdev: radeon_device pointer
6809 *
6810 * Enable the interrupt ring buffer (CIK).
6811 */
6812static void cik_enable_interrupts(struct radeon_device *rdev)
6813{
6814	u32 ih_cntl = RREG32(IH_CNTL);
6815	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817	ih_cntl |= ENABLE_INTR;
6818	ih_rb_cntl |= IH_RB_ENABLE;
6819	WREG32(IH_CNTL, ih_cntl);
6820	WREG32(IH_RB_CNTL, ih_rb_cntl);
6821	rdev->ih.enabled = true;
6822}
6823
6824/**
6825 * cik_disable_interrupts - Disable the interrupt ring buffer
6826 *
6827 * @rdev: radeon_device pointer
6828 *
6829 * Disable the interrupt ring buffer (CIK).
6830 */
6831static void cik_disable_interrupts(struct radeon_device *rdev)
6832{
6833	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834	u32 ih_cntl = RREG32(IH_CNTL);
6835
6836	ih_rb_cntl &= ~IH_RB_ENABLE;
6837	ih_cntl &= ~ENABLE_INTR;
6838	WREG32(IH_RB_CNTL, ih_rb_cntl);
6839	WREG32(IH_CNTL, ih_cntl);
6840	/* set rptr, wptr to 0 */
6841	WREG32(IH_RB_RPTR, 0);
6842	WREG32(IH_RB_WPTR, 0);
6843	rdev->ih.enabled = false;
6844	rdev->ih.rptr = 0;
6845}
6846
6847/**
6848 * cik_disable_interrupt_state - Disable all interrupt sources
6849 *
6850 * @rdev: radeon_device pointer
6851 *
6852 * Clear all interrupt enable bits used by the driver (CIK).
6853 */
6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855{
6856	u32 tmp;
6857
6858	/* gfx ring */
6859	tmp = RREG32(CP_INT_CNTL_RING0) &
6860		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861	WREG32(CP_INT_CNTL_RING0, tmp);
6862	/* sdma */
6863	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867	/* compute queues */
6868	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876	/* grbm */
6877	WREG32(GRBM_INT_CNTL, 0);
6878	/* SRBM */
6879	WREG32(SRBM_INT_CNTL, 0);
6880	/* vline/vblank, etc. */
6881	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883	if (rdev->num_crtc >= 4) {
6884		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886	}
6887	if (rdev->num_crtc >= 6) {
6888		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890	}
6891	/* pflip */
6892	if (rdev->num_crtc >= 2) {
6893		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895	}
6896	if (rdev->num_crtc >= 4) {
6897		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899	}
6900	if (rdev->num_crtc >= 6) {
6901		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903	}
6904
6905	/* dac hotplug */
6906	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908	/* digital hotplug */
6909	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910	WREG32(DC_HPD1_INT_CONTROL, tmp);
6911	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912	WREG32(DC_HPD2_INT_CONTROL, tmp);
6913	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914	WREG32(DC_HPD3_INT_CONTROL, tmp);
6915	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916	WREG32(DC_HPD4_INT_CONTROL, tmp);
6917	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918	WREG32(DC_HPD5_INT_CONTROL, tmp);
6919	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920	WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922}
6923
6924/**
6925 * cik_irq_init - init and enable the interrupt ring
6926 *
6927 * @rdev: radeon_device pointer
6928 *
6929 * Allocate a ring buffer for the interrupt controller,
6930 * enable the RLC, disable interrupts, enable the IH
6931 * ring buffer and enable it (CIK).
6932 * Called at device load and reume.
6933 * Returns 0 for success, errors for failure.
6934 */
6935static int cik_irq_init(struct radeon_device *rdev)
6936{
6937	int ret = 0;
6938	int rb_bufsz;
6939	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941	/* allocate ring */
6942	ret = r600_ih_ring_alloc(rdev);
6943	if (ret)
6944		return ret;
6945
6946	/* disable irqs */
6947	cik_disable_interrupts(rdev);
6948
6949	/* init rlc */
6950	ret = cik_rlc_resume(rdev);
6951	if (ret) {
6952		r600_ih_ring_fini(rdev);
6953		return ret;
6954	}
6955
6956	/* setup interrupt control */
6957	/* set dummy read address to dummy page address */
6958	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962	 */
6963	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972		      IH_WPTR_OVERFLOW_CLEAR |
6973		      (rb_bufsz << 1));
6974
6975	if (rdev->wb.enabled)
6976		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978	/* set the writeback address whether it's enabled or not */
6979	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982	WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984	/* set rptr, wptr to 0 */
6985	WREG32(IH_RB_RPTR, 0);
6986	WREG32(IH_RB_WPTR, 0);
6987
6988	/* Default settings for IH_CNTL (disabled at first) */
6989	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990	/* RPTR_REARM only works if msi's are enabled */
6991	if (rdev->msi_enabled)
6992		ih_cntl |= RPTR_REARM;
6993	WREG32(IH_CNTL, ih_cntl);
6994
6995	/* force the active interrupt state to all disabled */
6996	cik_disable_interrupt_state(rdev);
6997
6998	pci_set_master(rdev->pdev);
6999
7000	/* enable irqs */
7001	cik_enable_interrupts(rdev);
7002
7003	return ret;
7004}
7005
7006/**
7007 * cik_irq_set - enable/disable interrupt sources
7008 *
7009 * @rdev: radeon_device pointer
7010 *
7011 * Enable interrupt sources on the GPU (vblanks, hpd,
7012 * etc.) (CIK).
7013 * Returns 0 for success, errors for failure.
7014 */
7015int cik_irq_set(struct radeon_device *rdev)
7016{
7017	u32 cp_int_cntl;
7018	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022	u32 grbm_int_cntl = 0;
7023	u32 dma_cntl, dma_cntl1;
7024
7025	if (!rdev->irq.installed) {
7026		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027		return -EINVAL;
7028	}
7029	/* don't enable anything if the ih is disabled */
7030	if (!rdev->ih.enabled) {
7031		cik_disable_interrupts(rdev);
7032		/* force the active interrupt state to all disabled */
7033		cik_disable_interrupt_state(rdev);
7034		return 0;
7035	}
7036
7037	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060	/* enable CP interrupts on all rings */
7061	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064	}
7065	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067		DRM_DEBUG("si_irq_set: sw int cp1\n");
7068		if (ring->me == 1) {
7069			switch (ring->pipe) {
7070			case 0:
7071				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072				break;
7073			case 1:
7074				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075				break;
7076			case 2:
7077				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078				break;
7079			case 3:
7080				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081				break;
7082			default:
7083				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084				break;
7085			}
7086		} else if (ring->me == 2) {
7087			switch (ring->pipe) {
7088			case 0:
7089				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090				break;
7091			case 1:
7092				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093				break;
7094			case 2:
7095				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096				break;
7097			case 3:
7098				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099				break;
7100			default:
7101				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102				break;
7103			}
7104		} else {
7105			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106		}
7107	}
7108	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110		DRM_DEBUG("si_irq_set: sw int cp2\n");
7111		if (ring->me == 1) {
7112			switch (ring->pipe) {
7113			case 0:
7114				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115				break;
7116			case 1:
7117				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118				break;
7119			case 2:
7120				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121				break;
7122			case 3:
7123				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124				break;
7125			default:
7126				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127				break;
7128			}
7129		} else if (ring->me == 2) {
7130			switch (ring->pipe) {
7131			case 0:
7132				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133				break;
7134			case 1:
7135				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136				break;
7137			case 2:
7138				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139				break;
7140			case 3:
7141				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142				break;
7143			default:
7144				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145				break;
7146			}
7147		} else {
7148			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149		}
7150	}
7151
7152	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153		DRM_DEBUG("cik_irq_set: sw int dma\n");
7154		dma_cntl |= TRAP_ENABLE;
7155	}
7156
7157	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159		dma_cntl1 |= TRAP_ENABLE;
7160	}
7161
7162	if (rdev->irq.crtc_vblank_int[0] ||
7163	    atomic_read(&rdev->irq.pflip[0])) {
7164		DRM_DEBUG("cik_irq_set: vblank 0\n");
7165		crtc1 |= VBLANK_INTERRUPT_MASK;
7166	}
7167	if (rdev->irq.crtc_vblank_int[1] ||
7168	    atomic_read(&rdev->irq.pflip[1])) {
7169		DRM_DEBUG("cik_irq_set: vblank 1\n");
7170		crtc2 |= VBLANK_INTERRUPT_MASK;
7171	}
7172	if (rdev->irq.crtc_vblank_int[2] ||
7173	    atomic_read(&rdev->irq.pflip[2])) {
7174		DRM_DEBUG("cik_irq_set: vblank 2\n");
7175		crtc3 |= VBLANK_INTERRUPT_MASK;
7176	}
7177	if (rdev->irq.crtc_vblank_int[3] ||
7178	    atomic_read(&rdev->irq.pflip[3])) {
7179		DRM_DEBUG("cik_irq_set: vblank 3\n");
7180		crtc4 |= VBLANK_INTERRUPT_MASK;
7181	}
7182	if (rdev->irq.crtc_vblank_int[4] ||
7183	    atomic_read(&rdev->irq.pflip[4])) {
7184		DRM_DEBUG("cik_irq_set: vblank 4\n");
7185		crtc5 |= VBLANK_INTERRUPT_MASK;
7186	}
7187	if (rdev->irq.crtc_vblank_int[5] ||
7188	    atomic_read(&rdev->irq.pflip[5])) {
7189		DRM_DEBUG("cik_irq_set: vblank 5\n");
7190		crtc6 |= VBLANK_INTERRUPT_MASK;
7191	}
7192	if (rdev->irq.hpd[0]) {
7193		DRM_DEBUG("cik_irq_set: hpd 1\n");
7194		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195	}
7196	if (rdev->irq.hpd[1]) {
7197		DRM_DEBUG("cik_irq_set: hpd 2\n");
7198		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199	}
7200	if (rdev->irq.hpd[2]) {
7201		DRM_DEBUG("cik_irq_set: hpd 3\n");
7202		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203	}
7204	if (rdev->irq.hpd[3]) {
7205		DRM_DEBUG("cik_irq_set: hpd 4\n");
7206		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207	}
7208	if (rdev->irq.hpd[4]) {
7209		DRM_DEBUG("cik_irq_set: hpd 5\n");
7210		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211	}
7212	if (rdev->irq.hpd[5]) {
7213		DRM_DEBUG("cik_irq_set: hpd 6\n");
7214		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215	}
7216
7217	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235	if (rdev->num_crtc >= 4) {
7236		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238	}
7239	if (rdev->num_crtc >= 6) {
7240		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242	}
7243
7244	if (rdev->num_crtc >= 2) {
7245		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246		       GRPH_PFLIP_INT_MASK);
7247		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248		       GRPH_PFLIP_INT_MASK);
7249	}
7250	if (rdev->num_crtc >= 4) {
7251		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252		       GRPH_PFLIP_INT_MASK);
7253		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254		       GRPH_PFLIP_INT_MASK);
7255	}
7256	if (rdev->num_crtc >= 6) {
7257		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258		       GRPH_PFLIP_INT_MASK);
7259		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260		       GRPH_PFLIP_INT_MASK);
7261	}
7262
7263	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270	/* posting read */
7271	RREG32(SRBM_STATUS);
7272
7273	return 0;
7274}
7275
7276/**
7277 * cik_irq_ack - ack interrupt sources
7278 *
7279 * @rdev: radeon_device pointer
7280 *
7281 * Ack interrupt sources on the GPU (vblanks, hpd,
7282 * etc.) (CIK).  Certain interrupts sources are sw
7283 * generated and do not require an explicit ack.
7284 */
7285static inline void cik_irq_ack(struct radeon_device *rdev)
7286{
7287	u32 tmp;
7288
7289	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298		EVERGREEN_CRTC0_REGISTER_OFFSET);
7299	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300		EVERGREEN_CRTC1_REGISTER_OFFSET);
7301	if (rdev->num_crtc >= 4) {
7302		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303			EVERGREEN_CRTC2_REGISTER_OFFSET);
7304		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305			EVERGREEN_CRTC3_REGISTER_OFFSET);
7306	}
7307	if (rdev->num_crtc >= 6) {
7308		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309			EVERGREEN_CRTC4_REGISTER_OFFSET);
7310		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311			EVERGREEN_CRTC5_REGISTER_OFFSET);
7312	}
7313
7314	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316		       GRPH_PFLIP_INT_CLEAR);
7317	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319		       GRPH_PFLIP_INT_CLEAR);
7320	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329	if (rdev->num_crtc >= 4) {
7330		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332			       GRPH_PFLIP_INT_CLEAR);
7333		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335			       GRPH_PFLIP_INT_CLEAR);
7336		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344	}
7345
7346	if (rdev->num_crtc >= 6) {
7347		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349			       GRPH_PFLIP_INT_CLEAR);
7350		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352			       GRPH_PFLIP_INT_CLEAR);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361	}
7362
7363	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364		tmp = RREG32(DC_HPD1_INT_CONTROL);
7365		tmp |= DC_HPDx_INT_ACK;
7366		WREG32(DC_HPD1_INT_CONTROL, tmp);
7367	}
7368	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369		tmp = RREG32(DC_HPD2_INT_CONTROL);
7370		tmp |= DC_HPDx_INT_ACK;
7371		WREG32(DC_HPD2_INT_CONTROL, tmp);
7372	}
7373	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374		tmp = RREG32(DC_HPD3_INT_CONTROL);
7375		tmp |= DC_HPDx_INT_ACK;
7376		WREG32(DC_HPD3_INT_CONTROL, tmp);
7377	}
7378	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379		tmp = RREG32(DC_HPD4_INT_CONTROL);
7380		tmp |= DC_HPDx_INT_ACK;
7381		WREG32(DC_HPD4_INT_CONTROL, tmp);
7382	}
7383	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384		tmp = RREG32(DC_HPD5_INT_CONTROL);
7385		tmp |= DC_HPDx_INT_ACK;
7386		WREG32(DC_HPD5_INT_CONTROL, tmp);
7387	}
7388	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389		tmp = RREG32(DC_HPD6_INT_CONTROL);
7390		tmp |= DC_HPDx_INT_ACK;
7391		WREG32(DC_HPD6_INT_CONTROL, tmp);
7392	}
7393	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394		tmp = RREG32(DC_HPD1_INT_CONTROL);
7395		tmp |= DC_HPDx_RX_INT_ACK;
7396		WREG32(DC_HPD1_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399		tmp = RREG32(DC_HPD2_INT_CONTROL);
7400		tmp |= DC_HPDx_RX_INT_ACK;
7401		WREG32(DC_HPD2_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404		tmp = RREG32(DC_HPD3_INT_CONTROL);
7405		tmp |= DC_HPDx_RX_INT_ACK;
7406		WREG32(DC_HPD3_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409		tmp = RREG32(DC_HPD4_INT_CONTROL);
7410		tmp |= DC_HPDx_RX_INT_ACK;
7411		WREG32(DC_HPD4_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414		tmp = RREG32(DC_HPD5_INT_CONTROL);
7415		tmp |= DC_HPDx_RX_INT_ACK;
7416		WREG32(DC_HPD5_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419		tmp = RREG32(DC_HPD6_INT_CONTROL);
7420		tmp |= DC_HPDx_RX_INT_ACK;
7421		WREG32(DC_HPD6_INT_CONTROL, tmp);
7422	}
7423}
7424
7425/**
7426 * cik_irq_disable - disable interrupts
7427 *
7428 * @rdev: radeon_device pointer
7429 *
7430 * Disable interrupts on the hw (CIK).
7431 */
7432static void cik_irq_disable(struct radeon_device *rdev)
7433{
7434	cik_disable_interrupts(rdev);
7435	/* Wait and acknowledge irq */
7436	mdelay(1);
7437	cik_irq_ack(rdev);
7438	cik_disable_interrupt_state(rdev);
7439}
7440
7441/**
7442 * cik_irq_suspend - disable interrupts for suspend
7443 *
7444 * @rdev: radeon_device pointer
7445 *
7446 * Disable interrupts and stop the RLC (CIK).
7447 * Used for suspend.
7448 */
7449static void cik_irq_suspend(struct radeon_device *rdev)
7450{
7451	cik_irq_disable(rdev);
7452	cik_rlc_stop(rdev);
7453}
7454
7455/**
7456 * cik_irq_fini - tear down interrupt support
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw and free the IH ring
7461 * buffer (CIK).
7462 * Used for driver unload.
7463 */
7464static void cik_irq_fini(struct radeon_device *rdev)
7465{
7466	cik_irq_suspend(rdev);
7467	r600_ih_ring_fini(rdev);
7468}
7469
7470/**
7471 * cik_get_ih_wptr - get the IH ring buffer wptr
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Get the IH ring buffer wptr from either the register
7476 * or the writeback memory buffer (CIK).  Also check for
7477 * ring buffer overflow and deal with it.
7478 * Used by cik_irq_process().
7479 * Returns the value of the wptr.
7480 */
7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482{
7483	u32 wptr, tmp;
7484
7485	if (rdev->wb.enabled)
7486		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487	else
7488		wptr = RREG32(IH_RB_WPTR);
7489
7490	if (wptr & RB_OVERFLOW) {
7491		wptr &= ~RB_OVERFLOW;
7492		/* When a ring buffer overflow happen start parsing interrupt
7493		 * from the last not overwritten vector (wptr + 16). Hopefully
7494		 * this should allow us to catchup.
7495		 */
7496		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499		tmp = RREG32(IH_RB_CNTL);
7500		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501		WREG32(IH_RB_CNTL, tmp);
7502	}
7503	return (wptr & rdev->ih.ptr_mask);
7504}
7505
7506/*        CIK IV Ring
7507 * Each IV ring entry is 128 bits:
7508 * [7:0]    - interrupt source id
7509 * [31:8]   - reserved
7510 * [59:32]  - interrupt source data
7511 * [63:60]  - reserved
7512 * [71:64]  - RINGID
7513 *            CP:
7514 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518 *            PIPE_ID - ME0 0=3D
7519 *                    - ME1&2 compute dispatcher (4 pipes each)
7520 *            SDMA:
7521 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524 * [79:72]  - VMID
7525 * [95:80]  - PASID
7526 * [127:96] - reserved
7527 */
7528/**
7529 * cik_irq_process - interrupt handler
7530 *
7531 * @rdev: radeon_device pointer
7532 *
7533 * Interrupt hander (CIK).  Walk the IH ring,
7534 * ack interrupts and schedule work to handle
7535 * interrupt events.
7536 * Returns irq process return code.
7537 */
7538int cik_irq_process(struct radeon_device *rdev)
7539{
7540	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542	u32 wptr;
7543	u32 rptr;
7544	u32 src_id, src_data, ring_id;
7545	u8 me_id, pipe_id, queue_id;
7546	u32 ring_index;
7547	bool queue_hotplug = false;
7548	bool queue_dp = false;
7549	bool queue_reset = false;
7550	u32 addr, status, mc_client;
7551	bool queue_thermal = false;
7552
7553	if (!rdev->ih.enabled || rdev->shutdown)
7554		return IRQ_NONE;
7555
7556	wptr = cik_get_ih_wptr(rdev);
7557
7558restart_ih:
7559	/* is somebody else already processing irqs? */
7560	if (atomic_xchg(&rdev->ih.lock, 1))
7561		return IRQ_NONE;
7562
7563	rptr = rdev->ih.rptr;
7564	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565
7566	/* Order reading of wptr vs. reading of IH ring data */
7567	rmb();
7568
7569	/* display interrupts */
7570	cik_irq_ack(rdev);
7571
7572	while (rptr != wptr) {
7573		/* wptr/rptr are in bytes! */
7574		ring_index = rptr / 4;
7575
7576		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579
7580		switch (src_id) {
7581		case 1: /* D1 vblank/vline */
7582			switch (src_data) {
7583			case 0: /* D1 vblank */
7584				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587				if (rdev->irq.crtc_vblank_int[0]) {
7588					drm_handle_vblank(rdev_to_drm(rdev), 0);
7589					rdev->pm.vblank_sync = true;
7590					wake_up(&rdev->irq.vblank_queue);
7591				}
7592				if (atomic_read(&rdev->irq.pflip[0]))
7593					radeon_crtc_handle_vblank(rdev, 0);
7594				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595				DRM_DEBUG("IH: D1 vblank\n");
7596
7597				break;
7598			case 1: /* D1 vline */
7599				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603				DRM_DEBUG("IH: D1 vline\n");
7604
7605				break;
7606			default:
7607				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608				break;
7609			}
7610			break;
7611		case 2: /* D2 vblank/vline */
7612			switch (src_data) {
7613			case 0: /* D2 vblank */
7614				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617				if (rdev->irq.crtc_vblank_int[1]) {
7618					drm_handle_vblank(rdev_to_drm(rdev), 1);
7619					rdev->pm.vblank_sync = true;
7620					wake_up(&rdev->irq.vblank_queue);
7621				}
7622				if (atomic_read(&rdev->irq.pflip[1]))
7623					radeon_crtc_handle_vblank(rdev, 1);
7624				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625				DRM_DEBUG("IH: D2 vblank\n");
7626
7627				break;
7628			case 1: /* D2 vline */
7629				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633				DRM_DEBUG("IH: D2 vline\n");
7634
7635				break;
7636			default:
7637				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638				break;
7639			}
7640			break;
7641		case 3: /* D3 vblank/vline */
7642			switch (src_data) {
7643			case 0: /* D3 vblank */
7644				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647				if (rdev->irq.crtc_vblank_int[2]) {
7648					drm_handle_vblank(rdev_to_drm(rdev), 2);
7649					rdev->pm.vblank_sync = true;
7650					wake_up(&rdev->irq.vblank_queue);
7651				}
7652				if (atomic_read(&rdev->irq.pflip[2]))
7653					radeon_crtc_handle_vblank(rdev, 2);
7654				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655				DRM_DEBUG("IH: D3 vblank\n");
7656
7657				break;
7658			case 1: /* D3 vline */
7659				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663				DRM_DEBUG("IH: D3 vline\n");
7664
7665				break;
7666			default:
7667				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668				break;
7669			}
7670			break;
7671		case 4: /* D4 vblank/vline */
7672			switch (src_data) {
7673			case 0: /* D4 vblank */
7674				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677				if (rdev->irq.crtc_vblank_int[3]) {
7678					drm_handle_vblank(rdev_to_drm(rdev), 3);
7679					rdev->pm.vblank_sync = true;
7680					wake_up(&rdev->irq.vblank_queue);
7681				}
7682				if (atomic_read(&rdev->irq.pflip[3]))
7683					radeon_crtc_handle_vblank(rdev, 3);
7684				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685				DRM_DEBUG("IH: D4 vblank\n");
7686
7687				break;
7688			case 1: /* D4 vline */
7689				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693				DRM_DEBUG("IH: D4 vline\n");
7694
7695				break;
7696			default:
7697				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698				break;
7699			}
7700			break;
7701		case 5: /* D5 vblank/vline */
7702			switch (src_data) {
7703			case 0: /* D5 vblank */
7704				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707				if (rdev->irq.crtc_vblank_int[4]) {
7708					drm_handle_vblank(rdev_to_drm(rdev), 4);
7709					rdev->pm.vblank_sync = true;
7710					wake_up(&rdev->irq.vblank_queue);
7711				}
7712				if (atomic_read(&rdev->irq.pflip[4]))
7713					radeon_crtc_handle_vblank(rdev, 4);
7714				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715				DRM_DEBUG("IH: D5 vblank\n");
7716
7717				break;
7718			case 1: /* D5 vline */
7719				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723				DRM_DEBUG("IH: D5 vline\n");
7724
7725				break;
7726			default:
7727				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728				break;
7729			}
7730			break;
7731		case 6: /* D6 vblank/vline */
7732			switch (src_data) {
7733			case 0: /* D6 vblank */
7734				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737				if (rdev->irq.crtc_vblank_int[5]) {
7738					drm_handle_vblank(rdev_to_drm(rdev), 5);
7739					rdev->pm.vblank_sync = true;
7740					wake_up(&rdev->irq.vblank_queue);
7741				}
7742				if (atomic_read(&rdev->irq.pflip[5]))
7743					radeon_crtc_handle_vblank(rdev, 5);
7744				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745				DRM_DEBUG("IH: D6 vblank\n");
7746
7747				break;
7748			case 1: /* D6 vline */
7749				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753				DRM_DEBUG("IH: D6 vline\n");
7754
7755				break;
7756			default:
7757				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758				break;
7759			}
7760			break;
7761		case 8: /* D1 page flip */
7762		case 10: /* D2 page flip */
7763		case 12: /* D3 page flip */
7764		case 14: /* D4 page flip */
7765		case 16: /* D5 page flip */
7766		case 18: /* D6 page flip */
7767			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768			if (radeon_use_pflipirq > 0)
7769				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770			break;
7771		case 42: /* HPD hotplug */
7772			switch (src_data) {
7773			case 0:
7774				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778				queue_hotplug = true;
7779				DRM_DEBUG("IH: HPD1\n");
7780
7781				break;
7782			case 1:
7783				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787				queue_hotplug = true;
7788				DRM_DEBUG("IH: HPD2\n");
7789
7790				break;
7791			case 2:
7792				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796				queue_hotplug = true;
7797				DRM_DEBUG("IH: HPD3\n");
7798
7799				break;
7800			case 3:
7801				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805				queue_hotplug = true;
7806				DRM_DEBUG("IH: HPD4\n");
7807
7808				break;
7809			case 4:
7810				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814				queue_hotplug = true;
7815				DRM_DEBUG("IH: HPD5\n");
7816
7817				break;
7818			case 5:
7819				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823				queue_hotplug = true;
7824				DRM_DEBUG("IH: HPD6\n");
7825
7826				break;
7827			case 6:
7828				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832				queue_dp = true;
7833				DRM_DEBUG("IH: HPD_RX 1\n");
7834
7835				break;
7836			case 7:
7837				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841				queue_dp = true;
7842				DRM_DEBUG("IH: HPD_RX 2\n");
7843
7844				break;
7845			case 8:
7846				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850				queue_dp = true;
7851				DRM_DEBUG("IH: HPD_RX 3\n");
7852
7853				break;
7854			case 9:
7855				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859				queue_dp = true;
7860				DRM_DEBUG("IH: HPD_RX 4\n");
7861
7862				break;
7863			case 10:
7864				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868				queue_dp = true;
7869				DRM_DEBUG("IH: HPD_RX 5\n");
7870
7871				break;
7872			case 11:
7873				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877				queue_dp = true;
7878				DRM_DEBUG("IH: HPD_RX 6\n");
7879
7880				break;
7881			default:
7882				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883				break;
7884			}
7885			break;
7886		case 96:
7887			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888			WREG32(SRBM_INT_ACK, 0x1);
7889			break;
7890		case 124: /* UVD */
7891			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893			break;
7894		case 146:
7895		case 147:
7896			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899			/* reset addr and status */
7900			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901			if (addr == 0x0 && status == 0x0)
7902				break;
7903			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7905				addr);
7906			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907				status);
7908			cik_vm_decode_fault(rdev, status, addr, mc_client);
7909			break;
7910		case 167: /* VCE */
7911			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912			switch (src_data) {
7913			case 0:
7914				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915				break;
7916			case 1:
7917				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918				break;
7919			default:
7920				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921				break;
7922			}
7923			break;
7924		case 176: /* GFX RB CP_INT */
7925		case 177: /* GFX IB CP_INT */
7926			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927			break;
7928		case 181: /* CP EOP event */
7929			DRM_DEBUG("IH: CP EOP\n");
7930			/* XXX check the bitfield order! */
7931			me_id = (ring_id & 0x60) >> 5;
7932			pipe_id = (ring_id & 0x18) >> 3;
7933			queue_id = (ring_id & 0x7) >> 0;
7934			switch (me_id) {
7935			case 0:
7936				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937				break;
7938			case 1:
7939			case 2:
7940				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944				break;
7945			}
7946			break;
7947		case 184: /* CP Privileged reg access */
7948			DRM_ERROR("Illegal register access in command stream\n");
7949			/* XXX check the bitfield order! */
7950			me_id = (ring_id & 0x60) >> 5;
 
 
7951			switch (me_id) {
7952			case 0:
7953				/* This results in a full GPU reset, but all we need to do is soft
7954				 * reset the CP for gfx
7955				 */
7956				queue_reset = true;
7957				break;
7958			case 1:
7959				/* XXX compute */
7960				queue_reset = true;
7961				break;
7962			case 2:
7963				/* XXX compute */
7964				queue_reset = true;
7965				break;
7966			}
7967			break;
7968		case 185: /* CP Privileged inst */
7969			DRM_ERROR("Illegal instruction in command stream\n");
7970			/* XXX check the bitfield order! */
7971			me_id = (ring_id & 0x60) >> 5;
 
 
7972			switch (me_id) {
7973			case 0:
7974				/* This results in a full GPU reset, but all we need to do is soft
7975				 * reset the CP for gfx
7976				 */
7977				queue_reset = true;
7978				break;
7979			case 1:
7980				/* XXX compute */
7981				queue_reset = true;
7982				break;
7983			case 2:
7984				/* XXX compute */
7985				queue_reset = true;
7986				break;
7987			}
7988			break;
7989		case 224: /* SDMA trap event */
7990			/* XXX check the bitfield order! */
7991			me_id = (ring_id & 0x3) >> 0;
7992			queue_id = (ring_id & 0xc) >> 2;
7993			DRM_DEBUG("IH: SDMA trap\n");
7994			switch (me_id) {
7995			case 0:
7996				switch (queue_id) {
7997				case 0:
7998					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999					break;
8000				case 1:
8001					/* XXX compute */
8002					break;
8003				case 2:
8004					/* XXX compute */
8005					break;
8006				}
8007				break;
8008			case 1:
8009				switch (queue_id) {
8010				case 0:
8011					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012					break;
8013				case 1:
8014					/* XXX compute */
8015					break;
8016				case 2:
8017					/* XXX compute */
8018					break;
8019				}
8020				break;
8021			}
8022			break;
8023		case 230: /* thermal low to high */
8024			DRM_DEBUG("IH: thermal low to high\n");
8025			rdev->pm.dpm.thermal.high_to_low = false;
8026			queue_thermal = true;
8027			break;
8028		case 231: /* thermal high to low */
8029			DRM_DEBUG("IH: thermal high to low\n");
8030			rdev->pm.dpm.thermal.high_to_low = true;
8031			queue_thermal = true;
8032			break;
8033		case 233: /* GUI IDLE */
8034			DRM_DEBUG("IH: GUI idle\n");
8035			break;
8036		case 241: /* SDMA Privileged inst */
8037		case 247: /* SDMA Privileged inst */
8038			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039			/* XXX check the bitfield order! */
8040			me_id = (ring_id & 0x3) >> 0;
8041			queue_id = (ring_id & 0xc) >> 2;
8042			switch (me_id) {
8043			case 0:
8044				switch (queue_id) {
8045				case 0:
8046					queue_reset = true;
8047					break;
8048				case 1:
8049					/* XXX compute */
8050					queue_reset = true;
8051					break;
8052				case 2:
8053					/* XXX compute */
8054					queue_reset = true;
8055					break;
8056				}
8057				break;
8058			case 1:
8059				switch (queue_id) {
8060				case 0:
8061					queue_reset = true;
8062					break;
8063				case 1:
8064					/* XXX compute */
8065					queue_reset = true;
8066					break;
8067				case 2:
8068					/* XXX compute */
8069					queue_reset = true;
8070					break;
8071				}
8072				break;
8073			}
8074			break;
8075		default:
8076			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077			break;
8078		}
8079
8080		/* wptr/rptr are in bytes! */
8081		rptr += 16;
8082		rptr &= rdev->ih.ptr_mask;
8083		WREG32(IH_RB_RPTR, rptr);
8084	}
8085	if (queue_dp)
8086		schedule_work(&rdev->dp_work);
8087	if (queue_hotplug)
8088		schedule_delayed_work(&rdev->hotplug_work, 0);
8089	if (queue_reset) {
8090		rdev->needs_reset = true;
8091		wake_up_all(&rdev->fence_queue);
8092	}
8093	if (queue_thermal)
8094		schedule_work(&rdev->pm.dpm.thermal.work);
8095	rdev->ih.rptr = rptr;
8096	atomic_set(&rdev->ih.lock, 0);
8097
8098	/* make sure wptr hasn't changed while processing */
8099	wptr = cik_get_ih_wptr(rdev);
8100	if (wptr != rptr)
8101		goto restart_ih;
8102
8103	return IRQ_HANDLED;
8104}
8105
8106/*
8107 * startup/shutdown callbacks
8108 */
8109static void cik_uvd_init(struct radeon_device *rdev)
8110{
8111	int r;
8112
8113	if (!rdev->has_uvd)
8114		return;
8115
8116	r = radeon_uvd_init(rdev);
8117	if (r) {
8118		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119		/*
8120		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121		 * to early fails cik_uvd_start() and thus nothing happens
8122		 * there. So it is pointless to try to go through that code
8123		 * hence why we disable uvd here.
8124		 */
8125		rdev->has_uvd = false;
8126		return;
8127	}
8128	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130}
8131
8132static void cik_uvd_start(struct radeon_device *rdev)
8133{
8134	int r;
8135
8136	if (!rdev->has_uvd)
8137		return;
8138
8139	r = radeon_uvd_resume(rdev);
8140	if (r) {
8141		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142		goto error;
8143	}
8144	r = uvd_v4_2_resume(rdev);
8145	if (r) {
8146		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147		goto error;
8148	}
8149	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150	if (r) {
8151		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152		goto error;
8153	}
8154	return;
8155
8156error:
8157	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158}
8159
8160static void cik_uvd_resume(struct radeon_device *rdev)
8161{
8162	struct radeon_ring *ring;
8163	int r;
8164
8165	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166		return;
8167
8168	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170	if (r) {
8171		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172		return;
8173	}
8174	r = uvd_v1_0_init(rdev);
8175	if (r) {
8176		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177		return;
8178	}
8179}
8180
8181static void cik_vce_init(struct radeon_device *rdev)
8182{
8183	int r;
8184
8185	if (!rdev->has_vce)
8186		return;
8187
8188	r = radeon_vce_init(rdev);
8189	if (r) {
8190		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191		/*
8192		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193		 * to early fails cik_vce_start() and thus nothing happens
8194		 * there. So it is pointless to try to go through that code
8195		 * hence why we disable vce here.
8196		 */
8197		rdev->has_vce = false;
8198		return;
8199	}
8200	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204}
8205
8206static void cik_vce_start(struct radeon_device *rdev)
8207{
8208	int r;
8209
8210	if (!rdev->has_vce)
8211		return;
8212
8213	r = radeon_vce_resume(rdev);
8214	if (r) {
8215		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216		goto error;
8217	}
8218	r = vce_v2_0_resume(rdev);
8219	if (r) {
8220		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221		goto error;
8222	}
8223	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224	if (r) {
8225		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226		goto error;
8227	}
8228	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229	if (r) {
8230		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231		goto error;
8232	}
8233	return;
8234
8235error:
8236	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238}
8239
8240static void cik_vce_resume(struct radeon_device *rdev)
8241{
8242	struct radeon_ring *ring;
8243	int r;
8244
8245	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246		return;
8247
8248	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250	if (r) {
8251		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252		return;
8253	}
8254	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256	if (r) {
8257		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258		return;
8259	}
8260	r = vce_v1_0_init(rdev);
8261	if (r) {
8262		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263		return;
8264	}
8265}
8266
8267/**
8268 * cik_startup - program the asic to a functional state
8269 *
8270 * @rdev: radeon_device pointer
8271 *
8272 * Programs the asic to a functional state (CIK).
8273 * Called by cik_init() and cik_resume().
8274 * Returns 0 for success, error for failure.
8275 */
8276static int cik_startup(struct radeon_device *rdev)
8277{
8278	struct radeon_ring *ring;
8279	u32 nop;
8280	int r;
8281
8282	/* enable pcie gen2/3 link */
8283	cik_pcie_gen3_enable(rdev);
8284	/* enable aspm */
8285	cik_program_aspm(rdev);
8286
8287	/* scratch needs to be initialized before MC */
8288	r = r600_vram_scratch_init(rdev);
8289	if (r)
8290		return r;
8291
8292	cik_mc_program(rdev);
8293
8294	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295		r = ci_mc_load_microcode(rdev);
8296		if (r) {
8297			DRM_ERROR("Failed to load MC firmware!\n");
8298			return r;
8299		}
8300	}
8301
8302	r = cik_pcie_gart_enable(rdev);
8303	if (r)
8304		return r;
8305	cik_gpu_init(rdev);
8306
8307	/* allocate rlc buffers */
8308	if (rdev->flags & RADEON_IS_IGP) {
8309		if (rdev->family == CHIP_KAVERI) {
8310			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311			rdev->rlc.reg_list_size =
8312				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313		} else {
8314			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315			rdev->rlc.reg_list_size =
8316				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317		}
8318	}
8319	rdev->rlc.cs_data = ci_cs_data;
8320	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322	r = sumo_rlc_init(rdev);
8323	if (r) {
8324		DRM_ERROR("Failed to init rlc BOs!\n");
8325		return r;
8326	}
8327
8328	/* allocate wb buffer */
8329	r = radeon_wb_init(rdev);
8330	if (r)
8331		return r;
8332
8333	/* allocate mec buffers */
8334	r = cik_mec_init(rdev);
8335	if (r) {
8336		DRM_ERROR("Failed to init MEC BOs!\n");
8337		return r;
8338	}
8339
8340	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341	if (r) {
8342		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343		return r;
8344	}
8345
8346	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347	if (r) {
8348		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349		return r;
8350	}
8351
8352	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353	if (r) {
8354		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355		return r;
8356	}
8357
8358	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359	if (r) {
8360		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361		return r;
8362	}
8363
8364	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365	if (r) {
8366		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367		return r;
8368	}
8369
8370	cik_uvd_start(rdev);
8371	cik_vce_start(rdev);
8372
8373	/* Enable IRQ */
8374	if (!rdev->irq.installed) {
8375		r = radeon_irq_kms_init(rdev);
8376		if (r)
8377			return r;
8378	}
8379
8380	r = cik_irq_init(rdev);
8381	if (r) {
8382		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383		radeon_irq_kms_fini(rdev);
8384		return r;
8385	}
8386	cik_irq_set(rdev);
8387
8388	if (rdev->family == CHIP_HAWAII) {
8389		if (rdev->new_fw)
8390			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391		else
8392			nop = RADEON_CP_PACKET2;
8393	} else {
8394		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395	}
8396
8397	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399			     nop);
8400	if (r)
8401		return r;
8402
8403	/* set up the compute queues */
8404	/* type-2 packets are deprecated on MEC, use type-3 instead */
8405	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407			     nop);
8408	if (r)
8409		return r;
8410	ring->me = 1; /* first MEC */
8411	ring->pipe = 0; /* first pipe */
8412	ring->queue = 0; /* first queue */
8413	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414
8415	/* type-2 packets are deprecated on MEC, use type-3 instead */
8416	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418			     nop);
8419	if (r)
8420		return r;
8421	/* dGPU only have 1 MEC */
8422	ring->me = 1; /* first MEC */
8423	ring->pipe = 0; /* first pipe */
8424	ring->queue = 1; /* second queue */
8425	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426
8427	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430	if (r)
8431		return r;
8432
8433	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436	if (r)
8437		return r;
8438
8439	r = cik_cp_resume(rdev);
8440	if (r)
8441		return r;
8442
8443	r = cik_sdma_resume(rdev);
8444	if (r)
8445		return r;
8446
8447	cik_uvd_resume(rdev);
8448	cik_vce_resume(rdev);
8449
8450	r = radeon_ib_pool_init(rdev);
8451	if (r) {
8452		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453		return r;
8454	}
8455
8456	r = radeon_vm_manager_init(rdev);
8457	if (r) {
8458		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459		return r;
8460	}
8461
8462	r = radeon_audio_init(rdev);
8463	if (r)
8464		return r;
8465
8466	return 0;
8467}
8468
8469/**
8470 * cik_resume - resume the asic to a functional state
8471 *
8472 * @rdev: radeon_device pointer
8473 *
8474 * Programs the asic to a functional state (CIK).
8475 * Called at resume.
8476 * Returns 0 for success, error for failure.
8477 */
8478int cik_resume(struct radeon_device *rdev)
8479{
8480	int r;
8481
8482	/* post card */
8483	atom_asic_init(rdev->mode_info.atom_context);
8484
8485	/* init golden registers */
8486	cik_init_golden_registers(rdev);
8487
8488	if (rdev->pm.pm_method == PM_METHOD_DPM)
8489		radeon_pm_resume(rdev);
8490
8491	rdev->accel_working = true;
8492	r = cik_startup(rdev);
8493	if (r) {
8494		DRM_ERROR("cik startup failed on resume\n");
8495		rdev->accel_working = false;
8496		return r;
8497	}
8498
8499	return r;
8500
8501}
8502
8503/**
8504 * cik_suspend - suspend the asic
8505 *
8506 * @rdev: radeon_device pointer
8507 *
8508 * Bring the chip into a state suitable for suspend (CIK).
8509 * Called at suspend.
8510 * Returns 0 for success.
8511 */
8512int cik_suspend(struct radeon_device *rdev)
8513{
8514	radeon_pm_suspend(rdev);
8515	radeon_audio_fini(rdev);
8516	radeon_vm_manager_fini(rdev);
8517	cik_cp_enable(rdev, false);
8518	cik_sdma_enable(rdev, false);
8519	if (rdev->has_uvd) {
8520		radeon_uvd_suspend(rdev);
8521		uvd_v1_0_fini(rdev);
 
8522	}
8523	if (rdev->has_vce)
8524		radeon_vce_suspend(rdev);
8525	cik_fini_pg(rdev);
8526	cik_fini_cg(rdev);
8527	cik_irq_suspend(rdev);
8528	radeon_wb_disable(rdev);
8529	cik_pcie_gart_disable(rdev);
8530	return 0;
8531}
8532
8533/* Plan is to move initialization in that function and use
8534 * helper function so that radeon_device_init pretty much
8535 * do nothing more than calling asic specific function. This
8536 * should also allow to remove a bunch of callback function
8537 * like vram_info.
8538 */
8539/**
8540 * cik_init - asic specific driver and hw init
8541 *
8542 * @rdev: radeon_device pointer
8543 *
8544 * Setup asic specific driver variables and program the hw
8545 * to a functional state (CIK).
8546 * Called at driver startup.
8547 * Returns 0 for success, errors for failure.
8548 */
8549int cik_init(struct radeon_device *rdev)
8550{
8551	struct radeon_ring *ring;
8552	int r;
8553
8554	/* Read BIOS */
8555	if (!radeon_get_bios(rdev)) {
8556		if (ASIC_IS_AVIVO(rdev))
8557			return -EINVAL;
8558	}
8559	/* Must be an ATOMBIOS */
8560	if (!rdev->is_atom_bios) {
8561		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562		return -EINVAL;
8563	}
8564	r = radeon_atombios_init(rdev);
8565	if (r)
8566		return r;
8567
8568	/* Post card if necessary */
8569	if (!radeon_card_posted(rdev)) {
8570		if (!rdev->bios) {
8571			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572			return -EINVAL;
8573		}
8574		DRM_INFO("GPU not posted. posting now...\n");
8575		atom_asic_init(rdev->mode_info.atom_context);
8576	}
8577	/* init golden registers */
8578	cik_init_golden_registers(rdev);
8579	/* Initialize scratch registers */
8580	cik_scratch_init(rdev);
8581	/* Initialize surface registers */
8582	radeon_surface_init(rdev);
8583	/* Initialize clocks */
8584	radeon_get_clock_info(rdev_to_drm(rdev));
8585
8586	/* Fence driver */
8587	radeon_fence_driver_init(rdev);
 
 
8588
8589	/* initialize memory controller */
8590	r = cik_mc_init(rdev);
8591	if (r)
8592		return r;
8593	/* Memory manager */
8594	r = radeon_bo_init(rdev);
8595	if (r)
8596		return r;
8597
8598	if (rdev->flags & RADEON_IS_IGP) {
8599		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601			r = cik_init_microcode(rdev);
8602			if (r) {
8603				DRM_ERROR("Failed to load firmware!\n");
8604				return r;
8605			}
8606		}
8607	} else {
8608		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610		    !rdev->mc_fw) {
8611			r = cik_init_microcode(rdev);
8612			if (r) {
8613				DRM_ERROR("Failed to load firmware!\n");
8614				return r;
8615			}
8616		}
8617	}
8618
8619	/* Initialize power management */
8620	radeon_pm_init(rdev);
8621
8622	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623	ring->ring_obj = NULL;
8624	r600_ring_init(rdev, ring, 1024 * 1024);
8625
8626	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627	ring->ring_obj = NULL;
8628	r600_ring_init(rdev, ring, 1024 * 1024);
8629	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630	if (r)
8631		return r;
8632
8633	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634	ring->ring_obj = NULL;
8635	r600_ring_init(rdev, ring, 1024 * 1024);
8636	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637	if (r)
8638		return r;
8639
8640	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641	ring->ring_obj = NULL;
8642	r600_ring_init(rdev, ring, 256 * 1024);
8643
8644	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645	ring->ring_obj = NULL;
8646	r600_ring_init(rdev, ring, 256 * 1024);
8647
8648	cik_uvd_init(rdev);
8649	cik_vce_init(rdev);
8650
8651	rdev->ih.ring_obj = NULL;
8652	r600_ih_ring_init(rdev, 64 * 1024);
8653
8654	r = r600_pcie_gart_init(rdev);
8655	if (r)
8656		return r;
8657
8658	rdev->accel_working = true;
8659	r = cik_startup(rdev);
8660	if (r) {
8661		dev_err(rdev->dev, "disabling GPU acceleration\n");
8662		cik_cp_fini(rdev);
8663		cik_sdma_fini(rdev);
8664		cik_irq_fini(rdev);
8665		sumo_rlc_fini(rdev);
8666		cik_mec_fini(rdev);
8667		radeon_wb_fini(rdev);
8668		radeon_ib_pool_fini(rdev);
8669		radeon_vm_manager_fini(rdev);
8670		radeon_irq_kms_fini(rdev);
8671		cik_pcie_gart_fini(rdev);
8672		rdev->accel_working = false;
8673	}
8674
8675	/* Don't start up if the MC ucode is missing.
8676	 * The default clocks and voltages before the MC ucode
8677	 * is loaded are not suffient for advanced operations.
8678	 */
8679	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681		return -EINVAL;
8682	}
8683
8684	return 0;
8685}
8686
8687/**
8688 * cik_fini - asic specific driver and hw fini
8689 *
8690 * @rdev: radeon_device pointer
8691 *
8692 * Tear down the asic specific driver variables and program the hw
8693 * to an idle state (CIK).
8694 * Called at driver unload.
8695 */
8696void cik_fini(struct radeon_device *rdev)
8697{
8698	radeon_pm_fini(rdev);
8699	cik_cp_fini(rdev);
8700	cik_sdma_fini(rdev);
8701	cik_fini_pg(rdev);
8702	cik_fini_cg(rdev);
8703	cik_irq_fini(rdev);
8704	sumo_rlc_fini(rdev);
8705	cik_mec_fini(rdev);
8706	radeon_wb_fini(rdev);
8707	radeon_vm_manager_fini(rdev);
8708	radeon_ib_pool_fini(rdev);
8709	radeon_irq_kms_fini(rdev);
8710	uvd_v1_0_fini(rdev);
8711	radeon_uvd_fini(rdev);
8712	radeon_vce_fini(rdev);
8713	cik_pcie_gart_fini(rdev);
8714	r600_vram_scratch_fini(rdev);
8715	radeon_gem_fini(rdev);
8716	radeon_fence_driver_fini(rdev);
8717	radeon_bo_fini(rdev);
8718	radeon_atombios_fini(rdev);
8719	kfree(rdev->bios);
8720	rdev->bios = NULL;
8721}
8722
8723void dce8_program_fmt(struct drm_encoder *encoder)
8724{
8725	struct drm_device *dev = encoder->dev;
8726	struct radeon_device *rdev = dev->dev_private;
8727	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730	int bpc = 0;
8731	u32 tmp = 0;
8732	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733
8734	if (connector) {
8735		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736		bpc = radeon_get_monitor_bpc(connector);
8737		dither = radeon_connector->dither;
8738	}
8739
8740	/* LVDS/eDP FMT is set up by atom */
8741	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742		return;
8743
8744	/* not needed for analog */
8745	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747		return;
8748
8749	if (bpc == 0)
8750		return;
8751
8752	switch (bpc) {
8753	case 6:
8754		if (dither == RADEON_FMT_DITHER_ENABLE)
8755			/* XXX sort out optimal dither settings */
8756			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758		else
8759			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760		break;
8761	case 8:
8762		if (dither == RADEON_FMT_DITHER_ENABLE)
8763			/* XXX sort out optimal dither settings */
8764			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765				FMT_RGB_RANDOM_ENABLE |
8766				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767		else
8768			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769		break;
8770	case 10:
8771		if (dither == RADEON_FMT_DITHER_ENABLE)
8772			/* XXX sort out optimal dither settings */
8773			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774				FMT_RGB_RANDOM_ENABLE |
8775				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776		else
8777			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778		break;
8779	default:
8780		/* not needed */
8781		break;
8782	}
8783
8784	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785}
8786
8787/* display watermark setup */
8788/**
8789 * dce8_line_buffer_adjust - Set up the line buffer
8790 *
8791 * @rdev: radeon_device pointer
8792 * @radeon_crtc: the selected display controller
8793 * @mode: the current display mode on the selected display
8794 * controller
8795 *
8796 * Setup up the line buffer allocation for
8797 * the selected display controller (CIK).
8798 * Returns the line buffer size in pixels.
8799 */
8800static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801				   struct radeon_crtc *radeon_crtc,
8802				   struct drm_display_mode *mode)
8803{
8804	u32 tmp, buffer_alloc, i;
8805	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806	/*
8807	 * Line Buffer Setup
8808	 * There are 6 line buffers, one for each display controllers.
8809	 * There are 3 partitions per LB. Select the number of partitions
8810	 * to enable based on the display width.  For display widths larger
8811	 * than 4096, you need use to use 2 display controllers and combine
8812	 * them using the stereo blender.
8813	 */
8814	if (radeon_crtc->base.enabled && mode) {
8815		if (mode->crtc_hdisplay < 1920) {
8816			tmp = 1;
8817			buffer_alloc = 2;
8818		} else if (mode->crtc_hdisplay < 2560) {
8819			tmp = 2;
8820			buffer_alloc = 2;
8821		} else if (mode->crtc_hdisplay < 4096) {
8822			tmp = 0;
8823			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824		} else {
8825			DRM_DEBUG_KMS("Mode too big for LB!\n");
8826			tmp = 0;
8827			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828		}
8829	} else {
8830		tmp = 1;
8831		buffer_alloc = 0;
8832	}
8833
8834	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836
8837	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839	for (i = 0; i < rdev->usec_timeout; i++) {
8840		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842			break;
8843		udelay(1);
8844	}
8845
8846	if (radeon_crtc->base.enabled && mode) {
8847		switch (tmp) {
8848		case 0:
8849		default:
8850			return 4096 * 2;
8851		case 1:
8852			return 1920 * 2;
8853		case 2:
8854			return 2560 * 2;
8855		}
8856	}
8857
8858	/* controller not enabled, so no lb used */
8859	return 0;
8860}
8861
8862/**
8863 * cik_get_number_of_dram_channels - get the number of dram channels
8864 *
8865 * @rdev: radeon_device pointer
8866 *
8867 * Look up the number of video ram channels (CIK).
8868 * Used for display watermark bandwidth calculations
8869 * Returns the number of dram channels
8870 */
8871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872{
8873	u32 tmp = RREG32(MC_SHARED_CHMAP);
8874
8875	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876	case 0:
8877	default:
8878		return 1;
8879	case 1:
8880		return 2;
8881	case 2:
8882		return 4;
8883	case 3:
8884		return 8;
8885	case 4:
8886		return 3;
8887	case 5:
8888		return 6;
8889	case 6:
8890		return 10;
8891	case 7:
8892		return 12;
8893	case 8:
8894		return 16;
8895	}
8896}
8897
8898struct dce8_wm_params {
8899	u32 dram_channels; /* number of dram channels */
8900	u32 yclk;          /* bandwidth per dram data pin in kHz */
8901	u32 sclk;          /* engine clock in kHz */
8902	u32 disp_clk;      /* display clock in kHz */
8903	u32 src_width;     /* viewport width */
8904	u32 active_time;   /* active display time in ns */
8905	u32 blank_time;    /* blank time in ns */
8906	bool interlaced;    /* mode is interlaced */
8907	fixed20_12 vsc;    /* vertical scale ratio */
8908	u32 num_heads;     /* number of active crtcs */
8909	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910	u32 lb_size;       /* line buffer allocated to pipe */
8911	u32 vtaps;         /* vertical scaler taps */
8912};
8913
8914/**
8915 * dce8_dram_bandwidth - get the dram bandwidth
8916 *
8917 * @wm: watermark calculation data
8918 *
8919 * Calculate the raw dram bandwidth (CIK).
8920 * Used for display watermark bandwidth calculations
8921 * Returns the dram bandwidth in MBytes/s
8922 */
8923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924{
8925	/* Calculate raw DRAM Bandwidth */
8926	fixed20_12 dram_efficiency; /* 0.7 */
8927	fixed20_12 yclk, dram_channels, bandwidth;
8928	fixed20_12 a;
8929
8930	a.full = dfixed_const(1000);
8931	yclk.full = dfixed_const(wm->yclk);
8932	yclk.full = dfixed_div(yclk, a);
8933	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934	a.full = dfixed_const(10);
8935	dram_efficiency.full = dfixed_const(7);
8936	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937	bandwidth.full = dfixed_mul(dram_channels, yclk);
8938	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939
8940	return dfixed_trunc(bandwidth);
8941}
8942
8943/**
8944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945 *
8946 * @wm: watermark calculation data
8947 *
8948 * Calculate the dram bandwidth used for display (CIK).
8949 * Used for display watermark bandwidth calculations
8950 * Returns the dram bandwidth for display in MBytes/s
8951 */
8952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953{
8954	/* Calculate DRAM Bandwidth and the part allocated to display. */
8955	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956	fixed20_12 yclk, dram_channels, bandwidth;
8957	fixed20_12 a;
8958
8959	a.full = dfixed_const(1000);
8960	yclk.full = dfixed_const(wm->yclk);
8961	yclk.full = dfixed_div(yclk, a);
8962	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963	a.full = dfixed_const(10);
8964	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966	bandwidth.full = dfixed_mul(dram_channels, yclk);
8967	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968
8969	return dfixed_trunc(bandwidth);
8970}
8971
8972/**
8973 * dce8_data_return_bandwidth - get the data return bandwidth
8974 *
8975 * @wm: watermark calculation data
8976 *
8977 * Calculate the data return bandwidth used for display (CIK).
8978 * Used for display watermark bandwidth calculations
8979 * Returns the data return bandwidth in MBytes/s
8980 */
8981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982{
8983	/* Calculate the display Data return Bandwidth */
8984	fixed20_12 return_efficiency; /* 0.8 */
8985	fixed20_12 sclk, bandwidth;
8986	fixed20_12 a;
8987
8988	a.full = dfixed_const(1000);
8989	sclk.full = dfixed_const(wm->sclk);
8990	sclk.full = dfixed_div(sclk, a);
8991	a.full = dfixed_const(10);
8992	return_efficiency.full = dfixed_const(8);
8993	return_efficiency.full = dfixed_div(return_efficiency, a);
8994	a.full = dfixed_const(32);
8995	bandwidth.full = dfixed_mul(a, sclk);
8996	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997
8998	return dfixed_trunc(bandwidth);
8999}
9000
9001/**
9002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003 *
9004 * @wm: watermark calculation data
9005 *
9006 * Calculate the dmif bandwidth used for display (CIK).
9007 * Used for display watermark bandwidth calculations
9008 * Returns the dmif bandwidth in MBytes/s
9009 */
9010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011{
9012	/* Calculate the DMIF Request Bandwidth */
9013	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014	fixed20_12 disp_clk, bandwidth;
9015	fixed20_12 a, b;
9016
9017	a.full = dfixed_const(1000);
9018	disp_clk.full = dfixed_const(wm->disp_clk);
9019	disp_clk.full = dfixed_div(disp_clk, a);
9020	a.full = dfixed_const(32);
9021	b.full = dfixed_mul(a, disp_clk);
9022
9023	a.full = dfixed_const(10);
9024	disp_clk_request_efficiency.full = dfixed_const(8);
9025	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026
9027	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028
9029	return dfixed_trunc(bandwidth);
9030}
9031
9032/**
9033 * dce8_available_bandwidth - get the min available bandwidth
9034 *
9035 * @wm: watermark calculation data
9036 *
9037 * Calculate the min available bandwidth used for display (CIK).
9038 * Used for display watermark bandwidth calculations
9039 * Returns the min available bandwidth in MBytes/s
9040 */
9041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042{
9043	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047
9048	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049}
9050
9051/**
9052 * dce8_average_bandwidth - get the average available bandwidth
9053 *
9054 * @wm: watermark calculation data
9055 *
9056 * Calculate the average available bandwidth used for display (CIK).
9057 * Used for display watermark bandwidth calculations
9058 * Returns the average available bandwidth in MBytes/s
9059 */
9060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061{
9062	/* Calculate the display mode Average Bandwidth
9063	 * DisplayMode should contain the source and destination dimensions,
9064	 * timing, etc.
9065	 */
9066	fixed20_12 bpp;
9067	fixed20_12 line_time;
9068	fixed20_12 src_width;
9069	fixed20_12 bandwidth;
9070	fixed20_12 a;
9071
9072	a.full = dfixed_const(1000);
9073	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074	line_time.full = dfixed_div(line_time, a);
9075	bpp.full = dfixed_const(wm->bytes_per_pixel);
9076	src_width.full = dfixed_const(wm->src_width);
9077	bandwidth.full = dfixed_mul(src_width, bpp);
9078	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079	bandwidth.full = dfixed_div(bandwidth, line_time);
9080
9081	return dfixed_trunc(bandwidth);
9082}
9083
9084/**
9085 * dce8_latency_watermark - get the latency watermark
9086 *
9087 * @wm: watermark calculation data
9088 *
9089 * Calculate the latency watermark (CIK).
9090 * Used for display watermark bandwidth calculations
9091 * Returns the latency watermark in ns
9092 */
9093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094{
9095	/* First calculate the latency in ns */
9096	u32 mc_latency = 2000; /* 2000 ns. */
9097	u32 available_bandwidth = dce8_available_bandwidth(wm);
9098	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102		(wm->num_heads * cursor_line_pair_return_time);
9103	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105	u32 tmp, dmif_size = 12288;
9106	fixed20_12 a, b, c;
9107
9108	if (wm->num_heads == 0)
9109		return 0;
9110
9111	a.full = dfixed_const(2);
9112	b.full = dfixed_const(1);
9113	if ((wm->vsc.full > a.full) ||
9114	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115	    (wm->vtaps >= 5) ||
9116	    ((wm->vsc.full >= a.full) && wm->interlaced))
9117		max_src_lines_per_dst_line = 4;
9118	else
9119		max_src_lines_per_dst_line = 2;
9120
9121	a.full = dfixed_const(available_bandwidth);
9122	b.full = dfixed_const(wm->num_heads);
9123	a.full = dfixed_div(a, b);
9124	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125	tmp = min(dfixed_trunc(a), tmp);
9126
9127	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128
9129	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130	b.full = dfixed_const(1000);
9131	c.full = dfixed_const(lb_fill_bw);
9132	b.full = dfixed_div(c, b);
9133	a.full = dfixed_div(a, b);
9134	line_fill_time = dfixed_trunc(a);
9135
9136	if (line_fill_time < wm->active_time)
9137		return latency;
9138	else
9139		return latency + (line_fill_time - wm->active_time);
9140
9141}
9142
9143/**
9144 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145 * average and available dram bandwidth
9146 *
9147 * @wm: watermark calculation data
9148 *
9149 * Check if the display average bandwidth fits in the display
9150 * dram bandwidth (CIK).
9151 * Used for display watermark bandwidth calculations
9152 * Returns true if the display fits, false if not.
9153 */
9154static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155{
9156	if (dce8_average_bandwidth(wm) <=
9157	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158		return true;
9159	else
9160		return false;
9161}
9162
9163/**
9164 * dce8_average_bandwidth_vs_available_bandwidth - check
9165 * average and available bandwidth
9166 *
9167 * @wm: watermark calculation data
9168 *
9169 * Check if the display average bandwidth fits in the display
9170 * available bandwidth (CIK).
9171 * Used for display watermark bandwidth calculations
9172 * Returns true if the display fits, false if not.
9173 */
9174static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175{
9176	if (dce8_average_bandwidth(wm) <=
9177	    (dce8_available_bandwidth(wm) / wm->num_heads))
9178		return true;
9179	else
9180		return false;
9181}
9182
9183/**
9184 * dce8_check_latency_hiding - check latency hiding
9185 *
9186 * @wm: watermark calculation data
9187 *
9188 * Check latency hiding (CIK).
9189 * Used for display watermark bandwidth calculations
9190 * Returns true if the display fits, false if not.
9191 */
9192static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193{
9194	u32 lb_partitions = wm->lb_size / wm->src_width;
9195	u32 line_time = wm->active_time + wm->blank_time;
9196	u32 latency_tolerant_lines;
9197	u32 latency_hiding;
9198	fixed20_12 a;
9199
9200	a.full = dfixed_const(1);
9201	if (wm->vsc.full > a.full)
9202		latency_tolerant_lines = 1;
9203	else {
9204		if (lb_partitions <= (wm->vtaps + 1))
9205			latency_tolerant_lines = 1;
9206		else
9207			latency_tolerant_lines = 2;
9208	}
9209
9210	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211
9212	if (dce8_latency_watermark(wm) <= latency_hiding)
9213		return true;
9214	else
9215		return false;
9216}
9217
9218/**
9219 * dce8_program_watermarks - program display watermarks
9220 *
9221 * @rdev: radeon_device pointer
9222 * @radeon_crtc: the selected display controller
9223 * @lb_size: line buffer size
9224 * @num_heads: number of display controllers in use
9225 *
9226 * Calculate and program the display watermarks for the
9227 * selected display controller (CIK).
9228 */
9229static void dce8_program_watermarks(struct radeon_device *rdev,
9230				    struct radeon_crtc *radeon_crtc,
9231				    u32 lb_size, u32 num_heads)
9232{
9233	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234	struct dce8_wm_params wm_low, wm_high;
9235	u32 active_time;
9236	u32 line_time = 0;
9237	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238	u32 tmp, wm_mask;
9239
9240	if (radeon_crtc->base.enabled && num_heads && mode) {
9241		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242					    (u32)mode->clock);
9243		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244					  (u32)mode->clock);
9245		line_time = min(line_time, (u32)65535);
9246
9247		/* watermark for high clocks */
9248		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249		    rdev->pm.dpm_enabled) {
9250			wm_high.yclk =
9251				radeon_dpm_get_mclk(rdev, false) * 10;
9252			wm_high.sclk =
9253				radeon_dpm_get_sclk(rdev, false) * 10;
9254		} else {
9255			wm_high.yclk = rdev->pm.current_mclk * 10;
9256			wm_high.sclk = rdev->pm.current_sclk * 10;
9257		}
9258
9259		wm_high.disp_clk = mode->clock;
9260		wm_high.src_width = mode->crtc_hdisplay;
9261		wm_high.active_time = active_time;
9262		wm_high.blank_time = line_time - wm_high.active_time;
9263		wm_high.interlaced = false;
9264		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265			wm_high.interlaced = true;
9266		wm_high.vsc = radeon_crtc->vsc;
9267		wm_high.vtaps = 1;
9268		if (radeon_crtc->rmx_type != RMX_OFF)
9269			wm_high.vtaps = 2;
9270		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271		wm_high.lb_size = lb_size;
9272		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273		wm_high.num_heads = num_heads;
9274
9275		/* set for high clocks */
9276		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277
9278		/* possibly force display priority to high */
9279		/* should really do this at mode validation time... */
9280		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282		    !dce8_check_latency_hiding(&wm_high) ||
9283		    (rdev->disp_priority == 2)) {
9284			DRM_DEBUG_KMS("force priority to high\n");
9285		}
9286
9287		/* watermark for low clocks */
9288		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289		    rdev->pm.dpm_enabled) {
9290			wm_low.yclk =
9291				radeon_dpm_get_mclk(rdev, true) * 10;
9292			wm_low.sclk =
9293				radeon_dpm_get_sclk(rdev, true) * 10;
9294		} else {
9295			wm_low.yclk = rdev->pm.current_mclk * 10;
9296			wm_low.sclk = rdev->pm.current_sclk * 10;
9297		}
9298
9299		wm_low.disp_clk = mode->clock;
9300		wm_low.src_width = mode->crtc_hdisplay;
9301		wm_low.active_time = active_time;
9302		wm_low.blank_time = line_time - wm_low.active_time;
9303		wm_low.interlaced = false;
9304		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305			wm_low.interlaced = true;
9306		wm_low.vsc = radeon_crtc->vsc;
9307		wm_low.vtaps = 1;
9308		if (radeon_crtc->rmx_type != RMX_OFF)
9309			wm_low.vtaps = 2;
9310		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311		wm_low.lb_size = lb_size;
9312		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313		wm_low.num_heads = num_heads;
9314
9315		/* set for low clocks */
9316		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317
9318		/* possibly force display priority to high */
9319		/* should really do this at mode validation time... */
9320		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322		    !dce8_check_latency_hiding(&wm_low) ||
9323		    (rdev->disp_priority == 2)) {
9324			DRM_DEBUG_KMS("force priority to high\n");
9325		}
9326
9327		/* Save number of lines the linebuffer leads before the scanout */
9328		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329	}
9330
9331	/* select wm A */
9332	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333	tmp = wm_mask;
9334	tmp &= ~LATENCY_WATERMARK_MASK(3);
9335	tmp |= LATENCY_WATERMARK_MASK(1);
9336	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339		LATENCY_HIGH_WATERMARK(line_time)));
9340	/* select wm B */
9341	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342	tmp &= ~LATENCY_WATERMARK_MASK(3);
9343	tmp |= LATENCY_WATERMARK_MASK(2);
9344	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347		LATENCY_HIGH_WATERMARK(line_time)));
9348	/* restore original selection */
9349	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350
9351	/* save values for DPM */
9352	radeon_crtc->line_time = line_time;
9353	radeon_crtc->wm_high = latency_watermark_a;
9354	radeon_crtc->wm_low = latency_watermark_b;
9355}
9356
9357/**
9358 * dce8_bandwidth_update - program display watermarks
9359 *
9360 * @rdev: radeon_device pointer
9361 *
9362 * Calculate and program the display watermarks and line
9363 * buffer allocation (CIK).
9364 */
9365void dce8_bandwidth_update(struct radeon_device *rdev)
9366{
9367	struct drm_display_mode *mode = NULL;
9368	u32 num_heads = 0, lb_size;
9369	int i;
9370
9371	if (!rdev->mode_info.mode_config_initialized)
9372		return;
9373
9374	radeon_update_display_priority(rdev);
9375
9376	for (i = 0; i < rdev->num_crtc; i++) {
9377		if (rdev->mode_info.crtcs[i]->base.enabled)
9378			num_heads++;
9379	}
9380	for (i = 0; i < rdev->num_crtc; i++) {
9381		mode = &rdev->mode_info.crtcs[i]->base.mode;
9382		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384	}
9385}
9386
9387/**
9388 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389 *
9390 * @rdev: radeon_device pointer
9391 *
9392 * Fetches a GPU clock counter snapshot (SI).
9393 * Returns the 64 bit clock counter snapshot.
9394 */
9395uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396{
9397	uint64_t clock;
9398
9399	mutex_lock(&rdev->gpu_clock_mutex);
9400	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403	mutex_unlock(&rdev->gpu_clock_mutex);
9404	return clock;
9405}
9406
9407static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408			     u32 cntl_reg, u32 status_reg)
9409{
9410	int r, i;
9411	struct atom_clock_dividers dividers;
9412	uint32_t tmp;
9413
9414	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415					   clock, false, &dividers);
9416	if (r)
9417		return r;
9418
9419	tmp = RREG32_SMC(cntl_reg);
9420	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421	tmp |= dividers.post_divider;
9422	WREG32_SMC(cntl_reg, tmp);
9423
9424	for (i = 0; i < 100; i++) {
9425		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426			break;
9427		mdelay(10);
9428	}
9429	if (i == 100)
9430		return -ETIMEDOUT;
9431
9432	return 0;
9433}
9434
9435int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436{
9437	int r = 0;
9438
9439	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440	if (r)
9441		return r;
9442
9443	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444	return r;
9445}
9446
9447int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448{
9449	int r, i;
9450	struct atom_clock_dividers dividers;
9451	u32 tmp;
9452
9453	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454					   ecclk, false, &dividers);
9455	if (r)
9456		return r;
9457
9458	for (i = 0; i < 100; i++) {
9459		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460			break;
9461		mdelay(10);
9462	}
9463	if (i == 100)
9464		return -ETIMEDOUT;
9465
9466	tmp = RREG32_SMC(CG_ECLK_CNTL);
9467	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468	tmp |= dividers.post_divider;
9469	WREG32_SMC(CG_ECLK_CNTL, tmp);
9470
9471	for (i = 0; i < 100; i++) {
9472		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473			break;
9474		mdelay(10);
9475	}
9476	if (i == 100)
9477		return -ETIMEDOUT;
9478
9479	return 0;
9480}
9481
9482static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483{
9484	struct pci_dev *root = rdev->pdev->bus->self;
9485	enum pci_bus_speed speed_cap;
 
9486	u32 speed_cntl, current_data_rate;
9487	int i;
9488	u16 tmp16;
9489
9490	if (pci_is_root_bus(rdev->pdev->bus))
9491		return;
9492
9493	if (radeon_pcie_gen2 == 0)
9494		return;
9495
9496	if (rdev->flags & RADEON_IS_IGP)
9497		return;
9498
9499	if (!(rdev->flags & RADEON_IS_PCIE))
9500		return;
9501
9502	speed_cap = pcie_get_speed_cap(root);
9503	if (speed_cap == PCI_SPEED_UNKNOWN)
9504		return;
9505
9506	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507	    (speed_cap != PCIE_SPEED_5_0GT))
9508		return;
9509
9510	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512		LC_CURRENT_DATA_RATE_SHIFT;
9513	if (speed_cap == PCIE_SPEED_8_0GT) {
9514		if (current_data_rate == 2) {
9515			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516			return;
9517		}
9518		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9520		if (current_data_rate == 1) {
9521			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522			return;
9523		}
9524		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525	}
9526
9527	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
 
 
 
 
 
9528		return;
9529
9530	if (speed_cap == PCIE_SPEED_8_0GT) {
9531		/* re-try equalization if gen3 is not already enabled */
9532		if (current_data_rate != 2) {
9533			u16 bridge_cfg, gpu_cfg;
9534			u16 bridge_cfg2, gpu_cfg2;
9535			u32 max_lw, current_lw, tmp;
9536
9537			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9538			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
 
 
 
 
 
 
9539
9540			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9541			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9542			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9543
9544			if (current_lw < max_lw) {
9545				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9546				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9547					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9548					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9549					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9550					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9551				}
9552			}
9553
9554			for (i = 0; i < 10; i++) {
9555				/* check status */
9556				pcie_capability_read_word(rdev->pdev,
9557							  PCI_EXP_DEVSTA,
9558							  &tmp16);
9559				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9560					break;
9561
9562				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9563							  &bridge_cfg);
9564				pcie_capability_read_word(rdev->pdev,
9565							  PCI_EXP_LNKCTL,
9566							  &gpu_cfg);
9567
9568				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9569							  &bridge_cfg2);
9570				pcie_capability_read_word(rdev->pdev,
9571							  PCI_EXP_LNKCTL2,
9572							  &gpu_cfg2);
9573
9574				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9575				tmp |= LC_SET_QUIESCE;
9576				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9577
9578				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9579				tmp |= LC_REDO_EQ;
9580				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9581
9582				msleep(100);
9583
9584				/* linkctl */
9585				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
9586								   PCI_EXP_LNKCTL_HAWD,
9587								   bridge_cfg &
9588								   PCI_EXP_LNKCTL_HAWD);
9589				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
9590								   PCI_EXP_LNKCTL_HAWD,
9591								   gpu_cfg &
9592								   PCI_EXP_LNKCTL_HAWD);
 
9593
9594				/* linkctl2 */
9595				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
9596								   PCI_EXP_LNKCTL2_ENTER_COMP |
9597								   PCI_EXP_LNKCTL2_TX_MARGIN,
9598								   bridge_cfg2 |
9599								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9600								    PCI_EXP_LNKCTL2_TX_MARGIN));
9601				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9602								   PCI_EXP_LNKCTL2_ENTER_COMP |
9603								   PCI_EXP_LNKCTL2_TX_MARGIN,
9604								   gpu_cfg2 |
9605								   (PCI_EXP_LNKCTL2_ENTER_COMP |
9606								    PCI_EXP_LNKCTL2_TX_MARGIN));
9607
9608				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9609				tmp &= ~LC_SET_QUIESCE;
9610				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9611			}
9612		}
9613	}
9614
9615	/* set the link speed */
9616	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9617	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9618	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9619
9620	tmp16 = 0;
 
9621	if (speed_cap == PCIE_SPEED_8_0GT)
9622		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9623	else if (speed_cap == PCIE_SPEED_5_0GT)
9624		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9625	else
9626		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9627	pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9628					   PCI_EXP_LNKCTL2_TLS, tmp16);
9629
9630	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9631	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9632	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9633
9634	for (i = 0; i < rdev->usec_timeout; i++) {
9635		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9636		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9637			break;
9638		udelay(1);
9639	}
9640}
9641
9642static void cik_program_aspm(struct radeon_device *rdev)
9643{
9644	u32 data, orig;
9645	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9646	bool disable_clkreq = false;
9647
9648	if (radeon_aspm == 0)
9649		return;
9650
9651	/* XXX double check IGPs */
9652	if (rdev->flags & RADEON_IS_IGP)
9653		return;
9654
9655	if (!(rdev->flags & RADEON_IS_PCIE))
9656		return;
9657
9658	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9659	data &= ~LC_XMIT_N_FTS_MASK;
9660	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9661	if (orig != data)
9662		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9663
9664	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9665	data |= LC_GO_TO_RECOVERY;
9666	if (orig != data)
9667		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9668
9669	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9670	data |= P_IGNORE_EDB_ERR;
9671	if (orig != data)
9672		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9673
9674	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9675	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9676	data |= LC_PMI_TO_L1_DIS;
9677	if (!disable_l0s)
9678		data |= LC_L0S_INACTIVITY(7);
9679
9680	if (!disable_l1) {
9681		data |= LC_L1_INACTIVITY(7);
9682		data &= ~LC_PMI_TO_L1_DIS;
9683		if (orig != data)
9684			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9685
9686		if (!disable_plloff_in_l1) {
9687			bool clk_req_support;
9688
9689			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9690			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9691			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9692			if (orig != data)
9693				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9694
9695			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9696			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9697			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9698			if (orig != data)
9699				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9700
9701			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9702			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9703			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9704			if (orig != data)
9705				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9706
9707			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9708			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9709			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9710			if (orig != data)
9711				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9712
9713			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9714			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9715			data |= LC_DYN_LANES_PWR_STATE(3);
9716			if (orig != data)
9717				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9718
9719			if (!disable_clkreq &&
9720			    !pci_is_root_bus(rdev->pdev->bus)) {
9721				struct pci_dev *root = rdev->pdev->bus->self;
9722				u32 lnkcap;
9723
9724				clk_req_support = false;
9725				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9726				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9727					clk_req_support = true;
9728			} else {
9729				clk_req_support = false;
9730			}
9731
9732			if (clk_req_support) {
9733				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9734				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9735				if (orig != data)
9736					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9737
9738				orig = data = RREG32_SMC(THM_CLK_CNTL);
9739				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9740				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9741				if (orig != data)
9742					WREG32_SMC(THM_CLK_CNTL, data);
9743
9744				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9745				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9746				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9747				if (orig != data)
9748					WREG32_SMC(MISC_CLK_CTRL, data);
9749
9750				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9751				data &= ~BCLK_AS_XCLK;
9752				if (orig != data)
9753					WREG32_SMC(CG_CLKPIN_CNTL, data);
9754
9755				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9756				data &= ~FORCE_BIF_REFCLK_EN;
9757				if (orig != data)
9758					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9759
9760				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9761				data &= ~MPLL_CLKOUT_SEL_MASK;
9762				data |= MPLL_CLKOUT_SEL(4);
9763				if (orig != data)
9764					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9765			}
9766		}
9767	} else {
9768		if (orig != data)
9769			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9770	}
9771
9772	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9773	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9774	if (orig != data)
9775		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9776
9777	if (!disable_l0s) {
9778		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9779		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9780			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9781			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9782				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9783				data &= ~LC_L0S_INACTIVITY_MASK;
9784				if (orig != data)
9785					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9786			}
9787		}
9788	}
9789}
v5.4
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
 
 
  26#include <linux/slab.h>
  27#include <linux/module.h>
  28
  29#include <drm/drm_pci.h>
  30#include <drm/drm_vblank.h>
  31
  32#include "atom.h"
 
  33#include "cik_blit_shaders.h"
 
  34#include "cikd.h"
  35#include "clearstate_ci.h"
 
  36#include "radeon.h"
  37#include "radeon_asic.h"
  38#include "radeon_audio.h"
  39#include "radeon_ucode.h"
 
 
  40
  41#define SH_MEM_CONFIG_GFX_DEFAULT \
  42	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  43
  44MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  47MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  48MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  49MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  50MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  51MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  52MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  53
  54MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  55MODULE_FIRMWARE("radeon/bonaire_me.bin");
  56MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  57MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  58MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  59MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  60MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  61MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  62MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  63
  64MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  66MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  67MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  68MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  69MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  70MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  71MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  72MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  73
  74MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  75MODULE_FIRMWARE("radeon/hawaii_me.bin");
  76MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  77MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  78MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  79MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  80MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  81MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  82MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  83
  84MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  85MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  86MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  87MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  88MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  89MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  90
  91MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  92MODULE_FIRMWARE("radeon/kaveri_me.bin");
  93MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  94MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  95MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  96MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  97MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  98
  99MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 100MODULE_FIRMWARE("radeon/KABINI_me.bin");
 101MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 102MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 103MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 104MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 105
 106MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 107MODULE_FIRMWARE("radeon/kabini_me.bin");
 108MODULE_FIRMWARE("radeon/kabini_ce.bin");
 109MODULE_FIRMWARE("radeon/kabini_mec.bin");
 110MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 111MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 112
 113MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 114MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 115MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 116MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 117MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 118MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 119
 120MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 121MODULE_FIRMWARE("radeon/mullins_me.bin");
 122MODULE_FIRMWARE("radeon/mullins_ce.bin");
 123MODULE_FIRMWARE("radeon/mullins_mec.bin");
 124MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 125MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 126
 127extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 128extern void r600_ih_ring_fini(struct radeon_device *rdev);
 129extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 130extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 131extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 132extern void sumo_rlc_fini(struct radeon_device *rdev);
 133extern int sumo_rlc_init(struct radeon_device *rdev);
 134extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 135extern void si_rlc_reset(struct radeon_device *rdev);
 136extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 137static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 138extern int cik_sdma_resume(struct radeon_device *rdev);
 139extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 140extern void cik_sdma_fini(struct radeon_device *rdev);
 141extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 142static void cik_rlc_stop(struct radeon_device *rdev);
 143static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 144static void cik_program_aspm(struct radeon_device *rdev);
 145static void cik_init_pg(struct radeon_device *rdev);
 146static void cik_init_cg(struct radeon_device *rdev);
 147static void cik_fini_pg(struct radeon_device *rdev);
 148static void cik_fini_cg(struct radeon_device *rdev);
 149static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 150					  bool enable);
 151
 152/**
 153 * cik_get_allowed_info_register - fetch the register for the info ioctl
 154 *
 155 * @rdev: radeon_device pointer
 156 * @reg: register offset in bytes
 157 * @val: register value
 158 *
 159 * Returns 0 for success or -EINVAL for an invalid register
 160 *
 161 */
 162int cik_get_allowed_info_register(struct radeon_device *rdev,
 163				  u32 reg, u32 *val)
 164{
 165	switch (reg) {
 166	case GRBM_STATUS:
 167	case GRBM_STATUS2:
 168	case GRBM_STATUS_SE0:
 169	case GRBM_STATUS_SE1:
 170	case GRBM_STATUS_SE2:
 171	case GRBM_STATUS_SE3:
 172	case SRBM_STATUS:
 173	case SRBM_STATUS2:
 174	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 175	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 176	case UVD_STATUS:
 177	/* TODO VCE */
 178		*val = RREG32(reg);
 179		return 0;
 180	default:
 181		return -EINVAL;
 182	}
 183}
 184
 185/*
 186 * Indirect registers accessor
 187 */
 188u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 189{
 190	unsigned long flags;
 191	u32 r;
 192
 193	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 194	WREG32(CIK_DIDT_IND_INDEX, (reg));
 195	r = RREG32(CIK_DIDT_IND_DATA);
 196	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 197	return r;
 198}
 199
 200void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 201{
 202	unsigned long flags;
 203
 204	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 205	WREG32(CIK_DIDT_IND_INDEX, (reg));
 206	WREG32(CIK_DIDT_IND_DATA, (v));
 207	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 208}
 209
 210/* get temperature in millidegrees */
 211int ci_get_temp(struct radeon_device *rdev)
 212{
 213	u32 temp;
 214	int actual_temp = 0;
 215
 216	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 217		CTF_TEMP_SHIFT;
 218
 219	if (temp & 0x200)
 220		actual_temp = 255;
 221	else
 222		actual_temp = temp & 0x1ff;
 223
 224	actual_temp = actual_temp * 1000;
 225
 226	return actual_temp;
 227}
 228
 229/* get temperature in millidegrees */
 230int kv_get_temp(struct radeon_device *rdev)
 231{
 232	u32 temp;
 233	int actual_temp = 0;
 234
 235	temp = RREG32_SMC(0xC0300E0C);
 236
 237	if (temp)
 238		actual_temp = (temp / 8) - 49;
 239	else
 240		actual_temp = 0;
 241
 242	actual_temp = actual_temp * 1000;
 243
 244	return actual_temp;
 245}
 246
 247/*
 248 * Indirect registers accessor
 249 */
 250u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 251{
 252	unsigned long flags;
 253	u32 r;
 254
 255	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 256	WREG32(PCIE_INDEX, reg);
 257	(void)RREG32(PCIE_INDEX);
 258	r = RREG32(PCIE_DATA);
 259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260	return r;
 261}
 262
 263void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 264{
 265	unsigned long flags;
 266
 267	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 268	WREG32(PCIE_INDEX, reg);
 269	(void)RREG32(PCIE_INDEX);
 270	WREG32(PCIE_DATA, v);
 271	(void)RREG32(PCIE_DATA);
 272	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 273}
 274
 275static const u32 spectre_rlc_save_restore_register_list[] =
 276{
 277	(0x0e00 << 16) | (0xc12c >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc140 >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc150 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0xc15c >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0xc168 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0xc170 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0xc178 >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0xc204 >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0xc2b4 >> 2),
 294	0x00000000,
 295	(0x0e00 << 16) | (0xc2b8 >> 2),
 296	0x00000000,
 297	(0x0e00 << 16) | (0xc2bc >> 2),
 298	0x00000000,
 299	(0x0e00 << 16) | (0xc2c0 >> 2),
 300	0x00000000,
 301	(0x0e00 << 16) | (0x8228 >> 2),
 302	0x00000000,
 303	(0x0e00 << 16) | (0x829c >> 2),
 304	0x00000000,
 305	(0x0e00 << 16) | (0x869c >> 2),
 306	0x00000000,
 307	(0x0600 << 16) | (0x98f4 >> 2),
 308	0x00000000,
 309	(0x0e00 << 16) | (0x98f8 >> 2),
 310	0x00000000,
 311	(0x0e00 << 16) | (0x9900 >> 2),
 312	0x00000000,
 313	(0x0e00 << 16) | (0xc260 >> 2),
 314	0x00000000,
 315	(0x0e00 << 16) | (0x90e8 >> 2),
 316	0x00000000,
 317	(0x0e00 << 16) | (0x3c000 >> 2),
 318	0x00000000,
 319	(0x0e00 << 16) | (0x3c00c >> 2),
 320	0x00000000,
 321	(0x0e00 << 16) | (0x8c1c >> 2),
 322	0x00000000,
 323	(0x0e00 << 16) | (0x9700 >> 2),
 324	0x00000000,
 325	(0x0e00 << 16) | (0xcd20 >> 2),
 326	0x00000000,
 327	(0x4e00 << 16) | (0xcd20 >> 2),
 328	0x00000000,
 329	(0x5e00 << 16) | (0xcd20 >> 2),
 330	0x00000000,
 331	(0x6e00 << 16) | (0xcd20 >> 2),
 332	0x00000000,
 333	(0x7e00 << 16) | (0xcd20 >> 2),
 334	0x00000000,
 335	(0x8e00 << 16) | (0xcd20 >> 2),
 336	0x00000000,
 337	(0x9e00 << 16) | (0xcd20 >> 2),
 338	0x00000000,
 339	(0xae00 << 16) | (0xcd20 >> 2),
 340	0x00000000,
 341	(0xbe00 << 16) | (0xcd20 >> 2),
 342	0x00000000,
 343	(0x0e00 << 16) | (0x89bc >> 2),
 344	0x00000000,
 345	(0x0e00 << 16) | (0x8900 >> 2),
 346	0x00000000,
 347	0x3,
 348	(0x0e00 << 16) | (0xc130 >> 2),
 349	0x00000000,
 350	(0x0e00 << 16) | (0xc134 >> 2),
 351	0x00000000,
 352	(0x0e00 << 16) | (0xc1fc >> 2),
 353	0x00000000,
 354	(0x0e00 << 16) | (0xc208 >> 2),
 355	0x00000000,
 356	(0x0e00 << 16) | (0xc264 >> 2),
 357	0x00000000,
 358	(0x0e00 << 16) | (0xc268 >> 2),
 359	0x00000000,
 360	(0x0e00 << 16) | (0xc26c >> 2),
 361	0x00000000,
 362	(0x0e00 << 16) | (0xc270 >> 2),
 363	0x00000000,
 364	(0x0e00 << 16) | (0xc274 >> 2),
 365	0x00000000,
 366	(0x0e00 << 16) | (0xc278 >> 2),
 367	0x00000000,
 368	(0x0e00 << 16) | (0xc27c >> 2),
 369	0x00000000,
 370	(0x0e00 << 16) | (0xc280 >> 2),
 371	0x00000000,
 372	(0x0e00 << 16) | (0xc284 >> 2),
 373	0x00000000,
 374	(0x0e00 << 16) | (0xc288 >> 2),
 375	0x00000000,
 376	(0x0e00 << 16) | (0xc28c >> 2),
 377	0x00000000,
 378	(0x0e00 << 16) | (0xc290 >> 2),
 379	0x00000000,
 380	(0x0e00 << 16) | (0xc294 >> 2),
 381	0x00000000,
 382	(0x0e00 << 16) | (0xc298 >> 2),
 383	0x00000000,
 384	(0x0e00 << 16) | (0xc29c >> 2),
 385	0x00000000,
 386	(0x0e00 << 16) | (0xc2a0 >> 2),
 387	0x00000000,
 388	(0x0e00 << 16) | (0xc2a4 >> 2),
 389	0x00000000,
 390	(0x0e00 << 16) | (0xc2a8 >> 2),
 391	0x00000000,
 392	(0x0e00 << 16) | (0xc2ac  >> 2),
 393	0x00000000,
 394	(0x0e00 << 16) | (0xc2b0 >> 2),
 395	0x00000000,
 396	(0x0e00 << 16) | (0x301d0 >> 2),
 397	0x00000000,
 398	(0x0e00 << 16) | (0x30238 >> 2),
 399	0x00000000,
 400	(0x0e00 << 16) | (0x30250 >> 2),
 401	0x00000000,
 402	(0x0e00 << 16) | (0x30254 >> 2),
 403	0x00000000,
 404	(0x0e00 << 16) | (0x30258 >> 2),
 405	0x00000000,
 406	(0x0e00 << 16) | (0x3025c >> 2),
 407	0x00000000,
 408	(0x4e00 << 16) | (0xc900 >> 2),
 409	0x00000000,
 410	(0x5e00 << 16) | (0xc900 >> 2),
 411	0x00000000,
 412	(0x6e00 << 16) | (0xc900 >> 2),
 413	0x00000000,
 414	(0x7e00 << 16) | (0xc900 >> 2),
 415	0x00000000,
 416	(0x8e00 << 16) | (0xc900 >> 2),
 417	0x00000000,
 418	(0x9e00 << 16) | (0xc900 >> 2),
 419	0x00000000,
 420	(0xae00 << 16) | (0xc900 >> 2),
 421	0x00000000,
 422	(0xbe00 << 16) | (0xc900 >> 2),
 423	0x00000000,
 424	(0x4e00 << 16) | (0xc904 >> 2),
 425	0x00000000,
 426	(0x5e00 << 16) | (0xc904 >> 2),
 427	0x00000000,
 428	(0x6e00 << 16) | (0xc904 >> 2),
 429	0x00000000,
 430	(0x7e00 << 16) | (0xc904 >> 2),
 431	0x00000000,
 432	(0x8e00 << 16) | (0xc904 >> 2),
 433	0x00000000,
 434	(0x9e00 << 16) | (0xc904 >> 2),
 435	0x00000000,
 436	(0xae00 << 16) | (0xc904 >> 2),
 437	0x00000000,
 438	(0xbe00 << 16) | (0xc904 >> 2),
 439	0x00000000,
 440	(0x4e00 << 16) | (0xc908 >> 2),
 441	0x00000000,
 442	(0x5e00 << 16) | (0xc908 >> 2),
 443	0x00000000,
 444	(0x6e00 << 16) | (0xc908 >> 2),
 445	0x00000000,
 446	(0x7e00 << 16) | (0xc908 >> 2),
 447	0x00000000,
 448	(0x8e00 << 16) | (0xc908 >> 2),
 449	0x00000000,
 450	(0x9e00 << 16) | (0xc908 >> 2),
 451	0x00000000,
 452	(0xae00 << 16) | (0xc908 >> 2),
 453	0x00000000,
 454	(0xbe00 << 16) | (0xc908 >> 2),
 455	0x00000000,
 456	(0x4e00 << 16) | (0xc90c >> 2),
 457	0x00000000,
 458	(0x5e00 << 16) | (0xc90c >> 2),
 459	0x00000000,
 460	(0x6e00 << 16) | (0xc90c >> 2),
 461	0x00000000,
 462	(0x7e00 << 16) | (0xc90c >> 2),
 463	0x00000000,
 464	(0x8e00 << 16) | (0xc90c >> 2),
 465	0x00000000,
 466	(0x9e00 << 16) | (0xc90c >> 2),
 467	0x00000000,
 468	(0xae00 << 16) | (0xc90c >> 2),
 469	0x00000000,
 470	(0xbe00 << 16) | (0xc90c >> 2),
 471	0x00000000,
 472	(0x4e00 << 16) | (0xc910 >> 2),
 473	0x00000000,
 474	(0x5e00 << 16) | (0xc910 >> 2),
 475	0x00000000,
 476	(0x6e00 << 16) | (0xc910 >> 2),
 477	0x00000000,
 478	(0x7e00 << 16) | (0xc910 >> 2),
 479	0x00000000,
 480	(0x8e00 << 16) | (0xc910 >> 2),
 481	0x00000000,
 482	(0x9e00 << 16) | (0xc910 >> 2),
 483	0x00000000,
 484	(0xae00 << 16) | (0xc910 >> 2),
 485	0x00000000,
 486	(0xbe00 << 16) | (0xc910 >> 2),
 487	0x00000000,
 488	(0x0e00 << 16) | (0xc99c >> 2),
 489	0x00000000,
 490	(0x0e00 << 16) | (0x9834 >> 2),
 491	0x00000000,
 492	(0x0000 << 16) | (0x30f00 >> 2),
 493	0x00000000,
 494	(0x0001 << 16) | (0x30f00 >> 2),
 495	0x00000000,
 496	(0x0000 << 16) | (0x30f04 >> 2),
 497	0x00000000,
 498	(0x0001 << 16) | (0x30f04 >> 2),
 499	0x00000000,
 500	(0x0000 << 16) | (0x30f08 >> 2),
 501	0x00000000,
 502	(0x0001 << 16) | (0x30f08 >> 2),
 503	0x00000000,
 504	(0x0000 << 16) | (0x30f0c >> 2),
 505	0x00000000,
 506	(0x0001 << 16) | (0x30f0c >> 2),
 507	0x00000000,
 508	(0x0600 << 16) | (0x9b7c >> 2),
 509	0x00000000,
 510	(0x0e00 << 16) | (0x8a14 >> 2),
 511	0x00000000,
 512	(0x0e00 << 16) | (0x8a18 >> 2),
 513	0x00000000,
 514	(0x0600 << 16) | (0x30a00 >> 2),
 515	0x00000000,
 516	(0x0e00 << 16) | (0x8bf0 >> 2),
 517	0x00000000,
 518	(0x0e00 << 16) | (0x8bcc >> 2),
 519	0x00000000,
 520	(0x0e00 << 16) | (0x8b24 >> 2),
 521	0x00000000,
 522	(0x0e00 << 16) | (0x30a04 >> 2),
 523	0x00000000,
 524	(0x0600 << 16) | (0x30a10 >> 2),
 525	0x00000000,
 526	(0x0600 << 16) | (0x30a14 >> 2),
 527	0x00000000,
 528	(0x0600 << 16) | (0x30a18 >> 2),
 529	0x00000000,
 530	(0x0600 << 16) | (0x30a2c >> 2),
 531	0x00000000,
 532	(0x0e00 << 16) | (0xc700 >> 2),
 533	0x00000000,
 534	(0x0e00 << 16) | (0xc704 >> 2),
 535	0x00000000,
 536	(0x0e00 << 16) | (0xc708 >> 2),
 537	0x00000000,
 538	(0x0e00 << 16) | (0xc768 >> 2),
 539	0x00000000,
 540	(0x0400 << 16) | (0xc770 >> 2),
 541	0x00000000,
 542	(0x0400 << 16) | (0xc774 >> 2),
 543	0x00000000,
 544	(0x0400 << 16) | (0xc778 >> 2),
 545	0x00000000,
 546	(0x0400 << 16) | (0xc77c >> 2),
 547	0x00000000,
 548	(0x0400 << 16) | (0xc780 >> 2),
 549	0x00000000,
 550	(0x0400 << 16) | (0xc784 >> 2),
 551	0x00000000,
 552	(0x0400 << 16) | (0xc788 >> 2),
 553	0x00000000,
 554	(0x0400 << 16) | (0xc78c >> 2),
 555	0x00000000,
 556	(0x0400 << 16) | (0xc798 >> 2),
 557	0x00000000,
 558	(0x0400 << 16) | (0xc79c >> 2),
 559	0x00000000,
 560	(0x0400 << 16) | (0xc7a0 >> 2),
 561	0x00000000,
 562	(0x0400 << 16) | (0xc7a4 >> 2),
 563	0x00000000,
 564	(0x0400 << 16) | (0xc7a8 >> 2),
 565	0x00000000,
 566	(0x0400 << 16) | (0xc7ac >> 2),
 567	0x00000000,
 568	(0x0400 << 16) | (0xc7b0 >> 2),
 569	0x00000000,
 570	(0x0400 << 16) | (0xc7b4 >> 2),
 571	0x00000000,
 572	(0x0e00 << 16) | (0x9100 >> 2),
 573	0x00000000,
 574	(0x0e00 << 16) | (0x3c010 >> 2),
 575	0x00000000,
 576	(0x0e00 << 16) | (0x92a8 >> 2),
 577	0x00000000,
 578	(0x0e00 << 16) | (0x92ac >> 2),
 579	0x00000000,
 580	(0x0e00 << 16) | (0x92b4 >> 2),
 581	0x00000000,
 582	(0x0e00 << 16) | (0x92b8 >> 2),
 583	0x00000000,
 584	(0x0e00 << 16) | (0x92bc >> 2),
 585	0x00000000,
 586	(0x0e00 << 16) | (0x92c0 >> 2),
 587	0x00000000,
 588	(0x0e00 << 16) | (0x92c4 >> 2),
 589	0x00000000,
 590	(0x0e00 << 16) | (0x92c8 >> 2),
 591	0x00000000,
 592	(0x0e00 << 16) | (0x92cc >> 2),
 593	0x00000000,
 594	(0x0e00 << 16) | (0x92d0 >> 2),
 595	0x00000000,
 596	(0x0e00 << 16) | (0x8c00 >> 2),
 597	0x00000000,
 598	(0x0e00 << 16) | (0x8c04 >> 2),
 599	0x00000000,
 600	(0x0e00 << 16) | (0x8c20 >> 2),
 601	0x00000000,
 602	(0x0e00 << 16) | (0x8c38 >> 2),
 603	0x00000000,
 604	(0x0e00 << 16) | (0x8c3c >> 2),
 605	0x00000000,
 606	(0x0e00 << 16) | (0xae00 >> 2),
 607	0x00000000,
 608	(0x0e00 << 16) | (0x9604 >> 2),
 609	0x00000000,
 610	(0x0e00 << 16) | (0xac08 >> 2),
 611	0x00000000,
 612	(0x0e00 << 16) | (0xac0c >> 2),
 613	0x00000000,
 614	(0x0e00 << 16) | (0xac10 >> 2),
 615	0x00000000,
 616	(0x0e00 << 16) | (0xac14 >> 2),
 617	0x00000000,
 618	(0x0e00 << 16) | (0xac58 >> 2),
 619	0x00000000,
 620	(0x0e00 << 16) | (0xac68 >> 2),
 621	0x00000000,
 622	(0x0e00 << 16) | (0xac6c >> 2),
 623	0x00000000,
 624	(0x0e00 << 16) | (0xac70 >> 2),
 625	0x00000000,
 626	(0x0e00 << 16) | (0xac74 >> 2),
 627	0x00000000,
 628	(0x0e00 << 16) | (0xac78 >> 2),
 629	0x00000000,
 630	(0x0e00 << 16) | (0xac7c >> 2),
 631	0x00000000,
 632	(0x0e00 << 16) | (0xac80 >> 2),
 633	0x00000000,
 634	(0x0e00 << 16) | (0xac84 >> 2),
 635	0x00000000,
 636	(0x0e00 << 16) | (0xac88 >> 2),
 637	0x00000000,
 638	(0x0e00 << 16) | (0xac8c >> 2),
 639	0x00000000,
 640	(0x0e00 << 16) | (0x970c >> 2),
 641	0x00000000,
 642	(0x0e00 << 16) | (0x9714 >> 2),
 643	0x00000000,
 644	(0x0e00 << 16) | (0x9718 >> 2),
 645	0x00000000,
 646	(0x0e00 << 16) | (0x971c >> 2),
 647	0x00000000,
 648	(0x0e00 << 16) | (0x31068 >> 2),
 649	0x00000000,
 650	(0x4e00 << 16) | (0x31068 >> 2),
 651	0x00000000,
 652	(0x5e00 << 16) | (0x31068 >> 2),
 653	0x00000000,
 654	(0x6e00 << 16) | (0x31068 >> 2),
 655	0x00000000,
 656	(0x7e00 << 16) | (0x31068 >> 2),
 657	0x00000000,
 658	(0x8e00 << 16) | (0x31068 >> 2),
 659	0x00000000,
 660	(0x9e00 << 16) | (0x31068 >> 2),
 661	0x00000000,
 662	(0xae00 << 16) | (0x31068 >> 2),
 663	0x00000000,
 664	(0xbe00 << 16) | (0x31068 >> 2),
 665	0x00000000,
 666	(0x0e00 << 16) | (0xcd10 >> 2),
 667	0x00000000,
 668	(0x0e00 << 16) | (0xcd14 >> 2),
 669	0x00000000,
 670	(0x0e00 << 16) | (0x88b0 >> 2),
 671	0x00000000,
 672	(0x0e00 << 16) | (0x88b4 >> 2),
 673	0x00000000,
 674	(0x0e00 << 16) | (0x88b8 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0x88bc >> 2),
 677	0x00000000,
 678	(0x0400 << 16) | (0x89c0 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0x88c4 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0x88c8 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0x88d0 >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0x88d4 >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0x88d8 >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0x8980 >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0x30938 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0x3093c >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0x30940 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0x89a0 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0x30900 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0x30904 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0x89b4 >> 2),
 705	0x00000000,
 706	(0x0e00 << 16) | (0x3c210 >> 2),
 707	0x00000000,
 708	(0x0e00 << 16) | (0x3c214 >> 2),
 709	0x00000000,
 710	(0x0e00 << 16) | (0x3c218 >> 2),
 711	0x00000000,
 712	(0x0e00 << 16) | (0x8904 >> 2),
 713	0x00000000,
 714	0x5,
 715	(0x0e00 << 16) | (0x8c28 >> 2),
 716	(0x0e00 << 16) | (0x8c2c >> 2),
 717	(0x0e00 << 16) | (0x8c30 >> 2),
 718	(0x0e00 << 16) | (0x8c34 >> 2),
 719	(0x0e00 << 16) | (0x9600 >> 2),
 720};
 721
 722static const u32 kalindi_rlc_save_restore_register_list[] =
 723{
 724	(0x0e00 << 16) | (0xc12c >> 2),
 725	0x00000000,
 726	(0x0e00 << 16) | (0xc140 >> 2),
 727	0x00000000,
 728	(0x0e00 << 16) | (0xc150 >> 2),
 729	0x00000000,
 730	(0x0e00 << 16) | (0xc15c >> 2),
 731	0x00000000,
 732	(0x0e00 << 16) | (0xc168 >> 2),
 733	0x00000000,
 734	(0x0e00 << 16) | (0xc170 >> 2),
 735	0x00000000,
 736	(0x0e00 << 16) | (0xc204 >> 2),
 737	0x00000000,
 738	(0x0e00 << 16) | (0xc2b4 >> 2),
 739	0x00000000,
 740	(0x0e00 << 16) | (0xc2b8 >> 2),
 741	0x00000000,
 742	(0x0e00 << 16) | (0xc2bc >> 2),
 743	0x00000000,
 744	(0x0e00 << 16) | (0xc2c0 >> 2),
 745	0x00000000,
 746	(0x0e00 << 16) | (0x8228 >> 2),
 747	0x00000000,
 748	(0x0e00 << 16) | (0x829c >> 2),
 749	0x00000000,
 750	(0x0e00 << 16) | (0x869c >> 2),
 751	0x00000000,
 752	(0x0600 << 16) | (0x98f4 >> 2),
 753	0x00000000,
 754	(0x0e00 << 16) | (0x98f8 >> 2),
 755	0x00000000,
 756	(0x0e00 << 16) | (0x9900 >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0xc260 >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x90e8 >> 2),
 761	0x00000000,
 762	(0x0e00 << 16) | (0x3c000 >> 2),
 763	0x00000000,
 764	(0x0e00 << 16) | (0x3c00c >> 2),
 765	0x00000000,
 766	(0x0e00 << 16) | (0x8c1c >> 2),
 767	0x00000000,
 768	(0x0e00 << 16) | (0x9700 >> 2),
 769	0x00000000,
 770	(0x0e00 << 16) | (0xcd20 >> 2),
 771	0x00000000,
 772	(0x4e00 << 16) | (0xcd20 >> 2),
 773	0x00000000,
 774	(0x5e00 << 16) | (0xcd20 >> 2),
 775	0x00000000,
 776	(0x6e00 << 16) | (0xcd20 >> 2),
 777	0x00000000,
 778	(0x7e00 << 16) | (0xcd20 >> 2),
 779	0x00000000,
 780	(0x0e00 << 16) | (0x89bc >> 2),
 781	0x00000000,
 782	(0x0e00 << 16) | (0x8900 >> 2),
 783	0x00000000,
 784	0x3,
 785	(0x0e00 << 16) | (0xc130 >> 2),
 786	0x00000000,
 787	(0x0e00 << 16) | (0xc134 >> 2),
 788	0x00000000,
 789	(0x0e00 << 16) | (0xc1fc >> 2),
 790	0x00000000,
 791	(0x0e00 << 16) | (0xc208 >> 2),
 792	0x00000000,
 793	(0x0e00 << 16) | (0xc264 >> 2),
 794	0x00000000,
 795	(0x0e00 << 16) | (0xc268 >> 2),
 796	0x00000000,
 797	(0x0e00 << 16) | (0xc26c >> 2),
 798	0x00000000,
 799	(0x0e00 << 16) | (0xc270 >> 2),
 800	0x00000000,
 801	(0x0e00 << 16) | (0xc274 >> 2),
 802	0x00000000,
 803	(0x0e00 << 16) | (0xc28c >> 2),
 804	0x00000000,
 805	(0x0e00 << 16) | (0xc290 >> 2),
 806	0x00000000,
 807	(0x0e00 << 16) | (0xc294 >> 2),
 808	0x00000000,
 809	(0x0e00 << 16) | (0xc298 >> 2),
 810	0x00000000,
 811	(0x0e00 << 16) | (0xc2a0 >> 2),
 812	0x00000000,
 813	(0x0e00 << 16) | (0xc2a4 >> 2),
 814	0x00000000,
 815	(0x0e00 << 16) | (0xc2a8 >> 2),
 816	0x00000000,
 817	(0x0e00 << 16) | (0xc2ac >> 2),
 818	0x00000000,
 819	(0x0e00 << 16) | (0x301d0 >> 2),
 820	0x00000000,
 821	(0x0e00 << 16) | (0x30238 >> 2),
 822	0x00000000,
 823	(0x0e00 << 16) | (0x30250 >> 2),
 824	0x00000000,
 825	(0x0e00 << 16) | (0x30254 >> 2),
 826	0x00000000,
 827	(0x0e00 << 16) | (0x30258 >> 2),
 828	0x00000000,
 829	(0x0e00 << 16) | (0x3025c >> 2),
 830	0x00000000,
 831	(0x4e00 << 16) | (0xc900 >> 2),
 832	0x00000000,
 833	(0x5e00 << 16) | (0xc900 >> 2),
 834	0x00000000,
 835	(0x6e00 << 16) | (0xc900 >> 2),
 836	0x00000000,
 837	(0x7e00 << 16) | (0xc900 >> 2),
 838	0x00000000,
 839	(0x4e00 << 16) | (0xc904 >> 2),
 840	0x00000000,
 841	(0x5e00 << 16) | (0xc904 >> 2),
 842	0x00000000,
 843	(0x6e00 << 16) | (0xc904 >> 2),
 844	0x00000000,
 845	(0x7e00 << 16) | (0xc904 >> 2),
 846	0x00000000,
 847	(0x4e00 << 16) | (0xc908 >> 2),
 848	0x00000000,
 849	(0x5e00 << 16) | (0xc908 >> 2),
 850	0x00000000,
 851	(0x6e00 << 16) | (0xc908 >> 2),
 852	0x00000000,
 853	(0x7e00 << 16) | (0xc908 >> 2),
 854	0x00000000,
 855	(0x4e00 << 16) | (0xc90c >> 2),
 856	0x00000000,
 857	(0x5e00 << 16) | (0xc90c >> 2),
 858	0x00000000,
 859	(0x6e00 << 16) | (0xc90c >> 2),
 860	0x00000000,
 861	(0x7e00 << 16) | (0xc90c >> 2),
 862	0x00000000,
 863	(0x4e00 << 16) | (0xc910 >> 2),
 864	0x00000000,
 865	(0x5e00 << 16) | (0xc910 >> 2),
 866	0x00000000,
 867	(0x6e00 << 16) | (0xc910 >> 2),
 868	0x00000000,
 869	(0x7e00 << 16) | (0xc910 >> 2),
 870	0x00000000,
 871	(0x0e00 << 16) | (0xc99c >> 2),
 872	0x00000000,
 873	(0x0e00 << 16) | (0x9834 >> 2),
 874	0x00000000,
 875	(0x0000 << 16) | (0x30f00 >> 2),
 876	0x00000000,
 877	(0x0000 << 16) | (0x30f04 >> 2),
 878	0x00000000,
 879	(0x0000 << 16) | (0x30f08 >> 2),
 880	0x00000000,
 881	(0x0000 << 16) | (0x30f0c >> 2),
 882	0x00000000,
 883	(0x0600 << 16) | (0x9b7c >> 2),
 884	0x00000000,
 885	(0x0e00 << 16) | (0x8a14 >> 2),
 886	0x00000000,
 887	(0x0e00 << 16) | (0x8a18 >> 2),
 888	0x00000000,
 889	(0x0600 << 16) | (0x30a00 >> 2),
 890	0x00000000,
 891	(0x0e00 << 16) | (0x8bf0 >> 2),
 892	0x00000000,
 893	(0x0e00 << 16) | (0x8bcc >> 2),
 894	0x00000000,
 895	(0x0e00 << 16) | (0x8b24 >> 2),
 896	0x00000000,
 897	(0x0e00 << 16) | (0x30a04 >> 2),
 898	0x00000000,
 899	(0x0600 << 16) | (0x30a10 >> 2),
 900	0x00000000,
 901	(0x0600 << 16) | (0x30a14 >> 2),
 902	0x00000000,
 903	(0x0600 << 16) | (0x30a18 >> 2),
 904	0x00000000,
 905	(0x0600 << 16) | (0x30a2c >> 2),
 906	0x00000000,
 907	(0x0e00 << 16) | (0xc700 >> 2),
 908	0x00000000,
 909	(0x0e00 << 16) | (0xc704 >> 2),
 910	0x00000000,
 911	(0x0e00 << 16) | (0xc708 >> 2),
 912	0x00000000,
 913	(0x0e00 << 16) | (0xc768 >> 2),
 914	0x00000000,
 915	(0x0400 << 16) | (0xc770 >> 2),
 916	0x00000000,
 917	(0x0400 << 16) | (0xc774 >> 2),
 918	0x00000000,
 919	(0x0400 << 16) | (0xc798 >> 2),
 920	0x00000000,
 921	(0x0400 << 16) | (0xc79c >> 2),
 922	0x00000000,
 923	(0x0e00 << 16) | (0x9100 >> 2),
 924	0x00000000,
 925	(0x0e00 << 16) | (0x3c010 >> 2),
 926	0x00000000,
 927	(0x0e00 << 16) | (0x8c00 >> 2),
 928	0x00000000,
 929	(0x0e00 << 16) | (0x8c04 >> 2),
 930	0x00000000,
 931	(0x0e00 << 16) | (0x8c20 >> 2),
 932	0x00000000,
 933	(0x0e00 << 16) | (0x8c38 >> 2),
 934	0x00000000,
 935	(0x0e00 << 16) | (0x8c3c >> 2),
 936	0x00000000,
 937	(0x0e00 << 16) | (0xae00 >> 2),
 938	0x00000000,
 939	(0x0e00 << 16) | (0x9604 >> 2),
 940	0x00000000,
 941	(0x0e00 << 16) | (0xac08 >> 2),
 942	0x00000000,
 943	(0x0e00 << 16) | (0xac0c >> 2),
 944	0x00000000,
 945	(0x0e00 << 16) | (0xac10 >> 2),
 946	0x00000000,
 947	(0x0e00 << 16) | (0xac14 >> 2),
 948	0x00000000,
 949	(0x0e00 << 16) | (0xac58 >> 2),
 950	0x00000000,
 951	(0x0e00 << 16) | (0xac68 >> 2),
 952	0x00000000,
 953	(0x0e00 << 16) | (0xac6c >> 2),
 954	0x00000000,
 955	(0x0e00 << 16) | (0xac70 >> 2),
 956	0x00000000,
 957	(0x0e00 << 16) | (0xac74 >> 2),
 958	0x00000000,
 959	(0x0e00 << 16) | (0xac78 >> 2),
 960	0x00000000,
 961	(0x0e00 << 16) | (0xac7c >> 2),
 962	0x00000000,
 963	(0x0e00 << 16) | (0xac80 >> 2),
 964	0x00000000,
 965	(0x0e00 << 16) | (0xac84 >> 2),
 966	0x00000000,
 967	(0x0e00 << 16) | (0xac88 >> 2),
 968	0x00000000,
 969	(0x0e00 << 16) | (0xac8c >> 2),
 970	0x00000000,
 971	(0x0e00 << 16) | (0x970c >> 2),
 972	0x00000000,
 973	(0x0e00 << 16) | (0x9714 >> 2),
 974	0x00000000,
 975	(0x0e00 << 16) | (0x9718 >> 2),
 976	0x00000000,
 977	(0x0e00 << 16) | (0x971c >> 2),
 978	0x00000000,
 979	(0x0e00 << 16) | (0x31068 >> 2),
 980	0x00000000,
 981	(0x4e00 << 16) | (0x31068 >> 2),
 982	0x00000000,
 983	(0x5e00 << 16) | (0x31068 >> 2),
 984	0x00000000,
 985	(0x6e00 << 16) | (0x31068 >> 2),
 986	0x00000000,
 987	(0x7e00 << 16) | (0x31068 >> 2),
 988	0x00000000,
 989	(0x0e00 << 16) | (0xcd10 >> 2),
 990	0x00000000,
 991	(0x0e00 << 16) | (0xcd14 >> 2),
 992	0x00000000,
 993	(0x0e00 << 16) | (0x88b0 >> 2),
 994	0x00000000,
 995	(0x0e00 << 16) | (0x88b4 >> 2),
 996	0x00000000,
 997	(0x0e00 << 16) | (0x88b8 >> 2),
 998	0x00000000,
 999	(0x0e00 << 16) | (0x88bc >> 2),
1000	0x00000000,
1001	(0x0400 << 16) | (0x89c0 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88c4 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x88c8 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x88d0 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x88d4 >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x88d8 >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x8980 >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x30938 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x3093c >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x30940 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x89a0 >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x30900 >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x30904 >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x89b4 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x3e1fc >> 2),
1030	0x00000000,
1031	(0x0e00 << 16) | (0x3c210 >> 2),
1032	0x00000000,
1033	(0x0e00 << 16) | (0x3c214 >> 2),
1034	0x00000000,
1035	(0x0e00 << 16) | (0x3c218 >> 2),
1036	0x00000000,
1037	(0x0e00 << 16) | (0x8904 >> 2),
1038	0x00000000,
1039	0x5,
1040	(0x0e00 << 16) | (0x8c28 >> 2),
1041	(0x0e00 << 16) | (0x8c2c >> 2),
1042	(0x0e00 << 16) | (0x8c30 >> 2),
1043	(0x0e00 << 16) | (0x8c34 >> 2),
1044	(0x0e00 << 16) | (0x9600 >> 2),
1045};
1046
1047static const u32 bonaire_golden_spm_registers[] =
1048{
1049	0x30800, 0xe0ffffff, 0xe0000000
1050};
1051
1052static const u32 bonaire_golden_common_registers[] =
1053{
1054	0xc770, 0xffffffff, 0x00000800,
1055	0xc774, 0xffffffff, 0x00000800,
1056	0xc798, 0xffffffff, 0x00007fbf,
1057	0xc79c, 0xffffffff, 0x00007faf
1058};
1059
1060static const u32 bonaire_golden_registers[] =
1061{
1062	0x3354, 0x00000333, 0x00000333,
1063	0x3350, 0x000c0fc0, 0x00040200,
1064	0x9a10, 0x00010000, 0x00058208,
1065	0x3c000, 0xffff1fff, 0x00140000,
1066	0x3c200, 0xfdfc0fff, 0x00000100,
1067	0x3c234, 0x40000000, 0x40000200,
1068	0x9830, 0xffffffff, 0x00000000,
1069	0x9834, 0xf00fffff, 0x00000400,
1070	0x9838, 0x0002021c, 0x00020200,
1071	0xc78, 0x00000080, 0x00000000,
1072	0x5bb0, 0x000000f0, 0x00000070,
1073	0x5bc0, 0xf0311fff, 0x80300000,
1074	0x98f8, 0x73773777, 0x12010001,
1075	0x350c, 0x00810000, 0x408af000,
1076	0x7030, 0x31000111, 0x00000011,
1077	0x2f48, 0x73773777, 0x12010001,
1078	0x220c, 0x00007fb6, 0x0021a1b1,
1079	0x2210, 0x00007fb6, 0x002021b1,
1080	0x2180, 0x00007fb6, 0x00002191,
1081	0x2218, 0x00007fb6, 0x002121b1,
1082	0x221c, 0x00007fb6, 0x002021b1,
1083	0x21dc, 0x00007fb6, 0x00002191,
1084	0x21e0, 0x00007fb6, 0x00002191,
1085	0x3628, 0x0000003f, 0x0000000a,
1086	0x362c, 0x0000003f, 0x0000000a,
1087	0x2ae4, 0x00073ffe, 0x000022a2,
1088	0x240c, 0x000007ff, 0x00000000,
1089	0x8a14, 0xf000003f, 0x00000007,
1090	0x8bf0, 0x00002001, 0x00000001,
1091	0x8b24, 0xffffffff, 0x00ffffff,
1092	0x30a04, 0x0000ff0f, 0x00000000,
1093	0x28a4c, 0x07ffffff, 0x06000000,
1094	0x4d8, 0x00000fff, 0x00000100,
1095	0x3e78, 0x00000001, 0x00000002,
1096	0x9100, 0x03000000, 0x0362c688,
1097	0x8c00, 0x000000ff, 0x00000001,
1098	0xe40, 0x00001fff, 0x00001fff,
1099	0x9060, 0x0000007f, 0x00000020,
1100	0x9508, 0x00010000, 0x00010000,
1101	0xac14, 0x000003ff, 0x000000f3,
1102	0xac0c, 0xffffffff, 0x00001032
1103};
1104
1105static const u32 bonaire_mgcg_cgcg_init[] =
1106{
1107	0xc420, 0xffffffff, 0xfffffffc,
1108	0x30800, 0xffffffff, 0xe0000000,
1109	0x3c2a0, 0xffffffff, 0x00000100,
1110	0x3c208, 0xffffffff, 0x00000100,
1111	0x3c2c0, 0xffffffff, 0xc0000100,
1112	0x3c2c8, 0xffffffff, 0xc0000100,
1113	0x3c2c4, 0xffffffff, 0xc0000100,
1114	0x55e4, 0xffffffff, 0x00600100,
1115	0x3c280, 0xffffffff, 0x00000100,
1116	0x3c214, 0xffffffff, 0x06000100,
1117	0x3c220, 0xffffffff, 0x00000100,
1118	0x3c218, 0xffffffff, 0x06000100,
1119	0x3c204, 0xffffffff, 0x00000100,
1120	0x3c2e0, 0xffffffff, 0x00000100,
1121	0x3c224, 0xffffffff, 0x00000100,
1122	0x3c200, 0xffffffff, 0x00000100,
1123	0x3c230, 0xffffffff, 0x00000100,
1124	0x3c234, 0xffffffff, 0x00000100,
1125	0x3c250, 0xffffffff, 0x00000100,
1126	0x3c254, 0xffffffff, 0x00000100,
1127	0x3c258, 0xffffffff, 0x00000100,
1128	0x3c25c, 0xffffffff, 0x00000100,
1129	0x3c260, 0xffffffff, 0x00000100,
1130	0x3c27c, 0xffffffff, 0x00000100,
1131	0x3c278, 0xffffffff, 0x00000100,
1132	0x3c210, 0xffffffff, 0x06000100,
1133	0x3c290, 0xffffffff, 0x00000100,
1134	0x3c274, 0xffffffff, 0x00000100,
1135	0x3c2b4, 0xffffffff, 0x00000100,
1136	0x3c2b0, 0xffffffff, 0x00000100,
1137	0x3c270, 0xffffffff, 0x00000100,
1138	0x30800, 0xffffffff, 0xe0000000,
1139	0x3c020, 0xffffffff, 0x00010000,
1140	0x3c024, 0xffffffff, 0x00030002,
1141	0x3c028, 0xffffffff, 0x00040007,
1142	0x3c02c, 0xffffffff, 0x00060005,
1143	0x3c030, 0xffffffff, 0x00090008,
1144	0x3c034, 0xffffffff, 0x00010000,
1145	0x3c038, 0xffffffff, 0x00030002,
1146	0x3c03c, 0xffffffff, 0x00040007,
1147	0x3c040, 0xffffffff, 0x00060005,
1148	0x3c044, 0xffffffff, 0x00090008,
1149	0x3c048, 0xffffffff, 0x00010000,
1150	0x3c04c, 0xffffffff, 0x00030002,
1151	0x3c050, 0xffffffff, 0x00040007,
1152	0x3c054, 0xffffffff, 0x00060005,
1153	0x3c058, 0xffffffff, 0x00090008,
1154	0x3c05c, 0xffffffff, 0x00010000,
1155	0x3c060, 0xffffffff, 0x00030002,
1156	0x3c064, 0xffffffff, 0x00040007,
1157	0x3c068, 0xffffffff, 0x00060005,
1158	0x3c06c, 0xffffffff, 0x00090008,
1159	0x3c070, 0xffffffff, 0x00010000,
1160	0x3c074, 0xffffffff, 0x00030002,
1161	0x3c078, 0xffffffff, 0x00040007,
1162	0x3c07c, 0xffffffff, 0x00060005,
1163	0x3c080, 0xffffffff, 0x00090008,
1164	0x3c084, 0xffffffff, 0x00010000,
1165	0x3c088, 0xffffffff, 0x00030002,
1166	0x3c08c, 0xffffffff, 0x00040007,
1167	0x3c090, 0xffffffff, 0x00060005,
1168	0x3c094, 0xffffffff, 0x00090008,
1169	0x3c098, 0xffffffff, 0x00010000,
1170	0x3c09c, 0xffffffff, 0x00030002,
1171	0x3c0a0, 0xffffffff, 0x00040007,
1172	0x3c0a4, 0xffffffff, 0x00060005,
1173	0x3c0a8, 0xffffffff, 0x00090008,
1174	0x3c000, 0xffffffff, 0x96e00200,
1175	0x8708, 0xffffffff, 0x00900100,
1176	0xc424, 0xffffffff, 0x0020003f,
1177	0x38, 0xffffffff, 0x0140001c,
1178	0x3c, 0x000f0000, 0x000f0000,
1179	0x220, 0xffffffff, 0xC060000C,
1180	0x224, 0xc0000fff, 0x00000100,
1181	0xf90, 0xffffffff, 0x00000100,
1182	0xf98, 0x00000101, 0x00000000,
1183	0x20a8, 0xffffffff, 0x00000104,
1184	0x55e4, 0xff000fff, 0x00000100,
1185	0x30cc, 0xc0000fff, 0x00000104,
1186	0xc1e4, 0x00000001, 0x00000001,
1187	0xd00c, 0xff000ff0, 0x00000100,
1188	0xd80c, 0xff000ff0, 0x00000100
1189};
1190
1191static const u32 spectre_golden_spm_registers[] =
1192{
1193	0x30800, 0xe0ffffff, 0xe0000000
1194};
1195
1196static const u32 spectre_golden_common_registers[] =
1197{
1198	0xc770, 0xffffffff, 0x00000800,
1199	0xc774, 0xffffffff, 0x00000800,
1200	0xc798, 0xffffffff, 0x00007fbf,
1201	0xc79c, 0xffffffff, 0x00007faf
1202};
1203
1204static const u32 spectre_golden_registers[] =
1205{
1206	0x3c000, 0xffff1fff, 0x96940200,
1207	0x3c00c, 0xffff0001, 0xff000000,
1208	0x3c200, 0xfffc0fff, 0x00000100,
1209	0x6ed8, 0x00010101, 0x00010000,
1210	0x9834, 0xf00fffff, 0x00000400,
1211	0x9838, 0xfffffffc, 0x00020200,
1212	0x5bb0, 0x000000f0, 0x00000070,
1213	0x5bc0, 0xf0311fff, 0x80300000,
1214	0x98f8, 0x73773777, 0x12010001,
1215	0x9b7c, 0x00ff0000, 0x00fc0000,
1216	0x2f48, 0x73773777, 0x12010001,
1217	0x8a14, 0xf000003f, 0x00000007,
1218	0x8b24, 0xffffffff, 0x00ffffff,
1219	0x28350, 0x3f3f3fff, 0x00000082,
1220	0x28354, 0x0000003f, 0x00000000,
1221	0x3e78, 0x00000001, 0x00000002,
1222	0x913c, 0xffff03df, 0x00000004,
1223	0xc768, 0x00000008, 0x00000008,
1224	0x8c00, 0x000008ff, 0x00000800,
1225	0x9508, 0x00010000, 0x00010000,
1226	0xac0c, 0xffffffff, 0x54763210,
1227	0x214f8, 0x01ff01ff, 0x00000002,
1228	0x21498, 0x007ff800, 0x00200000,
1229	0x2015c, 0xffffffff, 0x00000f40,
1230	0x30934, 0xffffffff, 0x00000001
1231};
1232
1233static const u32 spectre_mgcg_cgcg_init[] =
1234{
1235	0xc420, 0xffffffff, 0xfffffffc,
1236	0x30800, 0xffffffff, 0xe0000000,
1237	0x3c2a0, 0xffffffff, 0x00000100,
1238	0x3c208, 0xffffffff, 0x00000100,
1239	0x3c2c0, 0xffffffff, 0x00000100,
1240	0x3c2c8, 0xffffffff, 0x00000100,
1241	0x3c2c4, 0xffffffff, 0x00000100,
1242	0x55e4, 0xffffffff, 0x00600100,
1243	0x3c280, 0xffffffff, 0x00000100,
1244	0x3c214, 0xffffffff, 0x06000100,
1245	0x3c220, 0xffffffff, 0x00000100,
1246	0x3c218, 0xffffffff, 0x06000100,
1247	0x3c204, 0xffffffff, 0x00000100,
1248	0x3c2e0, 0xffffffff, 0x00000100,
1249	0x3c224, 0xffffffff, 0x00000100,
1250	0x3c200, 0xffffffff, 0x00000100,
1251	0x3c230, 0xffffffff, 0x00000100,
1252	0x3c234, 0xffffffff, 0x00000100,
1253	0x3c250, 0xffffffff, 0x00000100,
1254	0x3c254, 0xffffffff, 0x00000100,
1255	0x3c258, 0xffffffff, 0x00000100,
1256	0x3c25c, 0xffffffff, 0x00000100,
1257	0x3c260, 0xffffffff, 0x00000100,
1258	0x3c27c, 0xffffffff, 0x00000100,
1259	0x3c278, 0xffffffff, 0x00000100,
1260	0x3c210, 0xffffffff, 0x06000100,
1261	0x3c290, 0xffffffff, 0x00000100,
1262	0x3c274, 0xffffffff, 0x00000100,
1263	0x3c2b4, 0xffffffff, 0x00000100,
1264	0x3c2b0, 0xffffffff, 0x00000100,
1265	0x3c270, 0xffffffff, 0x00000100,
1266	0x30800, 0xffffffff, 0xe0000000,
1267	0x3c020, 0xffffffff, 0x00010000,
1268	0x3c024, 0xffffffff, 0x00030002,
1269	0x3c028, 0xffffffff, 0x00040007,
1270	0x3c02c, 0xffffffff, 0x00060005,
1271	0x3c030, 0xffffffff, 0x00090008,
1272	0x3c034, 0xffffffff, 0x00010000,
1273	0x3c038, 0xffffffff, 0x00030002,
1274	0x3c03c, 0xffffffff, 0x00040007,
1275	0x3c040, 0xffffffff, 0x00060005,
1276	0x3c044, 0xffffffff, 0x00090008,
1277	0x3c048, 0xffffffff, 0x00010000,
1278	0x3c04c, 0xffffffff, 0x00030002,
1279	0x3c050, 0xffffffff, 0x00040007,
1280	0x3c054, 0xffffffff, 0x00060005,
1281	0x3c058, 0xffffffff, 0x00090008,
1282	0x3c05c, 0xffffffff, 0x00010000,
1283	0x3c060, 0xffffffff, 0x00030002,
1284	0x3c064, 0xffffffff, 0x00040007,
1285	0x3c068, 0xffffffff, 0x00060005,
1286	0x3c06c, 0xffffffff, 0x00090008,
1287	0x3c070, 0xffffffff, 0x00010000,
1288	0x3c074, 0xffffffff, 0x00030002,
1289	0x3c078, 0xffffffff, 0x00040007,
1290	0x3c07c, 0xffffffff, 0x00060005,
1291	0x3c080, 0xffffffff, 0x00090008,
1292	0x3c084, 0xffffffff, 0x00010000,
1293	0x3c088, 0xffffffff, 0x00030002,
1294	0x3c08c, 0xffffffff, 0x00040007,
1295	0x3c090, 0xffffffff, 0x00060005,
1296	0x3c094, 0xffffffff, 0x00090008,
1297	0x3c098, 0xffffffff, 0x00010000,
1298	0x3c09c, 0xffffffff, 0x00030002,
1299	0x3c0a0, 0xffffffff, 0x00040007,
1300	0x3c0a4, 0xffffffff, 0x00060005,
1301	0x3c0a8, 0xffffffff, 0x00090008,
1302	0x3c0ac, 0xffffffff, 0x00010000,
1303	0x3c0b0, 0xffffffff, 0x00030002,
1304	0x3c0b4, 0xffffffff, 0x00040007,
1305	0x3c0b8, 0xffffffff, 0x00060005,
1306	0x3c0bc, 0xffffffff, 0x00090008,
1307	0x3c000, 0xffffffff, 0x96e00200,
1308	0x8708, 0xffffffff, 0x00900100,
1309	0xc424, 0xffffffff, 0x0020003f,
1310	0x38, 0xffffffff, 0x0140001c,
1311	0x3c, 0x000f0000, 0x000f0000,
1312	0x220, 0xffffffff, 0xC060000C,
1313	0x224, 0xc0000fff, 0x00000100,
1314	0xf90, 0xffffffff, 0x00000100,
1315	0xf98, 0x00000101, 0x00000000,
1316	0x20a8, 0xffffffff, 0x00000104,
1317	0x55e4, 0xff000fff, 0x00000100,
1318	0x30cc, 0xc0000fff, 0x00000104,
1319	0xc1e4, 0x00000001, 0x00000001,
1320	0xd00c, 0xff000ff0, 0x00000100,
1321	0xd80c, 0xff000ff0, 0x00000100
1322};
1323
1324static const u32 kalindi_golden_spm_registers[] =
1325{
1326	0x30800, 0xe0ffffff, 0xe0000000
1327};
1328
1329static const u32 kalindi_golden_common_registers[] =
1330{
1331	0xc770, 0xffffffff, 0x00000800,
1332	0xc774, 0xffffffff, 0x00000800,
1333	0xc798, 0xffffffff, 0x00007fbf,
1334	0xc79c, 0xffffffff, 0x00007faf
1335};
1336
1337static const u32 kalindi_golden_registers[] =
1338{
1339	0x3c000, 0xffffdfff, 0x6e944040,
1340	0x55e4, 0xff607fff, 0xfc000100,
1341	0x3c220, 0xff000fff, 0x00000100,
1342	0x3c224, 0xff000fff, 0x00000100,
1343	0x3c200, 0xfffc0fff, 0x00000100,
1344	0x6ed8, 0x00010101, 0x00010000,
1345	0x9830, 0xffffffff, 0x00000000,
1346	0x9834, 0xf00fffff, 0x00000400,
1347	0x5bb0, 0x000000f0, 0x00000070,
1348	0x5bc0, 0xf0311fff, 0x80300000,
1349	0x98f8, 0x73773777, 0x12010001,
1350	0x98fc, 0xffffffff, 0x00000010,
1351	0x9b7c, 0x00ff0000, 0x00fc0000,
1352	0x8030, 0x00001f0f, 0x0000100a,
1353	0x2f48, 0x73773777, 0x12010001,
1354	0x2408, 0x000fffff, 0x000c007f,
1355	0x8a14, 0xf000003f, 0x00000007,
1356	0x8b24, 0x3fff3fff, 0x00ffcfff,
1357	0x30a04, 0x0000ff0f, 0x00000000,
1358	0x28a4c, 0x07ffffff, 0x06000000,
1359	0x4d8, 0x00000fff, 0x00000100,
1360	0x3e78, 0x00000001, 0x00000002,
1361	0xc768, 0x00000008, 0x00000008,
1362	0x8c00, 0x000000ff, 0x00000003,
1363	0x214f8, 0x01ff01ff, 0x00000002,
1364	0x21498, 0x007ff800, 0x00200000,
1365	0x2015c, 0xffffffff, 0x00000f40,
1366	0x88c4, 0x001f3ae3, 0x00000082,
1367	0x88d4, 0x0000001f, 0x00000010,
1368	0x30934, 0xffffffff, 0x00000000
1369};
1370
1371static const u32 kalindi_mgcg_cgcg_init[] =
1372{
1373	0xc420, 0xffffffff, 0xfffffffc,
1374	0x30800, 0xffffffff, 0xe0000000,
1375	0x3c2a0, 0xffffffff, 0x00000100,
1376	0x3c208, 0xffffffff, 0x00000100,
1377	0x3c2c0, 0xffffffff, 0x00000100,
1378	0x3c2c8, 0xffffffff, 0x00000100,
1379	0x3c2c4, 0xffffffff, 0x00000100,
1380	0x55e4, 0xffffffff, 0x00600100,
1381	0x3c280, 0xffffffff, 0x00000100,
1382	0x3c214, 0xffffffff, 0x06000100,
1383	0x3c220, 0xffffffff, 0x00000100,
1384	0x3c218, 0xffffffff, 0x06000100,
1385	0x3c204, 0xffffffff, 0x00000100,
1386	0x3c2e0, 0xffffffff, 0x00000100,
1387	0x3c224, 0xffffffff, 0x00000100,
1388	0x3c200, 0xffffffff, 0x00000100,
1389	0x3c230, 0xffffffff, 0x00000100,
1390	0x3c234, 0xffffffff, 0x00000100,
1391	0x3c250, 0xffffffff, 0x00000100,
1392	0x3c254, 0xffffffff, 0x00000100,
1393	0x3c258, 0xffffffff, 0x00000100,
1394	0x3c25c, 0xffffffff, 0x00000100,
1395	0x3c260, 0xffffffff, 0x00000100,
1396	0x3c27c, 0xffffffff, 0x00000100,
1397	0x3c278, 0xffffffff, 0x00000100,
1398	0x3c210, 0xffffffff, 0x06000100,
1399	0x3c290, 0xffffffff, 0x00000100,
1400	0x3c274, 0xffffffff, 0x00000100,
1401	0x3c2b4, 0xffffffff, 0x00000100,
1402	0x3c2b0, 0xffffffff, 0x00000100,
1403	0x3c270, 0xffffffff, 0x00000100,
1404	0x30800, 0xffffffff, 0xe0000000,
1405	0x3c020, 0xffffffff, 0x00010000,
1406	0x3c024, 0xffffffff, 0x00030002,
1407	0x3c028, 0xffffffff, 0x00040007,
1408	0x3c02c, 0xffffffff, 0x00060005,
1409	0x3c030, 0xffffffff, 0x00090008,
1410	0x3c034, 0xffffffff, 0x00010000,
1411	0x3c038, 0xffffffff, 0x00030002,
1412	0x3c03c, 0xffffffff, 0x00040007,
1413	0x3c040, 0xffffffff, 0x00060005,
1414	0x3c044, 0xffffffff, 0x00090008,
1415	0x3c000, 0xffffffff, 0x96e00200,
1416	0x8708, 0xffffffff, 0x00900100,
1417	0xc424, 0xffffffff, 0x0020003f,
1418	0x38, 0xffffffff, 0x0140001c,
1419	0x3c, 0x000f0000, 0x000f0000,
1420	0x220, 0xffffffff, 0xC060000C,
1421	0x224, 0xc0000fff, 0x00000100,
1422	0x20a8, 0xffffffff, 0x00000104,
1423	0x55e4, 0xff000fff, 0x00000100,
1424	0x30cc, 0xc0000fff, 0x00000104,
1425	0xc1e4, 0x00000001, 0x00000001,
1426	0xd00c, 0xff000ff0, 0x00000100,
1427	0xd80c, 0xff000ff0, 0x00000100
1428};
1429
1430static const u32 hawaii_golden_spm_registers[] =
1431{
1432	0x30800, 0xe0ffffff, 0xe0000000
1433};
1434
1435static const u32 hawaii_golden_common_registers[] =
1436{
1437	0x30800, 0xffffffff, 0xe0000000,
1438	0x28350, 0xffffffff, 0x3a00161a,
1439	0x28354, 0xffffffff, 0x0000002e,
1440	0x9a10, 0xffffffff, 0x00018208,
1441	0x98f8, 0xffffffff, 0x12011003
1442};
1443
1444static const u32 hawaii_golden_registers[] =
1445{
1446	0x3354, 0x00000333, 0x00000333,
1447	0x9a10, 0x00010000, 0x00058208,
1448	0x9830, 0xffffffff, 0x00000000,
1449	0x9834, 0xf00fffff, 0x00000400,
1450	0x9838, 0x0002021c, 0x00020200,
1451	0xc78, 0x00000080, 0x00000000,
1452	0x5bb0, 0x000000f0, 0x00000070,
1453	0x5bc0, 0xf0311fff, 0x80300000,
1454	0x350c, 0x00810000, 0x408af000,
1455	0x7030, 0x31000111, 0x00000011,
1456	0x2f48, 0x73773777, 0x12010001,
1457	0x2120, 0x0000007f, 0x0000001b,
1458	0x21dc, 0x00007fb6, 0x00002191,
1459	0x3628, 0x0000003f, 0x0000000a,
1460	0x362c, 0x0000003f, 0x0000000a,
1461	0x2ae4, 0x00073ffe, 0x000022a2,
1462	0x240c, 0x000007ff, 0x00000000,
1463	0x8bf0, 0x00002001, 0x00000001,
1464	0x8b24, 0xffffffff, 0x00ffffff,
1465	0x30a04, 0x0000ff0f, 0x00000000,
1466	0x28a4c, 0x07ffffff, 0x06000000,
1467	0x3e78, 0x00000001, 0x00000002,
1468	0xc768, 0x00000008, 0x00000008,
1469	0xc770, 0x00000f00, 0x00000800,
1470	0xc774, 0x00000f00, 0x00000800,
1471	0xc798, 0x00ffffff, 0x00ff7fbf,
1472	0xc79c, 0x00ffffff, 0x00ff7faf,
1473	0x8c00, 0x000000ff, 0x00000800,
1474	0xe40, 0x00001fff, 0x00001fff,
1475	0x9060, 0x0000007f, 0x00000020,
1476	0x9508, 0x00010000, 0x00010000,
1477	0xae00, 0x00100000, 0x000ff07c,
1478	0xac14, 0x000003ff, 0x0000000f,
1479	0xac10, 0xffffffff, 0x7564fdec,
1480	0xac0c, 0xffffffff, 0x3120b9a8,
1481	0xac08, 0x20000000, 0x0f9c0000
1482};
1483
1484static const u32 hawaii_mgcg_cgcg_init[] =
1485{
1486	0xc420, 0xffffffff, 0xfffffffd,
1487	0x30800, 0xffffffff, 0xe0000000,
1488	0x3c2a0, 0xffffffff, 0x00000100,
1489	0x3c208, 0xffffffff, 0x00000100,
1490	0x3c2c0, 0xffffffff, 0x00000100,
1491	0x3c2c8, 0xffffffff, 0x00000100,
1492	0x3c2c4, 0xffffffff, 0x00000100,
1493	0x55e4, 0xffffffff, 0x00200100,
1494	0x3c280, 0xffffffff, 0x00000100,
1495	0x3c214, 0xffffffff, 0x06000100,
1496	0x3c220, 0xffffffff, 0x00000100,
1497	0x3c218, 0xffffffff, 0x06000100,
1498	0x3c204, 0xffffffff, 0x00000100,
1499	0x3c2e0, 0xffffffff, 0x00000100,
1500	0x3c224, 0xffffffff, 0x00000100,
1501	0x3c200, 0xffffffff, 0x00000100,
1502	0x3c230, 0xffffffff, 0x00000100,
1503	0x3c234, 0xffffffff, 0x00000100,
1504	0x3c250, 0xffffffff, 0x00000100,
1505	0x3c254, 0xffffffff, 0x00000100,
1506	0x3c258, 0xffffffff, 0x00000100,
1507	0x3c25c, 0xffffffff, 0x00000100,
1508	0x3c260, 0xffffffff, 0x00000100,
1509	0x3c27c, 0xffffffff, 0x00000100,
1510	0x3c278, 0xffffffff, 0x00000100,
1511	0x3c210, 0xffffffff, 0x06000100,
1512	0x3c290, 0xffffffff, 0x00000100,
1513	0x3c274, 0xffffffff, 0x00000100,
1514	0x3c2b4, 0xffffffff, 0x00000100,
1515	0x3c2b0, 0xffffffff, 0x00000100,
1516	0x3c270, 0xffffffff, 0x00000100,
1517	0x30800, 0xffffffff, 0xe0000000,
1518	0x3c020, 0xffffffff, 0x00010000,
1519	0x3c024, 0xffffffff, 0x00030002,
1520	0x3c028, 0xffffffff, 0x00040007,
1521	0x3c02c, 0xffffffff, 0x00060005,
1522	0x3c030, 0xffffffff, 0x00090008,
1523	0x3c034, 0xffffffff, 0x00010000,
1524	0x3c038, 0xffffffff, 0x00030002,
1525	0x3c03c, 0xffffffff, 0x00040007,
1526	0x3c040, 0xffffffff, 0x00060005,
1527	0x3c044, 0xffffffff, 0x00090008,
1528	0x3c048, 0xffffffff, 0x00010000,
1529	0x3c04c, 0xffffffff, 0x00030002,
1530	0x3c050, 0xffffffff, 0x00040007,
1531	0x3c054, 0xffffffff, 0x00060005,
1532	0x3c058, 0xffffffff, 0x00090008,
1533	0x3c05c, 0xffffffff, 0x00010000,
1534	0x3c060, 0xffffffff, 0x00030002,
1535	0x3c064, 0xffffffff, 0x00040007,
1536	0x3c068, 0xffffffff, 0x00060005,
1537	0x3c06c, 0xffffffff, 0x00090008,
1538	0x3c070, 0xffffffff, 0x00010000,
1539	0x3c074, 0xffffffff, 0x00030002,
1540	0x3c078, 0xffffffff, 0x00040007,
1541	0x3c07c, 0xffffffff, 0x00060005,
1542	0x3c080, 0xffffffff, 0x00090008,
1543	0x3c084, 0xffffffff, 0x00010000,
1544	0x3c088, 0xffffffff, 0x00030002,
1545	0x3c08c, 0xffffffff, 0x00040007,
1546	0x3c090, 0xffffffff, 0x00060005,
1547	0x3c094, 0xffffffff, 0x00090008,
1548	0x3c098, 0xffffffff, 0x00010000,
1549	0x3c09c, 0xffffffff, 0x00030002,
1550	0x3c0a0, 0xffffffff, 0x00040007,
1551	0x3c0a4, 0xffffffff, 0x00060005,
1552	0x3c0a8, 0xffffffff, 0x00090008,
1553	0x3c0ac, 0xffffffff, 0x00010000,
1554	0x3c0b0, 0xffffffff, 0x00030002,
1555	0x3c0b4, 0xffffffff, 0x00040007,
1556	0x3c0b8, 0xffffffff, 0x00060005,
1557	0x3c0bc, 0xffffffff, 0x00090008,
1558	0x3c0c0, 0xffffffff, 0x00010000,
1559	0x3c0c4, 0xffffffff, 0x00030002,
1560	0x3c0c8, 0xffffffff, 0x00040007,
1561	0x3c0cc, 0xffffffff, 0x00060005,
1562	0x3c0d0, 0xffffffff, 0x00090008,
1563	0x3c0d4, 0xffffffff, 0x00010000,
1564	0x3c0d8, 0xffffffff, 0x00030002,
1565	0x3c0dc, 0xffffffff, 0x00040007,
1566	0x3c0e0, 0xffffffff, 0x00060005,
1567	0x3c0e4, 0xffffffff, 0x00090008,
1568	0x3c0e8, 0xffffffff, 0x00010000,
1569	0x3c0ec, 0xffffffff, 0x00030002,
1570	0x3c0f0, 0xffffffff, 0x00040007,
1571	0x3c0f4, 0xffffffff, 0x00060005,
1572	0x3c0f8, 0xffffffff, 0x00090008,
1573	0xc318, 0xffffffff, 0x00020200,
1574	0x3350, 0xffffffff, 0x00000200,
1575	0x15c0, 0xffffffff, 0x00000400,
1576	0x55e8, 0xffffffff, 0x00000000,
1577	0x2f50, 0xffffffff, 0x00000902,
1578	0x3c000, 0xffffffff, 0x96940200,
1579	0x8708, 0xffffffff, 0x00900100,
1580	0xc424, 0xffffffff, 0x0020003f,
1581	0x38, 0xffffffff, 0x0140001c,
1582	0x3c, 0x000f0000, 0x000f0000,
1583	0x220, 0xffffffff, 0xc060000c,
1584	0x224, 0xc0000fff, 0x00000100,
1585	0xf90, 0xffffffff, 0x00000100,
1586	0xf98, 0x00000101, 0x00000000,
1587	0x20a8, 0xffffffff, 0x00000104,
1588	0x55e4, 0xff000fff, 0x00000100,
1589	0x30cc, 0xc0000fff, 0x00000104,
1590	0xc1e4, 0x00000001, 0x00000001,
1591	0xd00c, 0xff000ff0, 0x00000100,
1592	0xd80c, 0xff000ff0, 0x00000100
1593};
1594
1595static const u32 godavari_golden_registers[] =
1596{
1597	0x55e4, 0xff607fff, 0xfc000100,
1598	0x6ed8, 0x00010101, 0x00010000,
1599	0x9830, 0xffffffff, 0x00000000,
1600	0x98302, 0xf00fffff, 0x00000400,
1601	0x6130, 0xffffffff, 0x00010000,
1602	0x5bb0, 0x000000f0, 0x00000070,
1603	0x5bc0, 0xf0311fff, 0x80300000,
1604	0x98f8, 0x73773777, 0x12010001,
1605	0x98fc, 0xffffffff, 0x00000010,
1606	0x8030, 0x00001f0f, 0x0000100a,
1607	0x2f48, 0x73773777, 0x12010001,
1608	0x2408, 0x000fffff, 0x000c007f,
1609	0x8a14, 0xf000003f, 0x00000007,
1610	0x8b24, 0xffffffff, 0x00ff0fff,
1611	0x30a04, 0x0000ff0f, 0x00000000,
1612	0x28a4c, 0x07ffffff, 0x06000000,
1613	0x4d8, 0x00000fff, 0x00000100,
1614	0xd014, 0x00010000, 0x00810001,
1615	0xd814, 0x00010000, 0x00810001,
1616	0x3e78, 0x00000001, 0x00000002,
1617	0xc768, 0x00000008, 0x00000008,
1618	0xc770, 0x00000f00, 0x00000800,
1619	0xc774, 0x00000f00, 0x00000800,
1620	0xc798, 0x00ffffff, 0x00ff7fbf,
1621	0xc79c, 0x00ffffff, 0x00ff7faf,
1622	0x8c00, 0x000000ff, 0x00000001,
1623	0x214f8, 0x01ff01ff, 0x00000002,
1624	0x21498, 0x007ff800, 0x00200000,
1625	0x2015c, 0xffffffff, 0x00000f40,
1626	0x88c4, 0x001f3ae3, 0x00000082,
1627	0x88d4, 0x0000001f, 0x00000010,
1628	0x30934, 0xffffffff, 0x00000000
1629};
1630
1631
1632static void cik_init_golden_registers(struct radeon_device *rdev)
1633{
1634	switch (rdev->family) {
1635	case CHIP_BONAIRE:
1636		radeon_program_register_sequence(rdev,
1637						 bonaire_mgcg_cgcg_init,
1638						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639		radeon_program_register_sequence(rdev,
1640						 bonaire_golden_registers,
1641						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642		radeon_program_register_sequence(rdev,
1643						 bonaire_golden_common_registers,
1644						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645		radeon_program_register_sequence(rdev,
1646						 bonaire_golden_spm_registers,
1647						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648		break;
1649	case CHIP_KABINI:
1650		radeon_program_register_sequence(rdev,
1651						 kalindi_mgcg_cgcg_init,
1652						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653		radeon_program_register_sequence(rdev,
1654						 kalindi_golden_registers,
1655						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656		radeon_program_register_sequence(rdev,
1657						 kalindi_golden_common_registers,
1658						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659		radeon_program_register_sequence(rdev,
1660						 kalindi_golden_spm_registers,
1661						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662		break;
1663	case CHIP_MULLINS:
1664		radeon_program_register_sequence(rdev,
1665						 kalindi_mgcg_cgcg_init,
1666						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667		radeon_program_register_sequence(rdev,
1668						 godavari_golden_registers,
1669						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1670		radeon_program_register_sequence(rdev,
1671						 kalindi_golden_common_registers,
1672						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673		radeon_program_register_sequence(rdev,
1674						 kalindi_golden_spm_registers,
1675						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676		break;
1677	case CHIP_KAVERI:
1678		radeon_program_register_sequence(rdev,
1679						 spectre_mgcg_cgcg_init,
1680						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681		radeon_program_register_sequence(rdev,
1682						 spectre_golden_registers,
1683						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1684		radeon_program_register_sequence(rdev,
1685						 spectre_golden_common_registers,
1686						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687		radeon_program_register_sequence(rdev,
1688						 spectre_golden_spm_registers,
1689						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690		break;
1691	case CHIP_HAWAII:
1692		radeon_program_register_sequence(rdev,
1693						 hawaii_mgcg_cgcg_init,
1694						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695		radeon_program_register_sequence(rdev,
1696						 hawaii_golden_registers,
1697						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698		radeon_program_register_sequence(rdev,
1699						 hawaii_golden_common_registers,
1700						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701		radeon_program_register_sequence(rdev,
1702						 hawaii_golden_spm_registers,
1703						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704		break;
1705	default:
1706		break;
1707	}
1708}
1709
1710/**
1711 * cik_get_xclk - get the xclk
1712 *
1713 * @rdev: radeon_device pointer
1714 *
1715 * Returns the reference clock used by the gfx engine
1716 * (CIK).
1717 */
1718u32 cik_get_xclk(struct radeon_device *rdev)
1719{
1720	u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722	if (rdev->flags & RADEON_IS_IGP) {
1723		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724			return reference_clock / 2;
1725	} else {
1726		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727			return reference_clock / 4;
1728	}
1729	return reference_clock;
1730}
1731
1732/**
1733 * cik_mm_rdoorbell - read a doorbell dword
1734 *
1735 * @rdev: radeon_device pointer
1736 * @index: doorbell index
1737 *
1738 * Returns the value in the doorbell aperture at the
1739 * requested doorbell index (CIK).
1740 */
1741u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742{
1743	if (index < rdev->doorbell.num_doorbells) {
1744		return readl(rdev->doorbell.ptr + index);
1745	} else {
1746		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747		return 0;
1748	}
1749}
1750
1751/**
1752 * cik_mm_wdoorbell - write a doorbell dword
1753 *
1754 * @rdev: radeon_device pointer
1755 * @index: doorbell index
1756 * @v: value to write
1757 *
1758 * Writes @v to the doorbell aperture at the
1759 * requested doorbell index (CIK).
1760 */
1761void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762{
1763	if (index < rdev->doorbell.num_doorbells) {
1764		writel(v, rdev->doorbell.ptr + index);
1765	} else {
1766		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767	}
1768}
1769
1770#define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773{
1774	{0x00000070, 0x04400000},
1775	{0x00000071, 0x80c01803},
1776	{0x00000072, 0x00004004},
1777	{0x00000073, 0x00000100},
1778	{0x00000074, 0x00ff0000},
1779	{0x00000075, 0x34000000},
1780	{0x00000076, 0x08000014},
1781	{0x00000077, 0x00cc08ec},
1782	{0x00000078, 0x00000400},
1783	{0x00000079, 0x00000000},
1784	{0x0000007a, 0x04090000},
1785	{0x0000007c, 0x00000000},
1786	{0x0000007e, 0x4408a8e8},
1787	{0x0000007f, 0x00000304},
1788	{0x00000080, 0x00000000},
1789	{0x00000082, 0x00000001},
1790	{0x00000083, 0x00000002},
1791	{0x00000084, 0xf3e4f400},
1792	{0x00000085, 0x052024e3},
1793	{0x00000087, 0x00000000},
1794	{0x00000088, 0x01000000},
1795	{0x0000008a, 0x1c0a0000},
1796	{0x0000008b, 0xff010000},
1797	{0x0000008d, 0xffffefff},
1798	{0x0000008e, 0xfff3efff},
1799	{0x0000008f, 0xfff3efbf},
1800	{0x00000092, 0xf7ffffff},
1801	{0x00000093, 0xffffff7f},
1802	{0x00000095, 0x00101101},
1803	{0x00000096, 0x00000fff},
1804	{0x00000097, 0x00116fff},
1805	{0x00000098, 0x60010000},
1806	{0x00000099, 0x10010000},
1807	{0x0000009a, 0x00006000},
1808	{0x0000009b, 0x00001000},
1809	{0x0000009f, 0x00b48000}
1810};
1811
1812#define HAWAII_IO_MC_REGS_SIZE 22
1813
1814static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815{
1816	{0x0000007d, 0x40000000},
1817	{0x0000007e, 0x40180304},
1818	{0x0000007f, 0x0000ff00},
1819	{0x00000081, 0x00000000},
1820	{0x00000083, 0x00000800},
1821	{0x00000086, 0x00000000},
1822	{0x00000087, 0x00000100},
1823	{0x00000088, 0x00020100},
1824	{0x00000089, 0x00000000},
1825	{0x0000008b, 0x00040000},
1826	{0x0000008c, 0x00000100},
1827	{0x0000008e, 0xff010000},
1828	{0x00000090, 0xffffefff},
1829	{0x00000091, 0xfff3efff},
1830	{0x00000092, 0xfff3efbf},
1831	{0x00000093, 0xf7ffffff},
1832	{0x00000094, 0xffffff7f},
1833	{0x00000095, 0x00000fff},
1834	{0x00000096, 0x00116fff},
1835	{0x00000097, 0x60010000},
1836	{0x00000098, 0x10010000},
1837	{0x0000009f, 0x00c79000}
1838};
1839
1840
1841/**
1842 * cik_srbm_select - select specific register instances
1843 *
1844 * @rdev: radeon_device pointer
1845 * @me: selected ME (micro engine)
1846 * @pipe: pipe
1847 * @queue: queue
1848 * @vmid: VMID
1849 *
1850 * Switches the currently active registers instances.  Some
1851 * registers are instanced per VMID, others are instanced per
1852 * me/pipe/queue combination.
1853 */
1854static void cik_srbm_select(struct radeon_device *rdev,
1855			    u32 me, u32 pipe, u32 queue, u32 vmid)
1856{
1857	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858			     MEID(me & 0x3) |
1859			     VMID(vmid & 0xf) |
1860			     QUEUEID(queue & 0x7));
1861	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862}
1863
1864/* ucode loading */
1865/**
1866 * ci_mc_load_microcode - load MC ucode into the hw
1867 *
1868 * @rdev: radeon_device pointer
1869 *
1870 * Load the GDDR MC ucode into the hw (CIK).
1871 * Returns 0 on success, error on failure.
1872 */
1873int ci_mc_load_microcode(struct radeon_device *rdev)
1874{
1875	const __be32 *fw_data = NULL;
1876	const __le32 *new_fw_data = NULL;
1877	u32 running, tmp;
1878	u32 *io_mc_regs = NULL;
1879	const __le32 *new_io_mc_regs = NULL;
1880	int i, regs_size, ucode_size;
1881
1882	if (!rdev->mc_fw)
1883		return -EINVAL;
1884
1885	if (rdev->new_fw) {
1886		const struct mc_firmware_header_v1_0 *hdr =
1887			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889		radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892		new_io_mc_regs = (const __le32 *)
1893			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895		new_fw_data = (const __le32 *)
1896			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897	} else {
1898		ucode_size = rdev->mc_fw->size / 4;
1899
1900		switch (rdev->family) {
1901		case CHIP_BONAIRE:
1902			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904			break;
1905		case CHIP_HAWAII:
1906			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907			regs_size = HAWAII_IO_MC_REGS_SIZE;
1908			break;
1909		default:
1910			return -EINVAL;
1911		}
1912		fw_data = (const __be32 *)rdev->mc_fw->data;
1913	}
1914
1915	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917	if (running == 0) {
1918		/* reset the engine and set to writable */
1919		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922		/* load mc io regs */
1923		for (i = 0; i < regs_size; i++) {
1924			if (rdev->new_fw) {
1925				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927			} else {
1928				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930			}
1931		}
1932
1933		tmp = RREG32(MC_SEQ_MISC0);
1934		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939		}
1940
1941		/* load the MC ucode */
1942		for (i = 0; i < ucode_size; i++) {
1943			if (rdev->new_fw)
1944				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945			else
1946				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947		}
1948
1949		/* put the engine back into the active state */
1950		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954		/* wait for training to complete */
1955		for (i = 0; i < rdev->usec_timeout; i++) {
1956			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957				break;
1958			udelay(1);
1959		}
1960		for (i = 0; i < rdev->usec_timeout; i++) {
1961			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962				break;
1963			udelay(1);
1964		}
1965	}
1966
1967	return 0;
1968}
1969
1970/**
1971 * cik_init_microcode - load ucode images from disk
1972 *
1973 * @rdev: radeon_device pointer
1974 *
1975 * Use the firmware interface to load the ucode images into
1976 * the driver (not loaded into hw).
1977 * Returns 0 on success, error on failure.
1978 */
1979static int cik_init_microcode(struct radeon_device *rdev)
1980{
1981	const char *chip_name;
1982	const char *new_chip_name;
1983	size_t pfp_req_size, me_req_size, ce_req_size,
1984		mec_req_size, rlc_req_size, mc_req_size = 0,
1985		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986	char fw_name[30];
1987	int new_fw = 0;
1988	int err;
1989	int num_fw;
1990	bool new_smc = false;
1991
1992	DRM_DEBUG("\n");
1993
1994	switch (rdev->family) {
1995	case CHIP_BONAIRE:
1996		chip_name = "BONAIRE";
1997		if ((rdev->pdev->revision == 0x80) ||
1998		    (rdev->pdev->revision == 0x81) ||
1999		    (rdev->pdev->device == 0x665f))
2000			new_smc = true;
2001		new_chip_name = "bonaire";
2002		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003		me_req_size = CIK_ME_UCODE_SIZE * 4;
2004		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011		num_fw = 8;
2012		break;
2013	case CHIP_HAWAII:
2014		chip_name = "HAWAII";
2015		if (rdev->pdev->revision == 0x80)
2016			new_smc = true;
2017		new_chip_name = "hawaii";
2018		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027		num_fw = 8;
2028		break;
2029	case CHIP_KAVERI:
2030		chip_name = "KAVERI";
2031		new_chip_name = "kaveri";
2032		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033		me_req_size = CIK_ME_UCODE_SIZE * 4;
2034		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038		num_fw = 7;
2039		break;
2040	case CHIP_KABINI:
2041		chip_name = "KABINI";
2042		new_chip_name = "kabini";
2043		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044		me_req_size = CIK_ME_UCODE_SIZE * 4;
2045		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049		num_fw = 6;
2050		break;
2051	case CHIP_MULLINS:
2052		chip_name = "MULLINS";
2053		new_chip_name = "mullins";
2054		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055		me_req_size = CIK_ME_UCODE_SIZE * 4;
2056		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060		num_fw = 6;
2061		break;
2062	default: BUG();
2063	}
2064
2065	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069	if (err) {
2070		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072		if (err)
2073			goto out;
2074		if (rdev->pfp_fw->size != pfp_req_size) {
2075			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076			       rdev->pfp_fw->size, fw_name);
2077			err = -EINVAL;
2078			goto out;
2079		}
2080	} else {
2081		err = radeon_ucode_validate(rdev->pfp_fw);
2082		if (err) {
2083			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084			       fw_name);
2085			goto out;
2086		} else {
2087			new_fw++;
2088		}
2089	}
2090
2091	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093	if (err) {
2094		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096		if (err)
2097			goto out;
2098		if (rdev->me_fw->size != me_req_size) {
2099			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100			       rdev->me_fw->size, fw_name);
2101			err = -EINVAL;
2102		}
2103	} else {
2104		err = radeon_ucode_validate(rdev->me_fw);
2105		if (err) {
2106			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107			       fw_name);
2108			goto out;
2109		} else {
2110			new_fw++;
2111		}
2112	}
2113
2114	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116	if (err) {
2117		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119		if (err)
2120			goto out;
2121		if (rdev->ce_fw->size != ce_req_size) {
2122			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123			       rdev->ce_fw->size, fw_name);
2124			err = -EINVAL;
2125		}
2126	} else {
2127		err = radeon_ucode_validate(rdev->ce_fw);
2128		if (err) {
2129			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130			       fw_name);
2131			goto out;
2132		} else {
2133			new_fw++;
2134		}
2135	}
2136
2137	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139	if (err) {
2140		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142		if (err)
2143			goto out;
2144		if (rdev->mec_fw->size != mec_req_size) {
2145			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146			       rdev->mec_fw->size, fw_name);
2147			err = -EINVAL;
2148		}
2149	} else {
2150		err = radeon_ucode_validate(rdev->mec_fw);
2151		if (err) {
2152			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153			       fw_name);
2154			goto out;
2155		} else {
2156			new_fw++;
2157		}
2158	}
2159
2160	if (rdev->family == CHIP_KAVERI) {
2161		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163		if (err) {
2164			goto out;
2165		} else {
2166			err = radeon_ucode_validate(rdev->mec2_fw);
2167			if (err) {
2168				goto out;
2169			} else {
2170				new_fw++;
2171			}
2172		}
2173	}
2174
2175	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177	if (err) {
2178		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180		if (err)
2181			goto out;
2182		if (rdev->rlc_fw->size != rlc_req_size) {
2183			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184			       rdev->rlc_fw->size, fw_name);
2185			err = -EINVAL;
2186		}
2187	} else {
2188		err = radeon_ucode_validate(rdev->rlc_fw);
2189		if (err) {
2190			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191			       fw_name);
2192			goto out;
2193		} else {
2194			new_fw++;
2195		}
2196	}
2197
2198	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200	if (err) {
2201		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203		if (err)
2204			goto out;
2205		if (rdev->sdma_fw->size != sdma_req_size) {
2206			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207			       rdev->sdma_fw->size, fw_name);
2208			err = -EINVAL;
2209		}
2210	} else {
2211		err = radeon_ucode_validate(rdev->sdma_fw);
2212		if (err) {
2213			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214			       fw_name);
2215			goto out;
2216		} else {
2217			new_fw++;
2218		}
2219	}
2220
2221	/* No SMC, MC ucode on APUs */
2222	if (!(rdev->flags & RADEON_IS_IGP)) {
2223		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225		if (err) {
2226			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228			if (err) {
2229				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231				if (err)
2232					goto out;
2233			}
2234			if ((rdev->mc_fw->size != mc_req_size) &&
2235			    (rdev->mc_fw->size != mc2_req_size)){
2236				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237				       rdev->mc_fw->size, fw_name);
2238				err = -EINVAL;
2239			}
2240			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241		} else {
2242			err = radeon_ucode_validate(rdev->mc_fw);
2243			if (err) {
2244				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245				       fw_name);
2246				goto out;
2247			} else {
2248				new_fw++;
2249			}
2250		}
2251
2252		if (new_smc)
2253			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254		else
2255			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257		if (err) {
2258			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260			if (err) {
2261				pr_err("smc: error loading firmware \"%s\"\n",
2262				       fw_name);
2263				release_firmware(rdev->smc_fw);
2264				rdev->smc_fw = NULL;
2265				err = 0;
2266			} else if (rdev->smc_fw->size != smc_req_size) {
2267				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268				       rdev->smc_fw->size, fw_name);
2269				err = -EINVAL;
2270			}
2271		} else {
2272			err = radeon_ucode_validate(rdev->smc_fw);
2273			if (err) {
2274				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275				       fw_name);
2276				goto out;
2277			} else {
2278				new_fw++;
2279			}
2280		}
2281	}
2282
2283	if (new_fw == 0) {
2284		rdev->new_fw = false;
2285	} else if (new_fw < num_fw) {
2286		pr_err("ci_fw: mixing new and old firmware!\n");
2287		err = -EINVAL;
2288	} else {
2289		rdev->new_fw = true;
2290	}
2291
2292out:
2293	if (err) {
2294		if (err != -EINVAL)
2295			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296			       fw_name);
2297		release_firmware(rdev->pfp_fw);
2298		rdev->pfp_fw = NULL;
2299		release_firmware(rdev->me_fw);
2300		rdev->me_fw = NULL;
2301		release_firmware(rdev->ce_fw);
2302		rdev->ce_fw = NULL;
2303		release_firmware(rdev->mec_fw);
2304		rdev->mec_fw = NULL;
2305		release_firmware(rdev->mec2_fw);
2306		rdev->mec2_fw = NULL;
2307		release_firmware(rdev->rlc_fw);
2308		rdev->rlc_fw = NULL;
2309		release_firmware(rdev->sdma_fw);
2310		rdev->sdma_fw = NULL;
2311		release_firmware(rdev->mc_fw);
2312		rdev->mc_fw = NULL;
2313		release_firmware(rdev->smc_fw);
2314		rdev->smc_fw = NULL;
2315	}
2316	return err;
2317}
2318
2319/*
2320 * Core functions
2321 */
2322/**
2323 * cik_tiling_mode_table_init - init the hw tiling table
2324 *
2325 * @rdev: radeon_device pointer
2326 *
2327 * Starting with SI, the tiling setup is done globally in a
2328 * set of 32 tiling modes.  Rather than selecting each set of
2329 * parameters per surface as on older asics, we just select
2330 * which index in the tiling table we want to use, and the
2331 * surface uses those parameters (CIK).
2332 */
2333static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334{
2335	u32 *tile = rdev->config.cik.tile_mode_array;
2336	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337	const u32 num_tile_mode_states =
2338			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339	const u32 num_secondary_tile_mode_states =
2340			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341	u32 reg_offset, split_equal_to_row_size;
2342	u32 num_pipe_configs;
2343	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344		rdev->config.cik.max_shader_engines;
2345
2346	switch (rdev->config.cik.mem_row_size_in_kb) {
2347	case 1:
2348		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349		break;
2350	case 2:
2351	default:
2352		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353		break;
2354	case 4:
2355		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356		break;
2357	}
2358
2359	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360	if (num_pipe_configs > 8)
2361		num_pipe_configs = 16;
2362
2363	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364		tile[reg_offset] = 0;
2365	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366		macrotile[reg_offset] = 0;
2367
2368	switch(num_pipe_configs) {
2369	case 16:
2370		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389			   TILE_SPLIT(split_equal_to_row_size));
2390		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400			   TILE_SPLIT(split_equal_to_row_size));
2401		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452			   NUM_BANKS(ADDR_SURF_16_BANK));
2453		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456			   NUM_BANKS(ADDR_SURF_16_BANK));
2457		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460			   NUM_BANKS(ADDR_SURF_16_BANK));
2461		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464			   NUM_BANKS(ADDR_SURF_16_BANK));
2465		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468			   NUM_BANKS(ADDR_SURF_8_BANK));
2469		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472			   NUM_BANKS(ADDR_SURF_4_BANK));
2473		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476			   NUM_BANKS(ADDR_SURF_2_BANK));
2477		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480			   NUM_BANKS(ADDR_SURF_16_BANK));
2481		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484			   NUM_BANKS(ADDR_SURF_16_BANK));
2485		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488			    NUM_BANKS(ADDR_SURF_16_BANK));
2489		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492			    NUM_BANKS(ADDR_SURF_8_BANK));
2493		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496			    NUM_BANKS(ADDR_SURF_4_BANK));
2497		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500			    NUM_BANKS(ADDR_SURF_2_BANK));
2501		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504			    NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510		break;
2511
2512	case 8:
2513		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532			   TILE_SPLIT(split_equal_to_row_size));
2533		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543			   TILE_SPLIT(split_equal_to_row_size));
2544		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607				NUM_BANKS(ADDR_SURF_16_BANK));
2608		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611				NUM_BANKS(ADDR_SURF_8_BANK));
2612		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615				NUM_BANKS(ADDR_SURF_4_BANK));
2616		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619				NUM_BANKS(ADDR_SURF_2_BANK));
2620		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623				NUM_BANKS(ADDR_SURF_16_BANK));
2624		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627				NUM_BANKS(ADDR_SURF_16_BANK));
2628		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631				NUM_BANKS(ADDR_SURF_16_BANK));
2632		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635				NUM_BANKS(ADDR_SURF_16_BANK));
2636		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639				NUM_BANKS(ADDR_SURF_8_BANK));
2640		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643				NUM_BANKS(ADDR_SURF_4_BANK));
2644		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647				NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653		break;
2654
2655	case 4:
2656		if (num_rbs == 4) {
2657		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676			   TILE_SPLIT(split_equal_to_row_size));
2677		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687			   TILE_SPLIT(split_equal_to_row_size));
2688		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736		} else if (num_rbs < 4) {
2737		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756			   TILE_SPLIT(split_equal_to_row_size));
2757		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			   TILE_SPLIT(split_equal_to_row_size));
2768		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815		}
2816
2817		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828				NUM_BANKS(ADDR_SURF_16_BANK));
2829		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832				NUM_BANKS(ADDR_SURF_16_BANK));
2833		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836				NUM_BANKS(ADDR_SURF_16_BANK));
2837		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840				NUM_BANKS(ADDR_SURF_8_BANK));
2841		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844				NUM_BANKS(ADDR_SURF_4_BANK));
2845		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848				NUM_BANKS(ADDR_SURF_16_BANK));
2849		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852				NUM_BANKS(ADDR_SURF_16_BANK));
2853		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860				NUM_BANKS(ADDR_SURF_16_BANK));
2861		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864				NUM_BANKS(ADDR_SURF_16_BANK));
2865		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868				NUM_BANKS(ADDR_SURF_8_BANK));
2869		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872				NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878		break;
2879
2880	case 2:
2881		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883			   PIPE_CONFIG(ADDR_SURF_P2) |
2884			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887			   PIPE_CONFIG(ADDR_SURF_P2) |
2888			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891			   PIPE_CONFIG(ADDR_SURF_P2) |
2892			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895			   PIPE_CONFIG(ADDR_SURF_P2) |
2896			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899			   PIPE_CONFIG(ADDR_SURF_P2) |
2900			   TILE_SPLIT(split_equal_to_row_size));
2901		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910			   PIPE_CONFIG(ADDR_SURF_P2) |
2911			   TILE_SPLIT(split_equal_to_row_size));
2912		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913			   PIPE_CONFIG(ADDR_SURF_P2);
2914		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916			   PIPE_CONFIG(ADDR_SURF_P2));
2917		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919			    PIPE_CONFIG(ADDR_SURF_P2) |
2920			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923			    PIPE_CONFIG(ADDR_SURF_P2) |
2924			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927			    PIPE_CONFIG(ADDR_SURF_P2) |
2928			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930			    PIPE_CONFIG(ADDR_SURF_P2) |
2931			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942			    PIPE_CONFIG(ADDR_SURF_P2) |
2943			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946			    PIPE_CONFIG(ADDR_SURF_P2));
2947		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957			    PIPE_CONFIG(ADDR_SURF_P2) |
2958			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963				NUM_BANKS(ADDR_SURF_16_BANK));
2964		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967				NUM_BANKS(ADDR_SURF_16_BANK));
2968		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971				NUM_BANKS(ADDR_SURF_16_BANK));
2972		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975				NUM_BANKS(ADDR_SURF_16_BANK));
2976		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979				NUM_BANKS(ADDR_SURF_16_BANK));
2980		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983				NUM_BANKS(ADDR_SURF_16_BANK));
2984		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987				NUM_BANKS(ADDR_SURF_8_BANK));
2988		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991				NUM_BANKS(ADDR_SURF_16_BANK));
2992		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995				NUM_BANKS(ADDR_SURF_16_BANK));
2996		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999				NUM_BANKS(ADDR_SURF_16_BANK));
3000		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003				NUM_BANKS(ADDR_SURF_16_BANK));
3004		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011				NUM_BANKS(ADDR_SURF_16_BANK));
3012		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015				NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021		break;
3022
3023	default:
3024		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025	}
3026}
3027
3028/**
3029 * cik_select_se_sh - select which SE, SH to address
3030 *
3031 * @rdev: radeon_device pointer
3032 * @se_num: shader engine to address
3033 * @sh_num: sh block to address
3034 *
3035 * Select which SE, SH combinations to address. Certain
3036 * registers are instanced per SE or SH.  0xffffffff means
3037 * broadcast to all SEs or SHs (CIK).
3038 */
3039static void cik_select_se_sh(struct radeon_device *rdev,
3040			     u32 se_num, u32 sh_num)
3041{
3042	u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046	else if (se_num == 0xffffffff)
3047		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048	else if (sh_num == 0xffffffff)
3049		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050	else
3051		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052	WREG32(GRBM_GFX_INDEX, data);
3053}
3054
3055/**
3056 * cik_create_bitmask - create a bitmask
3057 *
3058 * @bit_width: length of the mask
3059 *
3060 * create a variable length bit mask (CIK).
3061 * Returns the bitmask.
3062 */
3063static u32 cik_create_bitmask(u32 bit_width)
3064{
3065	u32 i, mask = 0;
3066
3067	for (i = 0; i < bit_width; i++) {
3068		mask <<= 1;
3069		mask |= 1;
3070	}
3071	return mask;
3072}
3073
3074/**
3075 * cik_get_rb_disabled - computes the mask of disabled RBs
3076 *
3077 * @rdev: radeon_device pointer
3078 * @max_rb_num: max RBs (render backends) for the asic
3079 * @se_num: number of SEs (shader engines) for the asic
3080 * @sh_per_se: number of SH blocks per SE for the asic
3081 *
3082 * Calculates the bitmask of disabled RBs (CIK).
3083 * Returns the disabled RB bitmask.
3084 */
3085static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086			      u32 max_rb_num_per_se,
3087			      u32 sh_per_se)
3088{
3089	u32 data, mask;
3090
3091	data = RREG32(CC_RB_BACKEND_DISABLE);
3092	if (data & 1)
3093		data &= BACKEND_DISABLE_MASK;
3094	else
3095		data = 0;
3096	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098	data >>= BACKEND_DISABLE_SHIFT;
3099
3100	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102	return data & mask;
3103}
3104
3105/**
3106 * cik_setup_rb - setup the RBs on the asic
3107 *
3108 * @rdev: radeon_device pointer
3109 * @se_num: number of SEs (shader engines) for the asic
3110 * @sh_per_se: number of SH blocks per SE for the asic
3111 * @max_rb_num: max RBs (render backends) for the asic
3112 *
3113 * Configures per-SE/SH RB registers (CIK).
3114 */
3115static void cik_setup_rb(struct radeon_device *rdev,
3116			 u32 se_num, u32 sh_per_se,
3117			 u32 max_rb_num_per_se)
3118{
3119	int i, j;
3120	u32 data, mask;
3121	u32 disabled_rbs = 0;
3122	u32 enabled_rbs = 0;
3123
3124	for (i = 0; i < se_num; i++) {
3125		for (j = 0; j < sh_per_se; j++) {
3126			cik_select_se_sh(rdev, i, j);
3127			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128			if (rdev->family == CHIP_HAWAII)
3129				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130			else
3131				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132		}
3133	}
3134	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135
3136	mask = 1;
3137	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138		if (!(disabled_rbs & mask))
3139			enabled_rbs |= mask;
3140		mask <<= 1;
3141	}
3142
3143	rdev->config.cik.backend_enable_mask = enabled_rbs;
3144
3145	for (i = 0; i < se_num; i++) {
3146		cik_select_se_sh(rdev, i, 0xffffffff);
3147		data = 0;
3148		for (j = 0; j < sh_per_se; j++) {
3149			switch (enabled_rbs & 3) {
3150			case 0:
3151				if (j == 0)
3152					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153				else
3154					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155				break;
3156			case 1:
3157				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158				break;
3159			case 2:
3160				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161				break;
3162			case 3:
3163			default:
3164				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165				break;
3166			}
3167			enabled_rbs >>= 2;
3168		}
3169		WREG32(PA_SC_RASTER_CONFIG, data);
3170	}
3171	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172}
3173
3174/**
3175 * cik_gpu_init - setup the 3D engine
3176 *
3177 * @rdev: radeon_device pointer
3178 *
3179 * Configures the 3D engine and tiling configuration
3180 * registers so that the 3D engine is usable.
3181 */
3182static void cik_gpu_init(struct radeon_device *rdev)
3183{
3184	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185	u32 mc_shared_chmap, mc_arb_ramcfg;
3186	u32 hdp_host_path_cntl;
3187	u32 tmp;
3188	int i, j;
3189
3190	switch (rdev->family) {
3191	case CHIP_BONAIRE:
3192		rdev->config.cik.max_shader_engines = 2;
3193		rdev->config.cik.max_tile_pipes = 4;
3194		rdev->config.cik.max_cu_per_sh = 7;
3195		rdev->config.cik.max_sh_per_se = 1;
3196		rdev->config.cik.max_backends_per_se = 2;
3197		rdev->config.cik.max_texture_channel_caches = 4;
3198		rdev->config.cik.max_gprs = 256;
3199		rdev->config.cik.max_gs_threads = 32;
3200		rdev->config.cik.max_hw_contexts = 8;
3201
3202		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207		break;
3208	case CHIP_HAWAII:
3209		rdev->config.cik.max_shader_engines = 4;
3210		rdev->config.cik.max_tile_pipes = 16;
3211		rdev->config.cik.max_cu_per_sh = 11;
3212		rdev->config.cik.max_sh_per_se = 1;
3213		rdev->config.cik.max_backends_per_se = 4;
3214		rdev->config.cik.max_texture_channel_caches = 16;
3215		rdev->config.cik.max_gprs = 256;
3216		rdev->config.cik.max_gs_threads = 32;
3217		rdev->config.cik.max_hw_contexts = 8;
3218
3219		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224		break;
3225	case CHIP_KAVERI:
3226		rdev->config.cik.max_shader_engines = 1;
3227		rdev->config.cik.max_tile_pipes = 4;
3228		rdev->config.cik.max_cu_per_sh = 8;
3229		rdev->config.cik.max_backends_per_se = 2;
3230		rdev->config.cik.max_sh_per_se = 1;
3231		rdev->config.cik.max_texture_channel_caches = 4;
3232		rdev->config.cik.max_gprs = 256;
3233		rdev->config.cik.max_gs_threads = 16;
3234		rdev->config.cik.max_hw_contexts = 8;
3235
3236		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241		break;
3242	case CHIP_KABINI:
3243	case CHIP_MULLINS:
3244	default:
3245		rdev->config.cik.max_shader_engines = 1;
3246		rdev->config.cik.max_tile_pipes = 2;
3247		rdev->config.cik.max_cu_per_sh = 2;
3248		rdev->config.cik.max_sh_per_se = 1;
3249		rdev->config.cik.max_backends_per_se = 1;
3250		rdev->config.cik.max_texture_channel_caches = 2;
3251		rdev->config.cik.max_gprs = 256;
3252		rdev->config.cik.max_gs_threads = 16;
3253		rdev->config.cik.max_hw_contexts = 8;
3254
3255		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260		break;
3261	}
3262
3263	/* Initialize HDP */
3264	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265		WREG32((0x2c14 + j), 0x00000000);
3266		WREG32((0x2c18 + j), 0x00000000);
3267		WREG32((0x2c1c + j), 0x00000000);
3268		WREG32((0x2c20 + j), 0x00000000);
3269		WREG32((0x2c24 + j), 0x00000000);
3270	}
3271
3272	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273	WREG32(SRBM_INT_CNTL, 0x1);
3274	WREG32(SRBM_INT_ACK, 0x1);
3275
3276	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277
3278	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280
3281	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282	rdev->config.cik.mem_max_burst_length_bytes = 256;
3283	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285	if (rdev->config.cik.mem_row_size_in_kb > 4)
3286		rdev->config.cik.mem_row_size_in_kb = 4;
3287	/* XXX use MC settings? */
3288	rdev->config.cik.shader_engine_tile_size = 32;
3289	rdev->config.cik.num_gpus = 1;
3290	rdev->config.cik.multi_gpu_tile_size = 64;
3291
3292	/* fix up row size */
3293	gb_addr_config &= ~ROW_SIZE_MASK;
3294	switch (rdev->config.cik.mem_row_size_in_kb) {
3295	case 1:
3296	default:
3297		gb_addr_config |= ROW_SIZE(0);
3298		break;
3299	case 2:
3300		gb_addr_config |= ROW_SIZE(1);
3301		break;
3302	case 4:
3303		gb_addr_config |= ROW_SIZE(2);
3304		break;
3305	}
3306
3307	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3308	 * not have bank info, so create a custom tiling dword.
3309	 * bits 3:0   num_pipes
3310	 * bits 7:4   num_banks
3311	 * bits 11:8  group_size
3312	 * bits 15:12 row_size
3313	 */
3314	rdev->config.cik.tile_config = 0;
3315	switch (rdev->config.cik.num_tile_pipes) {
3316	case 1:
3317		rdev->config.cik.tile_config |= (0 << 0);
3318		break;
3319	case 2:
3320		rdev->config.cik.tile_config |= (1 << 0);
3321		break;
3322	case 4:
3323		rdev->config.cik.tile_config |= (2 << 0);
3324		break;
3325	case 8:
3326	default:
3327		/* XXX what about 12? */
3328		rdev->config.cik.tile_config |= (3 << 0);
3329		break;
3330	}
3331	rdev->config.cik.tile_config |=
3332		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333	rdev->config.cik.tile_config |=
3334		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335	rdev->config.cik.tile_config |=
3336		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337
3338	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346
3347	cik_tiling_mode_table_init(rdev);
3348
3349	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350		     rdev->config.cik.max_sh_per_se,
3351		     rdev->config.cik.max_backends_per_se);
3352
3353	rdev->config.cik.active_cus = 0;
3354	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356			rdev->config.cik.active_cus +=
3357				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358		}
3359	}
3360
3361	/* set HW defaults for 3D engine */
3362	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363
3364	WREG32(SX_DEBUG_1, 0x20);
3365
3366	WREG32(TA_CNTL_AUX, 0x00010000);
3367
3368	tmp = RREG32(SPI_CONFIG_CNTL);
3369	tmp |= 0x03000000;
3370	WREG32(SPI_CONFIG_CNTL, tmp);
3371
3372	WREG32(SQ_CONFIG, 1);
3373
3374	WREG32(DB_DEBUG, 0);
3375
3376	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377	tmp |= 0x00000400;
3378	WREG32(DB_DEBUG2, tmp);
3379
3380	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381	tmp |= 0x00020200;
3382	WREG32(DB_DEBUG3, tmp);
3383
3384	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385	tmp |= 0x00018208;
3386	WREG32(CB_HW_CONTROL, tmp);
3387
3388	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389
3390	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394
3395	WREG32(VGT_NUM_INSTANCES, 1);
3396
3397	WREG32(CP_PERFMON_CNTL, 0);
3398
3399	WREG32(SQ_CONFIG, 0);
3400
3401	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402					  FORCE_EOV_MAX_REZ_CNT(255)));
3403
3404	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406
3407	WREG32(VGT_GS_VERTEX_REUSE, 16);
3408	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409
3410	tmp = RREG32(HDP_MISC_CNTL);
3411	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412	WREG32(HDP_MISC_CNTL, tmp);
3413
3414	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416
3417	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419
3420	udelay(50);
3421}
3422
3423/*
3424 * GPU scratch registers helpers function.
3425 */
3426/**
3427 * cik_scratch_init - setup driver info for CP scratch regs
3428 *
3429 * @rdev: radeon_device pointer
3430 *
3431 * Set up the number and offset of the CP scratch registers.
3432 * NOTE: use of CP scratch registers is a legacy inferface and
3433 * is not used by default on newer asics (r6xx+).  On newer asics,
3434 * memory buffers are used for fences rather than scratch regs.
3435 */
3436static void cik_scratch_init(struct radeon_device *rdev)
3437{
3438	int i;
3439
3440	rdev->scratch.num_reg = 7;
3441	rdev->scratch.reg_base = SCRATCH_REG0;
3442	for (i = 0; i < rdev->scratch.num_reg; i++) {
3443		rdev->scratch.free[i] = true;
3444		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445	}
3446}
3447
3448/**
3449 * cik_ring_test - basic gfx ring test
3450 *
3451 * @rdev: radeon_device pointer
3452 * @ring: radeon_ring structure holding ring information
3453 *
3454 * Allocate a scratch register and write to it using the gfx ring (CIK).
3455 * Provides a basic gfx ring test to verify that the ring is working.
3456 * Used by cik_cp_gfx_resume();
3457 * Returns 0 on success, error on failure.
3458 */
3459int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460{
3461	uint32_t scratch;
3462	uint32_t tmp = 0;
3463	unsigned i;
3464	int r;
3465
3466	r = radeon_scratch_get(rdev, &scratch);
3467	if (r) {
3468		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469		return r;
3470	}
3471	WREG32(scratch, 0xCAFEDEAD);
3472	r = radeon_ring_lock(rdev, ring, 3);
3473	if (r) {
3474		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475		radeon_scratch_free(rdev, scratch);
3476		return r;
3477	}
3478	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480	radeon_ring_write(ring, 0xDEADBEEF);
3481	radeon_ring_unlock_commit(rdev, ring, false);
3482
3483	for (i = 0; i < rdev->usec_timeout; i++) {
3484		tmp = RREG32(scratch);
3485		if (tmp == 0xDEADBEEF)
3486			break;
3487		udelay(1);
3488	}
3489	if (i < rdev->usec_timeout) {
3490		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491	} else {
3492		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493			  ring->idx, scratch, tmp);
3494		r = -EINVAL;
3495	}
3496	radeon_scratch_free(rdev, scratch);
3497	return r;
3498}
3499
3500/**
3501 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502 *
3503 * @rdev: radeon_device pointer
3504 * @ridx: radeon ring index
3505 *
3506 * Emits an hdp flush on the cp.
3507 */
3508static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509				       int ridx)
3510{
3511	struct radeon_ring *ring = &rdev->ring[ridx];
3512	u32 ref_and_mask;
3513
3514	switch (ring->idx) {
3515	case CAYMAN_RING_TYPE_CP1_INDEX:
3516	case CAYMAN_RING_TYPE_CP2_INDEX:
3517	default:
3518		switch (ring->me) {
3519		case 0:
3520			ref_and_mask = CP2 << ring->pipe;
3521			break;
3522		case 1:
3523			ref_and_mask = CP6 << ring->pipe;
3524			break;
3525		default:
3526			return;
3527		}
3528		break;
3529	case RADEON_RING_TYPE_GFX_INDEX:
3530		ref_and_mask = CP0;
3531		break;
3532	}
3533
3534	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3537				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3538	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540	radeon_ring_write(ring, ref_and_mask);
3541	radeon_ring_write(ring, ref_and_mask);
3542	radeon_ring_write(ring, 0x20); /* poll interval */
3543}
3544
3545/**
3546 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547 *
3548 * @rdev: radeon_device pointer
3549 * @fence: radeon fence object
3550 *
3551 * Emits a fence sequnce number on the gfx ring and flushes
3552 * GPU caches.
3553 */
3554void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555			     struct radeon_fence *fence)
3556{
3557	struct radeon_ring *ring = &rdev->ring[fence->ring];
3558	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559
3560	/* Workaround for cache flush problems. First send a dummy EOP
3561	 * event down the pipe with seq one below.
3562	 */
3563	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565				 EOP_TC_ACTION_EN |
3566				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567				 EVENT_INDEX(5)));
3568	radeon_ring_write(ring, addr & 0xfffffffc);
3569	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570				DATA_SEL(1) | INT_SEL(0));
3571	radeon_ring_write(ring, fence->seq - 1);
3572	radeon_ring_write(ring, 0);
3573
3574	/* Then send the real EOP event down the pipe. */
3575	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577				 EOP_TC_ACTION_EN |
3578				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579				 EVENT_INDEX(5)));
3580	radeon_ring_write(ring, addr & 0xfffffffc);
3581	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582	radeon_ring_write(ring, fence->seq);
3583	radeon_ring_write(ring, 0);
3584}
3585
3586/**
3587 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588 *
3589 * @rdev: radeon_device pointer
3590 * @fence: radeon fence object
3591 *
3592 * Emits a fence sequnce number on the compute ring and flushes
3593 * GPU caches.
3594 */
3595void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596				 struct radeon_fence *fence)
3597{
3598	struct radeon_ring *ring = &rdev->ring[fence->ring];
3599	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600
3601	/* RELEASE_MEM - flush caches, send int */
3602	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604				 EOP_TC_ACTION_EN |
3605				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606				 EVENT_INDEX(5)));
3607	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608	radeon_ring_write(ring, addr & 0xfffffffc);
3609	radeon_ring_write(ring, upper_32_bits(addr));
3610	radeon_ring_write(ring, fence->seq);
3611	radeon_ring_write(ring, 0);
3612}
3613
3614/**
3615 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ring: radeon ring buffer object
3619 * @semaphore: radeon semaphore object
3620 * @emit_wait: Is this a sempahore wait?
3621 *
3622 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623 * from running ahead of semaphore waits.
3624 */
3625bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626			     struct radeon_ring *ring,
3627			     struct radeon_semaphore *semaphore,
3628			     bool emit_wait)
3629{
3630	uint64_t addr = semaphore->gpu_addr;
3631	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632
3633	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634	radeon_ring_write(ring, lower_32_bits(addr));
3635	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636
3637	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638		/* Prevent the PFP from running ahead of the semaphore wait */
3639		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640		radeon_ring_write(ring, 0x0);
3641	}
3642
3643	return true;
3644}
3645
3646/**
3647 * cik_copy_cpdma - copy pages using the CP DMA engine
3648 *
3649 * @rdev: radeon_device pointer
3650 * @src_offset: src GPU address
3651 * @dst_offset: dst GPU address
3652 * @num_gpu_pages: number of GPU pages to xfer
3653 * @resv: reservation object to sync to
3654 *
3655 * Copy GPU paging using the CP DMA engine (CIK+).
3656 * Used by the radeon ttm implementation to move pages if
3657 * registered as the asic copy callback.
3658 */
3659struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660				    uint64_t src_offset, uint64_t dst_offset,
3661				    unsigned num_gpu_pages,
3662				    struct dma_resv *resv)
3663{
3664	struct radeon_fence *fence;
3665	struct radeon_sync sync;
3666	int ring_index = rdev->asic->copy.blit_ring_index;
3667	struct radeon_ring *ring = &rdev->ring[ring_index];
3668	u32 size_in_bytes, cur_size_in_bytes, control;
3669	int i, num_loops;
3670	int r = 0;
3671
3672	radeon_sync_create(&sync);
3673
3674	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677	if (r) {
3678		DRM_ERROR("radeon: moving bo (%d).\n", r);
3679		radeon_sync_free(rdev, &sync, NULL);
3680		return ERR_PTR(r);
3681	}
3682
3683	radeon_sync_resv(rdev, &sync, resv, false);
3684	radeon_sync_rings(rdev, &sync, ring->idx);
3685
3686	for (i = 0; i < num_loops; i++) {
3687		cur_size_in_bytes = size_in_bytes;
3688		if (cur_size_in_bytes > 0x1fffff)
3689			cur_size_in_bytes = 0x1fffff;
3690		size_in_bytes -= cur_size_in_bytes;
3691		control = 0;
3692		if (size_in_bytes == 0)
3693			control |= PACKET3_DMA_DATA_CP_SYNC;
3694		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695		radeon_ring_write(ring, control);
3696		radeon_ring_write(ring, lower_32_bits(src_offset));
3697		radeon_ring_write(ring, upper_32_bits(src_offset));
3698		radeon_ring_write(ring, lower_32_bits(dst_offset));
3699		radeon_ring_write(ring, upper_32_bits(dst_offset));
3700		radeon_ring_write(ring, cur_size_in_bytes);
3701		src_offset += cur_size_in_bytes;
3702		dst_offset += cur_size_in_bytes;
3703	}
3704
3705	r = radeon_fence_emit(rdev, &fence, ring->idx);
3706	if (r) {
3707		radeon_ring_unlock_undo(rdev, ring);
3708		radeon_sync_free(rdev, &sync, NULL);
3709		return ERR_PTR(r);
3710	}
3711
3712	radeon_ring_unlock_commit(rdev, ring, false);
3713	radeon_sync_free(rdev, &sync, fence);
3714
3715	return fence;
3716}
3717
3718/*
3719 * IB stuff
3720 */
3721/**
3722 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723 *
3724 * @rdev: radeon_device pointer
3725 * @ib: radeon indirect buffer object
3726 *
3727 * Emits a DE (drawing engine) or CE (constant engine) IB
3728 * on the gfx ring.  IBs are usually generated by userspace
3729 * acceleration drivers and submitted to the kernel for
3730 * scheduling on the ring.  This function schedules the IB
3731 * on the gfx ring for execution by the GPU.
3732 */
3733void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734{
3735	struct radeon_ring *ring = &rdev->ring[ib->ring];
3736	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737	u32 header, control = INDIRECT_BUFFER_VALID;
3738
3739	if (ib->is_const_ib) {
3740		/* set switch buffer packet before const IB */
3741		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742		radeon_ring_write(ring, 0);
3743
3744		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745	} else {
3746		u32 next_rptr;
3747		if (ring->rptr_save_reg) {
3748			next_rptr = ring->wptr + 3 + 4;
3749			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750			radeon_ring_write(ring, ((ring->rptr_save_reg -
3751						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3752			radeon_ring_write(ring, next_rptr);
3753		} else if (rdev->wb.enabled) {
3754			next_rptr = ring->wptr + 5 + 4;
3755			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759			radeon_ring_write(ring, next_rptr);
3760		}
3761
3762		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763	}
3764
3765	control |= ib->length_dw | (vm_id << 24);
3766
3767	radeon_ring_write(ring, header);
3768	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770	radeon_ring_write(ring, control);
3771}
3772
3773/**
3774 * cik_ib_test - basic gfx ring IB test
3775 *
3776 * @rdev: radeon_device pointer
3777 * @ring: radeon_ring structure holding ring information
3778 *
3779 * Allocate an IB and execute it on the gfx ring (CIK).
3780 * Provides a basic gfx ring test to verify that IBs are working.
3781 * Returns 0 on success, error on failure.
3782 */
3783int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784{
3785	struct radeon_ib ib;
3786	uint32_t scratch;
3787	uint32_t tmp = 0;
3788	unsigned i;
3789	int r;
3790
3791	r = radeon_scratch_get(rdev, &scratch);
3792	if (r) {
3793		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794		return r;
3795	}
3796	WREG32(scratch, 0xCAFEDEAD);
3797	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798	if (r) {
3799		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800		radeon_scratch_free(rdev, scratch);
3801		return r;
3802	}
3803	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805	ib.ptr[2] = 0xDEADBEEF;
3806	ib.length_dw = 3;
3807	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808	if (r) {
3809		radeon_scratch_free(rdev, scratch);
3810		radeon_ib_free(rdev, &ib);
3811		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812		return r;
3813	}
3814	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815		RADEON_USEC_IB_TEST_TIMEOUT));
3816	if (r < 0) {
3817		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818		radeon_scratch_free(rdev, scratch);
3819		radeon_ib_free(rdev, &ib);
3820		return r;
3821	} else if (r == 0) {
3822		DRM_ERROR("radeon: fence wait timed out.\n");
3823		radeon_scratch_free(rdev, scratch);
3824		radeon_ib_free(rdev, &ib);
3825		return -ETIMEDOUT;
3826	}
3827	r = 0;
3828	for (i = 0; i < rdev->usec_timeout; i++) {
3829		tmp = RREG32(scratch);
3830		if (tmp == 0xDEADBEEF)
3831			break;
3832		udelay(1);
3833	}
3834	if (i < rdev->usec_timeout) {
3835		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836	} else {
3837		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838			  scratch, tmp);
3839		r = -EINVAL;
3840	}
3841	radeon_scratch_free(rdev, scratch);
3842	radeon_ib_free(rdev, &ib);
3843	return r;
3844}
3845
3846/*
3847 * CP.
3848 * On CIK, gfx and compute now have independant command processors.
3849 *
3850 * GFX
3851 * Gfx consists of a single ring and can process both gfx jobs and
3852 * compute jobs.  The gfx CP consists of three microengines (ME):
3853 * PFP - Pre-Fetch Parser
3854 * ME - Micro Engine
3855 * CE - Constant Engine
3856 * The PFP and ME make up what is considered the Drawing Engine (DE).
3857 * The CE is an asynchronous engine used for updating buffer desciptors
3858 * used by the DE so that they can be loaded into cache in parallel
3859 * while the DE is processing state update packets.
3860 *
3861 * Compute
3862 * The compute CP consists of two microengines (ME):
3863 * MEC1 - Compute MicroEngine 1
3864 * MEC2 - Compute MicroEngine 2
3865 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866 * The queues are exposed to userspace and are programmed directly
3867 * by the compute runtime.
3868 */
3869/**
3870 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871 *
3872 * @rdev: radeon_device pointer
3873 * @enable: enable or disable the MEs
3874 *
3875 * Halts or unhalts the gfx MEs.
3876 */
3877static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878{
3879	if (enable)
3880		WREG32(CP_ME_CNTL, 0);
3881	else {
3882		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886	}
3887	udelay(50);
3888}
3889
3890/**
3891 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892 *
3893 * @rdev: radeon_device pointer
3894 *
3895 * Loads the gfx PFP, ME, and CE ucode.
3896 * Returns 0 for success, -EINVAL if the ucode is not available.
3897 */
3898static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899{
3900	int i;
3901
3902	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903		return -EINVAL;
3904
3905	cik_cp_gfx_enable(rdev, false);
3906
3907	if (rdev->new_fw) {
3908		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910		const struct gfx_firmware_header_v1_0 *ce_hdr =
3911			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912		const struct gfx_firmware_header_v1_0 *me_hdr =
3913			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914		const __le32 *fw_data;
3915		u32 fw_size;
3916
3917		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920
3921		/* PFP */
3922		fw_data = (const __le32 *)
3923			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925		WREG32(CP_PFP_UCODE_ADDR, 0);
3926		for (i = 0; i < fw_size; i++)
3927			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929
3930		/* CE */
3931		fw_data = (const __le32 *)
3932			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934		WREG32(CP_CE_UCODE_ADDR, 0);
3935		for (i = 0; i < fw_size; i++)
3936			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938
3939		/* ME */
3940		fw_data = (const __be32 *)
3941			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943		WREG32(CP_ME_RAM_WADDR, 0);
3944		for (i = 0; i < fw_size; i++)
3945			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948	} else {
3949		const __be32 *fw_data;
3950
3951		/* PFP */
3952		fw_data = (const __be32 *)rdev->pfp_fw->data;
3953		WREG32(CP_PFP_UCODE_ADDR, 0);
3954		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956		WREG32(CP_PFP_UCODE_ADDR, 0);
3957
3958		/* CE */
3959		fw_data = (const __be32 *)rdev->ce_fw->data;
3960		WREG32(CP_CE_UCODE_ADDR, 0);
3961		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963		WREG32(CP_CE_UCODE_ADDR, 0);
3964
3965		/* ME */
3966		fw_data = (const __be32 *)rdev->me_fw->data;
3967		WREG32(CP_ME_RAM_WADDR, 0);
3968		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970		WREG32(CP_ME_RAM_WADDR, 0);
3971	}
3972
3973	return 0;
3974}
3975
3976/**
3977 * cik_cp_gfx_start - start the gfx ring
3978 *
3979 * @rdev: radeon_device pointer
3980 *
3981 * Enables the ring and loads the clear state context and other
3982 * packets required to init the ring.
3983 * Returns 0 for success, error for failure.
3984 */
3985static int cik_cp_gfx_start(struct radeon_device *rdev)
3986{
3987	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988	int r, i;
3989
3990	/* init the CP */
3991	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992	WREG32(CP_ENDIAN_SWAP, 0);
3993	WREG32(CP_DEVICE_ID, 1);
3994
3995	cik_cp_gfx_enable(rdev, true);
3996
3997	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998	if (r) {
3999		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000		return r;
4001	}
4002
4003	/* init the CE partitions.  CE only used for gfx on CIK */
4004	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006	radeon_ring_write(ring, 0x8000);
4007	radeon_ring_write(ring, 0x8000);
4008
4009	/* setup clear context state */
4010	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012
4013	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014	radeon_ring_write(ring, 0x80000000);
4015	radeon_ring_write(ring, 0x80000000);
4016
4017	for (i = 0; i < cik_default_size; i++)
4018		radeon_ring_write(ring, cik_default_state[i]);
4019
4020	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022
4023	/* set clear context state */
4024	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025	radeon_ring_write(ring, 0);
4026
4027	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028	radeon_ring_write(ring, 0x00000316);
4029	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031
4032	radeon_ring_unlock_commit(rdev, ring, false);
4033
4034	return 0;
4035}
4036
4037/**
4038 * cik_cp_gfx_fini - stop the gfx ring
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Stop the gfx ring and tear down the driver ring
4043 * info.
4044 */
4045static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046{
4047	cik_cp_gfx_enable(rdev, false);
4048	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049}
4050
4051/**
4052 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053 *
4054 * @rdev: radeon_device pointer
4055 *
4056 * Program the location and size of the gfx ring buffer
4057 * and test it to make sure it's working.
4058 * Returns 0 for success, error for failure.
4059 */
4060static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061{
4062	struct radeon_ring *ring;
4063	u32 tmp;
4064	u32 rb_bufsz;
4065	u64 rb_addr;
4066	int r;
4067
4068	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069	if (rdev->family != CHIP_HAWAII)
4070		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071
4072	/* Set the write pointer delay */
4073	WREG32(CP_RB_WPTR_DELAY, 0);
4074
4075	/* set the RB to use vmid 0 */
4076	WREG32(CP_RB_VMID, 0);
4077
4078	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079
4080	/* ring 0 - compute and gfx */
4081	/* Set ring buffer size */
4082	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083	rb_bufsz = order_base_2(ring->ring_size / 8);
4084	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085#ifdef __BIG_ENDIAN
4086	tmp |= BUF_SWAP_32BIT;
4087#endif
4088	WREG32(CP_RB0_CNTL, tmp);
4089
4090	/* Initialize the ring buffer's read and write pointers */
4091	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092	ring->wptr = 0;
4093	WREG32(CP_RB0_WPTR, ring->wptr);
4094
4095	/* set the wb address wether it's enabled or not */
4096	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098
4099	/* scratch register shadowing is no longer supported */
4100	WREG32(SCRATCH_UMSK, 0);
4101
4102	if (!rdev->wb.enabled)
4103		tmp |= RB_NO_UPDATE;
4104
4105	mdelay(1);
4106	WREG32(CP_RB0_CNTL, tmp);
4107
4108	rb_addr = ring->gpu_addr >> 8;
4109	WREG32(CP_RB0_BASE, rb_addr);
4110	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111
4112	/* start the ring */
4113	cik_cp_gfx_start(rdev);
4114	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116	if (r) {
4117		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118		return r;
4119	}
4120
4121	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123
4124	return 0;
4125}
4126
4127u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128		     struct radeon_ring *ring)
4129{
4130	u32 rptr;
4131
4132	if (rdev->wb.enabled)
4133		rptr = rdev->wb.wb[ring->rptr_offs/4];
4134	else
4135		rptr = RREG32(CP_RB0_RPTR);
4136
4137	return rptr;
4138}
4139
4140u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141		     struct radeon_ring *ring)
4142{
4143	return RREG32(CP_RB0_WPTR);
4144}
4145
4146void cik_gfx_set_wptr(struct radeon_device *rdev,
4147		      struct radeon_ring *ring)
4148{
4149	WREG32(CP_RB0_WPTR, ring->wptr);
4150	(void)RREG32(CP_RB0_WPTR);
4151}
4152
4153u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154			 struct radeon_ring *ring)
4155{
4156	u32 rptr;
4157
4158	if (rdev->wb.enabled) {
4159		rptr = rdev->wb.wb[ring->rptr_offs/4];
4160	} else {
4161		mutex_lock(&rdev->srbm_mutex);
4162		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163		rptr = RREG32(CP_HQD_PQ_RPTR);
4164		cik_srbm_select(rdev, 0, 0, 0, 0);
4165		mutex_unlock(&rdev->srbm_mutex);
4166	}
4167
4168	return rptr;
4169}
4170
4171u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172			 struct radeon_ring *ring)
4173{
4174	u32 wptr;
4175
4176	if (rdev->wb.enabled) {
4177		/* XXX check if swapping is necessary on BE */
4178		wptr = rdev->wb.wb[ring->wptr_offs/4];
4179	} else {
4180		mutex_lock(&rdev->srbm_mutex);
4181		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182		wptr = RREG32(CP_HQD_PQ_WPTR);
4183		cik_srbm_select(rdev, 0, 0, 0, 0);
4184		mutex_unlock(&rdev->srbm_mutex);
4185	}
4186
4187	return wptr;
4188}
4189
4190void cik_compute_set_wptr(struct radeon_device *rdev,
4191			  struct radeon_ring *ring)
4192{
4193	/* XXX check if swapping is necessary on BE */
4194	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195	WDOORBELL32(ring->doorbell_index, ring->wptr);
4196}
4197
4198static void cik_compute_stop(struct radeon_device *rdev,
4199			     struct radeon_ring *ring)
4200{
4201	u32 j, tmp;
4202
4203	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204	/* Disable wptr polling. */
4205	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206	tmp &= ~WPTR_POLL_EN;
4207	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208	/* Disable HQD. */
4209	if (RREG32(CP_HQD_ACTIVE) & 1) {
4210		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211		for (j = 0; j < rdev->usec_timeout; j++) {
4212			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213				break;
4214			udelay(1);
4215		}
4216		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217		WREG32(CP_HQD_PQ_RPTR, 0);
4218		WREG32(CP_HQD_PQ_WPTR, 0);
4219	}
4220	cik_srbm_select(rdev, 0, 0, 0, 0);
4221}
4222
4223/**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232{
4233	if (enable)
4234		WREG32(CP_MEC_CNTL, 0);
4235	else {
4236		/*
4237		 * To make hibernation reliable we need to clear compute ring
4238		 * configuration before halting the compute ring.
4239		 */
4240		mutex_lock(&rdev->srbm_mutex);
4241		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243		mutex_unlock(&rdev->srbm_mutex);
4244
4245		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248	}
4249	udelay(50);
4250}
4251
4252/**
4253 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254 *
4255 * @rdev: radeon_device pointer
4256 *
4257 * Loads the compute MEC1&2 ucode.
4258 * Returns 0 for success, -EINVAL if the ucode is not available.
4259 */
4260static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261{
4262	int i;
4263
4264	if (!rdev->mec_fw)
4265		return -EINVAL;
4266
4267	cik_cp_compute_enable(rdev, false);
4268
4269	if (rdev->new_fw) {
4270		const struct gfx_firmware_header_v1_0 *mec_hdr =
4271			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272		const __le32 *fw_data;
4273		u32 fw_size;
4274
4275		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276
4277		/* MEC1 */
4278		fw_data = (const __le32 *)
4279			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282		for (i = 0; i < fw_size; i++)
4283			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285
4286		/* MEC2 */
4287		if (rdev->family == CHIP_KAVERI) {
4288			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290
4291			fw_data = (const __le32 *)
4292				(rdev->mec2_fw->data +
4293				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296			for (i = 0; i < fw_size; i++)
4297				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299		}
4300	} else {
4301		const __be32 *fw_data;
4302
4303		/* MEC1 */
4304		fw_data = (const __be32 *)rdev->mec_fw->data;
4305		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309
4310		if (rdev->family == CHIP_KAVERI) {
4311			/* MEC2 */
4312			fw_data = (const __be32 *)rdev->mec_fw->data;
4313			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317		}
4318	}
4319
4320	return 0;
4321}
4322
4323/**
4324 * cik_cp_compute_start - start the compute queues
4325 *
4326 * @rdev: radeon_device pointer
4327 *
4328 * Enable the compute queues.
4329 * Returns 0 for success, error for failure.
4330 */
4331static int cik_cp_compute_start(struct radeon_device *rdev)
4332{
4333	cik_cp_compute_enable(rdev, true);
4334
4335	return 0;
4336}
4337
4338/**
4339 * cik_cp_compute_fini - stop the compute queues
4340 *
4341 * @rdev: radeon_device pointer
4342 *
4343 * Stop the compute queues and tear down the driver queue
4344 * info.
4345 */
4346static void cik_cp_compute_fini(struct radeon_device *rdev)
4347{
4348	int i, idx, r;
4349
4350	cik_cp_compute_enable(rdev, false);
4351
4352	for (i = 0; i < 2; i++) {
4353		if (i == 0)
4354			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355		else
4356			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357
4358		if (rdev->ring[idx].mqd_obj) {
4359			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360			if (unlikely(r != 0))
4361				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362
4363			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365
4366			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367			rdev->ring[idx].mqd_obj = NULL;
4368		}
4369	}
4370}
4371
4372static void cik_mec_fini(struct radeon_device *rdev)
4373{
4374	int r;
4375
4376	if (rdev->mec.hpd_eop_obj) {
4377		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378		if (unlikely(r != 0))
4379			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382
4383		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384		rdev->mec.hpd_eop_obj = NULL;
4385	}
4386}
4387
4388#define MEC_HPD_SIZE 2048
4389
4390static int cik_mec_init(struct radeon_device *rdev)
4391{
4392	int r;
4393	u32 *hpd;
4394
4395	/*
4396	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398	 */
4399	if (rdev->family == CHIP_KAVERI)
4400		rdev->mec.num_mec = 2;
4401	else
4402		rdev->mec.num_mec = 1;
4403	rdev->mec.num_pipe = 4;
4404	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405
4406	if (rdev->mec.hpd_eop_obj == NULL) {
4407		r = radeon_bo_create(rdev,
4408				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409				     PAGE_SIZE, true,
4410				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411				     &rdev->mec.hpd_eop_obj);
4412		if (r) {
4413			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414			return r;
4415		}
4416	}
4417
4418	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419	if (unlikely(r != 0)) {
4420		cik_mec_fini(rdev);
4421		return r;
4422	}
4423	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424			  &rdev->mec.hpd_eop_gpu_addr);
4425	if (r) {
4426		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427		cik_mec_fini(rdev);
4428		return r;
4429	}
4430	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431	if (r) {
4432		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433		cik_mec_fini(rdev);
4434		return r;
4435	}
4436
4437	/* clear memory.  Not sure if this is required or not */
4438	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439
4440	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442
4443	return 0;
4444}
4445
4446struct hqd_registers
4447{
4448	u32 cp_mqd_base_addr;
4449	u32 cp_mqd_base_addr_hi;
4450	u32 cp_hqd_active;
4451	u32 cp_hqd_vmid;
4452	u32 cp_hqd_persistent_state;
4453	u32 cp_hqd_pipe_priority;
4454	u32 cp_hqd_queue_priority;
4455	u32 cp_hqd_quantum;
4456	u32 cp_hqd_pq_base;
4457	u32 cp_hqd_pq_base_hi;
4458	u32 cp_hqd_pq_rptr;
4459	u32 cp_hqd_pq_rptr_report_addr;
4460	u32 cp_hqd_pq_rptr_report_addr_hi;
4461	u32 cp_hqd_pq_wptr_poll_addr;
4462	u32 cp_hqd_pq_wptr_poll_addr_hi;
4463	u32 cp_hqd_pq_doorbell_control;
4464	u32 cp_hqd_pq_wptr;
4465	u32 cp_hqd_pq_control;
4466	u32 cp_hqd_ib_base_addr;
4467	u32 cp_hqd_ib_base_addr_hi;
4468	u32 cp_hqd_ib_rptr;
4469	u32 cp_hqd_ib_control;
4470	u32 cp_hqd_iq_timer;
4471	u32 cp_hqd_iq_rptr;
4472	u32 cp_hqd_dequeue_request;
4473	u32 cp_hqd_dma_offload;
4474	u32 cp_hqd_sema_cmd;
4475	u32 cp_hqd_msg_type;
4476	u32 cp_hqd_atomic0_preop_lo;
4477	u32 cp_hqd_atomic0_preop_hi;
4478	u32 cp_hqd_atomic1_preop_lo;
4479	u32 cp_hqd_atomic1_preop_hi;
4480	u32 cp_hqd_hq_scheduler0;
4481	u32 cp_hqd_hq_scheduler1;
4482	u32 cp_mqd_control;
4483};
4484
4485struct bonaire_mqd
4486{
4487	u32 header;
4488	u32 dispatch_initiator;
4489	u32 dimensions[3];
4490	u32 start_idx[3];
4491	u32 num_threads[3];
4492	u32 pipeline_stat_enable;
4493	u32 perf_counter_enable;
4494	u32 pgm[2];
4495	u32 tba[2];
4496	u32 tma[2];
4497	u32 pgm_rsrc[2];
4498	u32 vmid;
4499	u32 resource_limits;
4500	u32 static_thread_mgmt01[2];
4501	u32 tmp_ring_size;
4502	u32 static_thread_mgmt23[2];
4503	u32 restart[3];
4504	u32 thread_trace_enable;
4505	u32 reserved1;
4506	u32 user_data[16];
4507	u32 vgtcs_invoke_count[2];
4508	struct hqd_registers queue_state;
4509	u32 dequeue_cntr;
4510	u32 interrupt_queue[64];
4511};
4512
4513/**
4514 * cik_cp_compute_resume - setup the compute queue registers
4515 *
4516 * @rdev: radeon_device pointer
4517 *
4518 * Program the compute queues and test them to make sure they
4519 * are working.
4520 * Returns 0 for success, error for failure.
4521 */
4522static int cik_cp_compute_resume(struct radeon_device *rdev)
4523{
4524	int r, i, j, idx;
4525	u32 tmp;
4526	bool use_doorbell = true;
4527	u64 hqd_gpu_addr;
4528	u64 mqd_gpu_addr;
4529	u64 eop_gpu_addr;
4530	u64 wb_gpu_addr;
4531	u32 *buf;
4532	struct bonaire_mqd *mqd;
4533
4534	r = cik_cp_compute_start(rdev);
4535	if (r)
4536		return r;
4537
4538	/* fix up chicken bits */
4539	tmp = RREG32(CP_CPF_DEBUG);
4540	tmp |= (1 << 23);
4541	WREG32(CP_CPF_DEBUG, tmp);
4542
4543	/* init the pipes */
4544	mutex_lock(&rdev->srbm_mutex);
4545
4546	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547		int me = (i < 4) ? 1 : 2;
4548		int pipe = (i < 4) ? i : (i - 4);
4549
4550		cik_srbm_select(rdev, me, pipe, 0, 0);
4551
4552		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553		/* write the EOP addr */
4554		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556
4557		/* set the VMID assigned */
4558		WREG32(CP_HPD_EOP_VMID, 0);
4559
4560		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561		tmp = RREG32(CP_HPD_EOP_CONTROL);
4562		tmp &= ~EOP_SIZE_MASK;
4563		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564		WREG32(CP_HPD_EOP_CONTROL, tmp);
4565
4566	}
4567	cik_srbm_select(rdev, 0, 0, 0, 0);
4568	mutex_unlock(&rdev->srbm_mutex);
4569
4570	/* init the queues.  Just two for now. */
4571	for (i = 0; i < 2; i++) {
4572		if (i == 0)
4573			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574		else
4575			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576
4577		if (rdev->ring[idx].mqd_obj == NULL) {
4578			r = radeon_bo_create(rdev,
4579					     sizeof(struct bonaire_mqd),
4580					     PAGE_SIZE, true,
4581					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582					     NULL, &rdev->ring[idx].mqd_obj);
4583			if (r) {
4584				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585				return r;
4586			}
4587		}
4588
4589		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590		if (unlikely(r != 0)) {
4591			cik_cp_compute_fini(rdev);
4592			return r;
4593		}
4594		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595				  &mqd_gpu_addr);
4596		if (r) {
4597			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598			cik_cp_compute_fini(rdev);
4599			return r;
4600		}
4601		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602		if (r) {
4603			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604			cik_cp_compute_fini(rdev);
4605			return r;
4606		}
4607
4608		/* init the mqd struct */
4609		memset(buf, 0, sizeof(struct bonaire_mqd));
4610
4611		mqd = (struct bonaire_mqd *)buf;
4612		mqd->header = 0xC0310800;
4613		mqd->static_thread_mgmt01[0] = 0xffffffff;
4614		mqd->static_thread_mgmt01[1] = 0xffffffff;
4615		mqd->static_thread_mgmt23[0] = 0xffffffff;
4616		mqd->static_thread_mgmt23[1] = 0xffffffff;
4617
4618		mutex_lock(&rdev->srbm_mutex);
4619		cik_srbm_select(rdev, rdev->ring[idx].me,
4620				rdev->ring[idx].pipe,
4621				rdev->ring[idx].queue, 0);
4622
4623		/* disable wptr polling */
4624		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625		tmp &= ~WPTR_POLL_EN;
4626		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627
4628		/* enable doorbell? */
4629		mqd->queue_state.cp_hqd_pq_doorbell_control =
4630			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631		if (use_doorbell)
4632			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633		else
4634			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4637
4638		/* disable the queue if it's active */
4639		mqd->queue_state.cp_hqd_dequeue_request = 0;
4640		mqd->queue_state.cp_hqd_pq_rptr = 0;
4641		mqd->queue_state.cp_hqd_pq_wptr= 0;
4642		if (RREG32(CP_HQD_ACTIVE) & 1) {
4643			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644			for (j = 0; j < rdev->usec_timeout; j++) {
4645				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646					break;
4647				udelay(1);
4648			}
4649			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652		}
4653
4654		/* set the pointer to the MQD */
4655		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659		/* set MQD vmid to 0 */
4660		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663
4664		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670
4671		/* set up the HQD, this is similar to CP_RB0_CNTL */
4672		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673		mqd->queue_state.cp_hqd_pq_control &=
4674			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675
4676		mqd->queue_state.cp_hqd_pq_control |=
4677			order_base_2(rdev->ring[idx].ring_size / 8);
4678		mqd->queue_state.cp_hqd_pq_control |=
4679			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680#ifdef __BIG_ENDIAN
4681		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682#endif
4683		mqd->queue_state.cp_hqd_pq_control &=
4684			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685		mqd->queue_state.cp_hqd_pq_control |=
4686			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688
4689		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690		if (i == 0)
4691			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692		else
4693			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699
4700		/* set the wb address wether it's enabled or not */
4701		if (i == 0)
4702			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703		else
4704			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707			upper_32_bits(wb_gpu_addr) & 0xffff;
4708		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712
4713		/* enable the doorbell if requested */
4714		if (use_doorbell) {
4715			mqd->queue_state.cp_hqd_pq_doorbell_control =
4716				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722				~(DOORBELL_SOURCE | DOORBELL_HIT);
4723
4724		} else {
4725			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726		}
4727		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4729
4730		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731		rdev->ring[idx].wptr = 0;
4732		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735
4736		/* set the vmid for the queue */
4737		mqd->queue_state.cp_hqd_vmid = 0;
4738		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739
4740		/* activate the queue */
4741		mqd->queue_state.cp_hqd_active = 1;
4742		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743
4744		cik_srbm_select(rdev, 0, 0, 0, 0);
4745		mutex_unlock(&rdev->srbm_mutex);
4746
4747		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750		rdev->ring[idx].ready = true;
4751		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752		if (r)
4753			rdev->ring[idx].ready = false;
4754	}
4755
4756	return 0;
4757}
4758
4759static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760{
4761	cik_cp_gfx_enable(rdev, enable);
4762	cik_cp_compute_enable(rdev, enable);
4763}
4764
4765static int cik_cp_load_microcode(struct radeon_device *rdev)
4766{
4767	int r;
4768
4769	r = cik_cp_gfx_load_microcode(rdev);
4770	if (r)
4771		return r;
4772	r = cik_cp_compute_load_microcode(rdev);
4773	if (r)
4774		return r;
4775
4776	return 0;
4777}
4778
4779static void cik_cp_fini(struct radeon_device *rdev)
4780{
4781	cik_cp_gfx_fini(rdev);
4782	cik_cp_compute_fini(rdev);
4783}
4784
4785static int cik_cp_resume(struct radeon_device *rdev)
4786{
4787	int r;
4788
4789	cik_enable_gui_idle_interrupt(rdev, false);
4790
4791	r = cik_cp_load_microcode(rdev);
4792	if (r)
4793		return r;
4794
4795	r = cik_cp_gfx_resume(rdev);
4796	if (r)
4797		return r;
4798	r = cik_cp_compute_resume(rdev);
4799	if (r)
4800		return r;
4801
4802	cik_enable_gui_idle_interrupt(rdev, true);
4803
4804	return 0;
4805}
4806
4807static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808{
4809	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4810		RREG32(GRBM_STATUS));
4811	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4812		RREG32(GRBM_STATUS2));
4813	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4814		RREG32(GRBM_STATUS_SE0));
4815	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4816		RREG32(GRBM_STATUS_SE1));
4817	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4818		RREG32(GRBM_STATUS_SE2));
4819	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4820		RREG32(GRBM_STATUS_SE3));
4821	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4822		RREG32(SRBM_STATUS));
4823	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4824		RREG32(SRBM_STATUS2));
4825	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4826		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4828		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4831		 RREG32(CP_STALLED_STAT1));
4832	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4833		 RREG32(CP_STALLED_STAT2));
4834	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4835		 RREG32(CP_STALLED_STAT3));
4836	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4837		 RREG32(CP_CPF_BUSY_STAT));
4838	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839		 RREG32(CP_CPF_STALLED_STAT1));
4840	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843		 RREG32(CP_CPC_STALLED_STAT1));
4844	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845}
4846
4847/**
4848 * cik_gpu_check_soft_reset - check which blocks are busy
4849 *
4850 * @rdev: radeon_device pointer
4851 *
4852 * Check which blocks are busy and return the relevant reset
4853 * mask to be used by cik_gpu_soft_reset().
4854 * Returns a mask of the blocks to be reset.
4855 */
4856u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857{
4858	u32 reset_mask = 0;
4859	u32 tmp;
4860
4861	/* GRBM_STATUS */
4862	tmp = RREG32(GRBM_STATUS);
4863	if (tmp & (PA_BUSY | SC_BUSY |
4864		   BCI_BUSY | SX_BUSY |
4865		   TA_BUSY | VGT_BUSY |
4866		   DB_BUSY | CB_BUSY |
4867		   GDS_BUSY | SPI_BUSY |
4868		   IA_BUSY | IA_BUSY_NO_DMA))
4869		reset_mask |= RADEON_RESET_GFX;
4870
4871	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872		reset_mask |= RADEON_RESET_CP;
4873
4874	/* GRBM_STATUS2 */
4875	tmp = RREG32(GRBM_STATUS2);
4876	if (tmp & RLC_BUSY)
4877		reset_mask |= RADEON_RESET_RLC;
4878
4879	/* SDMA0_STATUS_REG */
4880	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881	if (!(tmp & SDMA_IDLE))
4882		reset_mask |= RADEON_RESET_DMA;
4883
4884	/* SDMA1_STATUS_REG */
4885	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886	if (!(tmp & SDMA_IDLE))
4887		reset_mask |= RADEON_RESET_DMA1;
4888
4889	/* SRBM_STATUS2 */
4890	tmp = RREG32(SRBM_STATUS2);
4891	if (tmp & SDMA_BUSY)
4892		reset_mask |= RADEON_RESET_DMA;
4893
4894	if (tmp & SDMA1_BUSY)
4895		reset_mask |= RADEON_RESET_DMA1;
4896
4897	/* SRBM_STATUS */
4898	tmp = RREG32(SRBM_STATUS);
4899
4900	if (tmp & IH_BUSY)
4901		reset_mask |= RADEON_RESET_IH;
4902
4903	if (tmp & SEM_BUSY)
4904		reset_mask |= RADEON_RESET_SEM;
4905
4906	if (tmp & GRBM_RQ_PENDING)
4907		reset_mask |= RADEON_RESET_GRBM;
4908
4909	if (tmp & VMC_BUSY)
4910		reset_mask |= RADEON_RESET_VMC;
4911
4912	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913		   MCC_BUSY | MCD_BUSY))
4914		reset_mask |= RADEON_RESET_MC;
4915
4916	if (evergreen_is_display_hung(rdev))
4917		reset_mask |= RADEON_RESET_DISPLAY;
4918
4919	/* Skip MC reset as it's mostly likely not hung, just busy */
4920	if (reset_mask & RADEON_RESET_MC) {
4921		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922		reset_mask &= ~RADEON_RESET_MC;
4923	}
4924
4925	return reset_mask;
4926}
4927
4928/**
4929 * cik_gpu_soft_reset - soft reset GPU
4930 *
4931 * @rdev: radeon_device pointer
4932 * @reset_mask: mask of which blocks to reset
4933 *
4934 * Soft reset the blocks specified in @reset_mask.
4935 */
4936static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937{
4938	struct evergreen_mc_save save;
4939	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940	u32 tmp;
4941
4942	if (reset_mask == 0)
4943		return;
4944
4945	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946
4947	cik_print_gpu_status_regs(rdev);
4948	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4949		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952
4953	/* disable CG/PG */
4954	cik_fini_pg(rdev);
4955	cik_fini_cg(rdev);
4956
4957	/* stop the rlc */
4958	cik_rlc_stop(rdev);
4959
4960	/* Disable GFX parsing/prefetching */
4961	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962
4963	/* Disable MEC parsing/prefetching */
4964	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965
4966	if (reset_mask & RADEON_RESET_DMA) {
4967		/* sdma0 */
4968		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969		tmp |= SDMA_HALT;
4970		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971	}
4972	if (reset_mask & RADEON_RESET_DMA1) {
4973		/* sdma1 */
4974		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975		tmp |= SDMA_HALT;
4976		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977	}
4978
4979	evergreen_mc_stop(rdev, &save);
4980	if (evergreen_mc_wait_for_idle(rdev)) {
4981		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982	}
4983
4984	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986
4987	if (reset_mask & RADEON_RESET_CP) {
4988		grbm_soft_reset |= SOFT_RESET_CP;
4989
4990		srbm_soft_reset |= SOFT_RESET_GRBM;
4991	}
4992
4993	if (reset_mask & RADEON_RESET_DMA)
4994		srbm_soft_reset |= SOFT_RESET_SDMA;
4995
4996	if (reset_mask & RADEON_RESET_DMA1)
4997		srbm_soft_reset |= SOFT_RESET_SDMA1;
4998
4999	if (reset_mask & RADEON_RESET_DISPLAY)
5000		srbm_soft_reset |= SOFT_RESET_DC;
5001
5002	if (reset_mask & RADEON_RESET_RLC)
5003		grbm_soft_reset |= SOFT_RESET_RLC;
5004
5005	if (reset_mask & RADEON_RESET_SEM)
5006		srbm_soft_reset |= SOFT_RESET_SEM;
5007
5008	if (reset_mask & RADEON_RESET_IH)
5009		srbm_soft_reset |= SOFT_RESET_IH;
5010
5011	if (reset_mask & RADEON_RESET_GRBM)
5012		srbm_soft_reset |= SOFT_RESET_GRBM;
5013
5014	if (reset_mask & RADEON_RESET_VMC)
5015		srbm_soft_reset |= SOFT_RESET_VMC;
5016
5017	if (!(rdev->flags & RADEON_IS_IGP)) {
5018		if (reset_mask & RADEON_RESET_MC)
5019			srbm_soft_reset |= SOFT_RESET_MC;
5020	}
5021
5022	if (grbm_soft_reset) {
5023		tmp = RREG32(GRBM_SOFT_RESET);
5024		tmp |= grbm_soft_reset;
5025		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026		WREG32(GRBM_SOFT_RESET, tmp);
5027		tmp = RREG32(GRBM_SOFT_RESET);
5028
5029		udelay(50);
5030
5031		tmp &= ~grbm_soft_reset;
5032		WREG32(GRBM_SOFT_RESET, tmp);
5033		tmp = RREG32(GRBM_SOFT_RESET);
5034	}
5035
5036	if (srbm_soft_reset) {
5037		tmp = RREG32(SRBM_SOFT_RESET);
5038		tmp |= srbm_soft_reset;
5039		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040		WREG32(SRBM_SOFT_RESET, tmp);
5041		tmp = RREG32(SRBM_SOFT_RESET);
5042
5043		udelay(50);
5044
5045		tmp &= ~srbm_soft_reset;
5046		WREG32(SRBM_SOFT_RESET, tmp);
5047		tmp = RREG32(SRBM_SOFT_RESET);
5048	}
5049
5050	/* Wait a little for things to settle down */
5051	udelay(50);
5052
5053	evergreen_mc_resume(rdev, &save);
5054	udelay(50);
5055
5056	cik_print_gpu_status_regs(rdev);
5057}
5058
5059struct kv_reset_save_regs {
5060	u32 gmcon_reng_execute;
5061	u32 gmcon_misc;
5062	u32 gmcon_misc3;
5063};
5064
5065static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066				   struct kv_reset_save_regs *save)
5067{
5068	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069	save->gmcon_misc = RREG32(GMCON_MISC);
5070	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071
5072	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074						STCTRL_STUTTER_EN));
5075}
5076
5077static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078				      struct kv_reset_save_regs *save)
5079{
5080	int i;
5081
5082	WREG32(GMCON_PGFSM_WRITE, 0);
5083	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084
5085	for (i = 0; i < 5; i++)
5086		WREG32(GMCON_PGFSM_WRITE, 0);
5087
5088	WREG32(GMCON_PGFSM_WRITE, 0);
5089	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090
5091	for (i = 0; i < 5; i++)
5092		WREG32(GMCON_PGFSM_WRITE, 0);
5093
5094	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096
5097	for (i = 0; i < 5; i++)
5098		WREG32(GMCON_PGFSM_WRITE, 0);
5099
5100	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102
5103	for (i = 0; i < 5; i++)
5104		WREG32(GMCON_PGFSM_WRITE, 0);
5105
5106	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108
5109	for (i = 0; i < 5; i++)
5110		WREG32(GMCON_PGFSM_WRITE, 0);
5111
5112	WREG32(GMCON_PGFSM_WRITE, 0);
5113	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114
5115	for (i = 0; i < 5; i++)
5116		WREG32(GMCON_PGFSM_WRITE, 0);
5117
5118	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120
5121	for (i = 0; i < 5; i++)
5122		WREG32(GMCON_PGFSM_WRITE, 0);
5123
5124	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126
5127	for (i = 0; i < 5; i++)
5128		WREG32(GMCON_PGFSM_WRITE, 0);
5129
5130	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132
5133	for (i = 0; i < 5; i++)
5134		WREG32(GMCON_PGFSM_WRITE, 0);
5135
5136	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138
5139	for (i = 0; i < 5; i++)
5140		WREG32(GMCON_PGFSM_WRITE, 0);
5141
5142	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144
5145	WREG32(GMCON_MISC3, save->gmcon_misc3);
5146	WREG32(GMCON_MISC, save->gmcon_misc);
5147	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148}
5149
5150static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151{
5152	struct evergreen_mc_save save;
5153	struct kv_reset_save_regs kv_save = { 0 };
5154	u32 tmp, i;
5155
5156	dev_info(rdev->dev, "GPU pci config reset\n");
5157
5158	/* disable dpm? */
5159
5160	/* disable cg/pg */
5161	cik_fini_pg(rdev);
5162	cik_fini_cg(rdev);
5163
5164	/* Disable GFX parsing/prefetching */
5165	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166
5167	/* Disable MEC parsing/prefetching */
5168	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169
5170	/* sdma0 */
5171	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172	tmp |= SDMA_HALT;
5173	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174	/* sdma1 */
5175	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176	tmp |= SDMA_HALT;
5177	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178	/* XXX other engines? */
5179
5180	/* halt the rlc, disable cp internal ints */
5181	cik_rlc_stop(rdev);
5182
5183	udelay(50);
5184
5185	/* disable mem access */
5186	evergreen_mc_stop(rdev, &save);
5187	if (evergreen_mc_wait_for_idle(rdev)) {
5188		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189	}
5190
5191	if (rdev->flags & RADEON_IS_IGP)
5192		kv_save_regs_for_reset(rdev, &kv_save);
5193
5194	/* disable BM */
5195	pci_clear_master(rdev->pdev);
5196	/* reset */
5197	radeon_pci_config_reset(rdev);
5198
5199	udelay(100);
5200
5201	/* wait for asic to come out of reset */
5202	for (i = 0; i < rdev->usec_timeout; i++) {
5203		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204			break;
5205		udelay(1);
5206	}
5207
5208	/* does asic init need to be run first??? */
5209	if (rdev->flags & RADEON_IS_IGP)
5210		kv_restore_regs_for_reset(rdev, &kv_save);
5211}
5212
5213/**
5214 * cik_asic_reset - soft reset GPU
5215 *
5216 * @rdev: radeon_device pointer
5217 * @hard: force hard reset
5218 *
5219 * Look up which blocks are hung and attempt
5220 * to reset them.
5221 * Returns 0 for success.
5222 */
5223int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224{
5225	u32 reset_mask;
5226
5227	if (hard) {
5228		cik_gpu_pci_config_reset(rdev);
5229		return 0;
5230	}
5231
5232	reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234	if (reset_mask)
5235		r600_set_bios_scratch_engine_hung(rdev, true);
5236
5237	/* try soft reset */
5238	cik_gpu_soft_reset(rdev, reset_mask);
5239
5240	reset_mask = cik_gpu_check_soft_reset(rdev);
5241
5242	/* try pci config reset */
5243	if (reset_mask && radeon_hard_reset)
5244		cik_gpu_pci_config_reset(rdev);
5245
5246	reset_mask = cik_gpu_check_soft_reset(rdev);
5247
5248	if (!reset_mask)
5249		r600_set_bios_scratch_engine_hung(rdev, false);
5250
5251	return 0;
5252}
5253
5254/**
5255 * cik_gfx_is_lockup - check if the 3D engine is locked up
5256 *
5257 * @rdev: radeon_device pointer
5258 * @ring: radeon_ring structure holding ring information
5259 *
5260 * Check if the 3D engine is locked up (CIK).
5261 * Returns true if the engine is locked, false if not.
5262 */
5263bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264{
5265	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266
5267	if (!(reset_mask & (RADEON_RESET_GFX |
5268			    RADEON_RESET_COMPUTE |
5269			    RADEON_RESET_CP))) {
5270		radeon_ring_lockup_update(rdev, ring);
5271		return false;
5272	}
5273	return radeon_ring_test_lockup(rdev, ring);
5274}
5275
5276/* MC */
5277/**
5278 * cik_mc_program - program the GPU memory controller
5279 *
5280 * @rdev: radeon_device pointer
5281 *
5282 * Set the location of vram, gart, and AGP in the GPU's
5283 * physical address space (CIK).
5284 */
5285static void cik_mc_program(struct radeon_device *rdev)
5286{
5287	struct evergreen_mc_save save;
5288	u32 tmp;
5289	int i, j;
5290
5291	/* Initialize HDP */
5292	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293		WREG32((0x2c14 + j), 0x00000000);
5294		WREG32((0x2c18 + j), 0x00000000);
5295		WREG32((0x2c1c + j), 0x00000000);
5296		WREG32((0x2c20 + j), 0x00000000);
5297		WREG32((0x2c24 + j), 0x00000000);
5298	}
5299	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300
5301	evergreen_mc_stop(rdev, &save);
5302	if (radeon_mc_wait_for_idle(rdev)) {
5303		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304	}
5305	/* Lockout access through VGA aperture*/
5306	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307	/* Update configuration */
5308	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309	       rdev->mc.vram_start >> 12);
5310	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311	       rdev->mc.vram_end >> 12);
5312	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313	       rdev->vram_scratch.gpu_addr >> 12);
5314	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316	WREG32(MC_VM_FB_LOCATION, tmp);
5317	/* XXX double check these! */
5318	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321	WREG32(MC_VM_AGP_BASE, 0);
5322	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324	if (radeon_mc_wait_for_idle(rdev)) {
5325		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326	}
5327	evergreen_mc_resume(rdev, &save);
5328	/* we need to own VRAM, so turn off the VGA renderer here
5329	 * to stop it overwriting our objects */
5330	rv515_vga_render_disable(rdev);
5331}
5332
5333/**
5334 * cik_mc_init - initialize the memory controller driver params
5335 *
5336 * @rdev: radeon_device pointer
5337 *
5338 * Look up the amount of vram, vram width, and decide how to place
5339 * vram and gart within the GPU's physical address space (CIK).
5340 * Returns 0 for success.
5341 */
5342static int cik_mc_init(struct radeon_device *rdev)
5343{
5344	u32 tmp;
5345	int chansize, numchan;
5346
5347	/* Get VRAM informations */
5348	rdev->mc.vram_is_ddr = true;
5349	tmp = RREG32(MC_ARB_RAMCFG);
5350	if (tmp & CHANSIZE_MASK) {
5351		chansize = 64;
5352	} else {
5353		chansize = 32;
5354	}
5355	tmp = RREG32(MC_SHARED_CHMAP);
5356	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357	case 0:
5358	default:
5359		numchan = 1;
5360		break;
5361	case 1:
5362		numchan = 2;
5363		break;
5364	case 2:
5365		numchan = 4;
5366		break;
5367	case 3:
5368		numchan = 8;
5369		break;
5370	case 4:
5371		numchan = 3;
5372		break;
5373	case 5:
5374		numchan = 6;
5375		break;
5376	case 6:
5377		numchan = 10;
5378		break;
5379	case 7:
5380		numchan = 12;
5381		break;
5382	case 8:
5383		numchan = 16;
5384		break;
5385	}
5386	rdev->mc.vram_width = numchan * chansize;
5387	/* Could aper size report 0 ? */
5388	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390	/* size in MB on si */
5391	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394	si_vram_gtt_location(rdev, &rdev->mc);
5395	radeon_update_bandwidth_info(rdev);
5396
5397	return 0;
5398}
5399
5400/*
5401 * GART
5402 * VMID 0 is the physical GPU addresses as used by the kernel.
5403 * VMIDs 1-15 are used for userspace clients and are handled
5404 * by the radeon vm/hsa code.
5405 */
5406/**
5407 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408 *
5409 * @rdev: radeon_device pointer
5410 *
5411 * Flush the TLB for the VMID 0 page table (CIK).
5412 */
5413void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414{
5415	/* flush hdp cache */
5416	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417
5418	/* bits 0-15 are the VM contexts0-15 */
5419	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420}
5421
5422/**
5423 * cik_pcie_gart_enable - gart enable
5424 *
5425 * @rdev: radeon_device pointer
5426 *
5427 * This sets up the TLBs, programs the page tables for VMID0,
5428 * sets up the hw for VMIDs 1-15 which are allocated on
5429 * demand, and sets up the global locations for the LDS, GDS,
5430 * and GPUVM for FSA64 clients (CIK).
5431 * Returns 0 for success, errors for failure.
5432 */
5433static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434{
5435	int r, i;
5436
5437	if (rdev->gart.robj == NULL) {
5438		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439		return -EINVAL;
5440	}
5441	r = radeon_gart_table_vram_pin(rdev);
5442	if (r)
5443		return r;
5444	/* Setup TLB control */
5445	WREG32(MC_VM_MX_L1_TLB_CNTL,
5446	       (0xA << 7) |
5447	       ENABLE_L1_TLB |
5448	       ENABLE_L1_FRAGMENT_PROCESSING |
5449	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450	       ENABLE_ADVANCED_DRIVER_MODEL |
5451	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452	/* Setup L2 cache */
5453	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454	       ENABLE_L2_FRAGMENT_PROCESSING |
5455	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5458	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461	       BANK_SELECT(4) |
5462	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463	/* setup context0 */
5464	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468			(u32)(rdev->dummy_page.addr >> 12));
5469	WREG32(VM_CONTEXT0_CNTL2, 0);
5470	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472
5473	WREG32(0x15D4, 0);
5474	WREG32(0x15D8, 0);
5475	WREG32(0x15DC, 0);
5476
5477	/* restore context1-15 */
5478	/* set vm size, must be a multiple of 4 */
5479	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481	for (i = 1; i < 16; i++) {
5482		if (i < 8)
5483			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484			       rdev->vm_manager.saved_table_addr[i]);
5485		else
5486			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487			       rdev->vm_manager.saved_table_addr[i]);
5488	}
5489
5490	/* enable context1-15 */
5491	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492	       (u32)(rdev->dummy_page.addr >> 12));
5493	WREG32(VM_CONTEXT1_CNTL2, 4);
5494	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508
5509	if (rdev->family == CHIP_KAVERI) {
5510		u32 tmp = RREG32(CHUB_CONTROL);
5511		tmp &= ~BYPASS_VM;
5512		WREG32(CHUB_CONTROL, tmp);
5513	}
5514
5515	/* XXX SH_MEM regs */
5516	/* where to put LDS, scratch, GPUVM in FSA64 space */
5517	mutex_lock(&rdev->srbm_mutex);
5518	for (i = 0; i < 16; i++) {
5519		cik_srbm_select(rdev, 0, 0, 0, i);
5520		/* CP and shaders */
5521		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522		WREG32(SH_MEM_APE1_BASE, 1);
5523		WREG32(SH_MEM_APE1_LIMIT, 0);
5524		WREG32(SH_MEM_BASES, 0);
5525		/* SDMA GFX */
5526		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530		/* XXX SDMA RLC - todo */
5531	}
5532	cik_srbm_select(rdev, 0, 0, 0, 0);
5533	mutex_unlock(&rdev->srbm_mutex);
5534
5535	cik_pcie_gart_tlb_flush(rdev);
5536	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537		 (unsigned)(rdev->mc.gtt_size >> 20),
5538		 (unsigned long long)rdev->gart.table_addr);
5539	rdev->gart.ready = true;
5540	return 0;
5541}
5542
5543/**
5544 * cik_pcie_gart_disable - gart disable
5545 *
5546 * @rdev: radeon_device pointer
5547 *
5548 * This disables all VM page table (CIK).
5549 */
5550static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551{
5552	unsigned i;
5553
5554	for (i = 1; i < 16; ++i) {
5555		uint32_t reg;
5556		if (i < 8)
5557			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558		else
5559			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561	}
5562
5563	/* Disable all tables */
5564	WREG32(VM_CONTEXT0_CNTL, 0);
5565	WREG32(VM_CONTEXT1_CNTL, 0);
5566	/* Setup TLB control */
5567	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569	/* Setup L2 cache */
5570	WREG32(VM_L2_CNTL,
5571	       ENABLE_L2_FRAGMENT_PROCESSING |
5572	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5575	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576	WREG32(VM_L2_CNTL2, 0);
5577	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579	radeon_gart_table_vram_unpin(rdev);
5580}
5581
5582/**
5583 * cik_pcie_gart_fini - vm fini callback
5584 *
5585 * @rdev: radeon_device pointer
5586 *
5587 * Tears down the driver GART/VM setup (CIK).
5588 */
5589static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590{
5591	cik_pcie_gart_disable(rdev);
5592	radeon_gart_table_vram_free(rdev);
5593	radeon_gart_fini(rdev);
5594}
5595
5596/* vm parser */
5597/**
5598 * cik_ib_parse - vm ib_parse callback
5599 *
5600 * @rdev: radeon_device pointer
5601 * @ib: indirect buffer pointer
5602 *
5603 * CIK uses hw IB checking so this is a nop (CIK).
5604 */
5605int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606{
5607	return 0;
5608}
5609
5610/*
5611 * vm
5612 * VMID 0 is the physical GPU addresses as used by the kernel.
5613 * VMIDs 1-15 are used for userspace clients and are handled
5614 * by the radeon vm/hsa code.
5615 */
5616/**
5617 * cik_vm_init - cik vm init callback
5618 *
5619 * @rdev: radeon_device pointer
5620 *
5621 * Inits cik specific vm parameters (number of VMs, base of vram for
5622 * VMIDs 1-15) (CIK).
5623 * Returns 0 for success.
5624 */
5625int cik_vm_init(struct radeon_device *rdev)
5626{
5627	/*
5628	 * number of VMs
5629	 * VMID 0 is reserved for System
5630	 * radeon graphics/compute will use VMIDs 1-15
5631	 */
5632	rdev->vm_manager.nvm = 16;
5633	/* base offset of vram pages */
5634	if (rdev->flags & RADEON_IS_IGP) {
5635		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636		tmp <<= 22;
5637		rdev->vm_manager.vram_base_offset = tmp;
5638	} else
5639		rdev->vm_manager.vram_base_offset = 0;
5640
5641	return 0;
5642}
5643
5644/**
5645 * cik_vm_fini - cik vm fini callback
5646 *
5647 * @rdev: radeon_device pointer
5648 *
5649 * Tear down any asic specific VM setup (CIK).
5650 */
5651void cik_vm_fini(struct radeon_device *rdev)
5652{
5653}
5654
5655/**
5656 * cik_vm_decode_fault - print human readable fault info
5657 *
5658 * @rdev: radeon_device pointer
5659 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
 
5661 *
5662 * Print human readable fault information (CIK).
5663 */
5664static void cik_vm_decode_fault(struct radeon_device *rdev,
5665				u32 status, u32 addr, u32 mc_client)
5666{
5667	u32 mc_id;
5668	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672
5673	if (rdev->family == CHIP_HAWAII)
5674		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675	else
5676		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677
5678	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679	       protections, vmid, addr,
5680	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681	       block, mc_client, mc_id);
5682}
5683
5684/**
5685 * cik_vm_flush - cik vm flush using the CP
5686 *
5687 * @rdev: radeon_device pointer
5688 *
5689 * Update the page table base and flush the VM TLB
5690 * using the CP (CIK).
5691 */
5692void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693		  unsigned vm_id, uint64_t pd_addr)
5694{
5695	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696
5697	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699				 WRITE_DATA_DST_SEL(0)));
5700	if (vm_id < 8) {
5701		radeon_ring_write(ring,
5702				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703	} else {
5704		radeon_ring_write(ring,
5705				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706	}
5707	radeon_ring_write(ring, 0);
5708	radeon_ring_write(ring, pd_addr >> 12);
5709
5710	/* update SH_MEM_* regs */
5711	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713				 WRITE_DATA_DST_SEL(0)));
5714	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715	radeon_ring_write(ring, 0);
5716	radeon_ring_write(ring, VMID(vm_id));
5717
5718	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720				 WRITE_DATA_DST_SEL(0)));
5721	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722	radeon_ring_write(ring, 0);
5723
5724	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728
5729	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731				 WRITE_DATA_DST_SEL(0)));
5732	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733	radeon_ring_write(ring, 0);
5734	radeon_ring_write(ring, VMID(0));
5735
5736	/* HDP flush */
5737	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738
5739	/* bits 0-15 are the VM contexts0-15 */
5740	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742				 WRITE_DATA_DST_SEL(0)));
5743	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744	radeon_ring_write(ring, 0);
5745	radeon_ring_write(ring, 1 << vm_id);
5746
5747	/* wait for the invalidate to complete */
5748	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5751				 WAIT_REG_MEM_ENGINE(0))); /* me */
5752	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753	radeon_ring_write(ring, 0);
5754	radeon_ring_write(ring, 0); /* ref */
5755	radeon_ring_write(ring, 0); /* mask */
5756	radeon_ring_write(ring, 0x20); /* poll interval */
5757
5758	/* compute doesn't have PFP */
5759	if (usepfp) {
5760		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5761		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762		radeon_ring_write(ring, 0x0);
5763	}
5764}
5765
5766/*
5767 * RLC
5768 * The RLC is a multi-purpose microengine that handles a
5769 * variety of functions, the most important of which is
5770 * the interrupt controller.
5771 */
5772static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773					  bool enable)
5774{
5775	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776
5777	if (enable)
5778		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779	else
5780		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781	WREG32(CP_INT_CNTL_RING0, tmp);
5782}
5783
5784static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785{
5786	u32 tmp;
5787
5788	tmp = RREG32(RLC_LB_CNTL);
5789	if (enable)
5790		tmp |= LOAD_BALANCE_ENABLE;
5791	else
5792		tmp &= ~LOAD_BALANCE_ENABLE;
5793	WREG32(RLC_LB_CNTL, tmp);
5794}
5795
5796static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797{
5798	u32 i, j, k;
5799	u32 mask;
5800
5801	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803			cik_select_se_sh(rdev, i, j);
5804			for (k = 0; k < rdev->usec_timeout; k++) {
5805				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806					break;
5807				udelay(1);
5808			}
5809		}
5810	}
5811	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812
5813	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814	for (k = 0; k < rdev->usec_timeout; k++) {
5815		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816			break;
5817		udelay(1);
5818	}
5819}
5820
5821static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822{
5823	u32 tmp;
5824
5825	tmp = RREG32(RLC_CNTL);
5826	if (tmp != rlc)
5827		WREG32(RLC_CNTL, rlc);
5828}
5829
5830static u32 cik_halt_rlc(struct radeon_device *rdev)
5831{
5832	u32 data, orig;
5833
5834	orig = data = RREG32(RLC_CNTL);
5835
5836	if (data & RLC_ENABLE) {
5837		u32 i;
5838
5839		data &= ~RLC_ENABLE;
5840		WREG32(RLC_CNTL, data);
5841
5842		for (i = 0; i < rdev->usec_timeout; i++) {
5843			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844				break;
5845			udelay(1);
5846		}
5847
5848		cik_wait_for_rlc_serdes(rdev);
5849	}
5850
5851	return orig;
5852}
5853
5854void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855{
5856	u32 tmp, i, mask;
5857
5858	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859	WREG32(RLC_GPR_REG2, tmp);
5860
5861	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862	for (i = 0; i < rdev->usec_timeout; i++) {
5863		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864			break;
5865		udelay(1);
5866	}
5867
5868	for (i = 0; i < rdev->usec_timeout; i++) {
5869		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870			break;
5871		udelay(1);
5872	}
5873}
5874
5875void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876{
5877	u32 tmp;
5878
5879	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880	WREG32(RLC_GPR_REG2, tmp);
5881}
5882
5883/**
5884 * cik_rlc_stop - stop the RLC ME
5885 *
5886 * @rdev: radeon_device pointer
5887 *
5888 * Halt the RLC ME (MicroEngine) (CIK).
5889 */
5890static void cik_rlc_stop(struct radeon_device *rdev)
5891{
5892	WREG32(RLC_CNTL, 0);
5893
5894	cik_enable_gui_idle_interrupt(rdev, false);
5895
5896	cik_wait_for_rlc_serdes(rdev);
5897}
5898
5899/**
5900 * cik_rlc_start - start the RLC ME
5901 *
5902 * @rdev: radeon_device pointer
5903 *
5904 * Unhalt the RLC ME (MicroEngine) (CIK).
5905 */
5906static void cik_rlc_start(struct radeon_device *rdev)
5907{
5908	WREG32(RLC_CNTL, RLC_ENABLE);
5909
5910	cik_enable_gui_idle_interrupt(rdev, true);
5911
5912	udelay(50);
5913}
5914
5915/**
5916 * cik_rlc_resume - setup the RLC hw
5917 *
5918 * @rdev: radeon_device pointer
5919 *
5920 * Initialize the RLC registers, load the ucode,
5921 * and start the RLC (CIK).
5922 * Returns 0 for success, -EINVAL if the ucode is not available.
5923 */
5924static int cik_rlc_resume(struct radeon_device *rdev)
5925{
5926	u32 i, size, tmp;
5927
5928	if (!rdev->rlc_fw)
5929		return -EINVAL;
5930
5931	cik_rlc_stop(rdev);
5932
5933	/* disable CG */
5934	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936
5937	si_rlc_reset(rdev);
5938
5939	cik_init_pg(rdev);
5940
5941	cik_init_cg(rdev);
5942
5943	WREG32(RLC_LB_CNTR_INIT, 0);
5944	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945
5946	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948	WREG32(RLC_LB_PARAMS, 0x00600408);
5949	WREG32(RLC_LB_CNTL, 0x80000004);
5950
5951	WREG32(RLC_MC_CNTL, 0);
5952	WREG32(RLC_UCODE_CNTL, 0);
5953
5954	if (rdev->new_fw) {
5955		const struct rlc_firmware_header_v1_0 *hdr =
5956			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957		const __le32 *fw_data = (const __le32 *)
5958			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959
5960		radeon_ucode_print_rlc_hdr(&hdr->header);
5961
5962		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963		WREG32(RLC_GPM_UCODE_ADDR, 0);
5964		for (i = 0; i < size; i++)
5965			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967	} else {
5968		const __be32 *fw_data;
5969
5970		switch (rdev->family) {
5971		case CHIP_BONAIRE:
5972		case CHIP_HAWAII:
5973		default:
5974			size = BONAIRE_RLC_UCODE_SIZE;
5975			break;
5976		case CHIP_KAVERI:
5977			size = KV_RLC_UCODE_SIZE;
5978			break;
5979		case CHIP_KABINI:
5980			size = KB_RLC_UCODE_SIZE;
5981			break;
5982		case CHIP_MULLINS:
5983			size = ML_RLC_UCODE_SIZE;
5984			break;
5985		}
5986
5987		fw_data = (const __be32 *)rdev->rlc_fw->data;
5988		WREG32(RLC_GPM_UCODE_ADDR, 0);
5989		for (i = 0; i < size; i++)
5990			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991		WREG32(RLC_GPM_UCODE_ADDR, 0);
5992	}
5993
5994	/* XXX - find out what chips support lbpw */
5995	cik_enable_lbpw(rdev, false);
5996
5997	if (rdev->family == CHIP_BONAIRE)
5998		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999
6000	cik_rlc_start(rdev);
6001
6002	return 0;
6003}
6004
6005static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006{
6007	u32 data, orig, tmp, tmp2;
6008
6009	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010
6011	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012		cik_enable_gui_idle_interrupt(rdev, true);
6013
6014		tmp = cik_halt_rlc(rdev);
6015
6016		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021
6022		cik_update_rlc(rdev, tmp);
6023
6024		data |= CGCG_EN | CGLS_EN;
6025	} else {
6026		cik_enable_gui_idle_interrupt(rdev, false);
6027
6028		RREG32(CB_CGTT_SCLK_CTRL);
6029		RREG32(CB_CGTT_SCLK_CTRL);
6030		RREG32(CB_CGTT_SCLK_CTRL);
6031		RREG32(CB_CGTT_SCLK_CTRL);
6032
6033		data &= ~(CGCG_EN | CGLS_EN);
6034	}
6035
6036	if (orig != data)
6037		WREG32(RLC_CGCG_CGLS_CTRL, data);
6038
6039}
6040
6041static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042{
6043	u32 data, orig, tmp = 0;
6044
6045	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048				orig = data = RREG32(CP_MEM_SLP_CNTL);
6049				data |= CP_MEM_LS_EN;
6050				if (orig != data)
6051					WREG32(CP_MEM_SLP_CNTL, data);
6052			}
6053		}
6054
6055		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056		data |= 0x00000001;
6057		data &= 0xfffffffd;
6058		if (orig != data)
6059			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060
6061		tmp = cik_halt_rlc(rdev);
6062
6063		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067		WREG32(RLC_SERDES_WR_CTRL, data);
6068
6069		cik_update_rlc(rdev, tmp);
6070
6071		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072			orig = data = RREG32(CGTS_SM_CTRL_REG);
6073			data &= ~SM_MODE_MASK;
6074			data |= SM_MODE(0x2);
6075			data |= SM_MODE_ENABLE;
6076			data &= ~CGTS_OVERRIDE;
6077			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079				data &= ~CGTS_LS_OVERRIDE;
6080			data &= ~ON_MONITOR_ADD_MASK;
6081			data |= ON_MONITOR_ADD_EN;
6082			data |= ON_MONITOR_ADD(0x96);
6083			if (orig != data)
6084				WREG32(CGTS_SM_CTRL_REG, data);
6085		}
6086	} else {
6087		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088		data |= 0x00000003;
6089		if (orig != data)
6090			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091
6092		data = RREG32(RLC_MEM_SLP_CNTL);
6093		if (data & RLC_MEM_LS_EN) {
6094			data &= ~RLC_MEM_LS_EN;
6095			WREG32(RLC_MEM_SLP_CNTL, data);
6096		}
6097
6098		data = RREG32(CP_MEM_SLP_CNTL);
6099		if (data & CP_MEM_LS_EN) {
6100			data &= ~CP_MEM_LS_EN;
6101			WREG32(CP_MEM_SLP_CNTL, data);
6102		}
6103
6104		orig = data = RREG32(CGTS_SM_CTRL_REG);
6105		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106		if (orig != data)
6107			WREG32(CGTS_SM_CTRL_REG, data);
6108
6109		tmp = cik_halt_rlc(rdev);
6110
6111		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115		WREG32(RLC_SERDES_WR_CTRL, data);
6116
6117		cik_update_rlc(rdev, tmp);
6118	}
6119}
6120
6121static const u32 mc_cg_registers[] =
6122{
6123	MC_HUB_MISC_HUB_CG,
6124	MC_HUB_MISC_SIP_CG,
6125	MC_HUB_MISC_VM_CG,
6126	MC_XPB_CLK_GAT,
6127	ATC_MISC_CG,
6128	MC_CITF_MISC_WR_CG,
6129	MC_CITF_MISC_RD_CG,
6130	MC_CITF_MISC_VM_CG,
6131	VM_L2_CG,
6132};
6133
6134static void cik_enable_mc_ls(struct radeon_device *rdev,
6135			     bool enable)
6136{
6137	int i;
6138	u32 orig, data;
6139
6140	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141		orig = data = RREG32(mc_cg_registers[i]);
6142		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143			data |= MC_LS_ENABLE;
6144		else
6145			data &= ~MC_LS_ENABLE;
6146		if (data != orig)
6147			WREG32(mc_cg_registers[i], data);
6148	}
6149}
6150
6151static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152			       bool enable)
6153{
6154	int i;
6155	u32 orig, data;
6156
6157	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158		orig = data = RREG32(mc_cg_registers[i]);
6159		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160			data |= MC_CG_ENABLE;
6161		else
6162			data &= ~MC_CG_ENABLE;
6163		if (data != orig)
6164			WREG32(mc_cg_registers[i], data);
6165	}
6166}
6167
6168static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169				 bool enable)
6170{
6171	u32 orig, data;
6172
6173	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176	} else {
6177		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178		data |= 0xff000000;
6179		if (data != orig)
6180			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181
6182		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183		data |= 0xff000000;
6184		if (data != orig)
6185			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186	}
6187}
6188
6189static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190				 bool enable)
6191{
6192	u32 orig, data;
6193
6194	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196		data |= 0x100;
6197		if (orig != data)
6198			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199
6200		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201		data |= 0x100;
6202		if (orig != data)
6203			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204	} else {
6205		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206		data &= ~0x100;
6207		if (orig != data)
6208			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209
6210		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211		data &= ~0x100;
6212		if (orig != data)
6213			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214	}
6215}
6216
6217static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218				bool enable)
6219{
6220	u32 orig, data;
6221
6222	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224		data = 0xfff;
6225		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226
6227		orig = data = RREG32(UVD_CGC_CTRL);
6228		data |= DCM;
6229		if (orig != data)
6230			WREG32(UVD_CGC_CTRL, data);
6231	} else {
6232		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233		data &= ~0xfff;
6234		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235
6236		orig = data = RREG32(UVD_CGC_CTRL);
6237		data &= ~DCM;
6238		if (orig != data)
6239			WREG32(UVD_CGC_CTRL, data);
6240	}
6241}
6242
6243static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244			       bool enable)
6245{
6246	u32 orig, data;
6247
6248	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249
6250	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253	else
6254		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256
6257	if (orig != data)
6258		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259}
6260
6261static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262				bool enable)
6263{
6264	u32 orig, data;
6265
6266	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267
6268	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269		data &= ~CLOCK_GATING_DIS;
6270	else
6271		data |= CLOCK_GATING_DIS;
6272
6273	if (orig != data)
6274		WREG32(HDP_HOST_PATH_CNTL, data);
6275}
6276
6277static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278			      bool enable)
6279{
6280	u32 orig, data;
6281
6282	orig = data = RREG32(HDP_MEM_POWER_LS);
6283
6284	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285		data |= HDP_LS_ENABLE;
6286	else
6287		data &= ~HDP_LS_ENABLE;
6288
6289	if (orig != data)
6290		WREG32(HDP_MEM_POWER_LS, data);
6291}
6292
6293void cik_update_cg(struct radeon_device *rdev,
6294		   u32 block, bool enable)
6295{
6296
6297	if (block & RADEON_CG_BLOCK_GFX) {
6298		cik_enable_gui_idle_interrupt(rdev, false);
6299		/* order matters! */
6300		if (enable) {
6301			cik_enable_mgcg(rdev, true);
6302			cik_enable_cgcg(rdev, true);
6303		} else {
6304			cik_enable_cgcg(rdev, false);
6305			cik_enable_mgcg(rdev, false);
6306		}
6307		cik_enable_gui_idle_interrupt(rdev, true);
6308	}
6309
6310	if (block & RADEON_CG_BLOCK_MC) {
6311		if (!(rdev->flags & RADEON_IS_IGP)) {
6312			cik_enable_mc_mgcg(rdev, enable);
6313			cik_enable_mc_ls(rdev, enable);
6314		}
6315	}
6316
6317	if (block & RADEON_CG_BLOCK_SDMA) {
6318		cik_enable_sdma_mgcg(rdev, enable);
6319		cik_enable_sdma_mgls(rdev, enable);
6320	}
6321
6322	if (block & RADEON_CG_BLOCK_BIF) {
6323		cik_enable_bif_mgls(rdev, enable);
6324	}
6325
6326	if (block & RADEON_CG_BLOCK_UVD) {
6327		if (rdev->has_uvd)
6328			cik_enable_uvd_mgcg(rdev, enable);
6329	}
6330
6331	if (block & RADEON_CG_BLOCK_HDP) {
6332		cik_enable_hdp_mgcg(rdev, enable);
6333		cik_enable_hdp_ls(rdev, enable);
6334	}
6335
6336	if (block & RADEON_CG_BLOCK_VCE) {
6337		vce_v2_0_enable_mgcg(rdev, enable);
6338	}
6339}
6340
6341static void cik_init_cg(struct radeon_device *rdev)
6342{
6343
6344	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345
6346	if (rdev->has_uvd)
6347		si_init_uvd_internal_cg(rdev);
6348
6349	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350			     RADEON_CG_BLOCK_SDMA |
6351			     RADEON_CG_BLOCK_BIF |
6352			     RADEON_CG_BLOCK_UVD |
6353			     RADEON_CG_BLOCK_HDP), true);
6354}
6355
6356static void cik_fini_cg(struct radeon_device *rdev)
6357{
6358	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359			     RADEON_CG_BLOCK_SDMA |
6360			     RADEON_CG_BLOCK_BIF |
6361			     RADEON_CG_BLOCK_UVD |
6362			     RADEON_CG_BLOCK_HDP), false);
6363
6364	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365}
6366
6367static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368					  bool enable)
6369{
6370	u32 data, orig;
6371
6372	orig = data = RREG32(RLC_PG_CNTL);
6373	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375	else
6376		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377	if (orig != data)
6378		WREG32(RLC_PG_CNTL, data);
6379}
6380
6381static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382					  bool enable)
6383{
6384	u32 data, orig;
6385
6386	orig = data = RREG32(RLC_PG_CNTL);
6387	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389	else
6390		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391	if (orig != data)
6392		WREG32(RLC_PG_CNTL, data);
6393}
6394
6395static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396{
6397	u32 data, orig;
6398
6399	orig = data = RREG32(RLC_PG_CNTL);
6400	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401		data &= ~DISABLE_CP_PG;
6402	else
6403		data |= DISABLE_CP_PG;
6404	if (orig != data)
6405		WREG32(RLC_PG_CNTL, data);
6406}
6407
6408static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409{
6410	u32 data, orig;
6411
6412	orig = data = RREG32(RLC_PG_CNTL);
6413	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414		data &= ~DISABLE_GDS_PG;
6415	else
6416		data |= DISABLE_GDS_PG;
6417	if (orig != data)
6418		WREG32(RLC_PG_CNTL, data);
6419}
6420
6421#define CP_ME_TABLE_SIZE    96
6422#define CP_ME_TABLE_OFFSET  2048
6423#define CP_MEC_TABLE_OFFSET 4096
6424
6425void cik_init_cp_pg_table(struct radeon_device *rdev)
6426{
6427	volatile u32 *dst_ptr;
6428	int me, i, max_me = 4;
6429	u32 bo_offset = 0;
6430	u32 table_offset, table_size;
6431
6432	if (rdev->family == CHIP_KAVERI)
6433		max_me = 5;
6434
6435	if (rdev->rlc.cp_table_ptr == NULL)
6436		return;
6437
6438	/* write the cp table buffer */
6439	dst_ptr = rdev->rlc.cp_table_ptr;
6440	for (me = 0; me < max_me; me++) {
6441		if (rdev->new_fw) {
6442			const __le32 *fw_data;
6443			const struct gfx_firmware_header_v1_0 *hdr;
6444
6445			if (me == 0) {
6446				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447				fw_data = (const __le32 *)
6448					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449				table_offset = le32_to_cpu(hdr->jt_offset);
6450				table_size = le32_to_cpu(hdr->jt_size);
6451			} else if (me == 1) {
6452				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453				fw_data = (const __le32 *)
6454					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455				table_offset = le32_to_cpu(hdr->jt_offset);
6456				table_size = le32_to_cpu(hdr->jt_size);
6457			} else if (me == 2) {
6458				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459				fw_data = (const __le32 *)
6460					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461				table_offset = le32_to_cpu(hdr->jt_offset);
6462				table_size = le32_to_cpu(hdr->jt_size);
6463			} else if (me == 3) {
6464				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465				fw_data = (const __le32 *)
6466					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467				table_offset = le32_to_cpu(hdr->jt_offset);
6468				table_size = le32_to_cpu(hdr->jt_size);
6469			} else {
6470				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471				fw_data = (const __le32 *)
6472					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473				table_offset = le32_to_cpu(hdr->jt_offset);
6474				table_size = le32_to_cpu(hdr->jt_size);
6475			}
6476
6477			for (i = 0; i < table_size; i ++) {
6478				dst_ptr[bo_offset + i] =
6479					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480			}
6481			bo_offset += table_size;
6482		} else {
6483			const __be32 *fw_data;
6484			table_size = CP_ME_TABLE_SIZE;
6485
6486			if (me == 0) {
6487				fw_data = (const __be32 *)rdev->ce_fw->data;
6488				table_offset = CP_ME_TABLE_OFFSET;
6489			} else if (me == 1) {
6490				fw_data = (const __be32 *)rdev->pfp_fw->data;
6491				table_offset = CP_ME_TABLE_OFFSET;
6492			} else if (me == 2) {
6493				fw_data = (const __be32 *)rdev->me_fw->data;
6494				table_offset = CP_ME_TABLE_OFFSET;
6495			} else {
6496				fw_data = (const __be32 *)rdev->mec_fw->data;
6497				table_offset = CP_MEC_TABLE_OFFSET;
6498			}
6499
6500			for (i = 0; i < table_size; i ++) {
6501				dst_ptr[bo_offset + i] =
6502					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503			}
6504			bo_offset += table_size;
6505		}
6506	}
6507}
6508
6509static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510				bool enable)
6511{
6512	u32 data, orig;
6513
6514	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515		orig = data = RREG32(RLC_PG_CNTL);
6516		data |= GFX_PG_ENABLE;
6517		if (orig != data)
6518			WREG32(RLC_PG_CNTL, data);
6519
6520		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521		data |= AUTO_PG_EN;
6522		if (orig != data)
6523			WREG32(RLC_AUTO_PG_CTRL, data);
6524	} else {
6525		orig = data = RREG32(RLC_PG_CNTL);
6526		data &= ~GFX_PG_ENABLE;
6527		if (orig != data)
6528			WREG32(RLC_PG_CNTL, data);
6529
6530		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531		data &= ~AUTO_PG_EN;
6532		if (orig != data)
6533			WREG32(RLC_AUTO_PG_CTRL, data);
6534
6535		data = RREG32(DB_RENDER_CONTROL);
6536	}
6537}
6538
6539static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540{
6541	u32 mask = 0, tmp, tmp1;
6542	int i;
6543
6544	cik_select_se_sh(rdev, se, sh);
6545	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548
6549	tmp &= 0xffff0000;
6550
6551	tmp |= tmp1;
6552	tmp >>= 16;
6553
6554	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555		mask <<= 1;
6556		mask |= 1;
6557	}
6558
6559	return (~tmp) & mask;
6560}
6561
6562static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563{
6564	u32 i, j, k, active_cu_number = 0;
6565	u32 mask, counter, cu_bitmap;
6566	u32 tmp = 0;
6567
6568	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570			mask = 1;
6571			cu_bitmap = 0;
6572			counter = 0;
6573			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575					if (counter < 2)
6576						cu_bitmap |= mask;
6577					counter ++;
6578				}
6579				mask <<= 1;
6580			}
6581
6582			active_cu_number += counter;
6583			tmp |= (cu_bitmap << (i * 16 + j * 8));
6584		}
6585	}
6586
6587	WREG32(RLC_PG_AO_CU_MASK, tmp);
6588
6589	tmp = RREG32(RLC_MAX_PG_CU);
6590	tmp &= ~MAX_PU_CU_MASK;
6591	tmp |= MAX_PU_CU(active_cu_number);
6592	WREG32(RLC_MAX_PG_CU, tmp);
6593}
6594
6595static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596				       bool enable)
6597{
6598	u32 data, orig;
6599
6600	orig = data = RREG32(RLC_PG_CNTL);
6601	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602		data |= STATIC_PER_CU_PG_ENABLE;
6603	else
6604		data &= ~STATIC_PER_CU_PG_ENABLE;
6605	if (orig != data)
6606		WREG32(RLC_PG_CNTL, data);
6607}
6608
6609static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610					bool enable)
6611{
6612	u32 data, orig;
6613
6614	orig = data = RREG32(RLC_PG_CNTL);
6615	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616		data |= DYN_PER_CU_PG_ENABLE;
6617	else
6618		data &= ~DYN_PER_CU_PG_ENABLE;
6619	if (orig != data)
6620		WREG32(RLC_PG_CNTL, data);
6621}
6622
6623#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6625
6626static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627{
6628	u32 data, orig;
6629	u32 i;
6630
6631	if (rdev->rlc.cs_data) {
6632		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636	} else {
6637		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638		for (i = 0; i < 3; i++)
6639			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640	}
6641	if (rdev->rlc.reg_list) {
6642		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645	}
6646
6647	orig = data = RREG32(RLC_PG_CNTL);
6648	data |= GFX_PG_SRC;
6649	if (orig != data)
6650		WREG32(RLC_PG_CNTL, data);
6651
6652	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654
6655	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656	data &= ~IDLE_POLL_COUNT_MASK;
6657	data |= IDLE_POLL_COUNT(0x60);
6658	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659
6660	data = 0x10101010;
6661	WREG32(RLC_PG_DELAY, data);
6662
6663	data = RREG32(RLC_PG_DELAY_2);
6664	data &= ~0xff;
6665	data |= 0x3;
6666	WREG32(RLC_PG_DELAY_2, data);
6667
6668	data = RREG32(RLC_AUTO_PG_CTRL);
6669	data &= ~GRBM_REG_SGIT_MASK;
6670	data |= GRBM_REG_SGIT(0x700);
6671	WREG32(RLC_AUTO_PG_CTRL, data);
6672
6673}
6674
6675static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676{
6677	cik_enable_gfx_cgpg(rdev, enable);
6678	cik_enable_gfx_static_mgpg(rdev, enable);
6679	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680}
6681
6682u32 cik_get_csb_size(struct radeon_device *rdev)
6683{
6684	u32 count = 0;
6685	const struct cs_section_def *sect = NULL;
6686	const struct cs_extent_def *ext = NULL;
6687
6688	if (rdev->rlc.cs_data == NULL)
6689		return 0;
6690
6691	/* begin clear state */
6692	count += 2;
6693	/* context control state */
6694	count += 3;
6695
6696	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697		for (ext = sect->section; ext->extent != NULL; ++ext) {
6698			if (sect->id == SECT_CONTEXT)
6699				count += 2 + ext->reg_count;
6700			else
6701				return 0;
6702		}
6703	}
6704	/* pa_sc_raster_config/pa_sc_raster_config1 */
6705	count += 4;
6706	/* end clear state */
6707	count += 2;
6708	/* clear state */
6709	count += 2;
6710
6711	return count;
6712}
6713
6714void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715{
6716	u32 count = 0, i;
6717	const struct cs_section_def *sect = NULL;
6718	const struct cs_extent_def *ext = NULL;
6719
6720	if (rdev->rlc.cs_data == NULL)
6721		return;
6722	if (buffer == NULL)
6723		return;
6724
6725	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727
6728	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729	buffer[count++] = cpu_to_le32(0x80000000);
6730	buffer[count++] = cpu_to_le32(0x80000000);
6731
6732	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733		for (ext = sect->section; ext->extent != NULL; ++ext) {
6734			if (sect->id == SECT_CONTEXT) {
6735				buffer[count++] =
6736					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738				for (i = 0; i < ext->reg_count; i++)
6739					buffer[count++] = cpu_to_le32(ext->extent[i]);
6740			} else {
6741				return;
6742			}
6743		}
6744	}
6745
6746	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748	switch (rdev->family) {
6749	case CHIP_BONAIRE:
6750		buffer[count++] = cpu_to_le32(0x16000012);
6751		buffer[count++] = cpu_to_le32(0x00000000);
6752		break;
6753	case CHIP_KAVERI:
6754		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755		buffer[count++] = cpu_to_le32(0x00000000);
6756		break;
6757	case CHIP_KABINI:
6758	case CHIP_MULLINS:
6759		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760		buffer[count++] = cpu_to_le32(0x00000000);
6761		break;
6762	case CHIP_HAWAII:
6763		buffer[count++] = cpu_to_le32(0x3a00161a);
6764		buffer[count++] = cpu_to_le32(0x0000002e);
6765		break;
6766	default:
6767		buffer[count++] = cpu_to_le32(0x00000000);
6768		buffer[count++] = cpu_to_le32(0x00000000);
6769		break;
6770	}
6771
6772	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774
6775	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776	buffer[count++] = cpu_to_le32(0);
6777}
6778
6779static void cik_init_pg(struct radeon_device *rdev)
6780{
6781	if (rdev->pg_flags) {
6782		cik_enable_sck_slowdown_on_pu(rdev, true);
6783		cik_enable_sck_slowdown_on_pd(rdev, true);
6784		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785			cik_init_gfx_cgpg(rdev);
6786			cik_enable_cp_pg(rdev, true);
6787			cik_enable_gds_pg(rdev, true);
6788		}
6789		cik_init_ao_cu_mask(rdev);
6790		cik_update_gfx_pg(rdev, true);
6791	}
6792}
6793
6794static void cik_fini_pg(struct radeon_device *rdev)
6795{
6796	if (rdev->pg_flags) {
6797		cik_update_gfx_pg(rdev, false);
6798		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799			cik_enable_cp_pg(rdev, false);
6800			cik_enable_gds_pg(rdev, false);
6801		}
6802	}
6803}
6804
6805/*
6806 * Interrupts
6807 * Starting with r6xx, interrupts are handled via a ring buffer.
6808 * Ring buffers are areas of GPU accessible memory that the GPU
6809 * writes interrupt vectors into and the host reads vectors out of.
6810 * There is a rptr (read pointer) that determines where the
6811 * host is currently reading, and a wptr (write pointer)
6812 * which determines where the GPU has written.  When the
6813 * pointers are equal, the ring is idle.  When the GPU
6814 * writes vectors to the ring buffer, it increments the
6815 * wptr.  When there is an interrupt, the host then starts
6816 * fetching commands and processing them until the pointers are
6817 * equal again at which point it updates the rptr.
6818 */
6819
6820/**
6821 * cik_enable_interrupts - Enable the interrupt ring buffer
6822 *
6823 * @rdev: radeon_device pointer
6824 *
6825 * Enable the interrupt ring buffer (CIK).
6826 */
6827static void cik_enable_interrupts(struct radeon_device *rdev)
6828{
6829	u32 ih_cntl = RREG32(IH_CNTL);
6830	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831
6832	ih_cntl |= ENABLE_INTR;
6833	ih_rb_cntl |= IH_RB_ENABLE;
6834	WREG32(IH_CNTL, ih_cntl);
6835	WREG32(IH_RB_CNTL, ih_rb_cntl);
6836	rdev->ih.enabled = true;
6837}
6838
6839/**
6840 * cik_disable_interrupts - Disable the interrupt ring buffer
6841 *
6842 * @rdev: radeon_device pointer
6843 *
6844 * Disable the interrupt ring buffer (CIK).
6845 */
6846static void cik_disable_interrupts(struct radeon_device *rdev)
6847{
6848	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849	u32 ih_cntl = RREG32(IH_CNTL);
6850
6851	ih_rb_cntl &= ~IH_RB_ENABLE;
6852	ih_cntl &= ~ENABLE_INTR;
6853	WREG32(IH_RB_CNTL, ih_rb_cntl);
6854	WREG32(IH_CNTL, ih_cntl);
6855	/* set rptr, wptr to 0 */
6856	WREG32(IH_RB_RPTR, 0);
6857	WREG32(IH_RB_WPTR, 0);
6858	rdev->ih.enabled = false;
6859	rdev->ih.rptr = 0;
6860}
6861
6862/**
6863 * cik_disable_interrupt_state - Disable all interrupt sources
6864 *
6865 * @rdev: radeon_device pointer
6866 *
6867 * Clear all interrupt enable bits used by the driver (CIK).
6868 */
6869static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870{
6871	u32 tmp;
6872
6873	/* gfx ring */
6874	tmp = RREG32(CP_INT_CNTL_RING0) &
6875		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876	WREG32(CP_INT_CNTL_RING0, tmp);
6877	/* sdma */
6878	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882	/* compute queues */
6883	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891	/* grbm */
6892	WREG32(GRBM_INT_CNTL, 0);
6893	/* SRBM */
6894	WREG32(SRBM_INT_CNTL, 0);
6895	/* vline/vblank, etc. */
6896	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898	if (rdev->num_crtc >= 4) {
6899		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901	}
6902	if (rdev->num_crtc >= 6) {
6903		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905	}
6906	/* pflip */
6907	if (rdev->num_crtc >= 2) {
6908		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910	}
6911	if (rdev->num_crtc >= 4) {
6912		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914	}
6915	if (rdev->num_crtc >= 6) {
6916		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918	}
6919
6920	/* dac hotplug */
6921	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922
6923	/* digital hotplug */
6924	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925	WREG32(DC_HPD1_INT_CONTROL, tmp);
6926	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927	WREG32(DC_HPD2_INT_CONTROL, tmp);
6928	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929	WREG32(DC_HPD3_INT_CONTROL, tmp);
6930	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931	WREG32(DC_HPD4_INT_CONTROL, tmp);
6932	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933	WREG32(DC_HPD5_INT_CONTROL, tmp);
6934	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935	WREG32(DC_HPD6_INT_CONTROL, tmp);
6936
6937}
6938
6939/**
6940 * cik_irq_init - init and enable the interrupt ring
6941 *
6942 * @rdev: radeon_device pointer
6943 *
6944 * Allocate a ring buffer for the interrupt controller,
6945 * enable the RLC, disable interrupts, enable the IH
6946 * ring buffer and enable it (CIK).
6947 * Called at device load and reume.
6948 * Returns 0 for success, errors for failure.
6949 */
6950static int cik_irq_init(struct radeon_device *rdev)
6951{
6952	int ret = 0;
6953	int rb_bufsz;
6954	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955
6956	/* allocate ring */
6957	ret = r600_ih_ring_alloc(rdev);
6958	if (ret)
6959		return ret;
6960
6961	/* disable irqs */
6962	cik_disable_interrupts(rdev);
6963
6964	/* init rlc */
6965	ret = cik_rlc_resume(rdev);
6966	if (ret) {
6967		r600_ih_ring_fini(rdev);
6968		return ret;
6969	}
6970
6971	/* setup interrupt control */
6972	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6973	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6974	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977	 */
6978	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982
6983	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985
6986	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987		      IH_WPTR_OVERFLOW_CLEAR |
6988		      (rb_bufsz << 1));
6989
6990	if (rdev->wb.enabled)
6991		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992
6993	/* set the writeback address whether it's enabled or not */
6994	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996
6997	WREG32(IH_RB_CNTL, ih_rb_cntl);
6998
6999	/* set rptr, wptr to 0 */
7000	WREG32(IH_RB_RPTR, 0);
7001	WREG32(IH_RB_WPTR, 0);
7002
7003	/* Default settings for IH_CNTL (disabled at first) */
7004	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005	/* RPTR_REARM only works if msi's are enabled */
7006	if (rdev->msi_enabled)
7007		ih_cntl |= RPTR_REARM;
7008	WREG32(IH_CNTL, ih_cntl);
7009
7010	/* force the active interrupt state to all disabled */
7011	cik_disable_interrupt_state(rdev);
7012
7013	pci_set_master(rdev->pdev);
7014
7015	/* enable irqs */
7016	cik_enable_interrupts(rdev);
7017
7018	return ret;
7019}
7020
7021/**
7022 * cik_irq_set - enable/disable interrupt sources
7023 *
7024 * @rdev: radeon_device pointer
7025 *
7026 * Enable interrupt sources on the GPU (vblanks, hpd,
7027 * etc.) (CIK).
7028 * Returns 0 for success, errors for failure.
7029 */
7030int cik_irq_set(struct radeon_device *rdev)
7031{
7032	u32 cp_int_cntl;
7033	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037	u32 grbm_int_cntl = 0;
7038	u32 dma_cntl, dma_cntl1;
7039
7040	if (!rdev->irq.installed) {
7041		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042		return -EINVAL;
7043	}
7044	/* don't enable anything if the ih is disabled */
7045	if (!rdev->ih.enabled) {
7046		cik_disable_interrupts(rdev);
7047		/* force the active interrupt state to all disabled */
7048		cik_disable_interrupt_state(rdev);
7049		return 0;
7050	}
7051
7052	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055
7056	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062
7063	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065
7066	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074
7075	/* enable CP interrupts on all rings */
7076	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079	}
7080	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082		DRM_DEBUG("si_irq_set: sw int cp1\n");
7083		if (ring->me == 1) {
7084			switch (ring->pipe) {
7085			case 0:
7086				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087				break;
7088			case 1:
7089				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090				break;
7091			case 2:
7092				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093				break;
7094			case 3:
7095				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096				break;
7097			default:
7098				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099				break;
7100			}
7101		} else if (ring->me == 2) {
7102			switch (ring->pipe) {
7103			case 0:
7104				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105				break;
7106			case 1:
7107				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108				break;
7109			case 2:
7110				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111				break;
7112			case 3:
7113				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114				break;
7115			default:
7116				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117				break;
7118			}
7119		} else {
7120			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121		}
7122	}
7123	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125		DRM_DEBUG("si_irq_set: sw int cp2\n");
7126		if (ring->me == 1) {
7127			switch (ring->pipe) {
7128			case 0:
7129				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130				break;
7131			case 1:
7132				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133				break;
7134			case 2:
7135				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136				break;
7137			case 3:
7138				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139				break;
7140			default:
7141				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142				break;
7143			}
7144		} else if (ring->me == 2) {
7145			switch (ring->pipe) {
7146			case 0:
7147				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148				break;
7149			case 1:
7150				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151				break;
7152			case 2:
7153				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154				break;
7155			case 3:
7156				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157				break;
7158			default:
7159				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160				break;
7161			}
7162		} else {
7163			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164		}
7165	}
7166
7167	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168		DRM_DEBUG("cik_irq_set: sw int dma\n");
7169		dma_cntl |= TRAP_ENABLE;
7170	}
7171
7172	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174		dma_cntl1 |= TRAP_ENABLE;
7175	}
7176
7177	if (rdev->irq.crtc_vblank_int[0] ||
7178	    atomic_read(&rdev->irq.pflip[0])) {
7179		DRM_DEBUG("cik_irq_set: vblank 0\n");
7180		crtc1 |= VBLANK_INTERRUPT_MASK;
7181	}
7182	if (rdev->irq.crtc_vblank_int[1] ||
7183	    atomic_read(&rdev->irq.pflip[1])) {
7184		DRM_DEBUG("cik_irq_set: vblank 1\n");
7185		crtc2 |= VBLANK_INTERRUPT_MASK;
7186	}
7187	if (rdev->irq.crtc_vblank_int[2] ||
7188	    atomic_read(&rdev->irq.pflip[2])) {
7189		DRM_DEBUG("cik_irq_set: vblank 2\n");
7190		crtc3 |= VBLANK_INTERRUPT_MASK;
7191	}
7192	if (rdev->irq.crtc_vblank_int[3] ||
7193	    atomic_read(&rdev->irq.pflip[3])) {
7194		DRM_DEBUG("cik_irq_set: vblank 3\n");
7195		crtc4 |= VBLANK_INTERRUPT_MASK;
7196	}
7197	if (rdev->irq.crtc_vblank_int[4] ||
7198	    atomic_read(&rdev->irq.pflip[4])) {
7199		DRM_DEBUG("cik_irq_set: vblank 4\n");
7200		crtc5 |= VBLANK_INTERRUPT_MASK;
7201	}
7202	if (rdev->irq.crtc_vblank_int[5] ||
7203	    atomic_read(&rdev->irq.pflip[5])) {
7204		DRM_DEBUG("cik_irq_set: vblank 5\n");
7205		crtc6 |= VBLANK_INTERRUPT_MASK;
7206	}
7207	if (rdev->irq.hpd[0]) {
7208		DRM_DEBUG("cik_irq_set: hpd 1\n");
7209		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210	}
7211	if (rdev->irq.hpd[1]) {
7212		DRM_DEBUG("cik_irq_set: hpd 2\n");
7213		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214	}
7215	if (rdev->irq.hpd[2]) {
7216		DRM_DEBUG("cik_irq_set: hpd 3\n");
7217		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218	}
7219	if (rdev->irq.hpd[3]) {
7220		DRM_DEBUG("cik_irq_set: hpd 4\n");
7221		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222	}
7223	if (rdev->irq.hpd[4]) {
7224		DRM_DEBUG("cik_irq_set: hpd 5\n");
7225		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226	}
7227	if (rdev->irq.hpd[5]) {
7228		DRM_DEBUG("cik_irq_set: hpd 6\n");
7229		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230	}
7231
7232	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233
7234	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236
7237	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245
7246	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247
7248	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250	if (rdev->num_crtc >= 4) {
7251		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253	}
7254	if (rdev->num_crtc >= 6) {
7255		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257	}
7258
7259	if (rdev->num_crtc >= 2) {
7260		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261		       GRPH_PFLIP_INT_MASK);
7262		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263		       GRPH_PFLIP_INT_MASK);
7264	}
7265	if (rdev->num_crtc >= 4) {
7266		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267		       GRPH_PFLIP_INT_MASK);
7268		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269		       GRPH_PFLIP_INT_MASK);
7270	}
7271	if (rdev->num_crtc >= 6) {
7272		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273		       GRPH_PFLIP_INT_MASK);
7274		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275		       GRPH_PFLIP_INT_MASK);
7276	}
7277
7278	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284
7285	/* posting read */
7286	RREG32(SRBM_STATUS);
7287
7288	return 0;
7289}
7290
7291/**
7292 * cik_irq_ack - ack interrupt sources
7293 *
7294 * @rdev: radeon_device pointer
7295 *
7296 * Ack interrupt sources on the GPU (vblanks, hpd,
7297 * etc.) (CIK).  Certain interrupts sources are sw
7298 * generated and do not require an explicit ack.
7299 */
7300static inline void cik_irq_ack(struct radeon_device *rdev)
7301{
7302	u32 tmp;
7303
7304	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311
7312	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313		EVERGREEN_CRTC0_REGISTER_OFFSET);
7314	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315		EVERGREEN_CRTC1_REGISTER_OFFSET);
7316	if (rdev->num_crtc >= 4) {
7317		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318			EVERGREEN_CRTC2_REGISTER_OFFSET);
7319		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320			EVERGREEN_CRTC3_REGISTER_OFFSET);
7321	}
7322	if (rdev->num_crtc >= 6) {
7323		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324			EVERGREEN_CRTC4_REGISTER_OFFSET);
7325		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326			EVERGREEN_CRTC5_REGISTER_OFFSET);
7327	}
7328
7329	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331		       GRPH_PFLIP_INT_CLEAR);
7332	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334		       GRPH_PFLIP_INT_CLEAR);
7335	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343
7344	if (rdev->num_crtc >= 4) {
7345		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347			       GRPH_PFLIP_INT_CLEAR);
7348		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350			       GRPH_PFLIP_INT_CLEAR);
7351		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359	}
7360
7361	if (rdev->num_crtc >= 6) {
7362		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364			       GRPH_PFLIP_INT_CLEAR);
7365		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367			       GRPH_PFLIP_INT_CLEAR);
7368		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376	}
7377
7378	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379		tmp = RREG32(DC_HPD1_INT_CONTROL);
7380		tmp |= DC_HPDx_INT_ACK;
7381		WREG32(DC_HPD1_INT_CONTROL, tmp);
7382	}
7383	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384		tmp = RREG32(DC_HPD2_INT_CONTROL);
7385		tmp |= DC_HPDx_INT_ACK;
7386		WREG32(DC_HPD2_INT_CONTROL, tmp);
7387	}
7388	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389		tmp = RREG32(DC_HPD3_INT_CONTROL);
7390		tmp |= DC_HPDx_INT_ACK;
7391		WREG32(DC_HPD3_INT_CONTROL, tmp);
7392	}
7393	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394		tmp = RREG32(DC_HPD4_INT_CONTROL);
7395		tmp |= DC_HPDx_INT_ACK;
7396		WREG32(DC_HPD4_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399		tmp = RREG32(DC_HPD5_INT_CONTROL);
7400		tmp |= DC_HPDx_INT_ACK;
7401		WREG32(DC_HPD5_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404		tmp = RREG32(DC_HPD6_INT_CONTROL);
7405		tmp |= DC_HPDx_INT_ACK;
7406		WREG32(DC_HPD6_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409		tmp = RREG32(DC_HPD1_INT_CONTROL);
7410		tmp |= DC_HPDx_RX_INT_ACK;
7411		WREG32(DC_HPD1_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414		tmp = RREG32(DC_HPD2_INT_CONTROL);
7415		tmp |= DC_HPDx_RX_INT_ACK;
7416		WREG32(DC_HPD2_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419		tmp = RREG32(DC_HPD3_INT_CONTROL);
7420		tmp |= DC_HPDx_RX_INT_ACK;
7421		WREG32(DC_HPD3_INT_CONTROL, tmp);
7422	}
7423	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424		tmp = RREG32(DC_HPD4_INT_CONTROL);
7425		tmp |= DC_HPDx_RX_INT_ACK;
7426		WREG32(DC_HPD4_INT_CONTROL, tmp);
7427	}
7428	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429		tmp = RREG32(DC_HPD5_INT_CONTROL);
7430		tmp |= DC_HPDx_RX_INT_ACK;
7431		WREG32(DC_HPD5_INT_CONTROL, tmp);
7432	}
7433	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434		tmp = RREG32(DC_HPD6_INT_CONTROL);
7435		tmp |= DC_HPDx_RX_INT_ACK;
7436		WREG32(DC_HPD6_INT_CONTROL, tmp);
7437	}
7438}
7439
7440/**
7441 * cik_irq_disable - disable interrupts
7442 *
7443 * @rdev: radeon_device pointer
7444 *
7445 * Disable interrupts on the hw (CIK).
7446 */
7447static void cik_irq_disable(struct radeon_device *rdev)
7448{
7449	cik_disable_interrupts(rdev);
7450	/* Wait and acknowledge irq */
7451	mdelay(1);
7452	cik_irq_ack(rdev);
7453	cik_disable_interrupt_state(rdev);
7454}
7455
7456/**
7457 * cik_irq_disable - disable interrupts for suspend
7458 *
7459 * @rdev: radeon_device pointer
7460 *
7461 * Disable interrupts and stop the RLC (CIK).
7462 * Used for suspend.
7463 */
7464static void cik_irq_suspend(struct radeon_device *rdev)
7465{
7466	cik_irq_disable(rdev);
7467	cik_rlc_stop(rdev);
7468}
7469
7470/**
7471 * cik_irq_fini - tear down interrupt support
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Disable interrupts on the hw and free the IH ring
7476 * buffer (CIK).
7477 * Used for driver unload.
7478 */
7479static void cik_irq_fini(struct radeon_device *rdev)
7480{
7481	cik_irq_suspend(rdev);
7482	r600_ih_ring_fini(rdev);
7483}
7484
7485/**
7486 * cik_get_ih_wptr - get the IH ring buffer wptr
7487 *
7488 * @rdev: radeon_device pointer
7489 *
7490 * Get the IH ring buffer wptr from either the register
7491 * or the writeback memory buffer (CIK).  Also check for
7492 * ring buffer overflow and deal with it.
7493 * Used by cik_irq_process().
7494 * Returns the value of the wptr.
7495 */
7496static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497{
7498	u32 wptr, tmp;
7499
7500	if (rdev->wb.enabled)
7501		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502	else
7503		wptr = RREG32(IH_RB_WPTR);
7504
7505	if (wptr & RB_OVERFLOW) {
7506		wptr &= ~RB_OVERFLOW;
7507		/* When a ring buffer overflow happen start parsing interrupt
7508		 * from the last not overwritten vector (wptr + 16). Hopefully
7509		 * this should allow us to catchup.
7510		 */
7511		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514		tmp = RREG32(IH_RB_CNTL);
7515		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516		WREG32(IH_RB_CNTL, tmp);
7517	}
7518	return (wptr & rdev->ih.ptr_mask);
7519}
7520
7521/*        CIK IV Ring
7522 * Each IV ring entry is 128 bits:
7523 * [7:0]    - interrupt source id
7524 * [31:8]   - reserved
7525 * [59:32]  - interrupt source data
7526 * [63:60]  - reserved
7527 * [71:64]  - RINGID
7528 *            CP:
7529 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533 *            PIPE_ID - ME0 0=3D
7534 *                    - ME1&2 compute dispatcher (4 pipes each)
7535 *            SDMA:
7536 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539 * [79:72]  - VMID
7540 * [95:80]  - PASID
7541 * [127:96] - reserved
7542 */
7543/**
7544 * cik_irq_process - interrupt handler
7545 *
7546 * @rdev: radeon_device pointer
7547 *
7548 * Interrupt hander (CIK).  Walk the IH ring,
7549 * ack interrupts and schedule work to handle
7550 * interrupt events.
7551 * Returns irq process return code.
7552 */
7553int cik_irq_process(struct radeon_device *rdev)
7554{
7555	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557	u32 wptr;
7558	u32 rptr;
7559	u32 src_id, src_data, ring_id;
7560	u8 me_id, pipe_id, queue_id;
7561	u32 ring_index;
7562	bool queue_hotplug = false;
7563	bool queue_dp = false;
7564	bool queue_reset = false;
7565	u32 addr, status, mc_client;
7566	bool queue_thermal = false;
7567
7568	if (!rdev->ih.enabled || rdev->shutdown)
7569		return IRQ_NONE;
7570
7571	wptr = cik_get_ih_wptr(rdev);
7572
7573restart_ih:
7574	/* is somebody else already processing irqs? */
7575	if (atomic_xchg(&rdev->ih.lock, 1))
7576		return IRQ_NONE;
7577
7578	rptr = rdev->ih.rptr;
7579	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580
7581	/* Order reading of wptr vs. reading of IH ring data */
7582	rmb();
7583
7584	/* display interrupts */
7585	cik_irq_ack(rdev);
7586
7587	while (rptr != wptr) {
7588		/* wptr/rptr are in bytes! */
7589		ring_index = rptr / 4;
7590
7591		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594
7595		switch (src_id) {
7596		case 1: /* D1 vblank/vline */
7597			switch (src_data) {
7598			case 0: /* D1 vblank */
7599				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602				if (rdev->irq.crtc_vblank_int[0]) {
7603					drm_handle_vblank(rdev->ddev, 0);
7604					rdev->pm.vblank_sync = true;
7605					wake_up(&rdev->irq.vblank_queue);
7606				}
7607				if (atomic_read(&rdev->irq.pflip[0]))
7608					radeon_crtc_handle_vblank(rdev, 0);
7609				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610				DRM_DEBUG("IH: D1 vblank\n");
7611
7612				break;
7613			case 1: /* D1 vline */
7614				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618				DRM_DEBUG("IH: D1 vline\n");
7619
7620				break;
7621			default:
7622				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623				break;
7624			}
7625			break;
7626		case 2: /* D2 vblank/vline */
7627			switch (src_data) {
7628			case 0: /* D2 vblank */
7629				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632				if (rdev->irq.crtc_vblank_int[1]) {
7633					drm_handle_vblank(rdev->ddev, 1);
7634					rdev->pm.vblank_sync = true;
7635					wake_up(&rdev->irq.vblank_queue);
7636				}
7637				if (atomic_read(&rdev->irq.pflip[1]))
7638					radeon_crtc_handle_vblank(rdev, 1);
7639				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640				DRM_DEBUG("IH: D2 vblank\n");
7641
7642				break;
7643			case 1: /* D2 vline */
7644				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648				DRM_DEBUG("IH: D2 vline\n");
7649
7650				break;
7651			default:
7652				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653				break;
7654			}
7655			break;
7656		case 3: /* D3 vblank/vline */
7657			switch (src_data) {
7658			case 0: /* D3 vblank */
7659				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662				if (rdev->irq.crtc_vblank_int[2]) {
7663					drm_handle_vblank(rdev->ddev, 2);
7664					rdev->pm.vblank_sync = true;
7665					wake_up(&rdev->irq.vblank_queue);
7666				}
7667				if (atomic_read(&rdev->irq.pflip[2]))
7668					radeon_crtc_handle_vblank(rdev, 2);
7669				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670				DRM_DEBUG("IH: D3 vblank\n");
7671
7672				break;
7673			case 1: /* D3 vline */
7674				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678				DRM_DEBUG("IH: D3 vline\n");
7679
7680				break;
7681			default:
7682				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683				break;
7684			}
7685			break;
7686		case 4: /* D4 vblank/vline */
7687			switch (src_data) {
7688			case 0: /* D4 vblank */
7689				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692				if (rdev->irq.crtc_vblank_int[3]) {
7693					drm_handle_vblank(rdev->ddev, 3);
7694					rdev->pm.vblank_sync = true;
7695					wake_up(&rdev->irq.vblank_queue);
7696				}
7697				if (atomic_read(&rdev->irq.pflip[3]))
7698					radeon_crtc_handle_vblank(rdev, 3);
7699				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700				DRM_DEBUG("IH: D4 vblank\n");
7701
7702				break;
7703			case 1: /* D4 vline */
7704				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708				DRM_DEBUG("IH: D4 vline\n");
7709
7710				break;
7711			default:
7712				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713				break;
7714			}
7715			break;
7716		case 5: /* D5 vblank/vline */
7717			switch (src_data) {
7718			case 0: /* D5 vblank */
7719				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722				if (rdev->irq.crtc_vblank_int[4]) {
7723					drm_handle_vblank(rdev->ddev, 4);
7724					rdev->pm.vblank_sync = true;
7725					wake_up(&rdev->irq.vblank_queue);
7726				}
7727				if (atomic_read(&rdev->irq.pflip[4]))
7728					radeon_crtc_handle_vblank(rdev, 4);
7729				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730				DRM_DEBUG("IH: D5 vblank\n");
7731
7732				break;
7733			case 1: /* D5 vline */
7734				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738				DRM_DEBUG("IH: D5 vline\n");
7739
7740				break;
7741			default:
7742				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743				break;
7744			}
7745			break;
7746		case 6: /* D6 vblank/vline */
7747			switch (src_data) {
7748			case 0: /* D6 vblank */
7749				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752				if (rdev->irq.crtc_vblank_int[5]) {
7753					drm_handle_vblank(rdev->ddev, 5);
7754					rdev->pm.vblank_sync = true;
7755					wake_up(&rdev->irq.vblank_queue);
7756				}
7757				if (atomic_read(&rdev->irq.pflip[5]))
7758					radeon_crtc_handle_vblank(rdev, 5);
7759				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760				DRM_DEBUG("IH: D6 vblank\n");
7761
7762				break;
7763			case 1: /* D6 vline */
7764				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768				DRM_DEBUG("IH: D6 vline\n");
7769
7770				break;
7771			default:
7772				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773				break;
7774			}
7775			break;
7776		case 8: /* D1 page flip */
7777		case 10: /* D2 page flip */
7778		case 12: /* D3 page flip */
7779		case 14: /* D4 page flip */
7780		case 16: /* D5 page flip */
7781		case 18: /* D6 page flip */
7782			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783			if (radeon_use_pflipirq > 0)
7784				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785			break;
7786		case 42: /* HPD hotplug */
7787			switch (src_data) {
7788			case 0:
7789				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793				queue_hotplug = true;
7794				DRM_DEBUG("IH: HPD1\n");
7795
7796				break;
7797			case 1:
7798				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802				queue_hotplug = true;
7803				DRM_DEBUG("IH: HPD2\n");
7804
7805				break;
7806			case 2:
7807				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811				queue_hotplug = true;
7812				DRM_DEBUG("IH: HPD3\n");
7813
7814				break;
7815			case 3:
7816				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820				queue_hotplug = true;
7821				DRM_DEBUG("IH: HPD4\n");
7822
7823				break;
7824			case 4:
7825				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829				queue_hotplug = true;
7830				DRM_DEBUG("IH: HPD5\n");
7831
7832				break;
7833			case 5:
7834				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838				queue_hotplug = true;
7839				DRM_DEBUG("IH: HPD6\n");
7840
7841				break;
7842			case 6:
7843				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847				queue_dp = true;
7848				DRM_DEBUG("IH: HPD_RX 1\n");
7849
7850				break;
7851			case 7:
7852				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856				queue_dp = true;
7857				DRM_DEBUG("IH: HPD_RX 2\n");
7858
7859				break;
7860			case 8:
7861				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865				queue_dp = true;
7866				DRM_DEBUG("IH: HPD_RX 3\n");
7867
7868				break;
7869			case 9:
7870				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874				queue_dp = true;
7875				DRM_DEBUG("IH: HPD_RX 4\n");
7876
7877				break;
7878			case 10:
7879				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883				queue_dp = true;
7884				DRM_DEBUG("IH: HPD_RX 5\n");
7885
7886				break;
7887			case 11:
7888				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892				queue_dp = true;
7893				DRM_DEBUG("IH: HPD_RX 6\n");
7894
7895				break;
7896			default:
7897				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898				break;
7899			}
7900			break;
7901		case 96:
7902			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903			WREG32(SRBM_INT_ACK, 0x1);
7904			break;
7905		case 124: /* UVD */
7906			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908			break;
7909		case 146:
7910		case 147:
7911			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914			/* reset addr and status */
7915			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916			if (addr == 0x0 && status == 0x0)
7917				break;
7918			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7920				addr);
7921			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922				status);
7923			cik_vm_decode_fault(rdev, status, addr, mc_client);
7924			break;
7925		case 167: /* VCE */
7926			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927			switch (src_data) {
7928			case 0:
7929				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930				break;
7931			case 1:
7932				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933				break;
7934			default:
7935				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936				break;
7937			}
7938			break;
7939		case 176: /* GFX RB CP_INT */
7940		case 177: /* GFX IB CP_INT */
7941			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942			break;
7943		case 181: /* CP EOP event */
7944			DRM_DEBUG("IH: CP EOP\n");
7945			/* XXX check the bitfield order! */
7946			me_id = (ring_id & 0x60) >> 5;
7947			pipe_id = (ring_id & 0x18) >> 3;
7948			queue_id = (ring_id & 0x7) >> 0;
7949			switch (me_id) {
7950			case 0:
7951				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952				break;
7953			case 1:
7954			case 2:
7955				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959				break;
7960			}
7961			break;
7962		case 184: /* CP Privileged reg access */
7963			DRM_ERROR("Illegal register access in command stream\n");
7964			/* XXX check the bitfield order! */
7965			me_id = (ring_id & 0x60) >> 5;
7966			pipe_id = (ring_id & 0x18) >> 3;
7967			queue_id = (ring_id & 0x7) >> 0;
7968			switch (me_id) {
7969			case 0:
7970				/* This results in a full GPU reset, but all we need to do is soft
7971				 * reset the CP for gfx
7972				 */
7973				queue_reset = true;
7974				break;
7975			case 1:
7976				/* XXX compute */
7977				queue_reset = true;
7978				break;
7979			case 2:
7980				/* XXX compute */
7981				queue_reset = true;
7982				break;
7983			}
7984			break;
7985		case 185: /* CP Privileged inst */
7986			DRM_ERROR("Illegal instruction in command stream\n");
7987			/* XXX check the bitfield order! */
7988			me_id = (ring_id & 0x60) >> 5;
7989			pipe_id = (ring_id & 0x18) >> 3;
7990			queue_id = (ring_id & 0x7) >> 0;
7991			switch (me_id) {
7992			case 0:
7993				/* This results in a full GPU reset, but all we need to do is soft
7994				 * reset the CP for gfx
7995				 */
7996				queue_reset = true;
7997				break;
7998			case 1:
7999				/* XXX compute */
8000				queue_reset = true;
8001				break;
8002			case 2:
8003				/* XXX compute */
8004				queue_reset = true;
8005				break;
8006			}
8007			break;
8008		case 224: /* SDMA trap event */
8009			/* XXX check the bitfield order! */
8010			me_id = (ring_id & 0x3) >> 0;
8011			queue_id = (ring_id & 0xc) >> 2;
8012			DRM_DEBUG("IH: SDMA trap\n");
8013			switch (me_id) {
8014			case 0:
8015				switch (queue_id) {
8016				case 0:
8017					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018					break;
8019				case 1:
8020					/* XXX compute */
8021					break;
8022				case 2:
8023					/* XXX compute */
8024					break;
8025				}
8026				break;
8027			case 1:
8028				switch (queue_id) {
8029				case 0:
8030					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031					break;
8032				case 1:
8033					/* XXX compute */
8034					break;
8035				case 2:
8036					/* XXX compute */
8037					break;
8038				}
8039				break;
8040			}
8041			break;
8042		case 230: /* thermal low to high */
8043			DRM_DEBUG("IH: thermal low to high\n");
8044			rdev->pm.dpm.thermal.high_to_low = false;
8045			queue_thermal = true;
8046			break;
8047		case 231: /* thermal high to low */
8048			DRM_DEBUG("IH: thermal high to low\n");
8049			rdev->pm.dpm.thermal.high_to_low = true;
8050			queue_thermal = true;
8051			break;
8052		case 233: /* GUI IDLE */
8053			DRM_DEBUG("IH: GUI idle\n");
8054			break;
8055		case 241: /* SDMA Privileged inst */
8056		case 247: /* SDMA Privileged inst */
8057			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058			/* XXX check the bitfield order! */
8059			me_id = (ring_id & 0x3) >> 0;
8060			queue_id = (ring_id & 0xc) >> 2;
8061			switch (me_id) {
8062			case 0:
8063				switch (queue_id) {
8064				case 0:
8065					queue_reset = true;
8066					break;
8067				case 1:
8068					/* XXX compute */
8069					queue_reset = true;
8070					break;
8071				case 2:
8072					/* XXX compute */
8073					queue_reset = true;
8074					break;
8075				}
8076				break;
8077			case 1:
8078				switch (queue_id) {
8079				case 0:
8080					queue_reset = true;
8081					break;
8082				case 1:
8083					/* XXX compute */
8084					queue_reset = true;
8085					break;
8086				case 2:
8087					/* XXX compute */
8088					queue_reset = true;
8089					break;
8090				}
8091				break;
8092			}
8093			break;
8094		default:
8095			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096			break;
8097		}
8098
8099		/* wptr/rptr are in bytes! */
8100		rptr += 16;
8101		rptr &= rdev->ih.ptr_mask;
8102		WREG32(IH_RB_RPTR, rptr);
8103	}
8104	if (queue_dp)
8105		schedule_work(&rdev->dp_work);
8106	if (queue_hotplug)
8107		schedule_delayed_work(&rdev->hotplug_work, 0);
8108	if (queue_reset) {
8109		rdev->needs_reset = true;
8110		wake_up_all(&rdev->fence_queue);
8111	}
8112	if (queue_thermal)
8113		schedule_work(&rdev->pm.dpm.thermal.work);
8114	rdev->ih.rptr = rptr;
8115	atomic_set(&rdev->ih.lock, 0);
8116
8117	/* make sure wptr hasn't changed while processing */
8118	wptr = cik_get_ih_wptr(rdev);
8119	if (wptr != rptr)
8120		goto restart_ih;
8121
8122	return IRQ_HANDLED;
8123}
8124
8125/*
8126 * startup/shutdown callbacks
8127 */
8128static void cik_uvd_init(struct radeon_device *rdev)
8129{
8130	int r;
8131
8132	if (!rdev->has_uvd)
8133		return;
8134
8135	r = radeon_uvd_init(rdev);
8136	if (r) {
8137		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138		/*
8139		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140		 * to early fails cik_uvd_start() and thus nothing happens
8141		 * there. So it is pointless to try to go through that code
8142		 * hence why we disable uvd here.
8143		 */
8144		rdev->has_uvd = 0;
8145		return;
8146	}
8147	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149}
8150
8151static void cik_uvd_start(struct radeon_device *rdev)
8152{
8153	int r;
8154
8155	if (!rdev->has_uvd)
8156		return;
8157
8158	r = radeon_uvd_resume(rdev);
8159	if (r) {
8160		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161		goto error;
8162	}
8163	r = uvd_v4_2_resume(rdev);
8164	if (r) {
8165		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166		goto error;
8167	}
8168	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169	if (r) {
8170		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171		goto error;
8172	}
8173	return;
8174
8175error:
8176	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177}
8178
8179static void cik_uvd_resume(struct radeon_device *rdev)
8180{
8181	struct radeon_ring *ring;
8182	int r;
8183
8184	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185		return;
8186
8187	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189	if (r) {
8190		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191		return;
8192	}
8193	r = uvd_v1_0_init(rdev);
8194	if (r) {
8195		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196		return;
8197	}
8198}
8199
8200static void cik_vce_init(struct radeon_device *rdev)
8201{
8202	int r;
8203
8204	if (!rdev->has_vce)
8205		return;
8206
8207	r = radeon_vce_init(rdev);
8208	if (r) {
8209		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210		/*
8211		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212		 * to early fails cik_vce_start() and thus nothing happens
8213		 * there. So it is pointless to try to go through that code
8214		 * hence why we disable vce here.
8215		 */
8216		rdev->has_vce = 0;
8217		return;
8218	}
8219	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223}
8224
8225static void cik_vce_start(struct radeon_device *rdev)
8226{
8227	int r;
8228
8229	if (!rdev->has_vce)
8230		return;
8231
8232	r = radeon_vce_resume(rdev);
8233	if (r) {
8234		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235		goto error;
8236	}
8237	r = vce_v2_0_resume(rdev);
8238	if (r) {
8239		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240		goto error;
8241	}
8242	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243	if (r) {
8244		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245		goto error;
8246	}
8247	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248	if (r) {
8249		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250		goto error;
8251	}
8252	return;
8253
8254error:
8255	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257}
8258
8259static void cik_vce_resume(struct radeon_device *rdev)
8260{
8261	struct radeon_ring *ring;
8262	int r;
8263
8264	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265		return;
8266
8267	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269	if (r) {
8270		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271		return;
8272	}
8273	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275	if (r) {
8276		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277		return;
8278	}
8279	r = vce_v1_0_init(rdev);
8280	if (r) {
8281		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282		return;
8283	}
8284}
8285
8286/**
8287 * cik_startup - program the asic to a functional state
8288 *
8289 * @rdev: radeon_device pointer
8290 *
8291 * Programs the asic to a functional state (CIK).
8292 * Called by cik_init() and cik_resume().
8293 * Returns 0 for success, error for failure.
8294 */
8295static int cik_startup(struct radeon_device *rdev)
8296{
8297	struct radeon_ring *ring;
8298	u32 nop;
8299	int r;
8300
8301	/* enable pcie gen2/3 link */
8302	cik_pcie_gen3_enable(rdev);
8303	/* enable aspm */
8304	cik_program_aspm(rdev);
8305
8306	/* scratch needs to be initialized before MC */
8307	r = r600_vram_scratch_init(rdev);
8308	if (r)
8309		return r;
8310
8311	cik_mc_program(rdev);
8312
8313	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314		r = ci_mc_load_microcode(rdev);
8315		if (r) {
8316			DRM_ERROR("Failed to load MC firmware!\n");
8317			return r;
8318		}
8319	}
8320
8321	r = cik_pcie_gart_enable(rdev);
8322	if (r)
8323		return r;
8324	cik_gpu_init(rdev);
8325
8326	/* allocate rlc buffers */
8327	if (rdev->flags & RADEON_IS_IGP) {
8328		if (rdev->family == CHIP_KAVERI) {
8329			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330			rdev->rlc.reg_list_size =
8331				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332		} else {
8333			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334			rdev->rlc.reg_list_size =
8335				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336		}
8337	}
8338	rdev->rlc.cs_data = ci_cs_data;
8339	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341	r = sumo_rlc_init(rdev);
8342	if (r) {
8343		DRM_ERROR("Failed to init rlc BOs!\n");
8344		return r;
8345	}
8346
8347	/* allocate wb buffer */
8348	r = radeon_wb_init(rdev);
8349	if (r)
8350		return r;
8351
8352	/* allocate mec buffers */
8353	r = cik_mec_init(rdev);
8354	if (r) {
8355		DRM_ERROR("Failed to init MEC BOs!\n");
8356		return r;
8357	}
8358
8359	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360	if (r) {
8361		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362		return r;
8363	}
8364
8365	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366	if (r) {
8367		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368		return r;
8369	}
8370
8371	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372	if (r) {
8373		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374		return r;
8375	}
8376
8377	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378	if (r) {
8379		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380		return r;
8381	}
8382
8383	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384	if (r) {
8385		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386		return r;
8387	}
8388
8389	cik_uvd_start(rdev);
8390	cik_vce_start(rdev);
8391
8392	/* Enable IRQ */
8393	if (!rdev->irq.installed) {
8394		r = radeon_irq_kms_init(rdev);
8395		if (r)
8396			return r;
8397	}
8398
8399	r = cik_irq_init(rdev);
8400	if (r) {
8401		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402		radeon_irq_kms_fini(rdev);
8403		return r;
8404	}
8405	cik_irq_set(rdev);
8406
8407	if (rdev->family == CHIP_HAWAII) {
8408		if (rdev->new_fw)
8409			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410		else
8411			nop = RADEON_CP_PACKET2;
8412	} else {
8413		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414	}
8415
8416	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418			     nop);
8419	if (r)
8420		return r;
8421
8422	/* set up the compute queues */
8423	/* type-2 packets are deprecated on MEC, use type-3 instead */
8424	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426			     nop);
8427	if (r)
8428		return r;
8429	ring->me = 1; /* first MEC */
8430	ring->pipe = 0; /* first pipe */
8431	ring->queue = 0; /* first queue */
8432	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433
8434	/* type-2 packets are deprecated on MEC, use type-3 instead */
8435	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437			     nop);
8438	if (r)
8439		return r;
8440	/* dGPU only have 1 MEC */
8441	ring->me = 1; /* first MEC */
8442	ring->pipe = 0; /* first pipe */
8443	ring->queue = 1; /* second queue */
8444	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445
8446	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449	if (r)
8450		return r;
8451
8452	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455	if (r)
8456		return r;
8457
8458	r = cik_cp_resume(rdev);
8459	if (r)
8460		return r;
8461
8462	r = cik_sdma_resume(rdev);
8463	if (r)
8464		return r;
8465
8466	cik_uvd_resume(rdev);
8467	cik_vce_resume(rdev);
8468
8469	r = radeon_ib_pool_init(rdev);
8470	if (r) {
8471		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472		return r;
8473	}
8474
8475	r = radeon_vm_manager_init(rdev);
8476	if (r) {
8477		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478		return r;
8479	}
8480
8481	r = radeon_audio_init(rdev);
8482	if (r)
8483		return r;
8484
8485	return 0;
8486}
8487
8488/**
8489 * cik_resume - resume the asic to a functional state
8490 *
8491 * @rdev: radeon_device pointer
8492 *
8493 * Programs the asic to a functional state (CIK).
8494 * Called at resume.
8495 * Returns 0 for success, error for failure.
8496 */
8497int cik_resume(struct radeon_device *rdev)
8498{
8499	int r;
8500
8501	/* post card */
8502	atom_asic_init(rdev->mode_info.atom_context);
8503
8504	/* init golden registers */
8505	cik_init_golden_registers(rdev);
8506
8507	if (rdev->pm.pm_method == PM_METHOD_DPM)
8508		radeon_pm_resume(rdev);
8509
8510	rdev->accel_working = true;
8511	r = cik_startup(rdev);
8512	if (r) {
8513		DRM_ERROR("cik startup failed on resume\n");
8514		rdev->accel_working = false;
8515		return r;
8516	}
8517
8518	return r;
8519
8520}
8521
8522/**
8523 * cik_suspend - suspend the asic
8524 *
8525 * @rdev: radeon_device pointer
8526 *
8527 * Bring the chip into a state suitable for suspend (CIK).
8528 * Called at suspend.
8529 * Returns 0 for success.
8530 */
8531int cik_suspend(struct radeon_device *rdev)
8532{
8533	radeon_pm_suspend(rdev);
8534	radeon_audio_fini(rdev);
8535	radeon_vm_manager_fini(rdev);
8536	cik_cp_enable(rdev, false);
8537	cik_sdma_enable(rdev, false);
8538	if (rdev->has_uvd) {
 
8539		uvd_v1_0_fini(rdev);
8540		radeon_uvd_suspend(rdev);
8541	}
8542	if (rdev->has_vce)
8543		radeon_vce_suspend(rdev);
8544	cik_fini_pg(rdev);
8545	cik_fini_cg(rdev);
8546	cik_irq_suspend(rdev);
8547	radeon_wb_disable(rdev);
8548	cik_pcie_gart_disable(rdev);
8549	return 0;
8550}
8551
8552/* Plan is to move initialization in that function and use
8553 * helper function so that radeon_device_init pretty much
8554 * do nothing more than calling asic specific function. This
8555 * should also allow to remove a bunch of callback function
8556 * like vram_info.
8557 */
8558/**
8559 * cik_init - asic specific driver and hw init
8560 *
8561 * @rdev: radeon_device pointer
8562 *
8563 * Setup asic specific driver variables and program the hw
8564 * to a functional state (CIK).
8565 * Called at driver startup.
8566 * Returns 0 for success, errors for failure.
8567 */
8568int cik_init(struct radeon_device *rdev)
8569{
8570	struct radeon_ring *ring;
8571	int r;
8572
8573	/* Read BIOS */
8574	if (!radeon_get_bios(rdev)) {
8575		if (ASIC_IS_AVIVO(rdev))
8576			return -EINVAL;
8577	}
8578	/* Must be an ATOMBIOS */
8579	if (!rdev->is_atom_bios) {
8580		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581		return -EINVAL;
8582	}
8583	r = radeon_atombios_init(rdev);
8584	if (r)
8585		return r;
8586
8587	/* Post card if necessary */
8588	if (!radeon_card_posted(rdev)) {
8589		if (!rdev->bios) {
8590			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591			return -EINVAL;
8592		}
8593		DRM_INFO("GPU not posted. posting now...\n");
8594		atom_asic_init(rdev->mode_info.atom_context);
8595	}
8596	/* init golden registers */
8597	cik_init_golden_registers(rdev);
8598	/* Initialize scratch registers */
8599	cik_scratch_init(rdev);
8600	/* Initialize surface registers */
8601	radeon_surface_init(rdev);
8602	/* Initialize clocks */
8603	radeon_get_clock_info(rdev->ddev);
8604
8605	/* Fence driver */
8606	r = radeon_fence_driver_init(rdev);
8607	if (r)
8608		return r;
8609
8610	/* initialize memory controller */
8611	r = cik_mc_init(rdev);
8612	if (r)
8613		return r;
8614	/* Memory manager */
8615	r = radeon_bo_init(rdev);
8616	if (r)
8617		return r;
8618
8619	if (rdev->flags & RADEON_IS_IGP) {
8620		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622			r = cik_init_microcode(rdev);
8623			if (r) {
8624				DRM_ERROR("Failed to load firmware!\n");
8625				return r;
8626			}
8627		}
8628	} else {
8629		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631		    !rdev->mc_fw) {
8632			r = cik_init_microcode(rdev);
8633			if (r) {
8634				DRM_ERROR("Failed to load firmware!\n");
8635				return r;
8636			}
8637		}
8638	}
8639
8640	/* Initialize power management */
8641	radeon_pm_init(rdev);
8642
8643	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644	ring->ring_obj = NULL;
8645	r600_ring_init(rdev, ring, 1024 * 1024);
8646
8647	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648	ring->ring_obj = NULL;
8649	r600_ring_init(rdev, ring, 1024 * 1024);
8650	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651	if (r)
8652		return r;
8653
8654	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655	ring->ring_obj = NULL;
8656	r600_ring_init(rdev, ring, 1024 * 1024);
8657	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658	if (r)
8659		return r;
8660
8661	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662	ring->ring_obj = NULL;
8663	r600_ring_init(rdev, ring, 256 * 1024);
8664
8665	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666	ring->ring_obj = NULL;
8667	r600_ring_init(rdev, ring, 256 * 1024);
8668
8669	cik_uvd_init(rdev);
8670	cik_vce_init(rdev);
8671
8672	rdev->ih.ring_obj = NULL;
8673	r600_ih_ring_init(rdev, 64 * 1024);
8674
8675	r = r600_pcie_gart_init(rdev);
8676	if (r)
8677		return r;
8678
8679	rdev->accel_working = true;
8680	r = cik_startup(rdev);
8681	if (r) {
8682		dev_err(rdev->dev, "disabling GPU acceleration\n");
8683		cik_cp_fini(rdev);
8684		cik_sdma_fini(rdev);
8685		cik_irq_fini(rdev);
8686		sumo_rlc_fini(rdev);
8687		cik_mec_fini(rdev);
8688		radeon_wb_fini(rdev);
8689		radeon_ib_pool_fini(rdev);
8690		radeon_vm_manager_fini(rdev);
8691		radeon_irq_kms_fini(rdev);
8692		cik_pcie_gart_fini(rdev);
8693		rdev->accel_working = false;
8694	}
8695
8696	/* Don't start up if the MC ucode is missing.
8697	 * The default clocks and voltages before the MC ucode
8698	 * is loaded are not suffient for advanced operations.
8699	 */
8700	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702		return -EINVAL;
8703	}
8704
8705	return 0;
8706}
8707
8708/**
8709 * cik_fini - asic specific driver and hw fini
8710 *
8711 * @rdev: radeon_device pointer
8712 *
8713 * Tear down the asic specific driver variables and program the hw
8714 * to an idle state (CIK).
8715 * Called at driver unload.
8716 */
8717void cik_fini(struct radeon_device *rdev)
8718{
8719	radeon_pm_fini(rdev);
8720	cik_cp_fini(rdev);
8721	cik_sdma_fini(rdev);
8722	cik_fini_pg(rdev);
8723	cik_fini_cg(rdev);
8724	cik_irq_fini(rdev);
8725	sumo_rlc_fini(rdev);
8726	cik_mec_fini(rdev);
8727	radeon_wb_fini(rdev);
8728	radeon_vm_manager_fini(rdev);
8729	radeon_ib_pool_fini(rdev);
8730	radeon_irq_kms_fini(rdev);
8731	uvd_v1_0_fini(rdev);
8732	radeon_uvd_fini(rdev);
8733	radeon_vce_fini(rdev);
8734	cik_pcie_gart_fini(rdev);
8735	r600_vram_scratch_fini(rdev);
8736	radeon_gem_fini(rdev);
8737	radeon_fence_driver_fini(rdev);
8738	radeon_bo_fini(rdev);
8739	radeon_atombios_fini(rdev);
8740	kfree(rdev->bios);
8741	rdev->bios = NULL;
8742}
8743
8744void dce8_program_fmt(struct drm_encoder *encoder)
8745{
8746	struct drm_device *dev = encoder->dev;
8747	struct radeon_device *rdev = dev->dev_private;
8748	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751	int bpc = 0;
8752	u32 tmp = 0;
8753	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754
8755	if (connector) {
8756		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757		bpc = radeon_get_monitor_bpc(connector);
8758		dither = radeon_connector->dither;
8759	}
8760
8761	/* LVDS/eDP FMT is set up by atom */
8762	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763		return;
8764
8765	/* not needed for analog */
8766	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768		return;
8769
8770	if (bpc == 0)
8771		return;
8772
8773	switch (bpc) {
8774	case 6:
8775		if (dither == RADEON_FMT_DITHER_ENABLE)
8776			/* XXX sort out optimal dither settings */
8777			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779		else
8780			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781		break;
8782	case 8:
8783		if (dither == RADEON_FMT_DITHER_ENABLE)
8784			/* XXX sort out optimal dither settings */
8785			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786				FMT_RGB_RANDOM_ENABLE |
8787				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788		else
8789			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790		break;
8791	case 10:
8792		if (dither == RADEON_FMT_DITHER_ENABLE)
8793			/* XXX sort out optimal dither settings */
8794			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795				FMT_RGB_RANDOM_ENABLE |
8796				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797		else
8798			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799		break;
8800	default:
8801		/* not needed */
8802		break;
8803	}
8804
8805	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806}
8807
8808/* display watermark setup */
8809/**
8810 * dce8_line_buffer_adjust - Set up the line buffer
8811 *
8812 * @rdev: radeon_device pointer
8813 * @radeon_crtc: the selected display controller
8814 * @mode: the current display mode on the selected display
8815 * controller
8816 *
8817 * Setup up the line buffer allocation for
8818 * the selected display controller (CIK).
8819 * Returns the line buffer size in pixels.
8820 */
8821static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822				   struct radeon_crtc *radeon_crtc,
8823				   struct drm_display_mode *mode)
8824{
8825	u32 tmp, buffer_alloc, i;
8826	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827	/*
8828	 * Line Buffer Setup
8829	 * There are 6 line buffers, one for each display controllers.
8830	 * There are 3 partitions per LB. Select the number of partitions
8831	 * to enable based on the display width.  For display widths larger
8832	 * than 4096, you need use to use 2 display controllers and combine
8833	 * them using the stereo blender.
8834	 */
8835	if (radeon_crtc->base.enabled && mode) {
8836		if (mode->crtc_hdisplay < 1920) {
8837			tmp = 1;
8838			buffer_alloc = 2;
8839		} else if (mode->crtc_hdisplay < 2560) {
8840			tmp = 2;
8841			buffer_alloc = 2;
8842		} else if (mode->crtc_hdisplay < 4096) {
8843			tmp = 0;
8844			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845		} else {
8846			DRM_DEBUG_KMS("Mode too big for LB!\n");
8847			tmp = 0;
8848			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849		}
8850	} else {
8851		tmp = 1;
8852		buffer_alloc = 0;
8853	}
8854
8855	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857
8858	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860	for (i = 0; i < rdev->usec_timeout; i++) {
8861		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863			break;
8864		udelay(1);
8865	}
8866
8867	if (radeon_crtc->base.enabled && mode) {
8868		switch (tmp) {
8869		case 0:
8870		default:
8871			return 4096 * 2;
8872		case 1:
8873			return 1920 * 2;
8874		case 2:
8875			return 2560 * 2;
8876		}
8877	}
8878
8879	/* controller not enabled, so no lb used */
8880	return 0;
8881}
8882
8883/**
8884 * cik_get_number_of_dram_channels - get the number of dram channels
8885 *
8886 * @rdev: radeon_device pointer
8887 *
8888 * Look up the number of video ram channels (CIK).
8889 * Used for display watermark bandwidth calculations
8890 * Returns the number of dram channels
8891 */
8892static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893{
8894	u32 tmp = RREG32(MC_SHARED_CHMAP);
8895
8896	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897	case 0:
8898	default:
8899		return 1;
8900	case 1:
8901		return 2;
8902	case 2:
8903		return 4;
8904	case 3:
8905		return 8;
8906	case 4:
8907		return 3;
8908	case 5:
8909		return 6;
8910	case 6:
8911		return 10;
8912	case 7:
8913		return 12;
8914	case 8:
8915		return 16;
8916	}
8917}
8918
8919struct dce8_wm_params {
8920	u32 dram_channels; /* number of dram channels */
8921	u32 yclk;          /* bandwidth per dram data pin in kHz */
8922	u32 sclk;          /* engine clock in kHz */
8923	u32 disp_clk;      /* display clock in kHz */
8924	u32 src_width;     /* viewport width */
8925	u32 active_time;   /* active display time in ns */
8926	u32 blank_time;    /* blank time in ns */
8927	bool interlaced;    /* mode is interlaced */
8928	fixed20_12 vsc;    /* vertical scale ratio */
8929	u32 num_heads;     /* number of active crtcs */
8930	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931	u32 lb_size;       /* line buffer allocated to pipe */
8932	u32 vtaps;         /* vertical scaler taps */
8933};
8934
8935/**
8936 * dce8_dram_bandwidth - get the dram bandwidth
8937 *
8938 * @wm: watermark calculation data
8939 *
8940 * Calculate the raw dram bandwidth (CIK).
8941 * Used for display watermark bandwidth calculations
8942 * Returns the dram bandwidth in MBytes/s
8943 */
8944static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945{
8946	/* Calculate raw DRAM Bandwidth */
8947	fixed20_12 dram_efficiency; /* 0.7 */
8948	fixed20_12 yclk, dram_channels, bandwidth;
8949	fixed20_12 a;
8950
8951	a.full = dfixed_const(1000);
8952	yclk.full = dfixed_const(wm->yclk);
8953	yclk.full = dfixed_div(yclk, a);
8954	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955	a.full = dfixed_const(10);
8956	dram_efficiency.full = dfixed_const(7);
8957	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958	bandwidth.full = dfixed_mul(dram_channels, yclk);
8959	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960
8961	return dfixed_trunc(bandwidth);
8962}
8963
8964/**
8965 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966 *
8967 * @wm: watermark calculation data
8968 *
8969 * Calculate the dram bandwidth used for display (CIK).
8970 * Used for display watermark bandwidth calculations
8971 * Returns the dram bandwidth for display in MBytes/s
8972 */
8973static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974{
8975	/* Calculate DRAM Bandwidth and the part allocated to display. */
8976	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977	fixed20_12 yclk, dram_channels, bandwidth;
8978	fixed20_12 a;
8979
8980	a.full = dfixed_const(1000);
8981	yclk.full = dfixed_const(wm->yclk);
8982	yclk.full = dfixed_div(yclk, a);
8983	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984	a.full = dfixed_const(10);
8985	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987	bandwidth.full = dfixed_mul(dram_channels, yclk);
8988	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989
8990	return dfixed_trunc(bandwidth);
8991}
8992
8993/**
8994 * dce8_data_return_bandwidth - get the data return bandwidth
8995 *
8996 * @wm: watermark calculation data
8997 *
8998 * Calculate the data return bandwidth used for display (CIK).
8999 * Used for display watermark bandwidth calculations
9000 * Returns the data return bandwidth in MBytes/s
9001 */
9002static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003{
9004	/* Calculate the display Data return Bandwidth */
9005	fixed20_12 return_efficiency; /* 0.8 */
9006	fixed20_12 sclk, bandwidth;
9007	fixed20_12 a;
9008
9009	a.full = dfixed_const(1000);
9010	sclk.full = dfixed_const(wm->sclk);
9011	sclk.full = dfixed_div(sclk, a);
9012	a.full = dfixed_const(10);
9013	return_efficiency.full = dfixed_const(8);
9014	return_efficiency.full = dfixed_div(return_efficiency, a);
9015	a.full = dfixed_const(32);
9016	bandwidth.full = dfixed_mul(a, sclk);
9017	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018
9019	return dfixed_trunc(bandwidth);
9020}
9021
9022/**
9023 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024 *
9025 * @wm: watermark calculation data
9026 *
9027 * Calculate the dmif bandwidth used for display (CIK).
9028 * Used for display watermark bandwidth calculations
9029 * Returns the dmif bandwidth in MBytes/s
9030 */
9031static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032{
9033	/* Calculate the DMIF Request Bandwidth */
9034	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035	fixed20_12 disp_clk, bandwidth;
9036	fixed20_12 a, b;
9037
9038	a.full = dfixed_const(1000);
9039	disp_clk.full = dfixed_const(wm->disp_clk);
9040	disp_clk.full = dfixed_div(disp_clk, a);
9041	a.full = dfixed_const(32);
9042	b.full = dfixed_mul(a, disp_clk);
9043
9044	a.full = dfixed_const(10);
9045	disp_clk_request_efficiency.full = dfixed_const(8);
9046	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047
9048	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049
9050	return dfixed_trunc(bandwidth);
9051}
9052
9053/**
9054 * dce8_available_bandwidth - get the min available bandwidth
9055 *
9056 * @wm: watermark calculation data
9057 *
9058 * Calculate the min available bandwidth used for display (CIK).
9059 * Used for display watermark bandwidth calculations
9060 * Returns the min available bandwidth in MBytes/s
9061 */
9062static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063{
9064	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068
9069	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070}
9071
9072/**
9073 * dce8_average_bandwidth - get the average available bandwidth
9074 *
9075 * @wm: watermark calculation data
9076 *
9077 * Calculate the average available bandwidth used for display (CIK).
9078 * Used for display watermark bandwidth calculations
9079 * Returns the average available bandwidth in MBytes/s
9080 */
9081static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082{
9083	/* Calculate the display mode Average Bandwidth
9084	 * DisplayMode should contain the source and destination dimensions,
9085	 * timing, etc.
9086	 */
9087	fixed20_12 bpp;
9088	fixed20_12 line_time;
9089	fixed20_12 src_width;
9090	fixed20_12 bandwidth;
9091	fixed20_12 a;
9092
9093	a.full = dfixed_const(1000);
9094	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095	line_time.full = dfixed_div(line_time, a);
9096	bpp.full = dfixed_const(wm->bytes_per_pixel);
9097	src_width.full = dfixed_const(wm->src_width);
9098	bandwidth.full = dfixed_mul(src_width, bpp);
9099	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100	bandwidth.full = dfixed_div(bandwidth, line_time);
9101
9102	return dfixed_trunc(bandwidth);
9103}
9104
9105/**
9106 * dce8_latency_watermark - get the latency watermark
9107 *
9108 * @wm: watermark calculation data
9109 *
9110 * Calculate the latency watermark (CIK).
9111 * Used for display watermark bandwidth calculations
9112 * Returns the latency watermark in ns
9113 */
9114static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115{
9116	/* First calculate the latency in ns */
9117	u32 mc_latency = 2000; /* 2000 ns. */
9118	u32 available_bandwidth = dce8_available_bandwidth(wm);
9119	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123		(wm->num_heads * cursor_line_pair_return_time);
9124	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126	u32 tmp, dmif_size = 12288;
9127	fixed20_12 a, b, c;
9128
9129	if (wm->num_heads == 0)
9130		return 0;
9131
9132	a.full = dfixed_const(2);
9133	b.full = dfixed_const(1);
9134	if ((wm->vsc.full > a.full) ||
9135	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136	    (wm->vtaps >= 5) ||
9137	    ((wm->vsc.full >= a.full) && wm->interlaced))
9138		max_src_lines_per_dst_line = 4;
9139	else
9140		max_src_lines_per_dst_line = 2;
9141
9142	a.full = dfixed_const(available_bandwidth);
9143	b.full = dfixed_const(wm->num_heads);
9144	a.full = dfixed_div(a, b);
9145	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146	tmp = min(dfixed_trunc(a), tmp);
9147
9148	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149
9150	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151	b.full = dfixed_const(1000);
9152	c.full = dfixed_const(lb_fill_bw);
9153	b.full = dfixed_div(c, b);
9154	a.full = dfixed_div(a, b);
9155	line_fill_time = dfixed_trunc(a);
9156
9157	if (line_fill_time < wm->active_time)
9158		return latency;
9159	else
9160		return latency + (line_fill_time - wm->active_time);
9161
9162}
9163
9164/**
9165 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166 * average and available dram bandwidth
9167 *
9168 * @wm: watermark calculation data
9169 *
9170 * Check if the display average bandwidth fits in the display
9171 * dram bandwidth (CIK).
9172 * Used for display watermark bandwidth calculations
9173 * Returns true if the display fits, false if not.
9174 */
9175static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176{
9177	if (dce8_average_bandwidth(wm) <=
9178	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179		return true;
9180	else
9181		return false;
9182}
9183
9184/**
9185 * dce8_average_bandwidth_vs_available_bandwidth - check
9186 * average and available bandwidth
9187 *
9188 * @wm: watermark calculation data
9189 *
9190 * Check if the display average bandwidth fits in the display
9191 * available bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns true if the display fits, false if not.
9194 */
9195static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196{
9197	if (dce8_average_bandwidth(wm) <=
9198	    (dce8_available_bandwidth(wm) / wm->num_heads))
9199		return true;
9200	else
9201		return false;
9202}
9203
9204/**
9205 * dce8_check_latency_hiding - check latency hiding
9206 *
9207 * @wm: watermark calculation data
9208 *
9209 * Check latency hiding (CIK).
9210 * Used for display watermark bandwidth calculations
9211 * Returns true if the display fits, false if not.
9212 */
9213static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214{
9215	u32 lb_partitions = wm->lb_size / wm->src_width;
9216	u32 line_time = wm->active_time + wm->blank_time;
9217	u32 latency_tolerant_lines;
9218	u32 latency_hiding;
9219	fixed20_12 a;
9220
9221	a.full = dfixed_const(1);
9222	if (wm->vsc.full > a.full)
9223		latency_tolerant_lines = 1;
9224	else {
9225		if (lb_partitions <= (wm->vtaps + 1))
9226			latency_tolerant_lines = 1;
9227		else
9228			latency_tolerant_lines = 2;
9229	}
9230
9231	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232
9233	if (dce8_latency_watermark(wm) <= latency_hiding)
9234		return true;
9235	else
9236		return false;
9237}
9238
9239/**
9240 * dce8_program_watermarks - program display watermarks
9241 *
9242 * @rdev: radeon_device pointer
9243 * @radeon_crtc: the selected display controller
9244 * @lb_size: line buffer size
9245 * @num_heads: number of display controllers in use
9246 *
9247 * Calculate and program the display watermarks for the
9248 * selected display controller (CIK).
9249 */
9250static void dce8_program_watermarks(struct radeon_device *rdev,
9251				    struct radeon_crtc *radeon_crtc,
9252				    u32 lb_size, u32 num_heads)
9253{
9254	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255	struct dce8_wm_params wm_low, wm_high;
9256	u32 active_time;
9257	u32 line_time = 0;
9258	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259	u32 tmp, wm_mask;
9260
9261	if (radeon_crtc->base.enabled && num_heads && mode) {
9262		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263					    (u32)mode->clock);
9264		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265					  (u32)mode->clock);
9266		line_time = min(line_time, (u32)65535);
9267
9268		/* watermark for high clocks */
9269		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270		    rdev->pm.dpm_enabled) {
9271			wm_high.yclk =
9272				radeon_dpm_get_mclk(rdev, false) * 10;
9273			wm_high.sclk =
9274				radeon_dpm_get_sclk(rdev, false) * 10;
9275		} else {
9276			wm_high.yclk = rdev->pm.current_mclk * 10;
9277			wm_high.sclk = rdev->pm.current_sclk * 10;
9278		}
9279
9280		wm_high.disp_clk = mode->clock;
9281		wm_high.src_width = mode->crtc_hdisplay;
9282		wm_high.active_time = active_time;
9283		wm_high.blank_time = line_time - wm_high.active_time;
9284		wm_high.interlaced = false;
9285		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286			wm_high.interlaced = true;
9287		wm_high.vsc = radeon_crtc->vsc;
9288		wm_high.vtaps = 1;
9289		if (radeon_crtc->rmx_type != RMX_OFF)
9290			wm_high.vtaps = 2;
9291		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292		wm_high.lb_size = lb_size;
9293		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294		wm_high.num_heads = num_heads;
9295
9296		/* set for high clocks */
9297		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298
9299		/* possibly force display priority to high */
9300		/* should really do this at mode validation time... */
9301		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303		    !dce8_check_latency_hiding(&wm_high) ||
9304		    (rdev->disp_priority == 2)) {
9305			DRM_DEBUG_KMS("force priority to high\n");
9306		}
9307
9308		/* watermark for low clocks */
9309		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310		    rdev->pm.dpm_enabled) {
9311			wm_low.yclk =
9312				radeon_dpm_get_mclk(rdev, true) * 10;
9313			wm_low.sclk =
9314				radeon_dpm_get_sclk(rdev, true) * 10;
9315		} else {
9316			wm_low.yclk = rdev->pm.current_mclk * 10;
9317			wm_low.sclk = rdev->pm.current_sclk * 10;
9318		}
9319
9320		wm_low.disp_clk = mode->clock;
9321		wm_low.src_width = mode->crtc_hdisplay;
9322		wm_low.active_time = active_time;
9323		wm_low.blank_time = line_time - wm_low.active_time;
9324		wm_low.interlaced = false;
9325		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326			wm_low.interlaced = true;
9327		wm_low.vsc = radeon_crtc->vsc;
9328		wm_low.vtaps = 1;
9329		if (radeon_crtc->rmx_type != RMX_OFF)
9330			wm_low.vtaps = 2;
9331		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332		wm_low.lb_size = lb_size;
9333		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334		wm_low.num_heads = num_heads;
9335
9336		/* set for low clocks */
9337		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338
9339		/* possibly force display priority to high */
9340		/* should really do this at mode validation time... */
9341		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343		    !dce8_check_latency_hiding(&wm_low) ||
9344		    (rdev->disp_priority == 2)) {
9345			DRM_DEBUG_KMS("force priority to high\n");
9346		}
9347
9348		/* Save number of lines the linebuffer leads before the scanout */
9349		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350	}
9351
9352	/* select wm A */
9353	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354	tmp = wm_mask;
9355	tmp &= ~LATENCY_WATERMARK_MASK(3);
9356	tmp |= LATENCY_WATERMARK_MASK(1);
9357	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360		LATENCY_HIGH_WATERMARK(line_time)));
9361	/* select wm B */
9362	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363	tmp &= ~LATENCY_WATERMARK_MASK(3);
9364	tmp |= LATENCY_WATERMARK_MASK(2);
9365	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368		LATENCY_HIGH_WATERMARK(line_time)));
9369	/* restore original selection */
9370	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371
9372	/* save values for DPM */
9373	radeon_crtc->line_time = line_time;
9374	radeon_crtc->wm_high = latency_watermark_a;
9375	radeon_crtc->wm_low = latency_watermark_b;
9376}
9377
9378/**
9379 * dce8_bandwidth_update - program display watermarks
9380 *
9381 * @rdev: radeon_device pointer
9382 *
9383 * Calculate and program the display watermarks and line
9384 * buffer allocation (CIK).
9385 */
9386void dce8_bandwidth_update(struct radeon_device *rdev)
9387{
9388	struct drm_display_mode *mode = NULL;
9389	u32 num_heads = 0, lb_size;
9390	int i;
9391
9392	if (!rdev->mode_info.mode_config_initialized)
9393		return;
9394
9395	radeon_update_display_priority(rdev);
9396
9397	for (i = 0; i < rdev->num_crtc; i++) {
9398		if (rdev->mode_info.crtcs[i]->base.enabled)
9399			num_heads++;
9400	}
9401	for (i = 0; i < rdev->num_crtc; i++) {
9402		mode = &rdev->mode_info.crtcs[i]->base.mode;
9403		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405	}
9406}
9407
9408/**
9409 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410 *
9411 * @rdev: radeon_device pointer
9412 *
9413 * Fetches a GPU clock counter snapshot (SI).
9414 * Returns the 64 bit clock counter snapshot.
9415 */
9416uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417{
9418	uint64_t clock;
9419
9420	mutex_lock(&rdev->gpu_clock_mutex);
9421	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424	mutex_unlock(&rdev->gpu_clock_mutex);
9425	return clock;
9426}
9427
9428static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429			     u32 cntl_reg, u32 status_reg)
9430{
9431	int r, i;
9432	struct atom_clock_dividers dividers;
9433	uint32_t tmp;
9434
9435	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436					   clock, false, &dividers);
9437	if (r)
9438		return r;
9439
9440	tmp = RREG32_SMC(cntl_reg);
9441	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442	tmp |= dividers.post_divider;
9443	WREG32_SMC(cntl_reg, tmp);
9444
9445	for (i = 0; i < 100; i++) {
9446		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447			break;
9448		mdelay(10);
9449	}
9450	if (i == 100)
9451		return -ETIMEDOUT;
9452
9453	return 0;
9454}
9455
9456int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457{
9458	int r = 0;
9459
9460	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461	if (r)
9462		return r;
9463
9464	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465	return r;
9466}
9467
9468int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469{
9470	int r, i;
9471	struct atom_clock_dividers dividers;
9472	u32 tmp;
9473
9474	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475					   ecclk, false, &dividers);
9476	if (r)
9477		return r;
9478
9479	for (i = 0; i < 100; i++) {
9480		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481			break;
9482		mdelay(10);
9483	}
9484	if (i == 100)
9485		return -ETIMEDOUT;
9486
9487	tmp = RREG32_SMC(CG_ECLK_CNTL);
9488	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489	tmp |= dividers.post_divider;
9490	WREG32_SMC(CG_ECLK_CNTL, tmp);
9491
9492	for (i = 0; i < 100; i++) {
9493		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494			break;
9495		mdelay(10);
9496	}
9497	if (i == 100)
9498		return -ETIMEDOUT;
9499
9500	return 0;
9501}
9502
9503static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504{
9505	struct pci_dev *root = rdev->pdev->bus->self;
9506	enum pci_bus_speed speed_cap;
9507	int bridge_pos, gpu_pos;
9508	u32 speed_cntl, current_data_rate;
9509	int i;
9510	u16 tmp16;
9511
9512	if (pci_is_root_bus(rdev->pdev->bus))
9513		return;
9514
9515	if (radeon_pcie_gen2 == 0)
9516		return;
9517
9518	if (rdev->flags & RADEON_IS_IGP)
9519		return;
9520
9521	if (!(rdev->flags & RADEON_IS_PCIE))
9522		return;
9523
9524	speed_cap = pcie_get_speed_cap(root);
9525	if (speed_cap == PCI_SPEED_UNKNOWN)
9526		return;
9527
9528	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529	    (speed_cap != PCIE_SPEED_5_0GT))
9530		return;
9531
9532	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534		LC_CURRENT_DATA_RATE_SHIFT;
9535	if (speed_cap == PCIE_SPEED_8_0GT) {
9536		if (current_data_rate == 2) {
9537			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538			return;
9539		}
9540		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9542		if (current_data_rate == 1) {
9543			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544			return;
9545		}
9546		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547	}
9548
9549	bridge_pos = pci_pcie_cap(root);
9550	if (!bridge_pos)
9551		return;
9552
9553	gpu_pos = pci_pcie_cap(rdev->pdev);
9554	if (!gpu_pos)
9555		return;
9556
9557	if (speed_cap == PCIE_SPEED_8_0GT) {
9558		/* re-try equalization if gen3 is not already enabled */
9559		if (current_data_rate != 2) {
9560			u16 bridge_cfg, gpu_cfg;
9561			u16 bridge_cfg2, gpu_cfg2;
9562			u32 max_lw, current_lw, tmp;
9563
9564			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566
9567			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569
9570			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572
9573			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576
9577			if (current_lw < max_lw) {
9578				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584				}
9585			}
9586
9587			for (i = 0; i < 10; i++) {
9588				/* check status */
9589				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
 
 
9590				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591					break;
9592
9593				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595
9596				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
 
 
 
 
 
 
9598
9599				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600				tmp |= LC_SET_QUIESCE;
9601				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602
9603				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604				tmp |= LC_REDO_EQ;
9605				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606
9607				msleep(100);
9608
9609				/* linkctl */
9610				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614
9615				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619
9620				/* linkctl2 */
9621				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622				tmp16 &= ~((1 << 4) | (7 << 9));
9623				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625
9626				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627				tmp16 &= ~((1 << 4) | (7 << 9));
9628				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
 
 
 
9630
9631				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632				tmp &= ~LC_SET_QUIESCE;
9633				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634			}
9635		}
9636	}
9637
9638	/* set the link speed */
9639	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642
9643	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644	tmp16 &= ~0xf;
9645	if (speed_cap == PCIE_SPEED_8_0GT)
9646		tmp16 |= 3; /* gen3 */
9647	else if (speed_cap == PCIE_SPEED_5_0GT)
9648		tmp16 |= 2; /* gen2 */
9649	else
9650		tmp16 |= 1; /* gen1 */
9651	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
 
9652
9653	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656
9657	for (i = 0; i < rdev->usec_timeout; i++) {
9658		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660			break;
9661		udelay(1);
9662	}
9663}
9664
9665static void cik_program_aspm(struct radeon_device *rdev)
9666{
9667	u32 data, orig;
9668	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669	bool disable_clkreq = false;
9670
9671	if (radeon_aspm == 0)
9672		return;
9673
9674	/* XXX double check IGPs */
9675	if (rdev->flags & RADEON_IS_IGP)
9676		return;
9677
9678	if (!(rdev->flags & RADEON_IS_PCIE))
9679		return;
9680
9681	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682	data &= ~LC_XMIT_N_FTS_MASK;
9683	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684	if (orig != data)
9685		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686
9687	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688	data |= LC_GO_TO_RECOVERY;
9689	if (orig != data)
9690		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691
9692	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693	data |= P_IGNORE_EDB_ERR;
9694	if (orig != data)
9695		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696
9697	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699	data |= LC_PMI_TO_L1_DIS;
9700	if (!disable_l0s)
9701		data |= LC_L0S_INACTIVITY(7);
9702
9703	if (!disable_l1) {
9704		data |= LC_L1_INACTIVITY(7);
9705		data &= ~LC_PMI_TO_L1_DIS;
9706		if (orig != data)
9707			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708
9709		if (!disable_plloff_in_l1) {
9710			bool clk_req_support;
9711
9712			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715			if (orig != data)
9716				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717
9718			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721			if (orig != data)
9722				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723
9724			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727			if (orig != data)
9728				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729
9730			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733			if (orig != data)
9734				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735
9736			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738			data |= LC_DYN_LANES_PWR_STATE(3);
9739			if (orig != data)
9740				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741
9742			if (!disable_clkreq &&
9743			    !pci_is_root_bus(rdev->pdev->bus)) {
9744				struct pci_dev *root = rdev->pdev->bus->self;
9745				u32 lnkcap;
9746
9747				clk_req_support = false;
9748				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750					clk_req_support = true;
9751			} else {
9752				clk_req_support = false;
9753			}
9754
9755			if (clk_req_support) {
9756				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758				if (orig != data)
9759					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760
9761				orig = data = RREG32_SMC(THM_CLK_CNTL);
9762				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764				if (orig != data)
9765					WREG32_SMC(THM_CLK_CNTL, data);
9766
9767				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770				if (orig != data)
9771					WREG32_SMC(MISC_CLK_CTRL, data);
9772
9773				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774				data &= ~BCLK_AS_XCLK;
9775				if (orig != data)
9776					WREG32_SMC(CG_CLKPIN_CNTL, data);
9777
9778				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779				data &= ~FORCE_BIF_REFCLK_EN;
9780				if (orig != data)
9781					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782
9783				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784				data &= ~MPLL_CLKOUT_SEL_MASK;
9785				data |= MPLL_CLKOUT_SEL(4);
9786				if (orig != data)
9787					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788			}
9789		}
9790	} else {
9791		if (orig != data)
9792			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793	}
9794
9795	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797	if (orig != data)
9798		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799
9800	if (!disable_l0s) {
9801		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806				data &= ~LC_L0S_INACTIVITY_MASK;
9807				if (orig != data)
9808					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809			}
9810		}
9811	}
9812}