Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24
  25#include <linux/firmware.h>
  26#include <linux/slab.h>
  27#include <linux/module.h>
  28
  29#include <drm/drm_pci.h>
  30#include <drm/drm_vblank.h>
  31
  32#include "atom.h"
  33#include "cik_blit_shaders.h"
  34#include "cikd.h"
  35#include "clearstate_ci.h"
  36#include "radeon.h"
  37#include "radeon_asic.h"
  38#include "radeon_audio.h"
  39#include "radeon_ucode.h"
  40
  41#define SH_MEM_CONFIG_GFX_DEFAULT \
  42	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
  43
  44MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  45MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  46MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  47MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  48MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  49MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  50MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  51MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  52MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  53
  54MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
  55MODULE_FIRMWARE("radeon/bonaire_me.bin");
  56MODULE_FIRMWARE("radeon/bonaire_ce.bin");
  57MODULE_FIRMWARE("radeon/bonaire_mec.bin");
  58MODULE_FIRMWARE("radeon/bonaire_mc.bin");
  59MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
  60MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
  61MODULE_FIRMWARE("radeon/bonaire_smc.bin");
  62MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
  63
  64MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  65MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  66MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  67MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  68MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  69MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  70MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  71MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  72MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  73
  74MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
  75MODULE_FIRMWARE("radeon/hawaii_me.bin");
  76MODULE_FIRMWARE("radeon/hawaii_ce.bin");
  77MODULE_FIRMWARE("radeon/hawaii_mec.bin");
  78MODULE_FIRMWARE("radeon/hawaii_mc.bin");
  79MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
  80MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
  81MODULE_FIRMWARE("radeon/hawaii_smc.bin");
  82MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
  83
  84MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  85MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  86MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  87MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  88MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  89MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  90
  91MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
  92MODULE_FIRMWARE("radeon/kaveri_me.bin");
  93MODULE_FIRMWARE("radeon/kaveri_ce.bin");
  94MODULE_FIRMWARE("radeon/kaveri_mec.bin");
  95MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
  96MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
  97MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
  98
  99MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
 100MODULE_FIRMWARE("radeon/KABINI_me.bin");
 101MODULE_FIRMWARE("radeon/KABINI_ce.bin");
 102MODULE_FIRMWARE("radeon/KABINI_mec.bin");
 103MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
 104MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
 105
 106MODULE_FIRMWARE("radeon/kabini_pfp.bin");
 107MODULE_FIRMWARE("radeon/kabini_me.bin");
 108MODULE_FIRMWARE("radeon/kabini_ce.bin");
 109MODULE_FIRMWARE("radeon/kabini_mec.bin");
 110MODULE_FIRMWARE("radeon/kabini_rlc.bin");
 111MODULE_FIRMWARE("radeon/kabini_sdma.bin");
 112
 113MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
 114MODULE_FIRMWARE("radeon/MULLINS_me.bin");
 115MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
 116MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
 117MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
 118MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
 119
 120MODULE_FIRMWARE("radeon/mullins_pfp.bin");
 121MODULE_FIRMWARE("radeon/mullins_me.bin");
 122MODULE_FIRMWARE("radeon/mullins_ce.bin");
 123MODULE_FIRMWARE("radeon/mullins_mec.bin");
 124MODULE_FIRMWARE("radeon/mullins_rlc.bin");
 125MODULE_FIRMWARE("radeon/mullins_sdma.bin");
 126
 127extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 128extern void r600_ih_ring_fini(struct radeon_device *rdev);
 129extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 130extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 131extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 132extern void sumo_rlc_fini(struct radeon_device *rdev);
 133extern int sumo_rlc_init(struct radeon_device *rdev);
 134extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 135extern void si_rlc_reset(struct radeon_device *rdev);
 136extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 137static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 138extern int cik_sdma_resume(struct radeon_device *rdev);
 139extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 140extern void cik_sdma_fini(struct radeon_device *rdev);
 141extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 142static void cik_rlc_stop(struct radeon_device *rdev);
 143static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 144static void cik_program_aspm(struct radeon_device *rdev);
 145static void cik_init_pg(struct radeon_device *rdev);
 146static void cik_init_cg(struct radeon_device *rdev);
 147static void cik_fini_pg(struct radeon_device *rdev);
 148static void cik_fini_cg(struct radeon_device *rdev);
 149static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
 150					  bool enable);
 151
 152/**
 153 * cik_get_allowed_info_register - fetch the register for the info ioctl
 154 *
 155 * @rdev: radeon_device pointer
 156 * @reg: register offset in bytes
 157 * @val: register value
 158 *
 159 * Returns 0 for success or -EINVAL for an invalid register
 160 *
 161 */
 162int cik_get_allowed_info_register(struct radeon_device *rdev,
 163				  u32 reg, u32 *val)
 164{
 165	switch (reg) {
 166	case GRBM_STATUS:
 167	case GRBM_STATUS2:
 168	case GRBM_STATUS_SE0:
 169	case GRBM_STATUS_SE1:
 170	case GRBM_STATUS_SE2:
 171	case GRBM_STATUS_SE3:
 172	case SRBM_STATUS:
 173	case SRBM_STATUS2:
 174	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
 175	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
 176	case UVD_STATUS:
 177	/* TODO VCE */
 178		*val = RREG32(reg);
 179		return 0;
 180	default:
 181		return -EINVAL;
 182	}
 183}
 184
 185/*
 186 * Indirect registers accessor
 187 */
 188u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
 189{
 190	unsigned long flags;
 191	u32 r;
 192
 193	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 194	WREG32(CIK_DIDT_IND_INDEX, (reg));
 195	r = RREG32(CIK_DIDT_IND_DATA);
 196	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 197	return r;
 198}
 199
 200void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 201{
 202	unsigned long flags;
 203
 204	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
 205	WREG32(CIK_DIDT_IND_INDEX, (reg));
 206	WREG32(CIK_DIDT_IND_DATA, (v));
 207	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
 208}
 209
 210/* get temperature in millidegrees */
 211int ci_get_temp(struct radeon_device *rdev)
 212{
 213	u32 temp;
 214	int actual_temp = 0;
 215
 216	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 217		CTF_TEMP_SHIFT;
 218
 219	if (temp & 0x200)
 220		actual_temp = 255;
 221	else
 222		actual_temp = temp & 0x1ff;
 223
 224	actual_temp = actual_temp * 1000;
 225
 226	return actual_temp;
 227}
 228
 229/* get temperature in millidegrees */
 230int kv_get_temp(struct radeon_device *rdev)
 231{
 232	u32 temp;
 233	int actual_temp = 0;
 234
 235	temp = RREG32_SMC(0xC0300E0C);
 236
 237	if (temp)
 238		actual_temp = (temp / 8) - 49;
 239	else
 240		actual_temp = 0;
 241
 242	actual_temp = actual_temp * 1000;
 243
 244	return actual_temp;
 245}
 246
 247/*
 248 * Indirect registers accessor
 249 */
 250u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 251{
 252	unsigned long flags;
 253	u32 r;
 254
 255	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 256	WREG32(PCIE_INDEX, reg);
 257	(void)RREG32(PCIE_INDEX);
 258	r = RREG32(PCIE_DATA);
 259	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 260	return r;
 261}
 262
 263void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 264{
 265	unsigned long flags;
 266
 267	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 268	WREG32(PCIE_INDEX, reg);
 269	(void)RREG32(PCIE_INDEX);
 270	WREG32(PCIE_DATA, v);
 271	(void)RREG32(PCIE_DATA);
 272	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 273}
 274
 275static const u32 spectre_rlc_save_restore_register_list[] =
 276{
 277	(0x0e00 << 16) | (0xc12c >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc140 >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc150 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0xc15c >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0xc168 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0xc170 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0xc178 >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0xc204 >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0xc2b4 >> 2),
 294	0x00000000,
 295	(0x0e00 << 16) | (0xc2b8 >> 2),
 296	0x00000000,
 297	(0x0e00 << 16) | (0xc2bc >> 2),
 298	0x00000000,
 299	(0x0e00 << 16) | (0xc2c0 >> 2),
 300	0x00000000,
 301	(0x0e00 << 16) | (0x8228 >> 2),
 302	0x00000000,
 303	(0x0e00 << 16) | (0x829c >> 2),
 304	0x00000000,
 305	(0x0e00 << 16) | (0x869c >> 2),
 306	0x00000000,
 307	(0x0600 << 16) | (0x98f4 >> 2),
 308	0x00000000,
 309	(0x0e00 << 16) | (0x98f8 >> 2),
 310	0x00000000,
 311	(0x0e00 << 16) | (0x9900 >> 2),
 312	0x00000000,
 313	(0x0e00 << 16) | (0xc260 >> 2),
 314	0x00000000,
 315	(0x0e00 << 16) | (0x90e8 >> 2),
 316	0x00000000,
 317	(0x0e00 << 16) | (0x3c000 >> 2),
 318	0x00000000,
 319	(0x0e00 << 16) | (0x3c00c >> 2),
 320	0x00000000,
 321	(0x0e00 << 16) | (0x8c1c >> 2),
 322	0x00000000,
 323	(0x0e00 << 16) | (0x9700 >> 2),
 324	0x00000000,
 325	(0x0e00 << 16) | (0xcd20 >> 2),
 326	0x00000000,
 327	(0x4e00 << 16) | (0xcd20 >> 2),
 328	0x00000000,
 329	(0x5e00 << 16) | (0xcd20 >> 2),
 330	0x00000000,
 331	(0x6e00 << 16) | (0xcd20 >> 2),
 332	0x00000000,
 333	(0x7e00 << 16) | (0xcd20 >> 2),
 334	0x00000000,
 335	(0x8e00 << 16) | (0xcd20 >> 2),
 336	0x00000000,
 337	(0x9e00 << 16) | (0xcd20 >> 2),
 338	0x00000000,
 339	(0xae00 << 16) | (0xcd20 >> 2),
 340	0x00000000,
 341	(0xbe00 << 16) | (0xcd20 >> 2),
 342	0x00000000,
 343	(0x0e00 << 16) | (0x89bc >> 2),
 344	0x00000000,
 345	(0x0e00 << 16) | (0x8900 >> 2),
 346	0x00000000,
 347	0x3,
 348	(0x0e00 << 16) | (0xc130 >> 2),
 349	0x00000000,
 350	(0x0e00 << 16) | (0xc134 >> 2),
 351	0x00000000,
 352	(0x0e00 << 16) | (0xc1fc >> 2),
 353	0x00000000,
 354	(0x0e00 << 16) | (0xc208 >> 2),
 355	0x00000000,
 356	(0x0e00 << 16) | (0xc264 >> 2),
 357	0x00000000,
 358	(0x0e00 << 16) | (0xc268 >> 2),
 359	0x00000000,
 360	(0x0e00 << 16) | (0xc26c >> 2),
 361	0x00000000,
 362	(0x0e00 << 16) | (0xc270 >> 2),
 363	0x00000000,
 364	(0x0e00 << 16) | (0xc274 >> 2),
 365	0x00000000,
 366	(0x0e00 << 16) | (0xc278 >> 2),
 367	0x00000000,
 368	(0x0e00 << 16) | (0xc27c >> 2),
 369	0x00000000,
 370	(0x0e00 << 16) | (0xc280 >> 2),
 371	0x00000000,
 372	(0x0e00 << 16) | (0xc284 >> 2),
 373	0x00000000,
 374	(0x0e00 << 16) | (0xc288 >> 2),
 375	0x00000000,
 376	(0x0e00 << 16) | (0xc28c >> 2),
 377	0x00000000,
 378	(0x0e00 << 16) | (0xc290 >> 2),
 379	0x00000000,
 380	(0x0e00 << 16) | (0xc294 >> 2),
 381	0x00000000,
 382	(0x0e00 << 16) | (0xc298 >> 2),
 383	0x00000000,
 384	(0x0e00 << 16) | (0xc29c >> 2),
 385	0x00000000,
 386	(0x0e00 << 16) | (0xc2a0 >> 2),
 387	0x00000000,
 388	(0x0e00 << 16) | (0xc2a4 >> 2),
 389	0x00000000,
 390	(0x0e00 << 16) | (0xc2a8 >> 2),
 391	0x00000000,
 392	(0x0e00 << 16) | (0xc2ac  >> 2),
 393	0x00000000,
 394	(0x0e00 << 16) | (0xc2b0 >> 2),
 395	0x00000000,
 396	(0x0e00 << 16) | (0x301d0 >> 2),
 397	0x00000000,
 398	(0x0e00 << 16) | (0x30238 >> 2),
 399	0x00000000,
 400	(0x0e00 << 16) | (0x30250 >> 2),
 401	0x00000000,
 402	(0x0e00 << 16) | (0x30254 >> 2),
 403	0x00000000,
 404	(0x0e00 << 16) | (0x30258 >> 2),
 405	0x00000000,
 406	(0x0e00 << 16) | (0x3025c >> 2),
 407	0x00000000,
 408	(0x4e00 << 16) | (0xc900 >> 2),
 409	0x00000000,
 410	(0x5e00 << 16) | (0xc900 >> 2),
 411	0x00000000,
 412	(0x6e00 << 16) | (0xc900 >> 2),
 413	0x00000000,
 414	(0x7e00 << 16) | (0xc900 >> 2),
 415	0x00000000,
 416	(0x8e00 << 16) | (0xc900 >> 2),
 417	0x00000000,
 418	(0x9e00 << 16) | (0xc900 >> 2),
 419	0x00000000,
 420	(0xae00 << 16) | (0xc900 >> 2),
 421	0x00000000,
 422	(0xbe00 << 16) | (0xc900 >> 2),
 423	0x00000000,
 424	(0x4e00 << 16) | (0xc904 >> 2),
 425	0x00000000,
 426	(0x5e00 << 16) | (0xc904 >> 2),
 427	0x00000000,
 428	(0x6e00 << 16) | (0xc904 >> 2),
 429	0x00000000,
 430	(0x7e00 << 16) | (0xc904 >> 2),
 431	0x00000000,
 432	(0x8e00 << 16) | (0xc904 >> 2),
 433	0x00000000,
 434	(0x9e00 << 16) | (0xc904 >> 2),
 435	0x00000000,
 436	(0xae00 << 16) | (0xc904 >> 2),
 437	0x00000000,
 438	(0xbe00 << 16) | (0xc904 >> 2),
 439	0x00000000,
 440	(0x4e00 << 16) | (0xc908 >> 2),
 441	0x00000000,
 442	(0x5e00 << 16) | (0xc908 >> 2),
 443	0x00000000,
 444	(0x6e00 << 16) | (0xc908 >> 2),
 445	0x00000000,
 446	(0x7e00 << 16) | (0xc908 >> 2),
 447	0x00000000,
 448	(0x8e00 << 16) | (0xc908 >> 2),
 449	0x00000000,
 450	(0x9e00 << 16) | (0xc908 >> 2),
 451	0x00000000,
 452	(0xae00 << 16) | (0xc908 >> 2),
 453	0x00000000,
 454	(0xbe00 << 16) | (0xc908 >> 2),
 455	0x00000000,
 456	(0x4e00 << 16) | (0xc90c >> 2),
 457	0x00000000,
 458	(0x5e00 << 16) | (0xc90c >> 2),
 459	0x00000000,
 460	(0x6e00 << 16) | (0xc90c >> 2),
 461	0x00000000,
 462	(0x7e00 << 16) | (0xc90c >> 2),
 463	0x00000000,
 464	(0x8e00 << 16) | (0xc90c >> 2),
 465	0x00000000,
 466	(0x9e00 << 16) | (0xc90c >> 2),
 467	0x00000000,
 468	(0xae00 << 16) | (0xc90c >> 2),
 469	0x00000000,
 470	(0xbe00 << 16) | (0xc90c >> 2),
 471	0x00000000,
 472	(0x4e00 << 16) | (0xc910 >> 2),
 473	0x00000000,
 474	(0x5e00 << 16) | (0xc910 >> 2),
 475	0x00000000,
 476	(0x6e00 << 16) | (0xc910 >> 2),
 477	0x00000000,
 478	(0x7e00 << 16) | (0xc910 >> 2),
 479	0x00000000,
 480	(0x8e00 << 16) | (0xc910 >> 2),
 481	0x00000000,
 482	(0x9e00 << 16) | (0xc910 >> 2),
 483	0x00000000,
 484	(0xae00 << 16) | (0xc910 >> 2),
 485	0x00000000,
 486	(0xbe00 << 16) | (0xc910 >> 2),
 487	0x00000000,
 488	(0x0e00 << 16) | (0xc99c >> 2),
 489	0x00000000,
 490	(0x0e00 << 16) | (0x9834 >> 2),
 491	0x00000000,
 492	(0x0000 << 16) | (0x30f00 >> 2),
 493	0x00000000,
 494	(0x0001 << 16) | (0x30f00 >> 2),
 495	0x00000000,
 496	(0x0000 << 16) | (0x30f04 >> 2),
 497	0x00000000,
 498	(0x0001 << 16) | (0x30f04 >> 2),
 499	0x00000000,
 500	(0x0000 << 16) | (0x30f08 >> 2),
 501	0x00000000,
 502	(0x0001 << 16) | (0x30f08 >> 2),
 503	0x00000000,
 504	(0x0000 << 16) | (0x30f0c >> 2),
 505	0x00000000,
 506	(0x0001 << 16) | (0x30f0c >> 2),
 507	0x00000000,
 508	(0x0600 << 16) | (0x9b7c >> 2),
 509	0x00000000,
 510	(0x0e00 << 16) | (0x8a14 >> 2),
 511	0x00000000,
 512	(0x0e00 << 16) | (0x8a18 >> 2),
 513	0x00000000,
 514	(0x0600 << 16) | (0x30a00 >> 2),
 515	0x00000000,
 516	(0x0e00 << 16) | (0x8bf0 >> 2),
 517	0x00000000,
 518	(0x0e00 << 16) | (0x8bcc >> 2),
 519	0x00000000,
 520	(0x0e00 << 16) | (0x8b24 >> 2),
 521	0x00000000,
 522	(0x0e00 << 16) | (0x30a04 >> 2),
 523	0x00000000,
 524	(0x0600 << 16) | (0x30a10 >> 2),
 525	0x00000000,
 526	(0x0600 << 16) | (0x30a14 >> 2),
 527	0x00000000,
 528	(0x0600 << 16) | (0x30a18 >> 2),
 529	0x00000000,
 530	(0x0600 << 16) | (0x30a2c >> 2),
 531	0x00000000,
 532	(0x0e00 << 16) | (0xc700 >> 2),
 533	0x00000000,
 534	(0x0e00 << 16) | (0xc704 >> 2),
 535	0x00000000,
 536	(0x0e00 << 16) | (0xc708 >> 2),
 537	0x00000000,
 538	(0x0e00 << 16) | (0xc768 >> 2),
 539	0x00000000,
 540	(0x0400 << 16) | (0xc770 >> 2),
 541	0x00000000,
 542	(0x0400 << 16) | (0xc774 >> 2),
 543	0x00000000,
 544	(0x0400 << 16) | (0xc778 >> 2),
 545	0x00000000,
 546	(0x0400 << 16) | (0xc77c >> 2),
 547	0x00000000,
 548	(0x0400 << 16) | (0xc780 >> 2),
 549	0x00000000,
 550	(0x0400 << 16) | (0xc784 >> 2),
 551	0x00000000,
 552	(0x0400 << 16) | (0xc788 >> 2),
 553	0x00000000,
 554	(0x0400 << 16) | (0xc78c >> 2),
 555	0x00000000,
 556	(0x0400 << 16) | (0xc798 >> 2),
 557	0x00000000,
 558	(0x0400 << 16) | (0xc79c >> 2),
 559	0x00000000,
 560	(0x0400 << 16) | (0xc7a0 >> 2),
 561	0x00000000,
 562	(0x0400 << 16) | (0xc7a4 >> 2),
 563	0x00000000,
 564	(0x0400 << 16) | (0xc7a8 >> 2),
 565	0x00000000,
 566	(0x0400 << 16) | (0xc7ac >> 2),
 567	0x00000000,
 568	(0x0400 << 16) | (0xc7b0 >> 2),
 569	0x00000000,
 570	(0x0400 << 16) | (0xc7b4 >> 2),
 571	0x00000000,
 572	(0x0e00 << 16) | (0x9100 >> 2),
 573	0x00000000,
 574	(0x0e00 << 16) | (0x3c010 >> 2),
 575	0x00000000,
 576	(0x0e00 << 16) | (0x92a8 >> 2),
 577	0x00000000,
 578	(0x0e00 << 16) | (0x92ac >> 2),
 579	0x00000000,
 580	(0x0e00 << 16) | (0x92b4 >> 2),
 581	0x00000000,
 582	(0x0e00 << 16) | (0x92b8 >> 2),
 583	0x00000000,
 584	(0x0e00 << 16) | (0x92bc >> 2),
 585	0x00000000,
 586	(0x0e00 << 16) | (0x92c0 >> 2),
 587	0x00000000,
 588	(0x0e00 << 16) | (0x92c4 >> 2),
 589	0x00000000,
 590	(0x0e00 << 16) | (0x92c8 >> 2),
 591	0x00000000,
 592	(0x0e00 << 16) | (0x92cc >> 2),
 593	0x00000000,
 594	(0x0e00 << 16) | (0x92d0 >> 2),
 595	0x00000000,
 596	(0x0e00 << 16) | (0x8c00 >> 2),
 597	0x00000000,
 598	(0x0e00 << 16) | (0x8c04 >> 2),
 599	0x00000000,
 600	(0x0e00 << 16) | (0x8c20 >> 2),
 601	0x00000000,
 602	(0x0e00 << 16) | (0x8c38 >> 2),
 603	0x00000000,
 604	(0x0e00 << 16) | (0x8c3c >> 2),
 605	0x00000000,
 606	(0x0e00 << 16) | (0xae00 >> 2),
 607	0x00000000,
 608	(0x0e00 << 16) | (0x9604 >> 2),
 609	0x00000000,
 610	(0x0e00 << 16) | (0xac08 >> 2),
 611	0x00000000,
 612	(0x0e00 << 16) | (0xac0c >> 2),
 613	0x00000000,
 614	(0x0e00 << 16) | (0xac10 >> 2),
 615	0x00000000,
 616	(0x0e00 << 16) | (0xac14 >> 2),
 617	0x00000000,
 618	(0x0e00 << 16) | (0xac58 >> 2),
 619	0x00000000,
 620	(0x0e00 << 16) | (0xac68 >> 2),
 621	0x00000000,
 622	(0x0e00 << 16) | (0xac6c >> 2),
 623	0x00000000,
 624	(0x0e00 << 16) | (0xac70 >> 2),
 625	0x00000000,
 626	(0x0e00 << 16) | (0xac74 >> 2),
 627	0x00000000,
 628	(0x0e00 << 16) | (0xac78 >> 2),
 629	0x00000000,
 630	(0x0e00 << 16) | (0xac7c >> 2),
 631	0x00000000,
 632	(0x0e00 << 16) | (0xac80 >> 2),
 633	0x00000000,
 634	(0x0e00 << 16) | (0xac84 >> 2),
 635	0x00000000,
 636	(0x0e00 << 16) | (0xac88 >> 2),
 637	0x00000000,
 638	(0x0e00 << 16) | (0xac8c >> 2),
 639	0x00000000,
 640	(0x0e00 << 16) | (0x970c >> 2),
 641	0x00000000,
 642	(0x0e00 << 16) | (0x9714 >> 2),
 643	0x00000000,
 644	(0x0e00 << 16) | (0x9718 >> 2),
 645	0x00000000,
 646	(0x0e00 << 16) | (0x971c >> 2),
 647	0x00000000,
 648	(0x0e00 << 16) | (0x31068 >> 2),
 649	0x00000000,
 650	(0x4e00 << 16) | (0x31068 >> 2),
 651	0x00000000,
 652	(0x5e00 << 16) | (0x31068 >> 2),
 653	0x00000000,
 654	(0x6e00 << 16) | (0x31068 >> 2),
 655	0x00000000,
 656	(0x7e00 << 16) | (0x31068 >> 2),
 657	0x00000000,
 658	(0x8e00 << 16) | (0x31068 >> 2),
 659	0x00000000,
 660	(0x9e00 << 16) | (0x31068 >> 2),
 661	0x00000000,
 662	(0xae00 << 16) | (0x31068 >> 2),
 663	0x00000000,
 664	(0xbe00 << 16) | (0x31068 >> 2),
 665	0x00000000,
 666	(0x0e00 << 16) | (0xcd10 >> 2),
 667	0x00000000,
 668	(0x0e00 << 16) | (0xcd14 >> 2),
 669	0x00000000,
 670	(0x0e00 << 16) | (0x88b0 >> 2),
 671	0x00000000,
 672	(0x0e00 << 16) | (0x88b4 >> 2),
 673	0x00000000,
 674	(0x0e00 << 16) | (0x88b8 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0x88bc >> 2),
 677	0x00000000,
 678	(0x0400 << 16) | (0x89c0 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0x88c4 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0x88c8 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0x88d0 >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0x88d4 >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0x88d8 >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0x8980 >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0x30938 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0x3093c >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0x30940 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0x89a0 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0x30900 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0x30904 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0x89b4 >> 2),
 705	0x00000000,
 706	(0x0e00 << 16) | (0x3c210 >> 2),
 707	0x00000000,
 708	(0x0e00 << 16) | (0x3c214 >> 2),
 709	0x00000000,
 710	(0x0e00 << 16) | (0x3c218 >> 2),
 711	0x00000000,
 712	(0x0e00 << 16) | (0x8904 >> 2),
 713	0x00000000,
 714	0x5,
 715	(0x0e00 << 16) | (0x8c28 >> 2),
 716	(0x0e00 << 16) | (0x8c2c >> 2),
 717	(0x0e00 << 16) | (0x8c30 >> 2),
 718	(0x0e00 << 16) | (0x8c34 >> 2),
 719	(0x0e00 << 16) | (0x9600 >> 2),
 720};
 721
 722static const u32 kalindi_rlc_save_restore_register_list[] =
 723{
 724	(0x0e00 << 16) | (0xc12c >> 2),
 725	0x00000000,
 726	(0x0e00 << 16) | (0xc140 >> 2),
 727	0x00000000,
 728	(0x0e00 << 16) | (0xc150 >> 2),
 729	0x00000000,
 730	(0x0e00 << 16) | (0xc15c >> 2),
 731	0x00000000,
 732	(0x0e00 << 16) | (0xc168 >> 2),
 733	0x00000000,
 734	(0x0e00 << 16) | (0xc170 >> 2),
 735	0x00000000,
 736	(0x0e00 << 16) | (0xc204 >> 2),
 737	0x00000000,
 738	(0x0e00 << 16) | (0xc2b4 >> 2),
 739	0x00000000,
 740	(0x0e00 << 16) | (0xc2b8 >> 2),
 741	0x00000000,
 742	(0x0e00 << 16) | (0xc2bc >> 2),
 743	0x00000000,
 744	(0x0e00 << 16) | (0xc2c0 >> 2),
 745	0x00000000,
 746	(0x0e00 << 16) | (0x8228 >> 2),
 747	0x00000000,
 748	(0x0e00 << 16) | (0x829c >> 2),
 749	0x00000000,
 750	(0x0e00 << 16) | (0x869c >> 2),
 751	0x00000000,
 752	(0x0600 << 16) | (0x98f4 >> 2),
 753	0x00000000,
 754	(0x0e00 << 16) | (0x98f8 >> 2),
 755	0x00000000,
 756	(0x0e00 << 16) | (0x9900 >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0xc260 >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x90e8 >> 2),
 761	0x00000000,
 762	(0x0e00 << 16) | (0x3c000 >> 2),
 763	0x00000000,
 764	(0x0e00 << 16) | (0x3c00c >> 2),
 765	0x00000000,
 766	(0x0e00 << 16) | (0x8c1c >> 2),
 767	0x00000000,
 768	(0x0e00 << 16) | (0x9700 >> 2),
 769	0x00000000,
 770	(0x0e00 << 16) | (0xcd20 >> 2),
 771	0x00000000,
 772	(0x4e00 << 16) | (0xcd20 >> 2),
 773	0x00000000,
 774	(0x5e00 << 16) | (0xcd20 >> 2),
 775	0x00000000,
 776	(0x6e00 << 16) | (0xcd20 >> 2),
 777	0x00000000,
 778	(0x7e00 << 16) | (0xcd20 >> 2),
 779	0x00000000,
 780	(0x0e00 << 16) | (0x89bc >> 2),
 781	0x00000000,
 782	(0x0e00 << 16) | (0x8900 >> 2),
 783	0x00000000,
 784	0x3,
 785	(0x0e00 << 16) | (0xc130 >> 2),
 786	0x00000000,
 787	(0x0e00 << 16) | (0xc134 >> 2),
 788	0x00000000,
 789	(0x0e00 << 16) | (0xc1fc >> 2),
 790	0x00000000,
 791	(0x0e00 << 16) | (0xc208 >> 2),
 792	0x00000000,
 793	(0x0e00 << 16) | (0xc264 >> 2),
 794	0x00000000,
 795	(0x0e00 << 16) | (0xc268 >> 2),
 796	0x00000000,
 797	(0x0e00 << 16) | (0xc26c >> 2),
 798	0x00000000,
 799	(0x0e00 << 16) | (0xc270 >> 2),
 800	0x00000000,
 801	(0x0e00 << 16) | (0xc274 >> 2),
 802	0x00000000,
 803	(0x0e00 << 16) | (0xc28c >> 2),
 804	0x00000000,
 805	(0x0e00 << 16) | (0xc290 >> 2),
 806	0x00000000,
 807	(0x0e00 << 16) | (0xc294 >> 2),
 808	0x00000000,
 809	(0x0e00 << 16) | (0xc298 >> 2),
 810	0x00000000,
 811	(0x0e00 << 16) | (0xc2a0 >> 2),
 812	0x00000000,
 813	(0x0e00 << 16) | (0xc2a4 >> 2),
 814	0x00000000,
 815	(0x0e00 << 16) | (0xc2a8 >> 2),
 816	0x00000000,
 817	(0x0e00 << 16) | (0xc2ac >> 2),
 818	0x00000000,
 819	(0x0e00 << 16) | (0x301d0 >> 2),
 820	0x00000000,
 821	(0x0e00 << 16) | (0x30238 >> 2),
 822	0x00000000,
 823	(0x0e00 << 16) | (0x30250 >> 2),
 824	0x00000000,
 825	(0x0e00 << 16) | (0x30254 >> 2),
 826	0x00000000,
 827	(0x0e00 << 16) | (0x30258 >> 2),
 828	0x00000000,
 829	(0x0e00 << 16) | (0x3025c >> 2),
 830	0x00000000,
 831	(0x4e00 << 16) | (0xc900 >> 2),
 832	0x00000000,
 833	(0x5e00 << 16) | (0xc900 >> 2),
 834	0x00000000,
 835	(0x6e00 << 16) | (0xc900 >> 2),
 836	0x00000000,
 837	(0x7e00 << 16) | (0xc900 >> 2),
 838	0x00000000,
 839	(0x4e00 << 16) | (0xc904 >> 2),
 840	0x00000000,
 841	(0x5e00 << 16) | (0xc904 >> 2),
 842	0x00000000,
 843	(0x6e00 << 16) | (0xc904 >> 2),
 844	0x00000000,
 845	(0x7e00 << 16) | (0xc904 >> 2),
 846	0x00000000,
 847	(0x4e00 << 16) | (0xc908 >> 2),
 848	0x00000000,
 849	(0x5e00 << 16) | (0xc908 >> 2),
 850	0x00000000,
 851	(0x6e00 << 16) | (0xc908 >> 2),
 852	0x00000000,
 853	(0x7e00 << 16) | (0xc908 >> 2),
 854	0x00000000,
 855	(0x4e00 << 16) | (0xc90c >> 2),
 856	0x00000000,
 857	(0x5e00 << 16) | (0xc90c >> 2),
 858	0x00000000,
 859	(0x6e00 << 16) | (0xc90c >> 2),
 860	0x00000000,
 861	(0x7e00 << 16) | (0xc90c >> 2),
 862	0x00000000,
 863	(0x4e00 << 16) | (0xc910 >> 2),
 864	0x00000000,
 865	(0x5e00 << 16) | (0xc910 >> 2),
 866	0x00000000,
 867	(0x6e00 << 16) | (0xc910 >> 2),
 868	0x00000000,
 869	(0x7e00 << 16) | (0xc910 >> 2),
 870	0x00000000,
 871	(0x0e00 << 16) | (0xc99c >> 2),
 872	0x00000000,
 873	(0x0e00 << 16) | (0x9834 >> 2),
 874	0x00000000,
 875	(0x0000 << 16) | (0x30f00 >> 2),
 876	0x00000000,
 877	(0x0000 << 16) | (0x30f04 >> 2),
 878	0x00000000,
 879	(0x0000 << 16) | (0x30f08 >> 2),
 880	0x00000000,
 881	(0x0000 << 16) | (0x30f0c >> 2),
 882	0x00000000,
 883	(0x0600 << 16) | (0x9b7c >> 2),
 884	0x00000000,
 885	(0x0e00 << 16) | (0x8a14 >> 2),
 886	0x00000000,
 887	(0x0e00 << 16) | (0x8a18 >> 2),
 888	0x00000000,
 889	(0x0600 << 16) | (0x30a00 >> 2),
 890	0x00000000,
 891	(0x0e00 << 16) | (0x8bf0 >> 2),
 892	0x00000000,
 893	(0x0e00 << 16) | (0x8bcc >> 2),
 894	0x00000000,
 895	(0x0e00 << 16) | (0x8b24 >> 2),
 896	0x00000000,
 897	(0x0e00 << 16) | (0x30a04 >> 2),
 898	0x00000000,
 899	(0x0600 << 16) | (0x30a10 >> 2),
 900	0x00000000,
 901	(0x0600 << 16) | (0x30a14 >> 2),
 902	0x00000000,
 903	(0x0600 << 16) | (0x30a18 >> 2),
 904	0x00000000,
 905	(0x0600 << 16) | (0x30a2c >> 2),
 906	0x00000000,
 907	(0x0e00 << 16) | (0xc700 >> 2),
 908	0x00000000,
 909	(0x0e00 << 16) | (0xc704 >> 2),
 910	0x00000000,
 911	(0x0e00 << 16) | (0xc708 >> 2),
 912	0x00000000,
 913	(0x0e00 << 16) | (0xc768 >> 2),
 914	0x00000000,
 915	(0x0400 << 16) | (0xc770 >> 2),
 916	0x00000000,
 917	(0x0400 << 16) | (0xc774 >> 2),
 918	0x00000000,
 919	(0x0400 << 16) | (0xc798 >> 2),
 920	0x00000000,
 921	(0x0400 << 16) | (0xc79c >> 2),
 922	0x00000000,
 923	(0x0e00 << 16) | (0x9100 >> 2),
 924	0x00000000,
 925	(0x0e00 << 16) | (0x3c010 >> 2),
 926	0x00000000,
 927	(0x0e00 << 16) | (0x8c00 >> 2),
 928	0x00000000,
 929	(0x0e00 << 16) | (0x8c04 >> 2),
 930	0x00000000,
 931	(0x0e00 << 16) | (0x8c20 >> 2),
 932	0x00000000,
 933	(0x0e00 << 16) | (0x8c38 >> 2),
 934	0x00000000,
 935	(0x0e00 << 16) | (0x8c3c >> 2),
 936	0x00000000,
 937	(0x0e00 << 16) | (0xae00 >> 2),
 938	0x00000000,
 939	(0x0e00 << 16) | (0x9604 >> 2),
 940	0x00000000,
 941	(0x0e00 << 16) | (0xac08 >> 2),
 942	0x00000000,
 943	(0x0e00 << 16) | (0xac0c >> 2),
 944	0x00000000,
 945	(0x0e00 << 16) | (0xac10 >> 2),
 946	0x00000000,
 947	(0x0e00 << 16) | (0xac14 >> 2),
 948	0x00000000,
 949	(0x0e00 << 16) | (0xac58 >> 2),
 950	0x00000000,
 951	(0x0e00 << 16) | (0xac68 >> 2),
 952	0x00000000,
 953	(0x0e00 << 16) | (0xac6c >> 2),
 954	0x00000000,
 955	(0x0e00 << 16) | (0xac70 >> 2),
 956	0x00000000,
 957	(0x0e00 << 16) | (0xac74 >> 2),
 958	0x00000000,
 959	(0x0e00 << 16) | (0xac78 >> 2),
 960	0x00000000,
 961	(0x0e00 << 16) | (0xac7c >> 2),
 962	0x00000000,
 963	(0x0e00 << 16) | (0xac80 >> 2),
 964	0x00000000,
 965	(0x0e00 << 16) | (0xac84 >> 2),
 966	0x00000000,
 967	(0x0e00 << 16) | (0xac88 >> 2),
 968	0x00000000,
 969	(0x0e00 << 16) | (0xac8c >> 2),
 970	0x00000000,
 971	(0x0e00 << 16) | (0x970c >> 2),
 972	0x00000000,
 973	(0x0e00 << 16) | (0x9714 >> 2),
 974	0x00000000,
 975	(0x0e00 << 16) | (0x9718 >> 2),
 976	0x00000000,
 977	(0x0e00 << 16) | (0x971c >> 2),
 978	0x00000000,
 979	(0x0e00 << 16) | (0x31068 >> 2),
 980	0x00000000,
 981	(0x4e00 << 16) | (0x31068 >> 2),
 982	0x00000000,
 983	(0x5e00 << 16) | (0x31068 >> 2),
 984	0x00000000,
 985	(0x6e00 << 16) | (0x31068 >> 2),
 986	0x00000000,
 987	(0x7e00 << 16) | (0x31068 >> 2),
 988	0x00000000,
 989	(0x0e00 << 16) | (0xcd10 >> 2),
 990	0x00000000,
 991	(0x0e00 << 16) | (0xcd14 >> 2),
 992	0x00000000,
 993	(0x0e00 << 16) | (0x88b0 >> 2),
 994	0x00000000,
 995	(0x0e00 << 16) | (0x88b4 >> 2),
 996	0x00000000,
 997	(0x0e00 << 16) | (0x88b8 >> 2),
 998	0x00000000,
 999	(0x0e00 << 16) | (0x88bc >> 2),
1000	0x00000000,
1001	(0x0400 << 16) | (0x89c0 >> 2),
1002	0x00000000,
1003	(0x0e00 << 16) | (0x88c4 >> 2),
1004	0x00000000,
1005	(0x0e00 << 16) | (0x88c8 >> 2),
1006	0x00000000,
1007	(0x0e00 << 16) | (0x88d0 >> 2),
1008	0x00000000,
1009	(0x0e00 << 16) | (0x88d4 >> 2),
1010	0x00000000,
1011	(0x0e00 << 16) | (0x88d8 >> 2),
1012	0x00000000,
1013	(0x0e00 << 16) | (0x8980 >> 2),
1014	0x00000000,
1015	(0x0e00 << 16) | (0x30938 >> 2),
1016	0x00000000,
1017	(0x0e00 << 16) | (0x3093c >> 2),
1018	0x00000000,
1019	(0x0e00 << 16) | (0x30940 >> 2),
1020	0x00000000,
1021	(0x0e00 << 16) | (0x89a0 >> 2),
1022	0x00000000,
1023	(0x0e00 << 16) | (0x30900 >> 2),
1024	0x00000000,
1025	(0x0e00 << 16) | (0x30904 >> 2),
1026	0x00000000,
1027	(0x0e00 << 16) | (0x89b4 >> 2),
1028	0x00000000,
1029	(0x0e00 << 16) | (0x3e1fc >> 2),
1030	0x00000000,
1031	(0x0e00 << 16) | (0x3c210 >> 2),
1032	0x00000000,
1033	(0x0e00 << 16) | (0x3c214 >> 2),
1034	0x00000000,
1035	(0x0e00 << 16) | (0x3c218 >> 2),
1036	0x00000000,
1037	(0x0e00 << 16) | (0x8904 >> 2),
1038	0x00000000,
1039	0x5,
1040	(0x0e00 << 16) | (0x8c28 >> 2),
1041	(0x0e00 << 16) | (0x8c2c >> 2),
1042	(0x0e00 << 16) | (0x8c30 >> 2),
1043	(0x0e00 << 16) | (0x8c34 >> 2),
1044	(0x0e00 << 16) | (0x9600 >> 2),
1045};
1046
1047static const u32 bonaire_golden_spm_registers[] =
1048{
1049	0x30800, 0xe0ffffff, 0xe0000000
1050};
1051
1052static const u32 bonaire_golden_common_registers[] =
1053{
1054	0xc770, 0xffffffff, 0x00000800,
1055	0xc774, 0xffffffff, 0x00000800,
1056	0xc798, 0xffffffff, 0x00007fbf,
1057	0xc79c, 0xffffffff, 0x00007faf
1058};
1059
1060static const u32 bonaire_golden_registers[] =
1061{
1062	0x3354, 0x00000333, 0x00000333,
1063	0x3350, 0x000c0fc0, 0x00040200,
1064	0x9a10, 0x00010000, 0x00058208,
1065	0x3c000, 0xffff1fff, 0x00140000,
1066	0x3c200, 0xfdfc0fff, 0x00000100,
1067	0x3c234, 0x40000000, 0x40000200,
1068	0x9830, 0xffffffff, 0x00000000,
1069	0x9834, 0xf00fffff, 0x00000400,
1070	0x9838, 0x0002021c, 0x00020200,
1071	0xc78, 0x00000080, 0x00000000,
1072	0x5bb0, 0x000000f0, 0x00000070,
1073	0x5bc0, 0xf0311fff, 0x80300000,
1074	0x98f8, 0x73773777, 0x12010001,
1075	0x350c, 0x00810000, 0x408af000,
1076	0x7030, 0x31000111, 0x00000011,
1077	0x2f48, 0x73773777, 0x12010001,
1078	0x220c, 0x00007fb6, 0x0021a1b1,
1079	0x2210, 0x00007fb6, 0x002021b1,
1080	0x2180, 0x00007fb6, 0x00002191,
1081	0x2218, 0x00007fb6, 0x002121b1,
1082	0x221c, 0x00007fb6, 0x002021b1,
1083	0x21dc, 0x00007fb6, 0x00002191,
1084	0x21e0, 0x00007fb6, 0x00002191,
1085	0x3628, 0x0000003f, 0x0000000a,
1086	0x362c, 0x0000003f, 0x0000000a,
1087	0x2ae4, 0x00073ffe, 0x000022a2,
1088	0x240c, 0x000007ff, 0x00000000,
1089	0x8a14, 0xf000003f, 0x00000007,
1090	0x8bf0, 0x00002001, 0x00000001,
1091	0x8b24, 0xffffffff, 0x00ffffff,
1092	0x30a04, 0x0000ff0f, 0x00000000,
1093	0x28a4c, 0x07ffffff, 0x06000000,
1094	0x4d8, 0x00000fff, 0x00000100,
1095	0x3e78, 0x00000001, 0x00000002,
1096	0x9100, 0x03000000, 0x0362c688,
1097	0x8c00, 0x000000ff, 0x00000001,
1098	0xe40, 0x00001fff, 0x00001fff,
1099	0x9060, 0x0000007f, 0x00000020,
1100	0x9508, 0x00010000, 0x00010000,
1101	0xac14, 0x000003ff, 0x000000f3,
1102	0xac0c, 0xffffffff, 0x00001032
1103};
1104
1105static const u32 bonaire_mgcg_cgcg_init[] =
1106{
1107	0xc420, 0xffffffff, 0xfffffffc,
1108	0x30800, 0xffffffff, 0xe0000000,
1109	0x3c2a0, 0xffffffff, 0x00000100,
1110	0x3c208, 0xffffffff, 0x00000100,
1111	0x3c2c0, 0xffffffff, 0xc0000100,
1112	0x3c2c8, 0xffffffff, 0xc0000100,
1113	0x3c2c4, 0xffffffff, 0xc0000100,
1114	0x55e4, 0xffffffff, 0x00600100,
1115	0x3c280, 0xffffffff, 0x00000100,
1116	0x3c214, 0xffffffff, 0x06000100,
1117	0x3c220, 0xffffffff, 0x00000100,
1118	0x3c218, 0xffffffff, 0x06000100,
1119	0x3c204, 0xffffffff, 0x00000100,
1120	0x3c2e0, 0xffffffff, 0x00000100,
1121	0x3c224, 0xffffffff, 0x00000100,
1122	0x3c200, 0xffffffff, 0x00000100,
1123	0x3c230, 0xffffffff, 0x00000100,
1124	0x3c234, 0xffffffff, 0x00000100,
1125	0x3c250, 0xffffffff, 0x00000100,
1126	0x3c254, 0xffffffff, 0x00000100,
1127	0x3c258, 0xffffffff, 0x00000100,
1128	0x3c25c, 0xffffffff, 0x00000100,
1129	0x3c260, 0xffffffff, 0x00000100,
1130	0x3c27c, 0xffffffff, 0x00000100,
1131	0x3c278, 0xffffffff, 0x00000100,
1132	0x3c210, 0xffffffff, 0x06000100,
1133	0x3c290, 0xffffffff, 0x00000100,
1134	0x3c274, 0xffffffff, 0x00000100,
1135	0x3c2b4, 0xffffffff, 0x00000100,
1136	0x3c2b0, 0xffffffff, 0x00000100,
1137	0x3c270, 0xffffffff, 0x00000100,
1138	0x30800, 0xffffffff, 0xe0000000,
1139	0x3c020, 0xffffffff, 0x00010000,
1140	0x3c024, 0xffffffff, 0x00030002,
1141	0x3c028, 0xffffffff, 0x00040007,
1142	0x3c02c, 0xffffffff, 0x00060005,
1143	0x3c030, 0xffffffff, 0x00090008,
1144	0x3c034, 0xffffffff, 0x00010000,
1145	0x3c038, 0xffffffff, 0x00030002,
1146	0x3c03c, 0xffffffff, 0x00040007,
1147	0x3c040, 0xffffffff, 0x00060005,
1148	0x3c044, 0xffffffff, 0x00090008,
1149	0x3c048, 0xffffffff, 0x00010000,
1150	0x3c04c, 0xffffffff, 0x00030002,
1151	0x3c050, 0xffffffff, 0x00040007,
1152	0x3c054, 0xffffffff, 0x00060005,
1153	0x3c058, 0xffffffff, 0x00090008,
1154	0x3c05c, 0xffffffff, 0x00010000,
1155	0x3c060, 0xffffffff, 0x00030002,
1156	0x3c064, 0xffffffff, 0x00040007,
1157	0x3c068, 0xffffffff, 0x00060005,
1158	0x3c06c, 0xffffffff, 0x00090008,
1159	0x3c070, 0xffffffff, 0x00010000,
1160	0x3c074, 0xffffffff, 0x00030002,
1161	0x3c078, 0xffffffff, 0x00040007,
1162	0x3c07c, 0xffffffff, 0x00060005,
1163	0x3c080, 0xffffffff, 0x00090008,
1164	0x3c084, 0xffffffff, 0x00010000,
1165	0x3c088, 0xffffffff, 0x00030002,
1166	0x3c08c, 0xffffffff, 0x00040007,
1167	0x3c090, 0xffffffff, 0x00060005,
1168	0x3c094, 0xffffffff, 0x00090008,
1169	0x3c098, 0xffffffff, 0x00010000,
1170	0x3c09c, 0xffffffff, 0x00030002,
1171	0x3c0a0, 0xffffffff, 0x00040007,
1172	0x3c0a4, 0xffffffff, 0x00060005,
1173	0x3c0a8, 0xffffffff, 0x00090008,
1174	0x3c000, 0xffffffff, 0x96e00200,
1175	0x8708, 0xffffffff, 0x00900100,
1176	0xc424, 0xffffffff, 0x0020003f,
1177	0x38, 0xffffffff, 0x0140001c,
1178	0x3c, 0x000f0000, 0x000f0000,
1179	0x220, 0xffffffff, 0xC060000C,
1180	0x224, 0xc0000fff, 0x00000100,
1181	0xf90, 0xffffffff, 0x00000100,
1182	0xf98, 0x00000101, 0x00000000,
1183	0x20a8, 0xffffffff, 0x00000104,
1184	0x55e4, 0xff000fff, 0x00000100,
1185	0x30cc, 0xc0000fff, 0x00000104,
1186	0xc1e4, 0x00000001, 0x00000001,
1187	0xd00c, 0xff000ff0, 0x00000100,
1188	0xd80c, 0xff000ff0, 0x00000100
1189};
1190
1191static const u32 spectre_golden_spm_registers[] =
1192{
1193	0x30800, 0xe0ffffff, 0xe0000000
1194};
1195
1196static const u32 spectre_golden_common_registers[] =
1197{
1198	0xc770, 0xffffffff, 0x00000800,
1199	0xc774, 0xffffffff, 0x00000800,
1200	0xc798, 0xffffffff, 0x00007fbf,
1201	0xc79c, 0xffffffff, 0x00007faf
1202};
1203
1204static const u32 spectre_golden_registers[] =
1205{
1206	0x3c000, 0xffff1fff, 0x96940200,
1207	0x3c00c, 0xffff0001, 0xff000000,
1208	0x3c200, 0xfffc0fff, 0x00000100,
1209	0x6ed8, 0x00010101, 0x00010000,
1210	0x9834, 0xf00fffff, 0x00000400,
1211	0x9838, 0xfffffffc, 0x00020200,
1212	0x5bb0, 0x000000f0, 0x00000070,
1213	0x5bc0, 0xf0311fff, 0x80300000,
1214	0x98f8, 0x73773777, 0x12010001,
1215	0x9b7c, 0x00ff0000, 0x00fc0000,
1216	0x2f48, 0x73773777, 0x12010001,
1217	0x8a14, 0xf000003f, 0x00000007,
1218	0x8b24, 0xffffffff, 0x00ffffff,
1219	0x28350, 0x3f3f3fff, 0x00000082,
1220	0x28354, 0x0000003f, 0x00000000,
1221	0x3e78, 0x00000001, 0x00000002,
1222	0x913c, 0xffff03df, 0x00000004,
1223	0xc768, 0x00000008, 0x00000008,
1224	0x8c00, 0x000008ff, 0x00000800,
1225	0x9508, 0x00010000, 0x00010000,
1226	0xac0c, 0xffffffff, 0x54763210,
1227	0x214f8, 0x01ff01ff, 0x00000002,
1228	0x21498, 0x007ff800, 0x00200000,
1229	0x2015c, 0xffffffff, 0x00000f40,
1230	0x30934, 0xffffffff, 0x00000001
1231};
1232
1233static const u32 spectre_mgcg_cgcg_init[] =
1234{
1235	0xc420, 0xffffffff, 0xfffffffc,
1236	0x30800, 0xffffffff, 0xe0000000,
1237	0x3c2a0, 0xffffffff, 0x00000100,
1238	0x3c208, 0xffffffff, 0x00000100,
1239	0x3c2c0, 0xffffffff, 0x00000100,
1240	0x3c2c8, 0xffffffff, 0x00000100,
1241	0x3c2c4, 0xffffffff, 0x00000100,
1242	0x55e4, 0xffffffff, 0x00600100,
1243	0x3c280, 0xffffffff, 0x00000100,
1244	0x3c214, 0xffffffff, 0x06000100,
1245	0x3c220, 0xffffffff, 0x00000100,
1246	0x3c218, 0xffffffff, 0x06000100,
1247	0x3c204, 0xffffffff, 0x00000100,
1248	0x3c2e0, 0xffffffff, 0x00000100,
1249	0x3c224, 0xffffffff, 0x00000100,
1250	0x3c200, 0xffffffff, 0x00000100,
1251	0x3c230, 0xffffffff, 0x00000100,
1252	0x3c234, 0xffffffff, 0x00000100,
1253	0x3c250, 0xffffffff, 0x00000100,
1254	0x3c254, 0xffffffff, 0x00000100,
1255	0x3c258, 0xffffffff, 0x00000100,
1256	0x3c25c, 0xffffffff, 0x00000100,
1257	0x3c260, 0xffffffff, 0x00000100,
1258	0x3c27c, 0xffffffff, 0x00000100,
1259	0x3c278, 0xffffffff, 0x00000100,
1260	0x3c210, 0xffffffff, 0x06000100,
1261	0x3c290, 0xffffffff, 0x00000100,
1262	0x3c274, 0xffffffff, 0x00000100,
1263	0x3c2b4, 0xffffffff, 0x00000100,
1264	0x3c2b0, 0xffffffff, 0x00000100,
1265	0x3c270, 0xffffffff, 0x00000100,
1266	0x30800, 0xffffffff, 0xe0000000,
1267	0x3c020, 0xffffffff, 0x00010000,
1268	0x3c024, 0xffffffff, 0x00030002,
1269	0x3c028, 0xffffffff, 0x00040007,
1270	0x3c02c, 0xffffffff, 0x00060005,
1271	0x3c030, 0xffffffff, 0x00090008,
1272	0x3c034, 0xffffffff, 0x00010000,
1273	0x3c038, 0xffffffff, 0x00030002,
1274	0x3c03c, 0xffffffff, 0x00040007,
1275	0x3c040, 0xffffffff, 0x00060005,
1276	0x3c044, 0xffffffff, 0x00090008,
1277	0x3c048, 0xffffffff, 0x00010000,
1278	0x3c04c, 0xffffffff, 0x00030002,
1279	0x3c050, 0xffffffff, 0x00040007,
1280	0x3c054, 0xffffffff, 0x00060005,
1281	0x3c058, 0xffffffff, 0x00090008,
1282	0x3c05c, 0xffffffff, 0x00010000,
1283	0x3c060, 0xffffffff, 0x00030002,
1284	0x3c064, 0xffffffff, 0x00040007,
1285	0x3c068, 0xffffffff, 0x00060005,
1286	0x3c06c, 0xffffffff, 0x00090008,
1287	0x3c070, 0xffffffff, 0x00010000,
1288	0x3c074, 0xffffffff, 0x00030002,
1289	0x3c078, 0xffffffff, 0x00040007,
1290	0x3c07c, 0xffffffff, 0x00060005,
1291	0x3c080, 0xffffffff, 0x00090008,
1292	0x3c084, 0xffffffff, 0x00010000,
1293	0x3c088, 0xffffffff, 0x00030002,
1294	0x3c08c, 0xffffffff, 0x00040007,
1295	0x3c090, 0xffffffff, 0x00060005,
1296	0x3c094, 0xffffffff, 0x00090008,
1297	0x3c098, 0xffffffff, 0x00010000,
1298	0x3c09c, 0xffffffff, 0x00030002,
1299	0x3c0a0, 0xffffffff, 0x00040007,
1300	0x3c0a4, 0xffffffff, 0x00060005,
1301	0x3c0a8, 0xffffffff, 0x00090008,
1302	0x3c0ac, 0xffffffff, 0x00010000,
1303	0x3c0b0, 0xffffffff, 0x00030002,
1304	0x3c0b4, 0xffffffff, 0x00040007,
1305	0x3c0b8, 0xffffffff, 0x00060005,
1306	0x3c0bc, 0xffffffff, 0x00090008,
1307	0x3c000, 0xffffffff, 0x96e00200,
1308	0x8708, 0xffffffff, 0x00900100,
1309	0xc424, 0xffffffff, 0x0020003f,
1310	0x38, 0xffffffff, 0x0140001c,
1311	0x3c, 0x000f0000, 0x000f0000,
1312	0x220, 0xffffffff, 0xC060000C,
1313	0x224, 0xc0000fff, 0x00000100,
1314	0xf90, 0xffffffff, 0x00000100,
1315	0xf98, 0x00000101, 0x00000000,
1316	0x20a8, 0xffffffff, 0x00000104,
1317	0x55e4, 0xff000fff, 0x00000100,
1318	0x30cc, 0xc0000fff, 0x00000104,
1319	0xc1e4, 0x00000001, 0x00000001,
1320	0xd00c, 0xff000ff0, 0x00000100,
1321	0xd80c, 0xff000ff0, 0x00000100
1322};
1323
1324static const u32 kalindi_golden_spm_registers[] =
1325{
1326	0x30800, 0xe0ffffff, 0xe0000000
1327};
1328
1329static const u32 kalindi_golden_common_registers[] =
1330{
1331	0xc770, 0xffffffff, 0x00000800,
1332	0xc774, 0xffffffff, 0x00000800,
1333	0xc798, 0xffffffff, 0x00007fbf,
1334	0xc79c, 0xffffffff, 0x00007faf
1335};
1336
1337static const u32 kalindi_golden_registers[] =
1338{
1339	0x3c000, 0xffffdfff, 0x6e944040,
1340	0x55e4, 0xff607fff, 0xfc000100,
1341	0x3c220, 0xff000fff, 0x00000100,
1342	0x3c224, 0xff000fff, 0x00000100,
1343	0x3c200, 0xfffc0fff, 0x00000100,
1344	0x6ed8, 0x00010101, 0x00010000,
1345	0x9830, 0xffffffff, 0x00000000,
1346	0x9834, 0xf00fffff, 0x00000400,
1347	0x5bb0, 0x000000f0, 0x00000070,
1348	0x5bc0, 0xf0311fff, 0x80300000,
1349	0x98f8, 0x73773777, 0x12010001,
1350	0x98fc, 0xffffffff, 0x00000010,
1351	0x9b7c, 0x00ff0000, 0x00fc0000,
1352	0x8030, 0x00001f0f, 0x0000100a,
1353	0x2f48, 0x73773777, 0x12010001,
1354	0x2408, 0x000fffff, 0x000c007f,
1355	0x8a14, 0xf000003f, 0x00000007,
1356	0x8b24, 0x3fff3fff, 0x00ffcfff,
1357	0x30a04, 0x0000ff0f, 0x00000000,
1358	0x28a4c, 0x07ffffff, 0x06000000,
1359	0x4d8, 0x00000fff, 0x00000100,
1360	0x3e78, 0x00000001, 0x00000002,
1361	0xc768, 0x00000008, 0x00000008,
1362	0x8c00, 0x000000ff, 0x00000003,
1363	0x214f8, 0x01ff01ff, 0x00000002,
1364	0x21498, 0x007ff800, 0x00200000,
1365	0x2015c, 0xffffffff, 0x00000f40,
1366	0x88c4, 0x001f3ae3, 0x00000082,
1367	0x88d4, 0x0000001f, 0x00000010,
1368	0x30934, 0xffffffff, 0x00000000
1369};
1370
1371static const u32 kalindi_mgcg_cgcg_init[] =
1372{
1373	0xc420, 0xffffffff, 0xfffffffc,
1374	0x30800, 0xffffffff, 0xe0000000,
1375	0x3c2a0, 0xffffffff, 0x00000100,
1376	0x3c208, 0xffffffff, 0x00000100,
1377	0x3c2c0, 0xffffffff, 0x00000100,
1378	0x3c2c8, 0xffffffff, 0x00000100,
1379	0x3c2c4, 0xffffffff, 0x00000100,
1380	0x55e4, 0xffffffff, 0x00600100,
1381	0x3c280, 0xffffffff, 0x00000100,
1382	0x3c214, 0xffffffff, 0x06000100,
1383	0x3c220, 0xffffffff, 0x00000100,
1384	0x3c218, 0xffffffff, 0x06000100,
1385	0x3c204, 0xffffffff, 0x00000100,
1386	0x3c2e0, 0xffffffff, 0x00000100,
1387	0x3c224, 0xffffffff, 0x00000100,
1388	0x3c200, 0xffffffff, 0x00000100,
1389	0x3c230, 0xffffffff, 0x00000100,
1390	0x3c234, 0xffffffff, 0x00000100,
1391	0x3c250, 0xffffffff, 0x00000100,
1392	0x3c254, 0xffffffff, 0x00000100,
1393	0x3c258, 0xffffffff, 0x00000100,
1394	0x3c25c, 0xffffffff, 0x00000100,
1395	0x3c260, 0xffffffff, 0x00000100,
1396	0x3c27c, 0xffffffff, 0x00000100,
1397	0x3c278, 0xffffffff, 0x00000100,
1398	0x3c210, 0xffffffff, 0x06000100,
1399	0x3c290, 0xffffffff, 0x00000100,
1400	0x3c274, 0xffffffff, 0x00000100,
1401	0x3c2b4, 0xffffffff, 0x00000100,
1402	0x3c2b0, 0xffffffff, 0x00000100,
1403	0x3c270, 0xffffffff, 0x00000100,
1404	0x30800, 0xffffffff, 0xe0000000,
1405	0x3c020, 0xffffffff, 0x00010000,
1406	0x3c024, 0xffffffff, 0x00030002,
1407	0x3c028, 0xffffffff, 0x00040007,
1408	0x3c02c, 0xffffffff, 0x00060005,
1409	0x3c030, 0xffffffff, 0x00090008,
1410	0x3c034, 0xffffffff, 0x00010000,
1411	0x3c038, 0xffffffff, 0x00030002,
1412	0x3c03c, 0xffffffff, 0x00040007,
1413	0x3c040, 0xffffffff, 0x00060005,
1414	0x3c044, 0xffffffff, 0x00090008,
1415	0x3c000, 0xffffffff, 0x96e00200,
1416	0x8708, 0xffffffff, 0x00900100,
1417	0xc424, 0xffffffff, 0x0020003f,
1418	0x38, 0xffffffff, 0x0140001c,
1419	0x3c, 0x000f0000, 0x000f0000,
1420	0x220, 0xffffffff, 0xC060000C,
1421	0x224, 0xc0000fff, 0x00000100,
1422	0x20a8, 0xffffffff, 0x00000104,
1423	0x55e4, 0xff000fff, 0x00000100,
1424	0x30cc, 0xc0000fff, 0x00000104,
1425	0xc1e4, 0x00000001, 0x00000001,
1426	0xd00c, 0xff000ff0, 0x00000100,
1427	0xd80c, 0xff000ff0, 0x00000100
1428};
1429
1430static const u32 hawaii_golden_spm_registers[] =
1431{
1432	0x30800, 0xe0ffffff, 0xe0000000
1433};
1434
1435static const u32 hawaii_golden_common_registers[] =
1436{
1437	0x30800, 0xffffffff, 0xe0000000,
1438	0x28350, 0xffffffff, 0x3a00161a,
1439	0x28354, 0xffffffff, 0x0000002e,
1440	0x9a10, 0xffffffff, 0x00018208,
1441	0x98f8, 0xffffffff, 0x12011003
1442};
1443
1444static const u32 hawaii_golden_registers[] =
1445{
1446	0x3354, 0x00000333, 0x00000333,
1447	0x9a10, 0x00010000, 0x00058208,
1448	0x9830, 0xffffffff, 0x00000000,
1449	0x9834, 0xf00fffff, 0x00000400,
1450	0x9838, 0x0002021c, 0x00020200,
1451	0xc78, 0x00000080, 0x00000000,
1452	0x5bb0, 0x000000f0, 0x00000070,
1453	0x5bc0, 0xf0311fff, 0x80300000,
1454	0x350c, 0x00810000, 0x408af000,
1455	0x7030, 0x31000111, 0x00000011,
1456	0x2f48, 0x73773777, 0x12010001,
1457	0x2120, 0x0000007f, 0x0000001b,
1458	0x21dc, 0x00007fb6, 0x00002191,
1459	0x3628, 0x0000003f, 0x0000000a,
1460	0x362c, 0x0000003f, 0x0000000a,
1461	0x2ae4, 0x00073ffe, 0x000022a2,
1462	0x240c, 0x000007ff, 0x00000000,
1463	0x8bf0, 0x00002001, 0x00000001,
1464	0x8b24, 0xffffffff, 0x00ffffff,
1465	0x30a04, 0x0000ff0f, 0x00000000,
1466	0x28a4c, 0x07ffffff, 0x06000000,
1467	0x3e78, 0x00000001, 0x00000002,
1468	0xc768, 0x00000008, 0x00000008,
1469	0xc770, 0x00000f00, 0x00000800,
1470	0xc774, 0x00000f00, 0x00000800,
1471	0xc798, 0x00ffffff, 0x00ff7fbf,
1472	0xc79c, 0x00ffffff, 0x00ff7faf,
1473	0x8c00, 0x000000ff, 0x00000800,
1474	0xe40, 0x00001fff, 0x00001fff,
1475	0x9060, 0x0000007f, 0x00000020,
1476	0x9508, 0x00010000, 0x00010000,
1477	0xae00, 0x00100000, 0x000ff07c,
1478	0xac14, 0x000003ff, 0x0000000f,
1479	0xac10, 0xffffffff, 0x7564fdec,
1480	0xac0c, 0xffffffff, 0x3120b9a8,
1481	0xac08, 0x20000000, 0x0f9c0000
1482};
1483
1484static const u32 hawaii_mgcg_cgcg_init[] =
1485{
1486	0xc420, 0xffffffff, 0xfffffffd,
1487	0x30800, 0xffffffff, 0xe0000000,
1488	0x3c2a0, 0xffffffff, 0x00000100,
1489	0x3c208, 0xffffffff, 0x00000100,
1490	0x3c2c0, 0xffffffff, 0x00000100,
1491	0x3c2c8, 0xffffffff, 0x00000100,
1492	0x3c2c4, 0xffffffff, 0x00000100,
1493	0x55e4, 0xffffffff, 0x00200100,
1494	0x3c280, 0xffffffff, 0x00000100,
1495	0x3c214, 0xffffffff, 0x06000100,
1496	0x3c220, 0xffffffff, 0x00000100,
1497	0x3c218, 0xffffffff, 0x06000100,
1498	0x3c204, 0xffffffff, 0x00000100,
1499	0x3c2e0, 0xffffffff, 0x00000100,
1500	0x3c224, 0xffffffff, 0x00000100,
1501	0x3c200, 0xffffffff, 0x00000100,
1502	0x3c230, 0xffffffff, 0x00000100,
1503	0x3c234, 0xffffffff, 0x00000100,
1504	0x3c250, 0xffffffff, 0x00000100,
1505	0x3c254, 0xffffffff, 0x00000100,
1506	0x3c258, 0xffffffff, 0x00000100,
1507	0x3c25c, 0xffffffff, 0x00000100,
1508	0x3c260, 0xffffffff, 0x00000100,
1509	0x3c27c, 0xffffffff, 0x00000100,
1510	0x3c278, 0xffffffff, 0x00000100,
1511	0x3c210, 0xffffffff, 0x06000100,
1512	0x3c290, 0xffffffff, 0x00000100,
1513	0x3c274, 0xffffffff, 0x00000100,
1514	0x3c2b4, 0xffffffff, 0x00000100,
1515	0x3c2b0, 0xffffffff, 0x00000100,
1516	0x3c270, 0xffffffff, 0x00000100,
1517	0x30800, 0xffffffff, 0xe0000000,
1518	0x3c020, 0xffffffff, 0x00010000,
1519	0x3c024, 0xffffffff, 0x00030002,
1520	0x3c028, 0xffffffff, 0x00040007,
1521	0x3c02c, 0xffffffff, 0x00060005,
1522	0x3c030, 0xffffffff, 0x00090008,
1523	0x3c034, 0xffffffff, 0x00010000,
1524	0x3c038, 0xffffffff, 0x00030002,
1525	0x3c03c, 0xffffffff, 0x00040007,
1526	0x3c040, 0xffffffff, 0x00060005,
1527	0x3c044, 0xffffffff, 0x00090008,
1528	0x3c048, 0xffffffff, 0x00010000,
1529	0x3c04c, 0xffffffff, 0x00030002,
1530	0x3c050, 0xffffffff, 0x00040007,
1531	0x3c054, 0xffffffff, 0x00060005,
1532	0x3c058, 0xffffffff, 0x00090008,
1533	0x3c05c, 0xffffffff, 0x00010000,
1534	0x3c060, 0xffffffff, 0x00030002,
1535	0x3c064, 0xffffffff, 0x00040007,
1536	0x3c068, 0xffffffff, 0x00060005,
1537	0x3c06c, 0xffffffff, 0x00090008,
1538	0x3c070, 0xffffffff, 0x00010000,
1539	0x3c074, 0xffffffff, 0x00030002,
1540	0x3c078, 0xffffffff, 0x00040007,
1541	0x3c07c, 0xffffffff, 0x00060005,
1542	0x3c080, 0xffffffff, 0x00090008,
1543	0x3c084, 0xffffffff, 0x00010000,
1544	0x3c088, 0xffffffff, 0x00030002,
1545	0x3c08c, 0xffffffff, 0x00040007,
1546	0x3c090, 0xffffffff, 0x00060005,
1547	0x3c094, 0xffffffff, 0x00090008,
1548	0x3c098, 0xffffffff, 0x00010000,
1549	0x3c09c, 0xffffffff, 0x00030002,
1550	0x3c0a0, 0xffffffff, 0x00040007,
1551	0x3c0a4, 0xffffffff, 0x00060005,
1552	0x3c0a8, 0xffffffff, 0x00090008,
1553	0x3c0ac, 0xffffffff, 0x00010000,
1554	0x3c0b0, 0xffffffff, 0x00030002,
1555	0x3c0b4, 0xffffffff, 0x00040007,
1556	0x3c0b8, 0xffffffff, 0x00060005,
1557	0x3c0bc, 0xffffffff, 0x00090008,
1558	0x3c0c0, 0xffffffff, 0x00010000,
1559	0x3c0c4, 0xffffffff, 0x00030002,
1560	0x3c0c8, 0xffffffff, 0x00040007,
1561	0x3c0cc, 0xffffffff, 0x00060005,
1562	0x3c0d0, 0xffffffff, 0x00090008,
1563	0x3c0d4, 0xffffffff, 0x00010000,
1564	0x3c0d8, 0xffffffff, 0x00030002,
1565	0x3c0dc, 0xffffffff, 0x00040007,
1566	0x3c0e0, 0xffffffff, 0x00060005,
1567	0x3c0e4, 0xffffffff, 0x00090008,
1568	0x3c0e8, 0xffffffff, 0x00010000,
1569	0x3c0ec, 0xffffffff, 0x00030002,
1570	0x3c0f0, 0xffffffff, 0x00040007,
1571	0x3c0f4, 0xffffffff, 0x00060005,
1572	0x3c0f8, 0xffffffff, 0x00090008,
1573	0xc318, 0xffffffff, 0x00020200,
1574	0x3350, 0xffffffff, 0x00000200,
1575	0x15c0, 0xffffffff, 0x00000400,
1576	0x55e8, 0xffffffff, 0x00000000,
1577	0x2f50, 0xffffffff, 0x00000902,
1578	0x3c000, 0xffffffff, 0x96940200,
1579	0x8708, 0xffffffff, 0x00900100,
1580	0xc424, 0xffffffff, 0x0020003f,
1581	0x38, 0xffffffff, 0x0140001c,
1582	0x3c, 0x000f0000, 0x000f0000,
1583	0x220, 0xffffffff, 0xc060000c,
1584	0x224, 0xc0000fff, 0x00000100,
1585	0xf90, 0xffffffff, 0x00000100,
1586	0xf98, 0x00000101, 0x00000000,
1587	0x20a8, 0xffffffff, 0x00000104,
1588	0x55e4, 0xff000fff, 0x00000100,
1589	0x30cc, 0xc0000fff, 0x00000104,
1590	0xc1e4, 0x00000001, 0x00000001,
1591	0xd00c, 0xff000ff0, 0x00000100,
1592	0xd80c, 0xff000ff0, 0x00000100
1593};
1594
1595static const u32 godavari_golden_registers[] =
1596{
1597	0x55e4, 0xff607fff, 0xfc000100,
1598	0x6ed8, 0x00010101, 0x00010000,
1599	0x9830, 0xffffffff, 0x00000000,
1600	0x98302, 0xf00fffff, 0x00000400,
1601	0x6130, 0xffffffff, 0x00010000,
1602	0x5bb0, 0x000000f0, 0x00000070,
1603	0x5bc0, 0xf0311fff, 0x80300000,
1604	0x98f8, 0x73773777, 0x12010001,
1605	0x98fc, 0xffffffff, 0x00000010,
1606	0x8030, 0x00001f0f, 0x0000100a,
1607	0x2f48, 0x73773777, 0x12010001,
1608	0x2408, 0x000fffff, 0x000c007f,
1609	0x8a14, 0xf000003f, 0x00000007,
1610	0x8b24, 0xffffffff, 0x00ff0fff,
1611	0x30a04, 0x0000ff0f, 0x00000000,
1612	0x28a4c, 0x07ffffff, 0x06000000,
1613	0x4d8, 0x00000fff, 0x00000100,
1614	0xd014, 0x00010000, 0x00810001,
1615	0xd814, 0x00010000, 0x00810001,
1616	0x3e78, 0x00000001, 0x00000002,
1617	0xc768, 0x00000008, 0x00000008,
1618	0xc770, 0x00000f00, 0x00000800,
1619	0xc774, 0x00000f00, 0x00000800,
1620	0xc798, 0x00ffffff, 0x00ff7fbf,
1621	0xc79c, 0x00ffffff, 0x00ff7faf,
1622	0x8c00, 0x000000ff, 0x00000001,
1623	0x214f8, 0x01ff01ff, 0x00000002,
1624	0x21498, 0x007ff800, 0x00200000,
1625	0x2015c, 0xffffffff, 0x00000f40,
1626	0x88c4, 0x001f3ae3, 0x00000082,
1627	0x88d4, 0x0000001f, 0x00000010,
1628	0x30934, 0xffffffff, 0x00000000
1629};
1630
1631
1632static void cik_init_golden_registers(struct radeon_device *rdev)
1633{
1634	switch (rdev->family) {
1635	case CHIP_BONAIRE:
1636		radeon_program_register_sequence(rdev,
1637						 bonaire_mgcg_cgcg_init,
1638						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639		radeon_program_register_sequence(rdev,
1640						 bonaire_golden_registers,
1641						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642		radeon_program_register_sequence(rdev,
1643						 bonaire_golden_common_registers,
1644						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645		radeon_program_register_sequence(rdev,
1646						 bonaire_golden_spm_registers,
1647						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648		break;
1649	case CHIP_KABINI:
1650		radeon_program_register_sequence(rdev,
1651						 kalindi_mgcg_cgcg_init,
1652						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653		radeon_program_register_sequence(rdev,
1654						 kalindi_golden_registers,
1655						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656		radeon_program_register_sequence(rdev,
1657						 kalindi_golden_common_registers,
1658						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659		radeon_program_register_sequence(rdev,
1660						 kalindi_golden_spm_registers,
1661						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662		break;
1663	case CHIP_MULLINS:
1664		radeon_program_register_sequence(rdev,
1665						 kalindi_mgcg_cgcg_init,
1666						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667		radeon_program_register_sequence(rdev,
1668						 godavari_golden_registers,
1669						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1670		radeon_program_register_sequence(rdev,
1671						 kalindi_golden_common_registers,
1672						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673		radeon_program_register_sequence(rdev,
1674						 kalindi_golden_spm_registers,
1675						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676		break;
1677	case CHIP_KAVERI:
1678		radeon_program_register_sequence(rdev,
1679						 spectre_mgcg_cgcg_init,
1680						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681		radeon_program_register_sequence(rdev,
1682						 spectre_golden_registers,
1683						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1684		radeon_program_register_sequence(rdev,
1685						 spectre_golden_common_registers,
1686						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687		radeon_program_register_sequence(rdev,
1688						 spectre_golden_spm_registers,
1689						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690		break;
1691	case CHIP_HAWAII:
1692		radeon_program_register_sequence(rdev,
1693						 hawaii_mgcg_cgcg_init,
1694						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695		radeon_program_register_sequence(rdev,
1696						 hawaii_golden_registers,
1697						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698		radeon_program_register_sequence(rdev,
1699						 hawaii_golden_common_registers,
1700						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701		radeon_program_register_sequence(rdev,
1702						 hawaii_golden_spm_registers,
1703						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704		break;
1705	default:
1706		break;
1707	}
1708}
1709
1710/**
1711 * cik_get_xclk - get the xclk
1712 *
1713 * @rdev: radeon_device pointer
1714 *
1715 * Returns the reference clock used by the gfx engine
1716 * (CIK).
1717 */
1718u32 cik_get_xclk(struct radeon_device *rdev)
1719{
1720	u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722	if (rdev->flags & RADEON_IS_IGP) {
1723		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724			return reference_clock / 2;
1725	} else {
1726		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727			return reference_clock / 4;
1728	}
1729	return reference_clock;
1730}
1731
1732/**
1733 * cik_mm_rdoorbell - read a doorbell dword
1734 *
1735 * @rdev: radeon_device pointer
1736 * @index: doorbell index
1737 *
1738 * Returns the value in the doorbell aperture at the
1739 * requested doorbell index (CIK).
1740 */
1741u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742{
1743	if (index < rdev->doorbell.num_doorbells) {
1744		return readl(rdev->doorbell.ptr + index);
1745	} else {
1746		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747		return 0;
1748	}
1749}
1750
1751/**
1752 * cik_mm_wdoorbell - write a doorbell dword
1753 *
1754 * @rdev: radeon_device pointer
1755 * @index: doorbell index
1756 * @v: value to write
1757 *
1758 * Writes @v to the doorbell aperture at the
1759 * requested doorbell index (CIK).
1760 */
1761void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762{
1763	if (index < rdev->doorbell.num_doorbells) {
1764		writel(v, rdev->doorbell.ptr + index);
1765	} else {
1766		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767	}
1768}
1769
1770#define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773{
1774	{0x00000070, 0x04400000},
1775	{0x00000071, 0x80c01803},
1776	{0x00000072, 0x00004004},
1777	{0x00000073, 0x00000100},
1778	{0x00000074, 0x00ff0000},
1779	{0x00000075, 0x34000000},
1780	{0x00000076, 0x08000014},
1781	{0x00000077, 0x00cc08ec},
1782	{0x00000078, 0x00000400},
1783	{0x00000079, 0x00000000},
1784	{0x0000007a, 0x04090000},
1785	{0x0000007c, 0x00000000},
1786	{0x0000007e, 0x4408a8e8},
1787	{0x0000007f, 0x00000304},
1788	{0x00000080, 0x00000000},
1789	{0x00000082, 0x00000001},
1790	{0x00000083, 0x00000002},
1791	{0x00000084, 0xf3e4f400},
1792	{0x00000085, 0x052024e3},
1793	{0x00000087, 0x00000000},
1794	{0x00000088, 0x01000000},
1795	{0x0000008a, 0x1c0a0000},
1796	{0x0000008b, 0xff010000},
1797	{0x0000008d, 0xffffefff},
1798	{0x0000008e, 0xfff3efff},
1799	{0x0000008f, 0xfff3efbf},
1800	{0x00000092, 0xf7ffffff},
1801	{0x00000093, 0xffffff7f},
1802	{0x00000095, 0x00101101},
1803	{0x00000096, 0x00000fff},
1804	{0x00000097, 0x00116fff},
1805	{0x00000098, 0x60010000},
1806	{0x00000099, 0x10010000},
1807	{0x0000009a, 0x00006000},
1808	{0x0000009b, 0x00001000},
1809	{0x0000009f, 0x00b48000}
1810};
1811
1812#define HAWAII_IO_MC_REGS_SIZE 22
1813
1814static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815{
1816	{0x0000007d, 0x40000000},
1817	{0x0000007e, 0x40180304},
1818	{0x0000007f, 0x0000ff00},
1819	{0x00000081, 0x00000000},
1820	{0x00000083, 0x00000800},
1821	{0x00000086, 0x00000000},
1822	{0x00000087, 0x00000100},
1823	{0x00000088, 0x00020100},
1824	{0x00000089, 0x00000000},
1825	{0x0000008b, 0x00040000},
1826	{0x0000008c, 0x00000100},
1827	{0x0000008e, 0xff010000},
1828	{0x00000090, 0xffffefff},
1829	{0x00000091, 0xfff3efff},
1830	{0x00000092, 0xfff3efbf},
1831	{0x00000093, 0xf7ffffff},
1832	{0x00000094, 0xffffff7f},
1833	{0x00000095, 0x00000fff},
1834	{0x00000096, 0x00116fff},
1835	{0x00000097, 0x60010000},
1836	{0x00000098, 0x10010000},
1837	{0x0000009f, 0x00c79000}
1838};
1839
1840
1841/**
1842 * cik_srbm_select - select specific register instances
1843 *
1844 * @rdev: radeon_device pointer
1845 * @me: selected ME (micro engine)
1846 * @pipe: pipe
1847 * @queue: queue
1848 * @vmid: VMID
1849 *
1850 * Switches the currently active registers instances.  Some
1851 * registers are instanced per VMID, others are instanced per
1852 * me/pipe/queue combination.
1853 */
1854static void cik_srbm_select(struct radeon_device *rdev,
1855			    u32 me, u32 pipe, u32 queue, u32 vmid)
1856{
1857	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858			     MEID(me & 0x3) |
1859			     VMID(vmid & 0xf) |
1860			     QUEUEID(queue & 0x7));
1861	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862}
1863
1864/* ucode loading */
1865/**
1866 * ci_mc_load_microcode - load MC ucode into the hw
1867 *
1868 * @rdev: radeon_device pointer
1869 *
1870 * Load the GDDR MC ucode into the hw (CIK).
1871 * Returns 0 on success, error on failure.
1872 */
1873int ci_mc_load_microcode(struct radeon_device *rdev)
1874{
1875	const __be32 *fw_data = NULL;
1876	const __le32 *new_fw_data = NULL;
1877	u32 running, tmp;
1878	u32 *io_mc_regs = NULL;
1879	const __le32 *new_io_mc_regs = NULL;
1880	int i, regs_size, ucode_size;
1881
1882	if (!rdev->mc_fw)
1883		return -EINVAL;
1884
1885	if (rdev->new_fw) {
1886		const struct mc_firmware_header_v1_0 *hdr =
1887			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889		radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892		new_io_mc_regs = (const __le32 *)
1893			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895		new_fw_data = (const __le32 *)
1896			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897	} else {
1898		ucode_size = rdev->mc_fw->size / 4;
1899
1900		switch (rdev->family) {
1901		case CHIP_BONAIRE:
1902			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904			break;
1905		case CHIP_HAWAII:
1906			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907			regs_size = HAWAII_IO_MC_REGS_SIZE;
1908			break;
1909		default:
1910			return -EINVAL;
1911		}
1912		fw_data = (const __be32 *)rdev->mc_fw->data;
1913	}
1914
1915	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917	if (running == 0) {
1918		/* reset the engine and set to writable */
1919		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922		/* load mc io regs */
1923		for (i = 0; i < regs_size; i++) {
1924			if (rdev->new_fw) {
1925				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927			} else {
1928				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930			}
1931		}
1932
1933		tmp = RREG32(MC_SEQ_MISC0);
1934		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939		}
1940
1941		/* load the MC ucode */
1942		for (i = 0; i < ucode_size; i++) {
1943			if (rdev->new_fw)
1944				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945			else
1946				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947		}
1948
1949		/* put the engine back into the active state */
1950		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954		/* wait for training to complete */
1955		for (i = 0; i < rdev->usec_timeout; i++) {
1956			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957				break;
1958			udelay(1);
1959		}
1960		for (i = 0; i < rdev->usec_timeout; i++) {
1961			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962				break;
1963			udelay(1);
1964		}
1965	}
1966
1967	return 0;
1968}
1969
1970/**
1971 * cik_init_microcode - load ucode images from disk
1972 *
1973 * @rdev: radeon_device pointer
1974 *
1975 * Use the firmware interface to load the ucode images into
1976 * the driver (not loaded into hw).
1977 * Returns 0 on success, error on failure.
1978 */
1979static int cik_init_microcode(struct radeon_device *rdev)
1980{
1981	const char *chip_name;
1982	const char *new_chip_name;
1983	size_t pfp_req_size, me_req_size, ce_req_size,
1984		mec_req_size, rlc_req_size, mc_req_size = 0,
1985		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986	char fw_name[30];
1987	int new_fw = 0;
1988	int err;
1989	int num_fw;
1990	bool new_smc = false;
1991
1992	DRM_DEBUG("\n");
1993
1994	switch (rdev->family) {
1995	case CHIP_BONAIRE:
1996		chip_name = "BONAIRE";
1997		if ((rdev->pdev->revision == 0x80) ||
1998		    (rdev->pdev->revision == 0x81) ||
1999		    (rdev->pdev->device == 0x665f))
2000			new_smc = true;
2001		new_chip_name = "bonaire";
2002		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003		me_req_size = CIK_ME_UCODE_SIZE * 4;
2004		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011		num_fw = 8;
2012		break;
2013	case CHIP_HAWAII:
2014		chip_name = "HAWAII";
2015		if (rdev->pdev->revision == 0x80)
2016			new_smc = true;
2017		new_chip_name = "hawaii";
2018		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027		num_fw = 8;
2028		break;
2029	case CHIP_KAVERI:
2030		chip_name = "KAVERI";
2031		new_chip_name = "kaveri";
2032		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033		me_req_size = CIK_ME_UCODE_SIZE * 4;
2034		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038		num_fw = 7;
2039		break;
2040	case CHIP_KABINI:
2041		chip_name = "KABINI";
2042		new_chip_name = "kabini";
2043		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044		me_req_size = CIK_ME_UCODE_SIZE * 4;
2045		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049		num_fw = 6;
2050		break;
2051	case CHIP_MULLINS:
2052		chip_name = "MULLINS";
2053		new_chip_name = "mullins";
2054		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055		me_req_size = CIK_ME_UCODE_SIZE * 4;
2056		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060		num_fw = 6;
2061		break;
2062	default: BUG();
2063	}
2064
2065	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069	if (err) {
2070		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072		if (err)
2073			goto out;
2074		if (rdev->pfp_fw->size != pfp_req_size) {
2075			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076			       rdev->pfp_fw->size, fw_name);
2077			err = -EINVAL;
2078			goto out;
2079		}
2080	} else {
2081		err = radeon_ucode_validate(rdev->pfp_fw);
2082		if (err) {
2083			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084			       fw_name);
2085			goto out;
2086		} else {
2087			new_fw++;
2088		}
2089	}
2090
2091	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093	if (err) {
2094		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096		if (err)
2097			goto out;
2098		if (rdev->me_fw->size != me_req_size) {
2099			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100			       rdev->me_fw->size, fw_name);
2101			err = -EINVAL;
2102		}
2103	} else {
2104		err = radeon_ucode_validate(rdev->me_fw);
2105		if (err) {
2106			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107			       fw_name);
2108			goto out;
2109		} else {
2110			new_fw++;
2111		}
2112	}
2113
2114	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116	if (err) {
2117		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119		if (err)
2120			goto out;
2121		if (rdev->ce_fw->size != ce_req_size) {
2122			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123			       rdev->ce_fw->size, fw_name);
2124			err = -EINVAL;
2125		}
2126	} else {
2127		err = radeon_ucode_validate(rdev->ce_fw);
2128		if (err) {
2129			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130			       fw_name);
2131			goto out;
2132		} else {
2133			new_fw++;
2134		}
2135	}
2136
2137	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139	if (err) {
2140		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142		if (err)
2143			goto out;
2144		if (rdev->mec_fw->size != mec_req_size) {
2145			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146			       rdev->mec_fw->size, fw_name);
2147			err = -EINVAL;
2148		}
2149	} else {
2150		err = radeon_ucode_validate(rdev->mec_fw);
2151		if (err) {
2152			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153			       fw_name);
2154			goto out;
2155		} else {
2156			new_fw++;
2157		}
2158	}
2159
2160	if (rdev->family == CHIP_KAVERI) {
2161		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163		if (err) {
2164			goto out;
2165		} else {
2166			err = radeon_ucode_validate(rdev->mec2_fw);
2167			if (err) {
2168				goto out;
2169			} else {
2170				new_fw++;
2171			}
2172		}
2173	}
2174
2175	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177	if (err) {
2178		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180		if (err)
2181			goto out;
2182		if (rdev->rlc_fw->size != rlc_req_size) {
2183			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184			       rdev->rlc_fw->size, fw_name);
2185			err = -EINVAL;
2186		}
2187	} else {
2188		err = radeon_ucode_validate(rdev->rlc_fw);
2189		if (err) {
2190			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191			       fw_name);
2192			goto out;
2193		} else {
2194			new_fw++;
2195		}
2196	}
2197
2198	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200	if (err) {
2201		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203		if (err)
2204			goto out;
2205		if (rdev->sdma_fw->size != sdma_req_size) {
2206			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207			       rdev->sdma_fw->size, fw_name);
2208			err = -EINVAL;
2209		}
2210	} else {
2211		err = radeon_ucode_validate(rdev->sdma_fw);
2212		if (err) {
2213			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214			       fw_name);
2215			goto out;
2216		} else {
2217			new_fw++;
2218		}
2219	}
2220
2221	/* No SMC, MC ucode on APUs */
2222	if (!(rdev->flags & RADEON_IS_IGP)) {
2223		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225		if (err) {
2226			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228			if (err) {
2229				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231				if (err)
2232					goto out;
2233			}
2234			if ((rdev->mc_fw->size != mc_req_size) &&
2235			    (rdev->mc_fw->size != mc2_req_size)){
2236				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237				       rdev->mc_fw->size, fw_name);
2238				err = -EINVAL;
2239			}
2240			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241		} else {
2242			err = radeon_ucode_validate(rdev->mc_fw);
2243			if (err) {
2244				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245				       fw_name);
2246				goto out;
2247			} else {
2248				new_fw++;
2249			}
2250		}
2251
2252		if (new_smc)
2253			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254		else
2255			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257		if (err) {
2258			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260			if (err) {
2261				pr_err("smc: error loading firmware \"%s\"\n",
2262				       fw_name);
2263				release_firmware(rdev->smc_fw);
2264				rdev->smc_fw = NULL;
2265				err = 0;
2266			} else if (rdev->smc_fw->size != smc_req_size) {
2267				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268				       rdev->smc_fw->size, fw_name);
2269				err = -EINVAL;
2270			}
2271		} else {
2272			err = radeon_ucode_validate(rdev->smc_fw);
2273			if (err) {
2274				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275				       fw_name);
2276				goto out;
2277			} else {
2278				new_fw++;
2279			}
2280		}
2281	}
2282
2283	if (new_fw == 0) {
2284		rdev->new_fw = false;
2285	} else if (new_fw < num_fw) {
2286		pr_err("ci_fw: mixing new and old firmware!\n");
2287		err = -EINVAL;
2288	} else {
2289		rdev->new_fw = true;
2290	}
2291
2292out:
2293	if (err) {
2294		if (err != -EINVAL)
2295			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296			       fw_name);
2297		release_firmware(rdev->pfp_fw);
2298		rdev->pfp_fw = NULL;
2299		release_firmware(rdev->me_fw);
2300		rdev->me_fw = NULL;
2301		release_firmware(rdev->ce_fw);
2302		rdev->ce_fw = NULL;
2303		release_firmware(rdev->mec_fw);
2304		rdev->mec_fw = NULL;
2305		release_firmware(rdev->mec2_fw);
2306		rdev->mec2_fw = NULL;
2307		release_firmware(rdev->rlc_fw);
2308		rdev->rlc_fw = NULL;
2309		release_firmware(rdev->sdma_fw);
2310		rdev->sdma_fw = NULL;
2311		release_firmware(rdev->mc_fw);
2312		rdev->mc_fw = NULL;
2313		release_firmware(rdev->smc_fw);
2314		rdev->smc_fw = NULL;
2315	}
2316	return err;
2317}
2318
2319/*
2320 * Core functions
2321 */
2322/**
2323 * cik_tiling_mode_table_init - init the hw tiling table
2324 *
2325 * @rdev: radeon_device pointer
2326 *
2327 * Starting with SI, the tiling setup is done globally in a
2328 * set of 32 tiling modes.  Rather than selecting each set of
2329 * parameters per surface as on older asics, we just select
2330 * which index in the tiling table we want to use, and the
2331 * surface uses those parameters (CIK).
2332 */
2333static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334{
2335	u32 *tile = rdev->config.cik.tile_mode_array;
2336	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337	const u32 num_tile_mode_states =
2338			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339	const u32 num_secondary_tile_mode_states =
2340			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341	u32 reg_offset, split_equal_to_row_size;
2342	u32 num_pipe_configs;
2343	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344		rdev->config.cik.max_shader_engines;
2345
2346	switch (rdev->config.cik.mem_row_size_in_kb) {
2347	case 1:
2348		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349		break;
2350	case 2:
2351	default:
2352		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353		break;
2354	case 4:
2355		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356		break;
2357	}
2358
2359	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360	if (num_pipe_configs > 8)
2361		num_pipe_configs = 16;
2362
2363	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364		tile[reg_offset] = 0;
2365	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366		macrotile[reg_offset] = 0;
2367
2368	switch(num_pipe_configs) {
2369	case 16:
2370		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389			   TILE_SPLIT(split_equal_to_row_size));
2390		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400			   TILE_SPLIT(split_equal_to_row_size));
2401		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452			   NUM_BANKS(ADDR_SURF_16_BANK));
2453		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456			   NUM_BANKS(ADDR_SURF_16_BANK));
2457		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460			   NUM_BANKS(ADDR_SURF_16_BANK));
2461		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464			   NUM_BANKS(ADDR_SURF_16_BANK));
2465		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468			   NUM_BANKS(ADDR_SURF_8_BANK));
2469		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472			   NUM_BANKS(ADDR_SURF_4_BANK));
2473		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476			   NUM_BANKS(ADDR_SURF_2_BANK));
2477		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480			   NUM_BANKS(ADDR_SURF_16_BANK));
2481		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484			   NUM_BANKS(ADDR_SURF_16_BANK));
2485		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488			    NUM_BANKS(ADDR_SURF_16_BANK));
2489		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492			    NUM_BANKS(ADDR_SURF_8_BANK));
2493		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496			    NUM_BANKS(ADDR_SURF_4_BANK));
2497		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500			    NUM_BANKS(ADDR_SURF_2_BANK));
2501		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504			    NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510		break;
2511
2512	case 8:
2513		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532			   TILE_SPLIT(split_equal_to_row_size));
2533		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543			   TILE_SPLIT(split_equal_to_row_size));
2544		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595				NUM_BANKS(ADDR_SURF_16_BANK));
2596		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599				NUM_BANKS(ADDR_SURF_16_BANK));
2600		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603				NUM_BANKS(ADDR_SURF_16_BANK));
2604		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607				NUM_BANKS(ADDR_SURF_16_BANK));
2608		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611				NUM_BANKS(ADDR_SURF_8_BANK));
2612		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615				NUM_BANKS(ADDR_SURF_4_BANK));
2616		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619				NUM_BANKS(ADDR_SURF_2_BANK));
2620		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623				NUM_BANKS(ADDR_SURF_16_BANK));
2624		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627				NUM_BANKS(ADDR_SURF_16_BANK));
2628		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631				NUM_BANKS(ADDR_SURF_16_BANK));
2632		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635				NUM_BANKS(ADDR_SURF_16_BANK));
2636		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639				NUM_BANKS(ADDR_SURF_8_BANK));
2640		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643				NUM_BANKS(ADDR_SURF_4_BANK));
2644		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647				NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653		break;
2654
2655	case 4:
2656		if (num_rbs == 4) {
2657		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676			   TILE_SPLIT(split_equal_to_row_size));
2677		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687			   TILE_SPLIT(split_equal_to_row_size));
2688		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736		} else if (num_rbs < 4) {
2737		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756			   TILE_SPLIT(split_equal_to_row_size));
2757		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767			   TILE_SPLIT(split_equal_to_row_size));
2768		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815		}
2816
2817		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820				NUM_BANKS(ADDR_SURF_16_BANK));
2821		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824				NUM_BANKS(ADDR_SURF_16_BANK));
2825		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828				NUM_BANKS(ADDR_SURF_16_BANK));
2829		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832				NUM_BANKS(ADDR_SURF_16_BANK));
2833		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836				NUM_BANKS(ADDR_SURF_16_BANK));
2837		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840				NUM_BANKS(ADDR_SURF_8_BANK));
2841		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844				NUM_BANKS(ADDR_SURF_4_BANK));
2845		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848				NUM_BANKS(ADDR_SURF_16_BANK));
2849		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852				NUM_BANKS(ADDR_SURF_16_BANK));
2853		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856				NUM_BANKS(ADDR_SURF_16_BANK));
2857		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860				NUM_BANKS(ADDR_SURF_16_BANK));
2861		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864				NUM_BANKS(ADDR_SURF_16_BANK));
2865		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868				NUM_BANKS(ADDR_SURF_8_BANK));
2869		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872				NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878		break;
2879
2880	case 2:
2881		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883			   PIPE_CONFIG(ADDR_SURF_P2) |
2884			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887			   PIPE_CONFIG(ADDR_SURF_P2) |
2888			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891			   PIPE_CONFIG(ADDR_SURF_P2) |
2892			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895			   PIPE_CONFIG(ADDR_SURF_P2) |
2896			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899			   PIPE_CONFIG(ADDR_SURF_P2) |
2900			   TILE_SPLIT(split_equal_to_row_size));
2901		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902			   PIPE_CONFIG(ADDR_SURF_P2) |
2903			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906			   PIPE_CONFIG(ADDR_SURF_P2) |
2907			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910			   PIPE_CONFIG(ADDR_SURF_P2) |
2911			   TILE_SPLIT(split_equal_to_row_size));
2912		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913			   PIPE_CONFIG(ADDR_SURF_P2);
2914		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916			   PIPE_CONFIG(ADDR_SURF_P2));
2917		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919			    PIPE_CONFIG(ADDR_SURF_P2) |
2920			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923			    PIPE_CONFIG(ADDR_SURF_P2) |
2924			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927			    PIPE_CONFIG(ADDR_SURF_P2) |
2928			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930			    PIPE_CONFIG(ADDR_SURF_P2) |
2931			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934			    PIPE_CONFIG(ADDR_SURF_P2) |
2935			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938			    PIPE_CONFIG(ADDR_SURF_P2) |
2939			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942			    PIPE_CONFIG(ADDR_SURF_P2) |
2943			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946			    PIPE_CONFIG(ADDR_SURF_P2));
2947		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949			    PIPE_CONFIG(ADDR_SURF_P2) |
2950			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953			    PIPE_CONFIG(ADDR_SURF_P2) |
2954			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957			    PIPE_CONFIG(ADDR_SURF_P2) |
2958			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963				NUM_BANKS(ADDR_SURF_16_BANK));
2964		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967				NUM_BANKS(ADDR_SURF_16_BANK));
2968		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971				NUM_BANKS(ADDR_SURF_16_BANK));
2972		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975				NUM_BANKS(ADDR_SURF_16_BANK));
2976		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979				NUM_BANKS(ADDR_SURF_16_BANK));
2980		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983				NUM_BANKS(ADDR_SURF_16_BANK));
2984		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987				NUM_BANKS(ADDR_SURF_8_BANK));
2988		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991				NUM_BANKS(ADDR_SURF_16_BANK));
2992		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995				NUM_BANKS(ADDR_SURF_16_BANK));
2996		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999				NUM_BANKS(ADDR_SURF_16_BANK));
3000		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003				NUM_BANKS(ADDR_SURF_16_BANK));
3004		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007				NUM_BANKS(ADDR_SURF_16_BANK));
3008		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011				NUM_BANKS(ADDR_SURF_16_BANK));
3012		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015				NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021		break;
3022
3023	default:
3024		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025	}
3026}
3027
3028/**
3029 * cik_select_se_sh - select which SE, SH to address
3030 *
3031 * @rdev: radeon_device pointer
3032 * @se_num: shader engine to address
3033 * @sh_num: sh block to address
3034 *
3035 * Select which SE, SH combinations to address. Certain
3036 * registers are instanced per SE or SH.  0xffffffff means
3037 * broadcast to all SEs or SHs (CIK).
3038 */
3039static void cik_select_se_sh(struct radeon_device *rdev,
3040			     u32 se_num, u32 sh_num)
3041{
3042	u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046	else if (se_num == 0xffffffff)
3047		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048	else if (sh_num == 0xffffffff)
3049		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050	else
3051		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052	WREG32(GRBM_GFX_INDEX, data);
3053}
3054
3055/**
3056 * cik_create_bitmask - create a bitmask
3057 *
3058 * @bit_width: length of the mask
3059 *
3060 * create a variable length bit mask (CIK).
3061 * Returns the bitmask.
3062 */
3063static u32 cik_create_bitmask(u32 bit_width)
3064{
3065	u32 i, mask = 0;
3066
3067	for (i = 0; i < bit_width; i++) {
3068		mask <<= 1;
3069		mask |= 1;
3070	}
3071	return mask;
3072}
3073
3074/**
3075 * cik_get_rb_disabled - computes the mask of disabled RBs
3076 *
3077 * @rdev: radeon_device pointer
3078 * @max_rb_num: max RBs (render backends) for the asic
3079 * @se_num: number of SEs (shader engines) for the asic
3080 * @sh_per_se: number of SH blocks per SE for the asic
3081 *
3082 * Calculates the bitmask of disabled RBs (CIK).
3083 * Returns the disabled RB bitmask.
3084 */
3085static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086			      u32 max_rb_num_per_se,
3087			      u32 sh_per_se)
3088{
3089	u32 data, mask;
3090
3091	data = RREG32(CC_RB_BACKEND_DISABLE);
3092	if (data & 1)
3093		data &= BACKEND_DISABLE_MASK;
3094	else
3095		data = 0;
3096	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098	data >>= BACKEND_DISABLE_SHIFT;
3099
3100	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102	return data & mask;
3103}
3104
3105/**
3106 * cik_setup_rb - setup the RBs on the asic
3107 *
3108 * @rdev: radeon_device pointer
3109 * @se_num: number of SEs (shader engines) for the asic
3110 * @sh_per_se: number of SH blocks per SE for the asic
3111 * @max_rb_num: max RBs (render backends) for the asic
3112 *
3113 * Configures per-SE/SH RB registers (CIK).
3114 */
3115static void cik_setup_rb(struct radeon_device *rdev,
3116			 u32 se_num, u32 sh_per_se,
3117			 u32 max_rb_num_per_se)
3118{
3119	int i, j;
3120	u32 data, mask;
3121	u32 disabled_rbs = 0;
3122	u32 enabled_rbs = 0;
3123
3124	for (i = 0; i < se_num; i++) {
3125		for (j = 0; j < sh_per_se; j++) {
3126			cik_select_se_sh(rdev, i, j);
3127			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128			if (rdev->family == CHIP_HAWAII)
3129				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130			else
3131				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132		}
3133	}
3134	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135
3136	mask = 1;
3137	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138		if (!(disabled_rbs & mask))
3139			enabled_rbs |= mask;
3140		mask <<= 1;
3141	}
3142
3143	rdev->config.cik.backend_enable_mask = enabled_rbs;
3144
3145	for (i = 0; i < se_num; i++) {
3146		cik_select_se_sh(rdev, i, 0xffffffff);
3147		data = 0;
3148		for (j = 0; j < sh_per_se; j++) {
3149			switch (enabled_rbs & 3) {
3150			case 0:
3151				if (j == 0)
3152					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153				else
3154					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155				break;
3156			case 1:
3157				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158				break;
3159			case 2:
3160				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161				break;
3162			case 3:
3163			default:
3164				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165				break;
3166			}
3167			enabled_rbs >>= 2;
3168		}
3169		WREG32(PA_SC_RASTER_CONFIG, data);
3170	}
3171	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172}
3173
3174/**
3175 * cik_gpu_init - setup the 3D engine
3176 *
3177 * @rdev: radeon_device pointer
3178 *
3179 * Configures the 3D engine and tiling configuration
3180 * registers so that the 3D engine is usable.
3181 */
3182static void cik_gpu_init(struct radeon_device *rdev)
3183{
3184	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185	u32 mc_shared_chmap, mc_arb_ramcfg;
3186	u32 hdp_host_path_cntl;
3187	u32 tmp;
3188	int i, j;
3189
3190	switch (rdev->family) {
3191	case CHIP_BONAIRE:
3192		rdev->config.cik.max_shader_engines = 2;
3193		rdev->config.cik.max_tile_pipes = 4;
3194		rdev->config.cik.max_cu_per_sh = 7;
3195		rdev->config.cik.max_sh_per_se = 1;
3196		rdev->config.cik.max_backends_per_se = 2;
3197		rdev->config.cik.max_texture_channel_caches = 4;
3198		rdev->config.cik.max_gprs = 256;
3199		rdev->config.cik.max_gs_threads = 32;
3200		rdev->config.cik.max_hw_contexts = 8;
3201
3202		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207		break;
3208	case CHIP_HAWAII:
3209		rdev->config.cik.max_shader_engines = 4;
3210		rdev->config.cik.max_tile_pipes = 16;
3211		rdev->config.cik.max_cu_per_sh = 11;
3212		rdev->config.cik.max_sh_per_se = 1;
3213		rdev->config.cik.max_backends_per_se = 4;
3214		rdev->config.cik.max_texture_channel_caches = 16;
3215		rdev->config.cik.max_gprs = 256;
3216		rdev->config.cik.max_gs_threads = 32;
3217		rdev->config.cik.max_hw_contexts = 8;
3218
3219		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224		break;
3225	case CHIP_KAVERI:
3226		rdev->config.cik.max_shader_engines = 1;
3227		rdev->config.cik.max_tile_pipes = 4;
3228		rdev->config.cik.max_cu_per_sh = 8;
3229		rdev->config.cik.max_backends_per_se = 2;
3230		rdev->config.cik.max_sh_per_se = 1;
3231		rdev->config.cik.max_texture_channel_caches = 4;
3232		rdev->config.cik.max_gprs = 256;
3233		rdev->config.cik.max_gs_threads = 16;
3234		rdev->config.cik.max_hw_contexts = 8;
3235
3236		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241		break;
3242	case CHIP_KABINI:
3243	case CHIP_MULLINS:
3244	default:
3245		rdev->config.cik.max_shader_engines = 1;
3246		rdev->config.cik.max_tile_pipes = 2;
3247		rdev->config.cik.max_cu_per_sh = 2;
3248		rdev->config.cik.max_sh_per_se = 1;
3249		rdev->config.cik.max_backends_per_se = 1;
3250		rdev->config.cik.max_texture_channel_caches = 2;
3251		rdev->config.cik.max_gprs = 256;
3252		rdev->config.cik.max_gs_threads = 16;
3253		rdev->config.cik.max_hw_contexts = 8;
3254
3255		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260		break;
3261	}
3262
3263	/* Initialize HDP */
3264	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265		WREG32((0x2c14 + j), 0x00000000);
3266		WREG32((0x2c18 + j), 0x00000000);
3267		WREG32((0x2c1c + j), 0x00000000);
3268		WREG32((0x2c20 + j), 0x00000000);
3269		WREG32((0x2c24 + j), 0x00000000);
3270	}
3271
3272	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273	WREG32(SRBM_INT_CNTL, 0x1);
3274	WREG32(SRBM_INT_ACK, 0x1);
3275
3276	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277
3278	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280
3281	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282	rdev->config.cik.mem_max_burst_length_bytes = 256;
3283	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285	if (rdev->config.cik.mem_row_size_in_kb > 4)
3286		rdev->config.cik.mem_row_size_in_kb = 4;
3287	/* XXX use MC settings? */
3288	rdev->config.cik.shader_engine_tile_size = 32;
3289	rdev->config.cik.num_gpus = 1;
3290	rdev->config.cik.multi_gpu_tile_size = 64;
3291
3292	/* fix up row size */
3293	gb_addr_config &= ~ROW_SIZE_MASK;
3294	switch (rdev->config.cik.mem_row_size_in_kb) {
3295	case 1:
3296	default:
3297		gb_addr_config |= ROW_SIZE(0);
3298		break;
3299	case 2:
3300		gb_addr_config |= ROW_SIZE(1);
3301		break;
3302	case 4:
3303		gb_addr_config |= ROW_SIZE(2);
3304		break;
3305	}
3306
3307	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3308	 * not have bank info, so create a custom tiling dword.
3309	 * bits 3:0   num_pipes
3310	 * bits 7:4   num_banks
3311	 * bits 11:8  group_size
3312	 * bits 15:12 row_size
3313	 */
3314	rdev->config.cik.tile_config = 0;
3315	switch (rdev->config.cik.num_tile_pipes) {
3316	case 1:
3317		rdev->config.cik.tile_config |= (0 << 0);
3318		break;
3319	case 2:
3320		rdev->config.cik.tile_config |= (1 << 0);
3321		break;
3322	case 4:
3323		rdev->config.cik.tile_config |= (2 << 0);
3324		break;
3325	case 8:
3326	default:
3327		/* XXX what about 12? */
3328		rdev->config.cik.tile_config |= (3 << 0);
3329		break;
3330	}
3331	rdev->config.cik.tile_config |=
3332		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333	rdev->config.cik.tile_config |=
3334		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335	rdev->config.cik.tile_config |=
3336		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337
3338	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346
3347	cik_tiling_mode_table_init(rdev);
3348
3349	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350		     rdev->config.cik.max_sh_per_se,
3351		     rdev->config.cik.max_backends_per_se);
3352
3353	rdev->config.cik.active_cus = 0;
3354	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356			rdev->config.cik.active_cus +=
3357				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358		}
3359	}
3360
3361	/* set HW defaults for 3D engine */
3362	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363
3364	WREG32(SX_DEBUG_1, 0x20);
3365
3366	WREG32(TA_CNTL_AUX, 0x00010000);
3367
3368	tmp = RREG32(SPI_CONFIG_CNTL);
3369	tmp |= 0x03000000;
3370	WREG32(SPI_CONFIG_CNTL, tmp);
3371
3372	WREG32(SQ_CONFIG, 1);
3373
3374	WREG32(DB_DEBUG, 0);
3375
3376	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377	tmp |= 0x00000400;
3378	WREG32(DB_DEBUG2, tmp);
3379
3380	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381	tmp |= 0x00020200;
3382	WREG32(DB_DEBUG3, tmp);
3383
3384	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385	tmp |= 0x00018208;
3386	WREG32(CB_HW_CONTROL, tmp);
3387
3388	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389
3390	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394
3395	WREG32(VGT_NUM_INSTANCES, 1);
3396
3397	WREG32(CP_PERFMON_CNTL, 0);
3398
3399	WREG32(SQ_CONFIG, 0);
3400
3401	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402					  FORCE_EOV_MAX_REZ_CNT(255)));
3403
3404	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406
3407	WREG32(VGT_GS_VERTEX_REUSE, 16);
3408	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409
3410	tmp = RREG32(HDP_MISC_CNTL);
3411	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412	WREG32(HDP_MISC_CNTL, tmp);
3413
3414	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416
3417	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419
3420	udelay(50);
3421}
3422
3423/*
3424 * GPU scratch registers helpers function.
3425 */
3426/**
3427 * cik_scratch_init - setup driver info for CP scratch regs
3428 *
3429 * @rdev: radeon_device pointer
3430 *
3431 * Set up the number and offset of the CP scratch registers.
3432 * NOTE: use of CP scratch registers is a legacy inferface and
3433 * is not used by default on newer asics (r6xx+).  On newer asics,
3434 * memory buffers are used for fences rather than scratch regs.
3435 */
3436static void cik_scratch_init(struct radeon_device *rdev)
3437{
3438	int i;
3439
3440	rdev->scratch.num_reg = 7;
3441	rdev->scratch.reg_base = SCRATCH_REG0;
3442	for (i = 0; i < rdev->scratch.num_reg; i++) {
3443		rdev->scratch.free[i] = true;
3444		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445	}
3446}
3447
3448/**
3449 * cik_ring_test - basic gfx ring test
3450 *
3451 * @rdev: radeon_device pointer
3452 * @ring: radeon_ring structure holding ring information
3453 *
3454 * Allocate a scratch register and write to it using the gfx ring (CIK).
3455 * Provides a basic gfx ring test to verify that the ring is working.
3456 * Used by cik_cp_gfx_resume();
3457 * Returns 0 on success, error on failure.
3458 */
3459int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460{
3461	uint32_t scratch;
3462	uint32_t tmp = 0;
3463	unsigned i;
3464	int r;
3465
3466	r = radeon_scratch_get(rdev, &scratch);
3467	if (r) {
3468		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469		return r;
3470	}
3471	WREG32(scratch, 0xCAFEDEAD);
3472	r = radeon_ring_lock(rdev, ring, 3);
3473	if (r) {
3474		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475		radeon_scratch_free(rdev, scratch);
3476		return r;
3477	}
3478	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480	radeon_ring_write(ring, 0xDEADBEEF);
3481	radeon_ring_unlock_commit(rdev, ring, false);
3482
3483	for (i = 0; i < rdev->usec_timeout; i++) {
3484		tmp = RREG32(scratch);
3485		if (tmp == 0xDEADBEEF)
3486			break;
3487		udelay(1);
3488	}
3489	if (i < rdev->usec_timeout) {
3490		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491	} else {
3492		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493			  ring->idx, scratch, tmp);
3494		r = -EINVAL;
3495	}
3496	radeon_scratch_free(rdev, scratch);
3497	return r;
3498}
3499
3500/**
3501 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502 *
3503 * @rdev: radeon_device pointer
3504 * @ridx: radeon ring index
3505 *
3506 * Emits an hdp flush on the cp.
3507 */
3508static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509				       int ridx)
3510{
3511	struct radeon_ring *ring = &rdev->ring[ridx];
3512	u32 ref_and_mask;
3513
3514	switch (ring->idx) {
3515	case CAYMAN_RING_TYPE_CP1_INDEX:
3516	case CAYMAN_RING_TYPE_CP2_INDEX:
3517	default:
3518		switch (ring->me) {
3519		case 0:
3520			ref_and_mask = CP2 << ring->pipe;
3521			break;
3522		case 1:
3523			ref_and_mask = CP6 << ring->pipe;
3524			break;
3525		default:
3526			return;
3527		}
3528		break;
3529	case RADEON_RING_TYPE_GFX_INDEX:
3530		ref_and_mask = CP0;
3531		break;
3532	}
3533
3534	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3537				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3538	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540	radeon_ring_write(ring, ref_and_mask);
3541	radeon_ring_write(ring, ref_and_mask);
3542	radeon_ring_write(ring, 0x20); /* poll interval */
3543}
3544
3545/**
3546 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547 *
3548 * @rdev: radeon_device pointer
3549 * @fence: radeon fence object
3550 *
3551 * Emits a fence sequnce number on the gfx ring and flushes
3552 * GPU caches.
3553 */
3554void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555			     struct radeon_fence *fence)
3556{
3557	struct radeon_ring *ring = &rdev->ring[fence->ring];
3558	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559
3560	/* Workaround for cache flush problems. First send a dummy EOP
3561	 * event down the pipe with seq one below.
3562	 */
3563	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565				 EOP_TC_ACTION_EN |
3566				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567				 EVENT_INDEX(5)));
3568	radeon_ring_write(ring, addr & 0xfffffffc);
3569	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570				DATA_SEL(1) | INT_SEL(0));
3571	radeon_ring_write(ring, fence->seq - 1);
3572	radeon_ring_write(ring, 0);
3573
3574	/* Then send the real EOP event down the pipe. */
3575	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577				 EOP_TC_ACTION_EN |
3578				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579				 EVENT_INDEX(5)));
3580	radeon_ring_write(ring, addr & 0xfffffffc);
3581	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582	radeon_ring_write(ring, fence->seq);
3583	radeon_ring_write(ring, 0);
3584}
3585
3586/**
3587 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588 *
3589 * @rdev: radeon_device pointer
3590 * @fence: radeon fence object
3591 *
3592 * Emits a fence sequnce number on the compute ring and flushes
3593 * GPU caches.
3594 */
3595void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596				 struct radeon_fence *fence)
3597{
3598	struct radeon_ring *ring = &rdev->ring[fence->ring];
3599	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600
3601	/* RELEASE_MEM - flush caches, send int */
3602	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604				 EOP_TC_ACTION_EN |
3605				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606				 EVENT_INDEX(5)));
3607	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608	radeon_ring_write(ring, addr & 0xfffffffc);
3609	radeon_ring_write(ring, upper_32_bits(addr));
3610	radeon_ring_write(ring, fence->seq);
3611	radeon_ring_write(ring, 0);
3612}
3613
3614/**
3615 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ring: radeon ring buffer object
3619 * @semaphore: radeon semaphore object
3620 * @emit_wait: Is this a sempahore wait?
3621 *
3622 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623 * from running ahead of semaphore waits.
3624 */
3625bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626			     struct radeon_ring *ring,
3627			     struct radeon_semaphore *semaphore,
3628			     bool emit_wait)
3629{
3630	uint64_t addr = semaphore->gpu_addr;
3631	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632
3633	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634	radeon_ring_write(ring, lower_32_bits(addr));
3635	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636
3637	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638		/* Prevent the PFP from running ahead of the semaphore wait */
3639		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640		radeon_ring_write(ring, 0x0);
3641	}
3642
3643	return true;
3644}
3645
3646/**
3647 * cik_copy_cpdma - copy pages using the CP DMA engine
3648 *
3649 * @rdev: radeon_device pointer
3650 * @src_offset: src GPU address
3651 * @dst_offset: dst GPU address
3652 * @num_gpu_pages: number of GPU pages to xfer
3653 * @resv: reservation object to sync to
3654 *
3655 * Copy GPU paging using the CP DMA engine (CIK+).
3656 * Used by the radeon ttm implementation to move pages if
3657 * registered as the asic copy callback.
3658 */
3659struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660				    uint64_t src_offset, uint64_t dst_offset,
3661				    unsigned num_gpu_pages,
3662				    struct dma_resv *resv)
3663{
3664	struct radeon_fence *fence;
3665	struct radeon_sync sync;
3666	int ring_index = rdev->asic->copy.blit_ring_index;
3667	struct radeon_ring *ring = &rdev->ring[ring_index];
3668	u32 size_in_bytes, cur_size_in_bytes, control;
3669	int i, num_loops;
3670	int r = 0;
3671
3672	radeon_sync_create(&sync);
3673
3674	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677	if (r) {
3678		DRM_ERROR("radeon: moving bo (%d).\n", r);
3679		radeon_sync_free(rdev, &sync, NULL);
3680		return ERR_PTR(r);
3681	}
3682
3683	radeon_sync_resv(rdev, &sync, resv, false);
3684	radeon_sync_rings(rdev, &sync, ring->idx);
3685
3686	for (i = 0; i < num_loops; i++) {
3687		cur_size_in_bytes = size_in_bytes;
3688		if (cur_size_in_bytes > 0x1fffff)
3689			cur_size_in_bytes = 0x1fffff;
3690		size_in_bytes -= cur_size_in_bytes;
3691		control = 0;
3692		if (size_in_bytes == 0)
3693			control |= PACKET3_DMA_DATA_CP_SYNC;
3694		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695		radeon_ring_write(ring, control);
3696		radeon_ring_write(ring, lower_32_bits(src_offset));
3697		radeon_ring_write(ring, upper_32_bits(src_offset));
3698		radeon_ring_write(ring, lower_32_bits(dst_offset));
3699		radeon_ring_write(ring, upper_32_bits(dst_offset));
3700		radeon_ring_write(ring, cur_size_in_bytes);
3701		src_offset += cur_size_in_bytes;
3702		dst_offset += cur_size_in_bytes;
3703	}
3704
3705	r = radeon_fence_emit(rdev, &fence, ring->idx);
3706	if (r) {
3707		radeon_ring_unlock_undo(rdev, ring);
3708		radeon_sync_free(rdev, &sync, NULL);
3709		return ERR_PTR(r);
3710	}
3711
3712	radeon_ring_unlock_commit(rdev, ring, false);
3713	radeon_sync_free(rdev, &sync, fence);
3714
3715	return fence;
3716}
3717
3718/*
3719 * IB stuff
3720 */
3721/**
3722 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723 *
3724 * @rdev: radeon_device pointer
3725 * @ib: radeon indirect buffer object
3726 *
3727 * Emits a DE (drawing engine) or CE (constant engine) IB
3728 * on the gfx ring.  IBs are usually generated by userspace
3729 * acceleration drivers and submitted to the kernel for
3730 * scheduling on the ring.  This function schedules the IB
3731 * on the gfx ring for execution by the GPU.
3732 */
3733void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734{
3735	struct radeon_ring *ring = &rdev->ring[ib->ring];
3736	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737	u32 header, control = INDIRECT_BUFFER_VALID;
3738
3739	if (ib->is_const_ib) {
3740		/* set switch buffer packet before const IB */
3741		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742		radeon_ring_write(ring, 0);
3743
3744		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745	} else {
3746		u32 next_rptr;
3747		if (ring->rptr_save_reg) {
3748			next_rptr = ring->wptr + 3 + 4;
3749			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750			radeon_ring_write(ring, ((ring->rptr_save_reg -
3751						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3752			radeon_ring_write(ring, next_rptr);
3753		} else if (rdev->wb.enabled) {
3754			next_rptr = ring->wptr + 5 + 4;
3755			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759			radeon_ring_write(ring, next_rptr);
3760		}
3761
3762		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763	}
3764
3765	control |= ib->length_dw | (vm_id << 24);
3766
3767	radeon_ring_write(ring, header);
3768	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770	radeon_ring_write(ring, control);
3771}
3772
3773/**
3774 * cik_ib_test - basic gfx ring IB test
3775 *
3776 * @rdev: radeon_device pointer
3777 * @ring: radeon_ring structure holding ring information
3778 *
3779 * Allocate an IB and execute it on the gfx ring (CIK).
3780 * Provides a basic gfx ring test to verify that IBs are working.
3781 * Returns 0 on success, error on failure.
3782 */
3783int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784{
3785	struct radeon_ib ib;
3786	uint32_t scratch;
3787	uint32_t tmp = 0;
3788	unsigned i;
3789	int r;
3790
3791	r = radeon_scratch_get(rdev, &scratch);
3792	if (r) {
3793		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794		return r;
3795	}
3796	WREG32(scratch, 0xCAFEDEAD);
3797	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798	if (r) {
3799		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800		radeon_scratch_free(rdev, scratch);
3801		return r;
3802	}
3803	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805	ib.ptr[2] = 0xDEADBEEF;
3806	ib.length_dw = 3;
3807	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808	if (r) {
3809		radeon_scratch_free(rdev, scratch);
3810		radeon_ib_free(rdev, &ib);
3811		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812		return r;
3813	}
3814	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815		RADEON_USEC_IB_TEST_TIMEOUT));
3816	if (r < 0) {
3817		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818		radeon_scratch_free(rdev, scratch);
3819		radeon_ib_free(rdev, &ib);
3820		return r;
3821	} else if (r == 0) {
3822		DRM_ERROR("radeon: fence wait timed out.\n");
3823		radeon_scratch_free(rdev, scratch);
3824		radeon_ib_free(rdev, &ib);
3825		return -ETIMEDOUT;
3826	}
3827	r = 0;
3828	for (i = 0; i < rdev->usec_timeout; i++) {
3829		tmp = RREG32(scratch);
3830		if (tmp == 0xDEADBEEF)
3831			break;
3832		udelay(1);
3833	}
3834	if (i < rdev->usec_timeout) {
3835		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836	} else {
3837		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838			  scratch, tmp);
3839		r = -EINVAL;
3840	}
3841	radeon_scratch_free(rdev, scratch);
3842	radeon_ib_free(rdev, &ib);
3843	return r;
3844}
3845
3846/*
3847 * CP.
3848 * On CIK, gfx and compute now have independant command processors.
3849 *
3850 * GFX
3851 * Gfx consists of a single ring and can process both gfx jobs and
3852 * compute jobs.  The gfx CP consists of three microengines (ME):
3853 * PFP - Pre-Fetch Parser
3854 * ME - Micro Engine
3855 * CE - Constant Engine
3856 * The PFP and ME make up what is considered the Drawing Engine (DE).
3857 * The CE is an asynchronous engine used for updating buffer desciptors
3858 * used by the DE so that they can be loaded into cache in parallel
3859 * while the DE is processing state update packets.
3860 *
3861 * Compute
3862 * The compute CP consists of two microengines (ME):
3863 * MEC1 - Compute MicroEngine 1
3864 * MEC2 - Compute MicroEngine 2
3865 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866 * The queues are exposed to userspace and are programmed directly
3867 * by the compute runtime.
3868 */
3869/**
3870 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871 *
3872 * @rdev: radeon_device pointer
3873 * @enable: enable or disable the MEs
3874 *
3875 * Halts or unhalts the gfx MEs.
3876 */
3877static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878{
3879	if (enable)
3880		WREG32(CP_ME_CNTL, 0);
3881	else {
3882		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886	}
3887	udelay(50);
3888}
3889
3890/**
3891 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892 *
3893 * @rdev: radeon_device pointer
3894 *
3895 * Loads the gfx PFP, ME, and CE ucode.
3896 * Returns 0 for success, -EINVAL if the ucode is not available.
3897 */
3898static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899{
3900	int i;
3901
3902	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903		return -EINVAL;
3904
3905	cik_cp_gfx_enable(rdev, false);
3906
3907	if (rdev->new_fw) {
3908		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910		const struct gfx_firmware_header_v1_0 *ce_hdr =
3911			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912		const struct gfx_firmware_header_v1_0 *me_hdr =
3913			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914		const __le32 *fw_data;
3915		u32 fw_size;
3916
3917		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920
3921		/* PFP */
3922		fw_data = (const __le32 *)
3923			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925		WREG32(CP_PFP_UCODE_ADDR, 0);
3926		for (i = 0; i < fw_size; i++)
3927			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929
3930		/* CE */
3931		fw_data = (const __le32 *)
3932			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934		WREG32(CP_CE_UCODE_ADDR, 0);
3935		for (i = 0; i < fw_size; i++)
3936			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938
3939		/* ME */
3940		fw_data = (const __be32 *)
3941			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943		WREG32(CP_ME_RAM_WADDR, 0);
3944		for (i = 0; i < fw_size; i++)
3945			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948	} else {
3949		const __be32 *fw_data;
3950
3951		/* PFP */
3952		fw_data = (const __be32 *)rdev->pfp_fw->data;
3953		WREG32(CP_PFP_UCODE_ADDR, 0);
3954		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956		WREG32(CP_PFP_UCODE_ADDR, 0);
3957
3958		/* CE */
3959		fw_data = (const __be32 *)rdev->ce_fw->data;
3960		WREG32(CP_CE_UCODE_ADDR, 0);
3961		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963		WREG32(CP_CE_UCODE_ADDR, 0);
3964
3965		/* ME */
3966		fw_data = (const __be32 *)rdev->me_fw->data;
3967		WREG32(CP_ME_RAM_WADDR, 0);
3968		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970		WREG32(CP_ME_RAM_WADDR, 0);
3971	}
3972
3973	return 0;
3974}
3975
3976/**
3977 * cik_cp_gfx_start - start the gfx ring
3978 *
3979 * @rdev: radeon_device pointer
3980 *
3981 * Enables the ring and loads the clear state context and other
3982 * packets required to init the ring.
3983 * Returns 0 for success, error for failure.
3984 */
3985static int cik_cp_gfx_start(struct radeon_device *rdev)
3986{
3987	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988	int r, i;
3989
3990	/* init the CP */
3991	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992	WREG32(CP_ENDIAN_SWAP, 0);
3993	WREG32(CP_DEVICE_ID, 1);
3994
3995	cik_cp_gfx_enable(rdev, true);
3996
3997	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998	if (r) {
3999		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000		return r;
4001	}
4002
4003	/* init the CE partitions.  CE only used for gfx on CIK */
4004	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006	radeon_ring_write(ring, 0x8000);
4007	radeon_ring_write(ring, 0x8000);
4008
4009	/* setup clear context state */
4010	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012
4013	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014	radeon_ring_write(ring, 0x80000000);
4015	radeon_ring_write(ring, 0x80000000);
4016
4017	for (i = 0; i < cik_default_size; i++)
4018		radeon_ring_write(ring, cik_default_state[i]);
4019
4020	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022
4023	/* set clear context state */
4024	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025	radeon_ring_write(ring, 0);
4026
4027	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028	radeon_ring_write(ring, 0x00000316);
4029	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031
4032	radeon_ring_unlock_commit(rdev, ring, false);
4033
4034	return 0;
4035}
4036
4037/**
4038 * cik_cp_gfx_fini - stop the gfx ring
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Stop the gfx ring and tear down the driver ring
4043 * info.
4044 */
4045static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046{
4047	cik_cp_gfx_enable(rdev, false);
4048	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049}
4050
4051/**
4052 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053 *
4054 * @rdev: radeon_device pointer
4055 *
4056 * Program the location and size of the gfx ring buffer
4057 * and test it to make sure it's working.
4058 * Returns 0 for success, error for failure.
4059 */
4060static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061{
4062	struct radeon_ring *ring;
4063	u32 tmp;
4064	u32 rb_bufsz;
4065	u64 rb_addr;
4066	int r;
4067
4068	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069	if (rdev->family != CHIP_HAWAII)
4070		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071
4072	/* Set the write pointer delay */
4073	WREG32(CP_RB_WPTR_DELAY, 0);
4074
4075	/* set the RB to use vmid 0 */
4076	WREG32(CP_RB_VMID, 0);
4077
4078	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079
4080	/* ring 0 - compute and gfx */
4081	/* Set ring buffer size */
4082	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083	rb_bufsz = order_base_2(ring->ring_size / 8);
4084	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085#ifdef __BIG_ENDIAN
4086	tmp |= BUF_SWAP_32BIT;
4087#endif
4088	WREG32(CP_RB0_CNTL, tmp);
4089
4090	/* Initialize the ring buffer's read and write pointers */
4091	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092	ring->wptr = 0;
4093	WREG32(CP_RB0_WPTR, ring->wptr);
4094
4095	/* set the wb address wether it's enabled or not */
4096	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098
4099	/* scratch register shadowing is no longer supported */
4100	WREG32(SCRATCH_UMSK, 0);
4101
4102	if (!rdev->wb.enabled)
4103		tmp |= RB_NO_UPDATE;
4104
4105	mdelay(1);
4106	WREG32(CP_RB0_CNTL, tmp);
4107
4108	rb_addr = ring->gpu_addr >> 8;
4109	WREG32(CP_RB0_BASE, rb_addr);
4110	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111
4112	/* start the ring */
4113	cik_cp_gfx_start(rdev);
4114	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116	if (r) {
4117		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118		return r;
4119	}
4120
4121	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123
4124	return 0;
4125}
4126
4127u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128		     struct radeon_ring *ring)
4129{
4130	u32 rptr;
4131
4132	if (rdev->wb.enabled)
4133		rptr = rdev->wb.wb[ring->rptr_offs/4];
4134	else
4135		rptr = RREG32(CP_RB0_RPTR);
4136
4137	return rptr;
4138}
4139
4140u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141		     struct radeon_ring *ring)
4142{
4143	return RREG32(CP_RB0_WPTR);
4144}
4145
4146void cik_gfx_set_wptr(struct radeon_device *rdev,
4147		      struct radeon_ring *ring)
4148{
4149	WREG32(CP_RB0_WPTR, ring->wptr);
4150	(void)RREG32(CP_RB0_WPTR);
4151}
4152
4153u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154			 struct radeon_ring *ring)
4155{
4156	u32 rptr;
4157
4158	if (rdev->wb.enabled) {
4159		rptr = rdev->wb.wb[ring->rptr_offs/4];
4160	} else {
4161		mutex_lock(&rdev->srbm_mutex);
4162		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163		rptr = RREG32(CP_HQD_PQ_RPTR);
4164		cik_srbm_select(rdev, 0, 0, 0, 0);
4165		mutex_unlock(&rdev->srbm_mutex);
4166	}
4167
4168	return rptr;
4169}
4170
4171u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172			 struct radeon_ring *ring)
4173{
4174	u32 wptr;
4175
4176	if (rdev->wb.enabled) {
4177		/* XXX check if swapping is necessary on BE */
4178		wptr = rdev->wb.wb[ring->wptr_offs/4];
4179	} else {
4180		mutex_lock(&rdev->srbm_mutex);
4181		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182		wptr = RREG32(CP_HQD_PQ_WPTR);
4183		cik_srbm_select(rdev, 0, 0, 0, 0);
4184		mutex_unlock(&rdev->srbm_mutex);
4185	}
4186
4187	return wptr;
4188}
4189
4190void cik_compute_set_wptr(struct radeon_device *rdev,
4191			  struct radeon_ring *ring)
4192{
4193	/* XXX check if swapping is necessary on BE */
4194	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195	WDOORBELL32(ring->doorbell_index, ring->wptr);
4196}
4197
4198static void cik_compute_stop(struct radeon_device *rdev,
4199			     struct radeon_ring *ring)
4200{
4201	u32 j, tmp;
4202
4203	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204	/* Disable wptr polling. */
4205	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206	tmp &= ~WPTR_POLL_EN;
4207	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208	/* Disable HQD. */
4209	if (RREG32(CP_HQD_ACTIVE) & 1) {
4210		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211		for (j = 0; j < rdev->usec_timeout; j++) {
4212			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213				break;
4214			udelay(1);
4215		}
4216		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217		WREG32(CP_HQD_PQ_RPTR, 0);
4218		WREG32(CP_HQD_PQ_WPTR, 0);
4219	}
4220	cik_srbm_select(rdev, 0, 0, 0, 0);
4221}
4222
4223/**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232{
4233	if (enable)
4234		WREG32(CP_MEC_CNTL, 0);
4235	else {
4236		/*
4237		 * To make hibernation reliable we need to clear compute ring
4238		 * configuration before halting the compute ring.
4239		 */
4240		mutex_lock(&rdev->srbm_mutex);
4241		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243		mutex_unlock(&rdev->srbm_mutex);
4244
4245		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248	}
4249	udelay(50);
4250}
4251
4252/**
4253 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254 *
4255 * @rdev: radeon_device pointer
4256 *
4257 * Loads the compute MEC1&2 ucode.
4258 * Returns 0 for success, -EINVAL if the ucode is not available.
4259 */
4260static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261{
4262	int i;
4263
4264	if (!rdev->mec_fw)
4265		return -EINVAL;
4266
4267	cik_cp_compute_enable(rdev, false);
4268
4269	if (rdev->new_fw) {
4270		const struct gfx_firmware_header_v1_0 *mec_hdr =
4271			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272		const __le32 *fw_data;
4273		u32 fw_size;
4274
4275		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276
4277		/* MEC1 */
4278		fw_data = (const __le32 *)
4279			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282		for (i = 0; i < fw_size; i++)
4283			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285
4286		/* MEC2 */
4287		if (rdev->family == CHIP_KAVERI) {
4288			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290
4291			fw_data = (const __le32 *)
4292				(rdev->mec2_fw->data +
4293				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296			for (i = 0; i < fw_size; i++)
4297				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299		}
4300	} else {
4301		const __be32 *fw_data;
4302
4303		/* MEC1 */
4304		fw_data = (const __be32 *)rdev->mec_fw->data;
4305		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309
4310		if (rdev->family == CHIP_KAVERI) {
4311			/* MEC2 */
4312			fw_data = (const __be32 *)rdev->mec_fw->data;
4313			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317		}
4318	}
4319
4320	return 0;
4321}
4322
4323/**
4324 * cik_cp_compute_start - start the compute queues
4325 *
4326 * @rdev: radeon_device pointer
4327 *
4328 * Enable the compute queues.
4329 * Returns 0 for success, error for failure.
4330 */
4331static int cik_cp_compute_start(struct radeon_device *rdev)
4332{
4333	cik_cp_compute_enable(rdev, true);
4334
4335	return 0;
4336}
4337
4338/**
4339 * cik_cp_compute_fini - stop the compute queues
4340 *
4341 * @rdev: radeon_device pointer
4342 *
4343 * Stop the compute queues and tear down the driver queue
4344 * info.
4345 */
4346static void cik_cp_compute_fini(struct radeon_device *rdev)
4347{
4348	int i, idx, r;
4349
4350	cik_cp_compute_enable(rdev, false);
4351
4352	for (i = 0; i < 2; i++) {
4353		if (i == 0)
4354			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355		else
4356			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357
4358		if (rdev->ring[idx].mqd_obj) {
4359			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360			if (unlikely(r != 0))
4361				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362
4363			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365
4366			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367			rdev->ring[idx].mqd_obj = NULL;
4368		}
4369	}
4370}
4371
4372static void cik_mec_fini(struct radeon_device *rdev)
4373{
4374	int r;
4375
4376	if (rdev->mec.hpd_eop_obj) {
4377		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378		if (unlikely(r != 0))
4379			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382
4383		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384		rdev->mec.hpd_eop_obj = NULL;
4385	}
4386}
4387
4388#define MEC_HPD_SIZE 2048
4389
4390static int cik_mec_init(struct radeon_device *rdev)
4391{
4392	int r;
4393	u32 *hpd;
4394
4395	/*
4396	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398	 */
4399	if (rdev->family == CHIP_KAVERI)
4400		rdev->mec.num_mec = 2;
4401	else
4402		rdev->mec.num_mec = 1;
4403	rdev->mec.num_pipe = 4;
4404	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405
4406	if (rdev->mec.hpd_eop_obj == NULL) {
4407		r = radeon_bo_create(rdev,
4408				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409				     PAGE_SIZE, true,
4410				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411				     &rdev->mec.hpd_eop_obj);
4412		if (r) {
4413			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414			return r;
4415		}
4416	}
4417
4418	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419	if (unlikely(r != 0)) {
4420		cik_mec_fini(rdev);
4421		return r;
4422	}
4423	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424			  &rdev->mec.hpd_eop_gpu_addr);
4425	if (r) {
4426		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427		cik_mec_fini(rdev);
4428		return r;
4429	}
4430	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431	if (r) {
4432		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433		cik_mec_fini(rdev);
4434		return r;
4435	}
4436
4437	/* clear memory.  Not sure if this is required or not */
4438	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439
4440	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442
4443	return 0;
4444}
4445
4446struct hqd_registers
4447{
4448	u32 cp_mqd_base_addr;
4449	u32 cp_mqd_base_addr_hi;
4450	u32 cp_hqd_active;
4451	u32 cp_hqd_vmid;
4452	u32 cp_hqd_persistent_state;
4453	u32 cp_hqd_pipe_priority;
4454	u32 cp_hqd_queue_priority;
4455	u32 cp_hqd_quantum;
4456	u32 cp_hqd_pq_base;
4457	u32 cp_hqd_pq_base_hi;
4458	u32 cp_hqd_pq_rptr;
4459	u32 cp_hqd_pq_rptr_report_addr;
4460	u32 cp_hqd_pq_rptr_report_addr_hi;
4461	u32 cp_hqd_pq_wptr_poll_addr;
4462	u32 cp_hqd_pq_wptr_poll_addr_hi;
4463	u32 cp_hqd_pq_doorbell_control;
4464	u32 cp_hqd_pq_wptr;
4465	u32 cp_hqd_pq_control;
4466	u32 cp_hqd_ib_base_addr;
4467	u32 cp_hqd_ib_base_addr_hi;
4468	u32 cp_hqd_ib_rptr;
4469	u32 cp_hqd_ib_control;
4470	u32 cp_hqd_iq_timer;
4471	u32 cp_hqd_iq_rptr;
4472	u32 cp_hqd_dequeue_request;
4473	u32 cp_hqd_dma_offload;
4474	u32 cp_hqd_sema_cmd;
4475	u32 cp_hqd_msg_type;
4476	u32 cp_hqd_atomic0_preop_lo;
4477	u32 cp_hqd_atomic0_preop_hi;
4478	u32 cp_hqd_atomic1_preop_lo;
4479	u32 cp_hqd_atomic1_preop_hi;
4480	u32 cp_hqd_hq_scheduler0;
4481	u32 cp_hqd_hq_scheduler1;
4482	u32 cp_mqd_control;
4483};
4484
4485struct bonaire_mqd
4486{
4487	u32 header;
4488	u32 dispatch_initiator;
4489	u32 dimensions[3];
4490	u32 start_idx[3];
4491	u32 num_threads[3];
4492	u32 pipeline_stat_enable;
4493	u32 perf_counter_enable;
4494	u32 pgm[2];
4495	u32 tba[2];
4496	u32 tma[2];
4497	u32 pgm_rsrc[2];
4498	u32 vmid;
4499	u32 resource_limits;
4500	u32 static_thread_mgmt01[2];
4501	u32 tmp_ring_size;
4502	u32 static_thread_mgmt23[2];
4503	u32 restart[3];
4504	u32 thread_trace_enable;
4505	u32 reserved1;
4506	u32 user_data[16];
4507	u32 vgtcs_invoke_count[2];
4508	struct hqd_registers queue_state;
4509	u32 dequeue_cntr;
4510	u32 interrupt_queue[64];
4511};
4512
4513/**
4514 * cik_cp_compute_resume - setup the compute queue registers
4515 *
4516 * @rdev: radeon_device pointer
4517 *
4518 * Program the compute queues and test them to make sure they
4519 * are working.
4520 * Returns 0 for success, error for failure.
4521 */
4522static int cik_cp_compute_resume(struct radeon_device *rdev)
4523{
4524	int r, i, j, idx;
4525	u32 tmp;
4526	bool use_doorbell = true;
4527	u64 hqd_gpu_addr;
4528	u64 mqd_gpu_addr;
4529	u64 eop_gpu_addr;
4530	u64 wb_gpu_addr;
4531	u32 *buf;
4532	struct bonaire_mqd *mqd;
4533
4534	r = cik_cp_compute_start(rdev);
4535	if (r)
4536		return r;
4537
4538	/* fix up chicken bits */
4539	tmp = RREG32(CP_CPF_DEBUG);
4540	tmp |= (1 << 23);
4541	WREG32(CP_CPF_DEBUG, tmp);
4542
4543	/* init the pipes */
4544	mutex_lock(&rdev->srbm_mutex);
4545
4546	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547		int me = (i < 4) ? 1 : 2;
4548		int pipe = (i < 4) ? i : (i - 4);
4549
4550		cik_srbm_select(rdev, me, pipe, 0, 0);
4551
4552		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553		/* write the EOP addr */
4554		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556
4557		/* set the VMID assigned */
4558		WREG32(CP_HPD_EOP_VMID, 0);
4559
4560		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561		tmp = RREG32(CP_HPD_EOP_CONTROL);
4562		tmp &= ~EOP_SIZE_MASK;
4563		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564		WREG32(CP_HPD_EOP_CONTROL, tmp);
4565
4566	}
4567	cik_srbm_select(rdev, 0, 0, 0, 0);
4568	mutex_unlock(&rdev->srbm_mutex);
4569
4570	/* init the queues.  Just two for now. */
4571	for (i = 0; i < 2; i++) {
4572		if (i == 0)
4573			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574		else
4575			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576
4577		if (rdev->ring[idx].mqd_obj == NULL) {
4578			r = radeon_bo_create(rdev,
4579					     sizeof(struct bonaire_mqd),
4580					     PAGE_SIZE, true,
4581					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582					     NULL, &rdev->ring[idx].mqd_obj);
4583			if (r) {
4584				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585				return r;
4586			}
4587		}
4588
4589		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590		if (unlikely(r != 0)) {
4591			cik_cp_compute_fini(rdev);
4592			return r;
4593		}
4594		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595				  &mqd_gpu_addr);
4596		if (r) {
4597			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598			cik_cp_compute_fini(rdev);
4599			return r;
4600		}
4601		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602		if (r) {
4603			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604			cik_cp_compute_fini(rdev);
4605			return r;
4606		}
4607
4608		/* init the mqd struct */
4609		memset(buf, 0, sizeof(struct bonaire_mqd));
4610
4611		mqd = (struct bonaire_mqd *)buf;
4612		mqd->header = 0xC0310800;
4613		mqd->static_thread_mgmt01[0] = 0xffffffff;
4614		mqd->static_thread_mgmt01[1] = 0xffffffff;
4615		mqd->static_thread_mgmt23[0] = 0xffffffff;
4616		mqd->static_thread_mgmt23[1] = 0xffffffff;
4617
4618		mutex_lock(&rdev->srbm_mutex);
4619		cik_srbm_select(rdev, rdev->ring[idx].me,
4620				rdev->ring[idx].pipe,
4621				rdev->ring[idx].queue, 0);
4622
4623		/* disable wptr polling */
4624		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625		tmp &= ~WPTR_POLL_EN;
4626		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627
4628		/* enable doorbell? */
4629		mqd->queue_state.cp_hqd_pq_doorbell_control =
4630			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631		if (use_doorbell)
4632			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633		else
4634			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4637
4638		/* disable the queue if it's active */
4639		mqd->queue_state.cp_hqd_dequeue_request = 0;
4640		mqd->queue_state.cp_hqd_pq_rptr = 0;
4641		mqd->queue_state.cp_hqd_pq_wptr= 0;
4642		if (RREG32(CP_HQD_ACTIVE) & 1) {
4643			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644			for (j = 0; j < rdev->usec_timeout; j++) {
4645				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646					break;
4647				udelay(1);
4648			}
4649			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652		}
4653
4654		/* set the pointer to the MQD */
4655		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659		/* set MQD vmid to 0 */
4660		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663
4664		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670
4671		/* set up the HQD, this is similar to CP_RB0_CNTL */
4672		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673		mqd->queue_state.cp_hqd_pq_control &=
4674			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675
4676		mqd->queue_state.cp_hqd_pq_control |=
4677			order_base_2(rdev->ring[idx].ring_size / 8);
4678		mqd->queue_state.cp_hqd_pq_control |=
4679			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680#ifdef __BIG_ENDIAN
4681		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682#endif
4683		mqd->queue_state.cp_hqd_pq_control &=
4684			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685		mqd->queue_state.cp_hqd_pq_control |=
4686			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688
4689		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690		if (i == 0)
4691			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692		else
4693			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699
4700		/* set the wb address wether it's enabled or not */
4701		if (i == 0)
4702			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703		else
4704			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707			upper_32_bits(wb_gpu_addr) & 0xffff;
4708		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712
4713		/* enable the doorbell if requested */
4714		if (use_doorbell) {
4715			mqd->queue_state.cp_hqd_pq_doorbell_control =
4716				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722				~(DOORBELL_SOURCE | DOORBELL_HIT);
4723
4724		} else {
4725			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726		}
4727		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4729
4730		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731		rdev->ring[idx].wptr = 0;
4732		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735
4736		/* set the vmid for the queue */
4737		mqd->queue_state.cp_hqd_vmid = 0;
4738		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739
4740		/* activate the queue */
4741		mqd->queue_state.cp_hqd_active = 1;
4742		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743
4744		cik_srbm_select(rdev, 0, 0, 0, 0);
4745		mutex_unlock(&rdev->srbm_mutex);
4746
4747		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750		rdev->ring[idx].ready = true;
4751		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752		if (r)
4753			rdev->ring[idx].ready = false;
4754	}
4755
4756	return 0;
4757}
4758
4759static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760{
4761	cik_cp_gfx_enable(rdev, enable);
4762	cik_cp_compute_enable(rdev, enable);
4763}
4764
4765static int cik_cp_load_microcode(struct radeon_device *rdev)
4766{
4767	int r;
4768
4769	r = cik_cp_gfx_load_microcode(rdev);
4770	if (r)
4771		return r;
4772	r = cik_cp_compute_load_microcode(rdev);
4773	if (r)
4774		return r;
4775
4776	return 0;
4777}
4778
4779static void cik_cp_fini(struct radeon_device *rdev)
4780{
4781	cik_cp_gfx_fini(rdev);
4782	cik_cp_compute_fini(rdev);
4783}
4784
4785static int cik_cp_resume(struct radeon_device *rdev)
4786{
4787	int r;
4788
4789	cik_enable_gui_idle_interrupt(rdev, false);
4790
4791	r = cik_cp_load_microcode(rdev);
4792	if (r)
4793		return r;
4794
4795	r = cik_cp_gfx_resume(rdev);
4796	if (r)
4797		return r;
4798	r = cik_cp_compute_resume(rdev);
4799	if (r)
4800		return r;
4801
4802	cik_enable_gui_idle_interrupt(rdev, true);
4803
4804	return 0;
4805}
4806
4807static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808{
4809	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4810		RREG32(GRBM_STATUS));
4811	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4812		RREG32(GRBM_STATUS2));
4813	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4814		RREG32(GRBM_STATUS_SE0));
4815	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4816		RREG32(GRBM_STATUS_SE1));
4817	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4818		RREG32(GRBM_STATUS_SE2));
4819	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4820		RREG32(GRBM_STATUS_SE3));
4821	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4822		RREG32(SRBM_STATUS));
4823	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4824		RREG32(SRBM_STATUS2));
4825	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4826		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4828		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4831		 RREG32(CP_STALLED_STAT1));
4832	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4833		 RREG32(CP_STALLED_STAT2));
4834	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4835		 RREG32(CP_STALLED_STAT3));
4836	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4837		 RREG32(CP_CPF_BUSY_STAT));
4838	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839		 RREG32(CP_CPF_STALLED_STAT1));
4840	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843		 RREG32(CP_CPC_STALLED_STAT1));
4844	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845}
4846
4847/**
4848 * cik_gpu_check_soft_reset - check which blocks are busy
4849 *
4850 * @rdev: radeon_device pointer
4851 *
4852 * Check which blocks are busy and return the relevant reset
4853 * mask to be used by cik_gpu_soft_reset().
4854 * Returns a mask of the blocks to be reset.
4855 */
4856u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857{
4858	u32 reset_mask = 0;
4859	u32 tmp;
4860
4861	/* GRBM_STATUS */
4862	tmp = RREG32(GRBM_STATUS);
4863	if (tmp & (PA_BUSY | SC_BUSY |
4864		   BCI_BUSY | SX_BUSY |
4865		   TA_BUSY | VGT_BUSY |
4866		   DB_BUSY | CB_BUSY |
4867		   GDS_BUSY | SPI_BUSY |
4868		   IA_BUSY | IA_BUSY_NO_DMA))
4869		reset_mask |= RADEON_RESET_GFX;
4870
4871	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872		reset_mask |= RADEON_RESET_CP;
4873
4874	/* GRBM_STATUS2 */
4875	tmp = RREG32(GRBM_STATUS2);
4876	if (tmp & RLC_BUSY)
4877		reset_mask |= RADEON_RESET_RLC;
4878
4879	/* SDMA0_STATUS_REG */
4880	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881	if (!(tmp & SDMA_IDLE))
4882		reset_mask |= RADEON_RESET_DMA;
4883
4884	/* SDMA1_STATUS_REG */
4885	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886	if (!(tmp & SDMA_IDLE))
4887		reset_mask |= RADEON_RESET_DMA1;
4888
4889	/* SRBM_STATUS2 */
4890	tmp = RREG32(SRBM_STATUS2);
4891	if (tmp & SDMA_BUSY)
4892		reset_mask |= RADEON_RESET_DMA;
4893
4894	if (tmp & SDMA1_BUSY)
4895		reset_mask |= RADEON_RESET_DMA1;
4896
4897	/* SRBM_STATUS */
4898	tmp = RREG32(SRBM_STATUS);
4899
4900	if (tmp & IH_BUSY)
4901		reset_mask |= RADEON_RESET_IH;
4902
4903	if (tmp & SEM_BUSY)
4904		reset_mask |= RADEON_RESET_SEM;
4905
4906	if (tmp & GRBM_RQ_PENDING)
4907		reset_mask |= RADEON_RESET_GRBM;
4908
4909	if (tmp & VMC_BUSY)
4910		reset_mask |= RADEON_RESET_VMC;
4911
4912	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913		   MCC_BUSY | MCD_BUSY))
4914		reset_mask |= RADEON_RESET_MC;
4915
4916	if (evergreen_is_display_hung(rdev))
4917		reset_mask |= RADEON_RESET_DISPLAY;
4918
4919	/* Skip MC reset as it's mostly likely not hung, just busy */
4920	if (reset_mask & RADEON_RESET_MC) {
4921		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922		reset_mask &= ~RADEON_RESET_MC;
4923	}
4924
4925	return reset_mask;
4926}
4927
4928/**
4929 * cik_gpu_soft_reset - soft reset GPU
4930 *
4931 * @rdev: radeon_device pointer
4932 * @reset_mask: mask of which blocks to reset
4933 *
4934 * Soft reset the blocks specified in @reset_mask.
4935 */
4936static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937{
4938	struct evergreen_mc_save save;
4939	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940	u32 tmp;
4941
4942	if (reset_mask == 0)
4943		return;
4944
4945	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946
4947	cik_print_gpu_status_regs(rdev);
4948	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4949		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952
4953	/* disable CG/PG */
4954	cik_fini_pg(rdev);
4955	cik_fini_cg(rdev);
4956
4957	/* stop the rlc */
4958	cik_rlc_stop(rdev);
4959
4960	/* Disable GFX parsing/prefetching */
4961	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962
4963	/* Disable MEC parsing/prefetching */
4964	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965
4966	if (reset_mask & RADEON_RESET_DMA) {
4967		/* sdma0 */
4968		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969		tmp |= SDMA_HALT;
4970		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971	}
4972	if (reset_mask & RADEON_RESET_DMA1) {
4973		/* sdma1 */
4974		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975		tmp |= SDMA_HALT;
4976		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977	}
4978
4979	evergreen_mc_stop(rdev, &save);
4980	if (evergreen_mc_wait_for_idle(rdev)) {
4981		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982	}
4983
4984	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986
4987	if (reset_mask & RADEON_RESET_CP) {
4988		grbm_soft_reset |= SOFT_RESET_CP;
4989
4990		srbm_soft_reset |= SOFT_RESET_GRBM;
4991	}
4992
4993	if (reset_mask & RADEON_RESET_DMA)
4994		srbm_soft_reset |= SOFT_RESET_SDMA;
4995
4996	if (reset_mask & RADEON_RESET_DMA1)
4997		srbm_soft_reset |= SOFT_RESET_SDMA1;
4998
4999	if (reset_mask & RADEON_RESET_DISPLAY)
5000		srbm_soft_reset |= SOFT_RESET_DC;
5001
5002	if (reset_mask & RADEON_RESET_RLC)
5003		grbm_soft_reset |= SOFT_RESET_RLC;
5004
5005	if (reset_mask & RADEON_RESET_SEM)
5006		srbm_soft_reset |= SOFT_RESET_SEM;
5007
5008	if (reset_mask & RADEON_RESET_IH)
5009		srbm_soft_reset |= SOFT_RESET_IH;
5010
5011	if (reset_mask & RADEON_RESET_GRBM)
5012		srbm_soft_reset |= SOFT_RESET_GRBM;
5013
5014	if (reset_mask & RADEON_RESET_VMC)
5015		srbm_soft_reset |= SOFT_RESET_VMC;
5016
5017	if (!(rdev->flags & RADEON_IS_IGP)) {
5018		if (reset_mask & RADEON_RESET_MC)
5019			srbm_soft_reset |= SOFT_RESET_MC;
5020	}
5021
5022	if (grbm_soft_reset) {
5023		tmp = RREG32(GRBM_SOFT_RESET);
5024		tmp |= grbm_soft_reset;
5025		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026		WREG32(GRBM_SOFT_RESET, tmp);
5027		tmp = RREG32(GRBM_SOFT_RESET);
5028
5029		udelay(50);
5030
5031		tmp &= ~grbm_soft_reset;
5032		WREG32(GRBM_SOFT_RESET, tmp);
5033		tmp = RREG32(GRBM_SOFT_RESET);
5034	}
5035
5036	if (srbm_soft_reset) {
5037		tmp = RREG32(SRBM_SOFT_RESET);
5038		tmp |= srbm_soft_reset;
5039		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040		WREG32(SRBM_SOFT_RESET, tmp);
5041		tmp = RREG32(SRBM_SOFT_RESET);
5042
5043		udelay(50);
5044
5045		tmp &= ~srbm_soft_reset;
5046		WREG32(SRBM_SOFT_RESET, tmp);
5047		tmp = RREG32(SRBM_SOFT_RESET);
5048	}
5049
5050	/* Wait a little for things to settle down */
5051	udelay(50);
5052
5053	evergreen_mc_resume(rdev, &save);
5054	udelay(50);
5055
5056	cik_print_gpu_status_regs(rdev);
5057}
5058
5059struct kv_reset_save_regs {
5060	u32 gmcon_reng_execute;
5061	u32 gmcon_misc;
5062	u32 gmcon_misc3;
5063};
5064
5065static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066				   struct kv_reset_save_regs *save)
5067{
5068	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069	save->gmcon_misc = RREG32(GMCON_MISC);
5070	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071
5072	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074						STCTRL_STUTTER_EN));
5075}
5076
5077static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078				      struct kv_reset_save_regs *save)
5079{
5080	int i;
5081
5082	WREG32(GMCON_PGFSM_WRITE, 0);
5083	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084
5085	for (i = 0; i < 5; i++)
5086		WREG32(GMCON_PGFSM_WRITE, 0);
5087
5088	WREG32(GMCON_PGFSM_WRITE, 0);
5089	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090
5091	for (i = 0; i < 5; i++)
5092		WREG32(GMCON_PGFSM_WRITE, 0);
5093
5094	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096
5097	for (i = 0; i < 5; i++)
5098		WREG32(GMCON_PGFSM_WRITE, 0);
5099
5100	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102
5103	for (i = 0; i < 5; i++)
5104		WREG32(GMCON_PGFSM_WRITE, 0);
5105
5106	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108
5109	for (i = 0; i < 5; i++)
5110		WREG32(GMCON_PGFSM_WRITE, 0);
5111
5112	WREG32(GMCON_PGFSM_WRITE, 0);
5113	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114
5115	for (i = 0; i < 5; i++)
5116		WREG32(GMCON_PGFSM_WRITE, 0);
5117
5118	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120
5121	for (i = 0; i < 5; i++)
5122		WREG32(GMCON_PGFSM_WRITE, 0);
5123
5124	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126
5127	for (i = 0; i < 5; i++)
5128		WREG32(GMCON_PGFSM_WRITE, 0);
5129
5130	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132
5133	for (i = 0; i < 5; i++)
5134		WREG32(GMCON_PGFSM_WRITE, 0);
5135
5136	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138
5139	for (i = 0; i < 5; i++)
5140		WREG32(GMCON_PGFSM_WRITE, 0);
5141
5142	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144
5145	WREG32(GMCON_MISC3, save->gmcon_misc3);
5146	WREG32(GMCON_MISC, save->gmcon_misc);
5147	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148}
5149
5150static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151{
5152	struct evergreen_mc_save save;
5153	struct kv_reset_save_regs kv_save = { 0 };
5154	u32 tmp, i;
5155
5156	dev_info(rdev->dev, "GPU pci config reset\n");
5157
5158	/* disable dpm? */
5159
5160	/* disable cg/pg */
5161	cik_fini_pg(rdev);
5162	cik_fini_cg(rdev);
5163
5164	/* Disable GFX parsing/prefetching */
5165	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166
5167	/* Disable MEC parsing/prefetching */
5168	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169
5170	/* sdma0 */
5171	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172	tmp |= SDMA_HALT;
5173	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174	/* sdma1 */
5175	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176	tmp |= SDMA_HALT;
5177	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178	/* XXX other engines? */
5179
5180	/* halt the rlc, disable cp internal ints */
5181	cik_rlc_stop(rdev);
5182
5183	udelay(50);
5184
5185	/* disable mem access */
5186	evergreen_mc_stop(rdev, &save);
5187	if (evergreen_mc_wait_for_idle(rdev)) {
5188		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189	}
5190
5191	if (rdev->flags & RADEON_IS_IGP)
5192		kv_save_regs_for_reset(rdev, &kv_save);
5193
5194	/* disable BM */
5195	pci_clear_master(rdev->pdev);
5196	/* reset */
5197	radeon_pci_config_reset(rdev);
5198
5199	udelay(100);
5200
5201	/* wait for asic to come out of reset */
5202	for (i = 0; i < rdev->usec_timeout; i++) {
5203		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204			break;
5205		udelay(1);
5206	}
5207
5208	/* does asic init need to be run first??? */
5209	if (rdev->flags & RADEON_IS_IGP)
5210		kv_restore_regs_for_reset(rdev, &kv_save);
5211}
5212
5213/**
5214 * cik_asic_reset - soft reset GPU
5215 *
5216 * @rdev: radeon_device pointer
5217 * @hard: force hard reset
5218 *
5219 * Look up which blocks are hung and attempt
5220 * to reset them.
5221 * Returns 0 for success.
5222 */
5223int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224{
5225	u32 reset_mask;
5226
5227	if (hard) {
5228		cik_gpu_pci_config_reset(rdev);
5229		return 0;
5230	}
5231
5232	reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234	if (reset_mask)
5235		r600_set_bios_scratch_engine_hung(rdev, true);
5236
5237	/* try soft reset */
5238	cik_gpu_soft_reset(rdev, reset_mask);
5239
5240	reset_mask = cik_gpu_check_soft_reset(rdev);
5241
5242	/* try pci config reset */
5243	if (reset_mask && radeon_hard_reset)
5244		cik_gpu_pci_config_reset(rdev);
5245
5246	reset_mask = cik_gpu_check_soft_reset(rdev);
5247
5248	if (!reset_mask)
5249		r600_set_bios_scratch_engine_hung(rdev, false);
5250
5251	return 0;
5252}
5253
5254/**
5255 * cik_gfx_is_lockup - check if the 3D engine is locked up
5256 *
5257 * @rdev: radeon_device pointer
5258 * @ring: radeon_ring structure holding ring information
5259 *
5260 * Check if the 3D engine is locked up (CIK).
5261 * Returns true if the engine is locked, false if not.
5262 */
5263bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264{
5265	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266
5267	if (!(reset_mask & (RADEON_RESET_GFX |
5268			    RADEON_RESET_COMPUTE |
5269			    RADEON_RESET_CP))) {
5270		radeon_ring_lockup_update(rdev, ring);
5271		return false;
5272	}
5273	return radeon_ring_test_lockup(rdev, ring);
5274}
5275
5276/* MC */
5277/**
5278 * cik_mc_program - program the GPU memory controller
5279 *
5280 * @rdev: radeon_device pointer
5281 *
5282 * Set the location of vram, gart, and AGP in the GPU's
5283 * physical address space (CIK).
5284 */
5285static void cik_mc_program(struct radeon_device *rdev)
5286{
5287	struct evergreen_mc_save save;
5288	u32 tmp;
5289	int i, j;
5290
5291	/* Initialize HDP */
5292	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293		WREG32((0x2c14 + j), 0x00000000);
5294		WREG32((0x2c18 + j), 0x00000000);
5295		WREG32((0x2c1c + j), 0x00000000);
5296		WREG32((0x2c20 + j), 0x00000000);
5297		WREG32((0x2c24 + j), 0x00000000);
5298	}
5299	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300
5301	evergreen_mc_stop(rdev, &save);
5302	if (radeon_mc_wait_for_idle(rdev)) {
5303		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304	}
5305	/* Lockout access through VGA aperture*/
5306	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307	/* Update configuration */
5308	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309	       rdev->mc.vram_start >> 12);
5310	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311	       rdev->mc.vram_end >> 12);
5312	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313	       rdev->vram_scratch.gpu_addr >> 12);
5314	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316	WREG32(MC_VM_FB_LOCATION, tmp);
5317	/* XXX double check these! */
5318	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321	WREG32(MC_VM_AGP_BASE, 0);
5322	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324	if (radeon_mc_wait_for_idle(rdev)) {
5325		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326	}
5327	evergreen_mc_resume(rdev, &save);
5328	/* we need to own VRAM, so turn off the VGA renderer here
5329	 * to stop it overwriting our objects */
5330	rv515_vga_render_disable(rdev);
5331}
5332
5333/**
5334 * cik_mc_init - initialize the memory controller driver params
5335 *
5336 * @rdev: radeon_device pointer
5337 *
5338 * Look up the amount of vram, vram width, and decide how to place
5339 * vram and gart within the GPU's physical address space (CIK).
5340 * Returns 0 for success.
5341 */
5342static int cik_mc_init(struct radeon_device *rdev)
5343{
5344	u32 tmp;
5345	int chansize, numchan;
5346
5347	/* Get VRAM informations */
5348	rdev->mc.vram_is_ddr = true;
5349	tmp = RREG32(MC_ARB_RAMCFG);
5350	if (tmp & CHANSIZE_MASK) {
5351		chansize = 64;
5352	} else {
5353		chansize = 32;
5354	}
5355	tmp = RREG32(MC_SHARED_CHMAP);
5356	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357	case 0:
5358	default:
5359		numchan = 1;
5360		break;
5361	case 1:
5362		numchan = 2;
5363		break;
5364	case 2:
5365		numchan = 4;
5366		break;
5367	case 3:
5368		numchan = 8;
5369		break;
5370	case 4:
5371		numchan = 3;
5372		break;
5373	case 5:
5374		numchan = 6;
5375		break;
5376	case 6:
5377		numchan = 10;
5378		break;
5379	case 7:
5380		numchan = 12;
5381		break;
5382	case 8:
5383		numchan = 16;
5384		break;
5385	}
5386	rdev->mc.vram_width = numchan * chansize;
5387	/* Could aper size report 0 ? */
5388	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390	/* size in MB on si */
5391	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394	si_vram_gtt_location(rdev, &rdev->mc);
5395	radeon_update_bandwidth_info(rdev);
5396
5397	return 0;
5398}
5399
5400/*
5401 * GART
5402 * VMID 0 is the physical GPU addresses as used by the kernel.
5403 * VMIDs 1-15 are used for userspace clients and are handled
5404 * by the radeon vm/hsa code.
5405 */
5406/**
5407 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408 *
5409 * @rdev: radeon_device pointer
5410 *
5411 * Flush the TLB for the VMID 0 page table (CIK).
5412 */
5413void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414{
5415	/* flush hdp cache */
5416	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417
5418	/* bits 0-15 are the VM contexts0-15 */
5419	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420}
5421
5422/**
5423 * cik_pcie_gart_enable - gart enable
5424 *
5425 * @rdev: radeon_device pointer
5426 *
5427 * This sets up the TLBs, programs the page tables for VMID0,
5428 * sets up the hw for VMIDs 1-15 which are allocated on
5429 * demand, and sets up the global locations for the LDS, GDS,
5430 * and GPUVM for FSA64 clients (CIK).
5431 * Returns 0 for success, errors for failure.
5432 */
5433static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434{
5435	int r, i;
5436
5437	if (rdev->gart.robj == NULL) {
5438		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439		return -EINVAL;
5440	}
5441	r = radeon_gart_table_vram_pin(rdev);
5442	if (r)
5443		return r;
5444	/* Setup TLB control */
5445	WREG32(MC_VM_MX_L1_TLB_CNTL,
5446	       (0xA << 7) |
5447	       ENABLE_L1_TLB |
5448	       ENABLE_L1_FRAGMENT_PROCESSING |
5449	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450	       ENABLE_ADVANCED_DRIVER_MODEL |
5451	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452	/* Setup L2 cache */
5453	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454	       ENABLE_L2_FRAGMENT_PROCESSING |
5455	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5458	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461	       BANK_SELECT(4) |
5462	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463	/* setup context0 */
5464	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468			(u32)(rdev->dummy_page.addr >> 12));
5469	WREG32(VM_CONTEXT0_CNTL2, 0);
5470	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472
5473	WREG32(0x15D4, 0);
5474	WREG32(0x15D8, 0);
5475	WREG32(0x15DC, 0);
5476
5477	/* restore context1-15 */
5478	/* set vm size, must be a multiple of 4 */
5479	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481	for (i = 1; i < 16; i++) {
5482		if (i < 8)
5483			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484			       rdev->vm_manager.saved_table_addr[i]);
5485		else
5486			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487			       rdev->vm_manager.saved_table_addr[i]);
5488	}
5489
5490	/* enable context1-15 */
5491	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492	       (u32)(rdev->dummy_page.addr >> 12));
5493	WREG32(VM_CONTEXT1_CNTL2, 4);
5494	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508
5509	if (rdev->family == CHIP_KAVERI) {
5510		u32 tmp = RREG32(CHUB_CONTROL);
5511		tmp &= ~BYPASS_VM;
5512		WREG32(CHUB_CONTROL, tmp);
5513	}
5514
5515	/* XXX SH_MEM regs */
5516	/* where to put LDS, scratch, GPUVM in FSA64 space */
5517	mutex_lock(&rdev->srbm_mutex);
5518	for (i = 0; i < 16; i++) {
5519		cik_srbm_select(rdev, 0, 0, 0, i);
5520		/* CP and shaders */
5521		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522		WREG32(SH_MEM_APE1_BASE, 1);
5523		WREG32(SH_MEM_APE1_LIMIT, 0);
5524		WREG32(SH_MEM_BASES, 0);
5525		/* SDMA GFX */
5526		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530		/* XXX SDMA RLC - todo */
5531	}
5532	cik_srbm_select(rdev, 0, 0, 0, 0);
5533	mutex_unlock(&rdev->srbm_mutex);
5534
5535	cik_pcie_gart_tlb_flush(rdev);
5536	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537		 (unsigned)(rdev->mc.gtt_size >> 20),
5538		 (unsigned long long)rdev->gart.table_addr);
5539	rdev->gart.ready = true;
5540	return 0;
5541}
5542
5543/**
5544 * cik_pcie_gart_disable - gart disable
5545 *
5546 * @rdev: radeon_device pointer
5547 *
5548 * This disables all VM page table (CIK).
5549 */
5550static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551{
5552	unsigned i;
5553
5554	for (i = 1; i < 16; ++i) {
5555		uint32_t reg;
5556		if (i < 8)
5557			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558		else
5559			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561	}
5562
5563	/* Disable all tables */
5564	WREG32(VM_CONTEXT0_CNTL, 0);
5565	WREG32(VM_CONTEXT1_CNTL, 0);
5566	/* Setup TLB control */
5567	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569	/* Setup L2 cache */
5570	WREG32(VM_L2_CNTL,
5571	       ENABLE_L2_FRAGMENT_PROCESSING |
5572	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5575	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576	WREG32(VM_L2_CNTL2, 0);
5577	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579	radeon_gart_table_vram_unpin(rdev);
5580}
5581
5582/**
5583 * cik_pcie_gart_fini - vm fini callback
5584 *
5585 * @rdev: radeon_device pointer
5586 *
5587 * Tears down the driver GART/VM setup (CIK).
5588 */
5589static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590{
5591	cik_pcie_gart_disable(rdev);
5592	radeon_gart_table_vram_free(rdev);
5593	radeon_gart_fini(rdev);
5594}
5595
5596/* vm parser */
5597/**
5598 * cik_ib_parse - vm ib_parse callback
5599 *
5600 * @rdev: radeon_device pointer
5601 * @ib: indirect buffer pointer
5602 *
5603 * CIK uses hw IB checking so this is a nop (CIK).
5604 */
5605int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606{
5607	return 0;
5608}
5609
5610/*
5611 * vm
5612 * VMID 0 is the physical GPU addresses as used by the kernel.
5613 * VMIDs 1-15 are used for userspace clients and are handled
5614 * by the radeon vm/hsa code.
5615 */
5616/**
5617 * cik_vm_init - cik vm init callback
5618 *
5619 * @rdev: radeon_device pointer
5620 *
5621 * Inits cik specific vm parameters (number of VMs, base of vram for
5622 * VMIDs 1-15) (CIK).
5623 * Returns 0 for success.
5624 */
5625int cik_vm_init(struct radeon_device *rdev)
5626{
5627	/*
5628	 * number of VMs
5629	 * VMID 0 is reserved for System
5630	 * radeon graphics/compute will use VMIDs 1-15
5631	 */
5632	rdev->vm_manager.nvm = 16;
5633	/* base offset of vram pages */
5634	if (rdev->flags & RADEON_IS_IGP) {
5635		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636		tmp <<= 22;
5637		rdev->vm_manager.vram_base_offset = tmp;
5638	} else
5639		rdev->vm_manager.vram_base_offset = 0;
5640
5641	return 0;
5642}
5643
5644/**
5645 * cik_vm_fini - cik vm fini callback
5646 *
5647 * @rdev: radeon_device pointer
5648 *
5649 * Tear down any asic specific VM setup (CIK).
5650 */
5651void cik_vm_fini(struct radeon_device *rdev)
5652{
5653}
5654
5655/**
5656 * cik_vm_decode_fault - print human readable fault info
5657 *
5658 * @rdev: radeon_device pointer
5659 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5661 *
5662 * Print human readable fault information (CIK).
5663 */
5664static void cik_vm_decode_fault(struct radeon_device *rdev,
5665				u32 status, u32 addr, u32 mc_client)
5666{
5667	u32 mc_id;
5668	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672
5673	if (rdev->family == CHIP_HAWAII)
5674		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675	else
5676		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677
5678	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679	       protections, vmid, addr,
5680	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681	       block, mc_client, mc_id);
5682}
5683
5684/**
5685 * cik_vm_flush - cik vm flush using the CP
5686 *
5687 * @rdev: radeon_device pointer
5688 *
5689 * Update the page table base and flush the VM TLB
5690 * using the CP (CIK).
5691 */
5692void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693		  unsigned vm_id, uint64_t pd_addr)
5694{
5695	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696
5697	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699				 WRITE_DATA_DST_SEL(0)));
5700	if (vm_id < 8) {
5701		radeon_ring_write(ring,
5702				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703	} else {
5704		radeon_ring_write(ring,
5705				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706	}
5707	radeon_ring_write(ring, 0);
5708	radeon_ring_write(ring, pd_addr >> 12);
5709
5710	/* update SH_MEM_* regs */
5711	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713				 WRITE_DATA_DST_SEL(0)));
5714	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715	radeon_ring_write(ring, 0);
5716	radeon_ring_write(ring, VMID(vm_id));
5717
5718	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720				 WRITE_DATA_DST_SEL(0)));
5721	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722	radeon_ring_write(ring, 0);
5723
5724	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728
5729	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731				 WRITE_DATA_DST_SEL(0)));
5732	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733	radeon_ring_write(ring, 0);
5734	radeon_ring_write(ring, VMID(0));
5735
5736	/* HDP flush */
5737	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738
5739	/* bits 0-15 are the VM contexts0-15 */
5740	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742				 WRITE_DATA_DST_SEL(0)));
5743	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744	radeon_ring_write(ring, 0);
5745	radeon_ring_write(ring, 1 << vm_id);
5746
5747	/* wait for the invalidate to complete */
5748	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5751				 WAIT_REG_MEM_ENGINE(0))); /* me */
5752	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753	radeon_ring_write(ring, 0);
5754	radeon_ring_write(ring, 0); /* ref */
5755	radeon_ring_write(ring, 0); /* mask */
5756	radeon_ring_write(ring, 0x20); /* poll interval */
5757
5758	/* compute doesn't have PFP */
5759	if (usepfp) {
5760		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5761		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762		radeon_ring_write(ring, 0x0);
5763	}
5764}
5765
5766/*
5767 * RLC
5768 * The RLC is a multi-purpose microengine that handles a
5769 * variety of functions, the most important of which is
5770 * the interrupt controller.
5771 */
5772static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773					  bool enable)
5774{
5775	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776
5777	if (enable)
5778		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779	else
5780		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781	WREG32(CP_INT_CNTL_RING0, tmp);
5782}
5783
5784static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785{
5786	u32 tmp;
5787
5788	tmp = RREG32(RLC_LB_CNTL);
5789	if (enable)
5790		tmp |= LOAD_BALANCE_ENABLE;
5791	else
5792		tmp &= ~LOAD_BALANCE_ENABLE;
5793	WREG32(RLC_LB_CNTL, tmp);
5794}
5795
5796static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797{
5798	u32 i, j, k;
5799	u32 mask;
5800
5801	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803			cik_select_se_sh(rdev, i, j);
5804			for (k = 0; k < rdev->usec_timeout; k++) {
5805				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806					break;
5807				udelay(1);
5808			}
5809		}
5810	}
5811	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812
5813	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814	for (k = 0; k < rdev->usec_timeout; k++) {
5815		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816			break;
5817		udelay(1);
5818	}
5819}
5820
5821static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822{
5823	u32 tmp;
5824
5825	tmp = RREG32(RLC_CNTL);
5826	if (tmp != rlc)
5827		WREG32(RLC_CNTL, rlc);
5828}
5829
5830static u32 cik_halt_rlc(struct radeon_device *rdev)
5831{
5832	u32 data, orig;
5833
5834	orig = data = RREG32(RLC_CNTL);
5835
5836	if (data & RLC_ENABLE) {
5837		u32 i;
5838
5839		data &= ~RLC_ENABLE;
5840		WREG32(RLC_CNTL, data);
5841
5842		for (i = 0; i < rdev->usec_timeout; i++) {
5843			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844				break;
5845			udelay(1);
5846		}
5847
5848		cik_wait_for_rlc_serdes(rdev);
5849	}
5850
5851	return orig;
5852}
5853
5854void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855{
5856	u32 tmp, i, mask;
5857
5858	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859	WREG32(RLC_GPR_REG2, tmp);
5860
5861	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862	for (i = 0; i < rdev->usec_timeout; i++) {
5863		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864			break;
5865		udelay(1);
5866	}
5867
5868	for (i = 0; i < rdev->usec_timeout; i++) {
5869		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870			break;
5871		udelay(1);
5872	}
5873}
5874
5875void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876{
5877	u32 tmp;
5878
5879	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880	WREG32(RLC_GPR_REG2, tmp);
5881}
5882
5883/**
5884 * cik_rlc_stop - stop the RLC ME
5885 *
5886 * @rdev: radeon_device pointer
5887 *
5888 * Halt the RLC ME (MicroEngine) (CIK).
5889 */
5890static void cik_rlc_stop(struct radeon_device *rdev)
5891{
5892	WREG32(RLC_CNTL, 0);
5893
5894	cik_enable_gui_idle_interrupt(rdev, false);
5895
5896	cik_wait_for_rlc_serdes(rdev);
5897}
5898
5899/**
5900 * cik_rlc_start - start the RLC ME
5901 *
5902 * @rdev: radeon_device pointer
5903 *
5904 * Unhalt the RLC ME (MicroEngine) (CIK).
5905 */
5906static void cik_rlc_start(struct radeon_device *rdev)
5907{
5908	WREG32(RLC_CNTL, RLC_ENABLE);
5909
5910	cik_enable_gui_idle_interrupt(rdev, true);
5911
5912	udelay(50);
5913}
5914
5915/**
5916 * cik_rlc_resume - setup the RLC hw
5917 *
5918 * @rdev: radeon_device pointer
5919 *
5920 * Initialize the RLC registers, load the ucode,
5921 * and start the RLC (CIK).
5922 * Returns 0 for success, -EINVAL if the ucode is not available.
5923 */
5924static int cik_rlc_resume(struct radeon_device *rdev)
5925{
5926	u32 i, size, tmp;
5927
5928	if (!rdev->rlc_fw)
5929		return -EINVAL;
5930
5931	cik_rlc_stop(rdev);
5932
5933	/* disable CG */
5934	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936
5937	si_rlc_reset(rdev);
5938
5939	cik_init_pg(rdev);
5940
5941	cik_init_cg(rdev);
5942
5943	WREG32(RLC_LB_CNTR_INIT, 0);
5944	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945
5946	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948	WREG32(RLC_LB_PARAMS, 0x00600408);
5949	WREG32(RLC_LB_CNTL, 0x80000004);
5950
5951	WREG32(RLC_MC_CNTL, 0);
5952	WREG32(RLC_UCODE_CNTL, 0);
5953
5954	if (rdev->new_fw) {
5955		const struct rlc_firmware_header_v1_0 *hdr =
5956			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957		const __le32 *fw_data = (const __le32 *)
5958			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959
5960		radeon_ucode_print_rlc_hdr(&hdr->header);
5961
5962		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963		WREG32(RLC_GPM_UCODE_ADDR, 0);
5964		for (i = 0; i < size; i++)
5965			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967	} else {
5968		const __be32 *fw_data;
5969
5970		switch (rdev->family) {
5971		case CHIP_BONAIRE:
5972		case CHIP_HAWAII:
5973		default:
5974			size = BONAIRE_RLC_UCODE_SIZE;
5975			break;
5976		case CHIP_KAVERI:
5977			size = KV_RLC_UCODE_SIZE;
5978			break;
5979		case CHIP_KABINI:
5980			size = KB_RLC_UCODE_SIZE;
5981			break;
5982		case CHIP_MULLINS:
5983			size = ML_RLC_UCODE_SIZE;
5984			break;
5985		}
5986
5987		fw_data = (const __be32 *)rdev->rlc_fw->data;
5988		WREG32(RLC_GPM_UCODE_ADDR, 0);
5989		for (i = 0; i < size; i++)
5990			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991		WREG32(RLC_GPM_UCODE_ADDR, 0);
5992	}
5993
5994	/* XXX - find out what chips support lbpw */
5995	cik_enable_lbpw(rdev, false);
5996
5997	if (rdev->family == CHIP_BONAIRE)
5998		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999
6000	cik_rlc_start(rdev);
6001
6002	return 0;
6003}
6004
6005static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006{
6007	u32 data, orig, tmp, tmp2;
6008
6009	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010
6011	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012		cik_enable_gui_idle_interrupt(rdev, true);
6013
6014		tmp = cik_halt_rlc(rdev);
6015
6016		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021
6022		cik_update_rlc(rdev, tmp);
6023
6024		data |= CGCG_EN | CGLS_EN;
6025	} else {
6026		cik_enable_gui_idle_interrupt(rdev, false);
6027
6028		RREG32(CB_CGTT_SCLK_CTRL);
6029		RREG32(CB_CGTT_SCLK_CTRL);
6030		RREG32(CB_CGTT_SCLK_CTRL);
6031		RREG32(CB_CGTT_SCLK_CTRL);
6032
6033		data &= ~(CGCG_EN | CGLS_EN);
6034	}
6035
6036	if (orig != data)
6037		WREG32(RLC_CGCG_CGLS_CTRL, data);
6038
6039}
6040
6041static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042{
6043	u32 data, orig, tmp = 0;
6044
6045	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048				orig = data = RREG32(CP_MEM_SLP_CNTL);
6049				data |= CP_MEM_LS_EN;
6050				if (orig != data)
6051					WREG32(CP_MEM_SLP_CNTL, data);
6052			}
6053		}
6054
6055		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056		data |= 0x00000001;
6057		data &= 0xfffffffd;
6058		if (orig != data)
6059			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060
6061		tmp = cik_halt_rlc(rdev);
6062
6063		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067		WREG32(RLC_SERDES_WR_CTRL, data);
6068
6069		cik_update_rlc(rdev, tmp);
6070
6071		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072			orig = data = RREG32(CGTS_SM_CTRL_REG);
6073			data &= ~SM_MODE_MASK;
6074			data |= SM_MODE(0x2);
6075			data |= SM_MODE_ENABLE;
6076			data &= ~CGTS_OVERRIDE;
6077			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079				data &= ~CGTS_LS_OVERRIDE;
6080			data &= ~ON_MONITOR_ADD_MASK;
6081			data |= ON_MONITOR_ADD_EN;
6082			data |= ON_MONITOR_ADD(0x96);
6083			if (orig != data)
6084				WREG32(CGTS_SM_CTRL_REG, data);
6085		}
6086	} else {
6087		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088		data |= 0x00000003;
6089		if (orig != data)
6090			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091
6092		data = RREG32(RLC_MEM_SLP_CNTL);
6093		if (data & RLC_MEM_LS_EN) {
6094			data &= ~RLC_MEM_LS_EN;
6095			WREG32(RLC_MEM_SLP_CNTL, data);
6096		}
6097
6098		data = RREG32(CP_MEM_SLP_CNTL);
6099		if (data & CP_MEM_LS_EN) {
6100			data &= ~CP_MEM_LS_EN;
6101			WREG32(CP_MEM_SLP_CNTL, data);
6102		}
6103
6104		orig = data = RREG32(CGTS_SM_CTRL_REG);
6105		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106		if (orig != data)
6107			WREG32(CGTS_SM_CTRL_REG, data);
6108
6109		tmp = cik_halt_rlc(rdev);
6110
6111		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115		WREG32(RLC_SERDES_WR_CTRL, data);
6116
6117		cik_update_rlc(rdev, tmp);
6118	}
6119}
6120
6121static const u32 mc_cg_registers[] =
6122{
6123	MC_HUB_MISC_HUB_CG,
6124	MC_HUB_MISC_SIP_CG,
6125	MC_HUB_MISC_VM_CG,
6126	MC_XPB_CLK_GAT,
6127	ATC_MISC_CG,
6128	MC_CITF_MISC_WR_CG,
6129	MC_CITF_MISC_RD_CG,
6130	MC_CITF_MISC_VM_CG,
6131	VM_L2_CG,
6132};
6133
6134static void cik_enable_mc_ls(struct radeon_device *rdev,
6135			     bool enable)
6136{
6137	int i;
6138	u32 orig, data;
6139
6140	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141		orig = data = RREG32(mc_cg_registers[i]);
6142		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143			data |= MC_LS_ENABLE;
6144		else
6145			data &= ~MC_LS_ENABLE;
6146		if (data != orig)
6147			WREG32(mc_cg_registers[i], data);
6148	}
6149}
6150
6151static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152			       bool enable)
6153{
6154	int i;
6155	u32 orig, data;
6156
6157	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158		orig = data = RREG32(mc_cg_registers[i]);
6159		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160			data |= MC_CG_ENABLE;
6161		else
6162			data &= ~MC_CG_ENABLE;
6163		if (data != orig)
6164			WREG32(mc_cg_registers[i], data);
6165	}
6166}
6167
6168static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169				 bool enable)
6170{
6171	u32 orig, data;
6172
6173	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176	} else {
6177		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178		data |= 0xff000000;
6179		if (data != orig)
6180			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181
6182		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183		data |= 0xff000000;
6184		if (data != orig)
6185			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186	}
6187}
6188
6189static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190				 bool enable)
6191{
6192	u32 orig, data;
6193
6194	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196		data |= 0x100;
6197		if (orig != data)
6198			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199
6200		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201		data |= 0x100;
6202		if (orig != data)
6203			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204	} else {
6205		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206		data &= ~0x100;
6207		if (orig != data)
6208			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209
6210		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211		data &= ~0x100;
6212		if (orig != data)
6213			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214	}
6215}
6216
6217static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218				bool enable)
6219{
6220	u32 orig, data;
6221
6222	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224		data = 0xfff;
6225		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226
6227		orig = data = RREG32(UVD_CGC_CTRL);
6228		data |= DCM;
6229		if (orig != data)
6230			WREG32(UVD_CGC_CTRL, data);
6231	} else {
6232		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233		data &= ~0xfff;
6234		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235
6236		orig = data = RREG32(UVD_CGC_CTRL);
6237		data &= ~DCM;
6238		if (orig != data)
6239			WREG32(UVD_CGC_CTRL, data);
6240	}
6241}
6242
6243static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244			       bool enable)
6245{
6246	u32 orig, data;
6247
6248	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249
6250	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253	else
6254		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256
6257	if (orig != data)
6258		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259}
6260
6261static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262				bool enable)
6263{
6264	u32 orig, data;
6265
6266	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267
6268	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269		data &= ~CLOCK_GATING_DIS;
6270	else
6271		data |= CLOCK_GATING_DIS;
6272
6273	if (orig != data)
6274		WREG32(HDP_HOST_PATH_CNTL, data);
6275}
6276
6277static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278			      bool enable)
6279{
6280	u32 orig, data;
6281
6282	orig = data = RREG32(HDP_MEM_POWER_LS);
6283
6284	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285		data |= HDP_LS_ENABLE;
6286	else
6287		data &= ~HDP_LS_ENABLE;
6288
6289	if (orig != data)
6290		WREG32(HDP_MEM_POWER_LS, data);
6291}
6292
6293void cik_update_cg(struct radeon_device *rdev,
6294		   u32 block, bool enable)
6295{
6296
6297	if (block & RADEON_CG_BLOCK_GFX) {
6298		cik_enable_gui_idle_interrupt(rdev, false);
6299		/* order matters! */
6300		if (enable) {
6301			cik_enable_mgcg(rdev, true);
6302			cik_enable_cgcg(rdev, true);
6303		} else {
6304			cik_enable_cgcg(rdev, false);
6305			cik_enable_mgcg(rdev, false);
6306		}
6307		cik_enable_gui_idle_interrupt(rdev, true);
6308	}
6309
6310	if (block & RADEON_CG_BLOCK_MC) {
6311		if (!(rdev->flags & RADEON_IS_IGP)) {
6312			cik_enable_mc_mgcg(rdev, enable);
6313			cik_enable_mc_ls(rdev, enable);
6314		}
6315	}
6316
6317	if (block & RADEON_CG_BLOCK_SDMA) {
6318		cik_enable_sdma_mgcg(rdev, enable);
6319		cik_enable_sdma_mgls(rdev, enable);
6320	}
6321
6322	if (block & RADEON_CG_BLOCK_BIF) {
6323		cik_enable_bif_mgls(rdev, enable);
6324	}
6325
6326	if (block & RADEON_CG_BLOCK_UVD) {
6327		if (rdev->has_uvd)
6328			cik_enable_uvd_mgcg(rdev, enable);
6329	}
6330
6331	if (block & RADEON_CG_BLOCK_HDP) {
6332		cik_enable_hdp_mgcg(rdev, enable);
6333		cik_enable_hdp_ls(rdev, enable);
6334	}
6335
6336	if (block & RADEON_CG_BLOCK_VCE) {
6337		vce_v2_0_enable_mgcg(rdev, enable);
6338	}
6339}
6340
6341static void cik_init_cg(struct radeon_device *rdev)
6342{
6343
6344	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345
6346	if (rdev->has_uvd)
6347		si_init_uvd_internal_cg(rdev);
6348
6349	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350			     RADEON_CG_BLOCK_SDMA |
6351			     RADEON_CG_BLOCK_BIF |
6352			     RADEON_CG_BLOCK_UVD |
6353			     RADEON_CG_BLOCK_HDP), true);
6354}
6355
6356static void cik_fini_cg(struct radeon_device *rdev)
6357{
6358	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359			     RADEON_CG_BLOCK_SDMA |
6360			     RADEON_CG_BLOCK_BIF |
6361			     RADEON_CG_BLOCK_UVD |
6362			     RADEON_CG_BLOCK_HDP), false);
6363
6364	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365}
6366
6367static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368					  bool enable)
6369{
6370	u32 data, orig;
6371
6372	orig = data = RREG32(RLC_PG_CNTL);
6373	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375	else
6376		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377	if (orig != data)
6378		WREG32(RLC_PG_CNTL, data);
6379}
6380
6381static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382					  bool enable)
6383{
6384	u32 data, orig;
6385
6386	orig = data = RREG32(RLC_PG_CNTL);
6387	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389	else
6390		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391	if (orig != data)
6392		WREG32(RLC_PG_CNTL, data);
6393}
6394
6395static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396{
6397	u32 data, orig;
6398
6399	orig = data = RREG32(RLC_PG_CNTL);
6400	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401		data &= ~DISABLE_CP_PG;
6402	else
6403		data |= DISABLE_CP_PG;
6404	if (orig != data)
6405		WREG32(RLC_PG_CNTL, data);
6406}
6407
6408static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409{
6410	u32 data, orig;
6411
6412	orig = data = RREG32(RLC_PG_CNTL);
6413	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414		data &= ~DISABLE_GDS_PG;
6415	else
6416		data |= DISABLE_GDS_PG;
6417	if (orig != data)
6418		WREG32(RLC_PG_CNTL, data);
6419}
6420
6421#define CP_ME_TABLE_SIZE    96
6422#define CP_ME_TABLE_OFFSET  2048
6423#define CP_MEC_TABLE_OFFSET 4096
6424
6425void cik_init_cp_pg_table(struct radeon_device *rdev)
6426{
6427	volatile u32 *dst_ptr;
6428	int me, i, max_me = 4;
6429	u32 bo_offset = 0;
6430	u32 table_offset, table_size;
6431
6432	if (rdev->family == CHIP_KAVERI)
6433		max_me = 5;
6434
6435	if (rdev->rlc.cp_table_ptr == NULL)
6436		return;
6437
6438	/* write the cp table buffer */
6439	dst_ptr = rdev->rlc.cp_table_ptr;
6440	for (me = 0; me < max_me; me++) {
6441		if (rdev->new_fw) {
6442			const __le32 *fw_data;
6443			const struct gfx_firmware_header_v1_0 *hdr;
6444
6445			if (me == 0) {
6446				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447				fw_data = (const __le32 *)
6448					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449				table_offset = le32_to_cpu(hdr->jt_offset);
6450				table_size = le32_to_cpu(hdr->jt_size);
6451			} else if (me == 1) {
6452				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453				fw_data = (const __le32 *)
6454					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455				table_offset = le32_to_cpu(hdr->jt_offset);
6456				table_size = le32_to_cpu(hdr->jt_size);
6457			} else if (me == 2) {
6458				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459				fw_data = (const __le32 *)
6460					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461				table_offset = le32_to_cpu(hdr->jt_offset);
6462				table_size = le32_to_cpu(hdr->jt_size);
6463			} else if (me == 3) {
6464				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465				fw_data = (const __le32 *)
6466					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467				table_offset = le32_to_cpu(hdr->jt_offset);
6468				table_size = le32_to_cpu(hdr->jt_size);
6469			} else {
6470				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471				fw_data = (const __le32 *)
6472					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473				table_offset = le32_to_cpu(hdr->jt_offset);
6474				table_size = le32_to_cpu(hdr->jt_size);
6475			}
6476
6477			for (i = 0; i < table_size; i ++) {
6478				dst_ptr[bo_offset + i] =
6479					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480			}
6481			bo_offset += table_size;
6482		} else {
6483			const __be32 *fw_data;
6484			table_size = CP_ME_TABLE_SIZE;
6485
6486			if (me == 0) {
6487				fw_data = (const __be32 *)rdev->ce_fw->data;
6488				table_offset = CP_ME_TABLE_OFFSET;
6489			} else if (me == 1) {
6490				fw_data = (const __be32 *)rdev->pfp_fw->data;
6491				table_offset = CP_ME_TABLE_OFFSET;
6492			} else if (me == 2) {
6493				fw_data = (const __be32 *)rdev->me_fw->data;
6494				table_offset = CP_ME_TABLE_OFFSET;
6495			} else {
6496				fw_data = (const __be32 *)rdev->mec_fw->data;
6497				table_offset = CP_MEC_TABLE_OFFSET;
6498			}
6499
6500			for (i = 0; i < table_size; i ++) {
6501				dst_ptr[bo_offset + i] =
6502					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503			}
6504			bo_offset += table_size;
6505		}
6506	}
6507}
6508
6509static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510				bool enable)
6511{
6512	u32 data, orig;
6513
6514	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515		orig = data = RREG32(RLC_PG_CNTL);
6516		data |= GFX_PG_ENABLE;
6517		if (orig != data)
6518			WREG32(RLC_PG_CNTL, data);
6519
6520		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521		data |= AUTO_PG_EN;
6522		if (orig != data)
6523			WREG32(RLC_AUTO_PG_CTRL, data);
6524	} else {
6525		orig = data = RREG32(RLC_PG_CNTL);
6526		data &= ~GFX_PG_ENABLE;
6527		if (orig != data)
6528			WREG32(RLC_PG_CNTL, data);
6529
6530		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531		data &= ~AUTO_PG_EN;
6532		if (orig != data)
6533			WREG32(RLC_AUTO_PG_CTRL, data);
6534
6535		data = RREG32(DB_RENDER_CONTROL);
6536	}
6537}
6538
6539static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540{
6541	u32 mask = 0, tmp, tmp1;
6542	int i;
6543
6544	cik_select_se_sh(rdev, se, sh);
6545	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548
6549	tmp &= 0xffff0000;
6550
6551	tmp |= tmp1;
6552	tmp >>= 16;
6553
6554	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555		mask <<= 1;
6556		mask |= 1;
6557	}
6558
6559	return (~tmp) & mask;
6560}
6561
6562static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563{
6564	u32 i, j, k, active_cu_number = 0;
6565	u32 mask, counter, cu_bitmap;
6566	u32 tmp = 0;
6567
6568	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570			mask = 1;
6571			cu_bitmap = 0;
6572			counter = 0;
6573			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575					if (counter < 2)
6576						cu_bitmap |= mask;
6577					counter ++;
6578				}
6579				mask <<= 1;
6580			}
6581
6582			active_cu_number += counter;
6583			tmp |= (cu_bitmap << (i * 16 + j * 8));
6584		}
6585	}
6586
6587	WREG32(RLC_PG_AO_CU_MASK, tmp);
6588
6589	tmp = RREG32(RLC_MAX_PG_CU);
6590	tmp &= ~MAX_PU_CU_MASK;
6591	tmp |= MAX_PU_CU(active_cu_number);
6592	WREG32(RLC_MAX_PG_CU, tmp);
6593}
6594
6595static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596				       bool enable)
6597{
6598	u32 data, orig;
6599
6600	orig = data = RREG32(RLC_PG_CNTL);
6601	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602		data |= STATIC_PER_CU_PG_ENABLE;
6603	else
6604		data &= ~STATIC_PER_CU_PG_ENABLE;
6605	if (orig != data)
6606		WREG32(RLC_PG_CNTL, data);
6607}
6608
6609static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610					bool enable)
6611{
6612	u32 data, orig;
6613
6614	orig = data = RREG32(RLC_PG_CNTL);
6615	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616		data |= DYN_PER_CU_PG_ENABLE;
6617	else
6618		data &= ~DYN_PER_CU_PG_ENABLE;
6619	if (orig != data)
6620		WREG32(RLC_PG_CNTL, data);
6621}
6622
6623#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6625
6626static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627{
6628	u32 data, orig;
6629	u32 i;
6630
6631	if (rdev->rlc.cs_data) {
6632		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636	} else {
6637		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638		for (i = 0; i < 3; i++)
6639			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640	}
6641	if (rdev->rlc.reg_list) {
6642		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645	}
6646
6647	orig = data = RREG32(RLC_PG_CNTL);
6648	data |= GFX_PG_SRC;
6649	if (orig != data)
6650		WREG32(RLC_PG_CNTL, data);
6651
6652	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654
6655	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656	data &= ~IDLE_POLL_COUNT_MASK;
6657	data |= IDLE_POLL_COUNT(0x60);
6658	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659
6660	data = 0x10101010;
6661	WREG32(RLC_PG_DELAY, data);
6662
6663	data = RREG32(RLC_PG_DELAY_2);
6664	data &= ~0xff;
6665	data |= 0x3;
6666	WREG32(RLC_PG_DELAY_2, data);
6667
6668	data = RREG32(RLC_AUTO_PG_CTRL);
6669	data &= ~GRBM_REG_SGIT_MASK;
6670	data |= GRBM_REG_SGIT(0x700);
6671	WREG32(RLC_AUTO_PG_CTRL, data);
6672
6673}
6674
6675static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676{
6677	cik_enable_gfx_cgpg(rdev, enable);
6678	cik_enable_gfx_static_mgpg(rdev, enable);
6679	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680}
6681
6682u32 cik_get_csb_size(struct radeon_device *rdev)
6683{
6684	u32 count = 0;
6685	const struct cs_section_def *sect = NULL;
6686	const struct cs_extent_def *ext = NULL;
6687
6688	if (rdev->rlc.cs_data == NULL)
6689		return 0;
6690
6691	/* begin clear state */
6692	count += 2;
6693	/* context control state */
6694	count += 3;
6695
6696	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697		for (ext = sect->section; ext->extent != NULL; ++ext) {
6698			if (sect->id == SECT_CONTEXT)
6699				count += 2 + ext->reg_count;
6700			else
6701				return 0;
6702		}
6703	}
6704	/* pa_sc_raster_config/pa_sc_raster_config1 */
6705	count += 4;
6706	/* end clear state */
6707	count += 2;
6708	/* clear state */
6709	count += 2;
6710
6711	return count;
6712}
6713
6714void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715{
6716	u32 count = 0, i;
6717	const struct cs_section_def *sect = NULL;
6718	const struct cs_extent_def *ext = NULL;
6719
6720	if (rdev->rlc.cs_data == NULL)
6721		return;
6722	if (buffer == NULL)
6723		return;
6724
6725	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727
6728	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729	buffer[count++] = cpu_to_le32(0x80000000);
6730	buffer[count++] = cpu_to_le32(0x80000000);
6731
6732	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733		for (ext = sect->section; ext->extent != NULL; ++ext) {
6734			if (sect->id == SECT_CONTEXT) {
6735				buffer[count++] =
6736					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738				for (i = 0; i < ext->reg_count; i++)
6739					buffer[count++] = cpu_to_le32(ext->extent[i]);
6740			} else {
6741				return;
6742			}
6743		}
6744	}
6745
6746	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748	switch (rdev->family) {
6749	case CHIP_BONAIRE:
6750		buffer[count++] = cpu_to_le32(0x16000012);
6751		buffer[count++] = cpu_to_le32(0x00000000);
6752		break;
6753	case CHIP_KAVERI:
6754		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755		buffer[count++] = cpu_to_le32(0x00000000);
6756		break;
6757	case CHIP_KABINI:
6758	case CHIP_MULLINS:
6759		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760		buffer[count++] = cpu_to_le32(0x00000000);
6761		break;
6762	case CHIP_HAWAII:
6763		buffer[count++] = cpu_to_le32(0x3a00161a);
6764		buffer[count++] = cpu_to_le32(0x0000002e);
6765		break;
6766	default:
6767		buffer[count++] = cpu_to_le32(0x00000000);
6768		buffer[count++] = cpu_to_le32(0x00000000);
6769		break;
6770	}
6771
6772	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774
6775	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776	buffer[count++] = cpu_to_le32(0);
6777}
6778
6779static void cik_init_pg(struct radeon_device *rdev)
6780{
6781	if (rdev->pg_flags) {
6782		cik_enable_sck_slowdown_on_pu(rdev, true);
6783		cik_enable_sck_slowdown_on_pd(rdev, true);
6784		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785			cik_init_gfx_cgpg(rdev);
6786			cik_enable_cp_pg(rdev, true);
6787			cik_enable_gds_pg(rdev, true);
6788		}
6789		cik_init_ao_cu_mask(rdev);
6790		cik_update_gfx_pg(rdev, true);
6791	}
6792}
6793
6794static void cik_fini_pg(struct radeon_device *rdev)
6795{
6796	if (rdev->pg_flags) {
6797		cik_update_gfx_pg(rdev, false);
6798		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799			cik_enable_cp_pg(rdev, false);
6800			cik_enable_gds_pg(rdev, false);
6801		}
6802	}
6803}
6804
6805/*
6806 * Interrupts
6807 * Starting with r6xx, interrupts are handled via a ring buffer.
6808 * Ring buffers are areas of GPU accessible memory that the GPU
6809 * writes interrupt vectors into and the host reads vectors out of.
6810 * There is a rptr (read pointer) that determines where the
6811 * host is currently reading, and a wptr (write pointer)
6812 * which determines where the GPU has written.  When the
6813 * pointers are equal, the ring is idle.  When the GPU
6814 * writes vectors to the ring buffer, it increments the
6815 * wptr.  When there is an interrupt, the host then starts
6816 * fetching commands and processing them until the pointers are
6817 * equal again at which point it updates the rptr.
6818 */
6819
6820/**
6821 * cik_enable_interrupts - Enable the interrupt ring buffer
6822 *
6823 * @rdev: radeon_device pointer
6824 *
6825 * Enable the interrupt ring buffer (CIK).
6826 */
6827static void cik_enable_interrupts(struct radeon_device *rdev)
6828{
6829	u32 ih_cntl = RREG32(IH_CNTL);
6830	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831
6832	ih_cntl |= ENABLE_INTR;
6833	ih_rb_cntl |= IH_RB_ENABLE;
6834	WREG32(IH_CNTL, ih_cntl);
6835	WREG32(IH_RB_CNTL, ih_rb_cntl);
6836	rdev->ih.enabled = true;
6837}
6838
6839/**
6840 * cik_disable_interrupts - Disable the interrupt ring buffer
6841 *
6842 * @rdev: radeon_device pointer
6843 *
6844 * Disable the interrupt ring buffer (CIK).
6845 */
6846static void cik_disable_interrupts(struct radeon_device *rdev)
6847{
6848	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849	u32 ih_cntl = RREG32(IH_CNTL);
6850
6851	ih_rb_cntl &= ~IH_RB_ENABLE;
6852	ih_cntl &= ~ENABLE_INTR;
6853	WREG32(IH_RB_CNTL, ih_rb_cntl);
6854	WREG32(IH_CNTL, ih_cntl);
6855	/* set rptr, wptr to 0 */
6856	WREG32(IH_RB_RPTR, 0);
6857	WREG32(IH_RB_WPTR, 0);
6858	rdev->ih.enabled = false;
6859	rdev->ih.rptr = 0;
6860}
6861
6862/**
6863 * cik_disable_interrupt_state - Disable all interrupt sources
6864 *
6865 * @rdev: radeon_device pointer
6866 *
6867 * Clear all interrupt enable bits used by the driver (CIK).
6868 */
6869static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870{
6871	u32 tmp;
6872
6873	/* gfx ring */
6874	tmp = RREG32(CP_INT_CNTL_RING0) &
6875		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876	WREG32(CP_INT_CNTL_RING0, tmp);
6877	/* sdma */
6878	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882	/* compute queues */
6883	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891	/* grbm */
6892	WREG32(GRBM_INT_CNTL, 0);
6893	/* SRBM */
6894	WREG32(SRBM_INT_CNTL, 0);
6895	/* vline/vblank, etc. */
6896	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898	if (rdev->num_crtc >= 4) {
6899		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901	}
6902	if (rdev->num_crtc >= 6) {
6903		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905	}
6906	/* pflip */
6907	if (rdev->num_crtc >= 2) {
6908		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910	}
6911	if (rdev->num_crtc >= 4) {
6912		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914	}
6915	if (rdev->num_crtc >= 6) {
6916		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918	}
6919
6920	/* dac hotplug */
6921	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922
6923	/* digital hotplug */
6924	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925	WREG32(DC_HPD1_INT_CONTROL, tmp);
6926	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927	WREG32(DC_HPD2_INT_CONTROL, tmp);
6928	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929	WREG32(DC_HPD3_INT_CONTROL, tmp);
6930	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931	WREG32(DC_HPD4_INT_CONTROL, tmp);
6932	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933	WREG32(DC_HPD5_INT_CONTROL, tmp);
6934	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935	WREG32(DC_HPD6_INT_CONTROL, tmp);
6936
6937}
6938
6939/**
6940 * cik_irq_init - init and enable the interrupt ring
6941 *
6942 * @rdev: radeon_device pointer
6943 *
6944 * Allocate a ring buffer for the interrupt controller,
6945 * enable the RLC, disable interrupts, enable the IH
6946 * ring buffer and enable it (CIK).
6947 * Called at device load and reume.
6948 * Returns 0 for success, errors for failure.
6949 */
6950static int cik_irq_init(struct radeon_device *rdev)
6951{
6952	int ret = 0;
6953	int rb_bufsz;
6954	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955
6956	/* allocate ring */
6957	ret = r600_ih_ring_alloc(rdev);
6958	if (ret)
6959		return ret;
6960
6961	/* disable irqs */
6962	cik_disable_interrupts(rdev);
6963
6964	/* init rlc */
6965	ret = cik_rlc_resume(rdev);
6966	if (ret) {
6967		r600_ih_ring_fini(rdev);
6968		return ret;
6969	}
6970
6971	/* setup interrupt control */
6972	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6973	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6974	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977	 */
6978	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982
6983	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985
6986	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987		      IH_WPTR_OVERFLOW_CLEAR |
6988		      (rb_bufsz << 1));
6989
6990	if (rdev->wb.enabled)
6991		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992
6993	/* set the writeback address whether it's enabled or not */
6994	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996
6997	WREG32(IH_RB_CNTL, ih_rb_cntl);
6998
6999	/* set rptr, wptr to 0 */
7000	WREG32(IH_RB_RPTR, 0);
7001	WREG32(IH_RB_WPTR, 0);
7002
7003	/* Default settings for IH_CNTL (disabled at first) */
7004	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005	/* RPTR_REARM only works if msi's are enabled */
7006	if (rdev->msi_enabled)
7007		ih_cntl |= RPTR_REARM;
7008	WREG32(IH_CNTL, ih_cntl);
7009
7010	/* force the active interrupt state to all disabled */
7011	cik_disable_interrupt_state(rdev);
7012
7013	pci_set_master(rdev->pdev);
7014
7015	/* enable irqs */
7016	cik_enable_interrupts(rdev);
7017
7018	return ret;
7019}
7020
7021/**
7022 * cik_irq_set - enable/disable interrupt sources
7023 *
7024 * @rdev: radeon_device pointer
7025 *
7026 * Enable interrupt sources on the GPU (vblanks, hpd,
7027 * etc.) (CIK).
7028 * Returns 0 for success, errors for failure.
7029 */
7030int cik_irq_set(struct radeon_device *rdev)
7031{
7032	u32 cp_int_cntl;
7033	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037	u32 grbm_int_cntl = 0;
7038	u32 dma_cntl, dma_cntl1;
7039
7040	if (!rdev->irq.installed) {
7041		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042		return -EINVAL;
7043	}
7044	/* don't enable anything if the ih is disabled */
7045	if (!rdev->ih.enabled) {
7046		cik_disable_interrupts(rdev);
7047		/* force the active interrupt state to all disabled */
7048		cik_disable_interrupt_state(rdev);
7049		return 0;
7050	}
7051
7052	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055
7056	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062
7063	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065
7066	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074
7075	/* enable CP interrupts on all rings */
7076	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079	}
7080	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082		DRM_DEBUG("si_irq_set: sw int cp1\n");
7083		if (ring->me == 1) {
7084			switch (ring->pipe) {
7085			case 0:
7086				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087				break;
7088			case 1:
7089				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090				break;
7091			case 2:
7092				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093				break;
7094			case 3:
7095				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096				break;
7097			default:
7098				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099				break;
7100			}
7101		} else if (ring->me == 2) {
7102			switch (ring->pipe) {
7103			case 0:
7104				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105				break;
7106			case 1:
7107				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108				break;
7109			case 2:
7110				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111				break;
7112			case 3:
7113				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114				break;
7115			default:
7116				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117				break;
7118			}
7119		} else {
7120			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121		}
7122	}
7123	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125		DRM_DEBUG("si_irq_set: sw int cp2\n");
7126		if (ring->me == 1) {
7127			switch (ring->pipe) {
7128			case 0:
7129				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130				break;
7131			case 1:
7132				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133				break;
7134			case 2:
7135				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136				break;
7137			case 3:
7138				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139				break;
7140			default:
7141				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142				break;
7143			}
7144		} else if (ring->me == 2) {
7145			switch (ring->pipe) {
7146			case 0:
7147				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148				break;
7149			case 1:
7150				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151				break;
7152			case 2:
7153				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154				break;
7155			case 3:
7156				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157				break;
7158			default:
7159				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160				break;
7161			}
7162		} else {
7163			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164		}
7165	}
7166
7167	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168		DRM_DEBUG("cik_irq_set: sw int dma\n");
7169		dma_cntl |= TRAP_ENABLE;
7170	}
7171
7172	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174		dma_cntl1 |= TRAP_ENABLE;
7175	}
7176
7177	if (rdev->irq.crtc_vblank_int[0] ||
7178	    atomic_read(&rdev->irq.pflip[0])) {
7179		DRM_DEBUG("cik_irq_set: vblank 0\n");
7180		crtc1 |= VBLANK_INTERRUPT_MASK;
7181	}
7182	if (rdev->irq.crtc_vblank_int[1] ||
7183	    atomic_read(&rdev->irq.pflip[1])) {
7184		DRM_DEBUG("cik_irq_set: vblank 1\n");
7185		crtc2 |= VBLANK_INTERRUPT_MASK;
7186	}
7187	if (rdev->irq.crtc_vblank_int[2] ||
7188	    atomic_read(&rdev->irq.pflip[2])) {
7189		DRM_DEBUG("cik_irq_set: vblank 2\n");
7190		crtc3 |= VBLANK_INTERRUPT_MASK;
7191	}
7192	if (rdev->irq.crtc_vblank_int[3] ||
7193	    atomic_read(&rdev->irq.pflip[3])) {
7194		DRM_DEBUG("cik_irq_set: vblank 3\n");
7195		crtc4 |= VBLANK_INTERRUPT_MASK;
7196	}
7197	if (rdev->irq.crtc_vblank_int[4] ||
7198	    atomic_read(&rdev->irq.pflip[4])) {
7199		DRM_DEBUG("cik_irq_set: vblank 4\n");
7200		crtc5 |= VBLANK_INTERRUPT_MASK;
7201	}
7202	if (rdev->irq.crtc_vblank_int[5] ||
7203	    atomic_read(&rdev->irq.pflip[5])) {
7204		DRM_DEBUG("cik_irq_set: vblank 5\n");
7205		crtc6 |= VBLANK_INTERRUPT_MASK;
7206	}
7207	if (rdev->irq.hpd[0]) {
7208		DRM_DEBUG("cik_irq_set: hpd 1\n");
7209		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210	}
7211	if (rdev->irq.hpd[1]) {
7212		DRM_DEBUG("cik_irq_set: hpd 2\n");
7213		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214	}
7215	if (rdev->irq.hpd[2]) {
7216		DRM_DEBUG("cik_irq_set: hpd 3\n");
7217		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218	}
7219	if (rdev->irq.hpd[3]) {
7220		DRM_DEBUG("cik_irq_set: hpd 4\n");
7221		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222	}
7223	if (rdev->irq.hpd[4]) {
7224		DRM_DEBUG("cik_irq_set: hpd 5\n");
7225		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226	}
7227	if (rdev->irq.hpd[5]) {
7228		DRM_DEBUG("cik_irq_set: hpd 6\n");
7229		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230	}
7231
7232	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233
7234	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236
7237	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245
7246	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247
7248	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250	if (rdev->num_crtc >= 4) {
7251		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253	}
7254	if (rdev->num_crtc >= 6) {
7255		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257	}
7258
7259	if (rdev->num_crtc >= 2) {
7260		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261		       GRPH_PFLIP_INT_MASK);
7262		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263		       GRPH_PFLIP_INT_MASK);
7264	}
7265	if (rdev->num_crtc >= 4) {
7266		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267		       GRPH_PFLIP_INT_MASK);
7268		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269		       GRPH_PFLIP_INT_MASK);
7270	}
7271	if (rdev->num_crtc >= 6) {
7272		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273		       GRPH_PFLIP_INT_MASK);
7274		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275		       GRPH_PFLIP_INT_MASK);
7276	}
7277
7278	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284
7285	/* posting read */
7286	RREG32(SRBM_STATUS);
7287
7288	return 0;
7289}
7290
7291/**
7292 * cik_irq_ack - ack interrupt sources
7293 *
7294 * @rdev: radeon_device pointer
7295 *
7296 * Ack interrupt sources on the GPU (vblanks, hpd,
7297 * etc.) (CIK).  Certain interrupts sources are sw
7298 * generated and do not require an explicit ack.
7299 */
7300static inline void cik_irq_ack(struct radeon_device *rdev)
7301{
7302	u32 tmp;
7303
7304	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311
7312	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313		EVERGREEN_CRTC0_REGISTER_OFFSET);
7314	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315		EVERGREEN_CRTC1_REGISTER_OFFSET);
7316	if (rdev->num_crtc >= 4) {
7317		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318			EVERGREEN_CRTC2_REGISTER_OFFSET);
7319		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320			EVERGREEN_CRTC3_REGISTER_OFFSET);
7321	}
7322	if (rdev->num_crtc >= 6) {
7323		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324			EVERGREEN_CRTC4_REGISTER_OFFSET);
7325		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326			EVERGREEN_CRTC5_REGISTER_OFFSET);
7327	}
7328
7329	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331		       GRPH_PFLIP_INT_CLEAR);
7332	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334		       GRPH_PFLIP_INT_CLEAR);
7335	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343
7344	if (rdev->num_crtc >= 4) {
7345		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347			       GRPH_PFLIP_INT_CLEAR);
7348		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350			       GRPH_PFLIP_INT_CLEAR);
7351		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359	}
7360
7361	if (rdev->num_crtc >= 6) {
7362		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364			       GRPH_PFLIP_INT_CLEAR);
7365		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367			       GRPH_PFLIP_INT_CLEAR);
7368		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376	}
7377
7378	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379		tmp = RREG32(DC_HPD1_INT_CONTROL);
7380		tmp |= DC_HPDx_INT_ACK;
7381		WREG32(DC_HPD1_INT_CONTROL, tmp);
7382	}
7383	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384		tmp = RREG32(DC_HPD2_INT_CONTROL);
7385		tmp |= DC_HPDx_INT_ACK;
7386		WREG32(DC_HPD2_INT_CONTROL, tmp);
7387	}
7388	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389		tmp = RREG32(DC_HPD3_INT_CONTROL);
7390		tmp |= DC_HPDx_INT_ACK;
7391		WREG32(DC_HPD3_INT_CONTROL, tmp);
7392	}
7393	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394		tmp = RREG32(DC_HPD4_INT_CONTROL);
7395		tmp |= DC_HPDx_INT_ACK;
7396		WREG32(DC_HPD4_INT_CONTROL, tmp);
7397	}
7398	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399		tmp = RREG32(DC_HPD5_INT_CONTROL);
7400		tmp |= DC_HPDx_INT_ACK;
7401		WREG32(DC_HPD5_INT_CONTROL, tmp);
7402	}
7403	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404		tmp = RREG32(DC_HPD6_INT_CONTROL);
7405		tmp |= DC_HPDx_INT_ACK;
7406		WREG32(DC_HPD6_INT_CONTROL, tmp);
7407	}
7408	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409		tmp = RREG32(DC_HPD1_INT_CONTROL);
7410		tmp |= DC_HPDx_RX_INT_ACK;
7411		WREG32(DC_HPD1_INT_CONTROL, tmp);
7412	}
7413	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414		tmp = RREG32(DC_HPD2_INT_CONTROL);
7415		tmp |= DC_HPDx_RX_INT_ACK;
7416		WREG32(DC_HPD2_INT_CONTROL, tmp);
7417	}
7418	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419		tmp = RREG32(DC_HPD3_INT_CONTROL);
7420		tmp |= DC_HPDx_RX_INT_ACK;
7421		WREG32(DC_HPD3_INT_CONTROL, tmp);
7422	}
7423	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424		tmp = RREG32(DC_HPD4_INT_CONTROL);
7425		tmp |= DC_HPDx_RX_INT_ACK;
7426		WREG32(DC_HPD4_INT_CONTROL, tmp);
7427	}
7428	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429		tmp = RREG32(DC_HPD5_INT_CONTROL);
7430		tmp |= DC_HPDx_RX_INT_ACK;
7431		WREG32(DC_HPD5_INT_CONTROL, tmp);
7432	}
7433	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434		tmp = RREG32(DC_HPD6_INT_CONTROL);
7435		tmp |= DC_HPDx_RX_INT_ACK;
7436		WREG32(DC_HPD6_INT_CONTROL, tmp);
7437	}
7438}
7439
7440/**
7441 * cik_irq_disable - disable interrupts
7442 *
7443 * @rdev: radeon_device pointer
7444 *
7445 * Disable interrupts on the hw (CIK).
7446 */
7447static void cik_irq_disable(struct radeon_device *rdev)
7448{
7449	cik_disable_interrupts(rdev);
7450	/* Wait and acknowledge irq */
7451	mdelay(1);
7452	cik_irq_ack(rdev);
7453	cik_disable_interrupt_state(rdev);
7454}
7455
7456/**
7457 * cik_irq_disable - disable interrupts for suspend
7458 *
7459 * @rdev: radeon_device pointer
7460 *
7461 * Disable interrupts and stop the RLC (CIK).
7462 * Used for suspend.
7463 */
7464static void cik_irq_suspend(struct radeon_device *rdev)
7465{
7466	cik_irq_disable(rdev);
7467	cik_rlc_stop(rdev);
7468}
7469
7470/**
7471 * cik_irq_fini - tear down interrupt support
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Disable interrupts on the hw and free the IH ring
7476 * buffer (CIK).
7477 * Used for driver unload.
7478 */
7479static void cik_irq_fini(struct radeon_device *rdev)
7480{
7481	cik_irq_suspend(rdev);
7482	r600_ih_ring_fini(rdev);
7483}
7484
7485/**
7486 * cik_get_ih_wptr - get the IH ring buffer wptr
7487 *
7488 * @rdev: radeon_device pointer
7489 *
7490 * Get the IH ring buffer wptr from either the register
7491 * or the writeback memory buffer (CIK).  Also check for
7492 * ring buffer overflow and deal with it.
7493 * Used by cik_irq_process().
7494 * Returns the value of the wptr.
7495 */
7496static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497{
7498	u32 wptr, tmp;
7499
7500	if (rdev->wb.enabled)
7501		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502	else
7503		wptr = RREG32(IH_RB_WPTR);
7504
7505	if (wptr & RB_OVERFLOW) {
7506		wptr &= ~RB_OVERFLOW;
7507		/* When a ring buffer overflow happen start parsing interrupt
7508		 * from the last not overwritten vector (wptr + 16). Hopefully
7509		 * this should allow us to catchup.
7510		 */
7511		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514		tmp = RREG32(IH_RB_CNTL);
7515		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516		WREG32(IH_RB_CNTL, tmp);
7517	}
7518	return (wptr & rdev->ih.ptr_mask);
7519}
7520
7521/*        CIK IV Ring
7522 * Each IV ring entry is 128 bits:
7523 * [7:0]    - interrupt source id
7524 * [31:8]   - reserved
7525 * [59:32]  - interrupt source data
7526 * [63:60]  - reserved
7527 * [71:64]  - RINGID
7528 *            CP:
7529 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533 *            PIPE_ID - ME0 0=3D
7534 *                    - ME1&2 compute dispatcher (4 pipes each)
7535 *            SDMA:
7536 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539 * [79:72]  - VMID
7540 * [95:80]  - PASID
7541 * [127:96] - reserved
7542 */
7543/**
7544 * cik_irq_process - interrupt handler
7545 *
7546 * @rdev: radeon_device pointer
7547 *
7548 * Interrupt hander (CIK).  Walk the IH ring,
7549 * ack interrupts and schedule work to handle
7550 * interrupt events.
7551 * Returns irq process return code.
7552 */
7553int cik_irq_process(struct radeon_device *rdev)
7554{
7555	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557	u32 wptr;
7558	u32 rptr;
7559	u32 src_id, src_data, ring_id;
7560	u8 me_id, pipe_id, queue_id;
7561	u32 ring_index;
7562	bool queue_hotplug = false;
7563	bool queue_dp = false;
7564	bool queue_reset = false;
7565	u32 addr, status, mc_client;
7566	bool queue_thermal = false;
7567
7568	if (!rdev->ih.enabled || rdev->shutdown)
7569		return IRQ_NONE;
7570
7571	wptr = cik_get_ih_wptr(rdev);
7572
7573restart_ih:
7574	/* is somebody else already processing irqs? */
7575	if (atomic_xchg(&rdev->ih.lock, 1))
7576		return IRQ_NONE;
7577
7578	rptr = rdev->ih.rptr;
7579	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580
7581	/* Order reading of wptr vs. reading of IH ring data */
7582	rmb();
7583
7584	/* display interrupts */
7585	cik_irq_ack(rdev);
7586
7587	while (rptr != wptr) {
7588		/* wptr/rptr are in bytes! */
7589		ring_index = rptr / 4;
7590
7591		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594
7595		switch (src_id) {
7596		case 1: /* D1 vblank/vline */
7597			switch (src_data) {
7598			case 0: /* D1 vblank */
7599				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602				if (rdev->irq.crtc_vblank_int[0]) {
7603					drm_handle_vblank(rdev->ddev, 0);
7604					rdev->pm.vblank_sync = true;
7605					wake_up(&rdev->irq.vblank_queue);
7606				}
7607				if (atomic_read(&rdev->irq.pflip[0]))
7608					radeon_crtc_handle_vblank(rdev, 0);
7609				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610				DRM_DEBUG("IH: D1 vblank\n");
7611
7612				break;
7613			case 1: /* D1 vline */
7614				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618				DRM_DEBUG("IH: D1 vline\n");
7619
7620				break;
7621			default:
7622				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623				break;
7624			}
7625			break;
7626		case 2: /* D2 vblank/vline */
7627			switch (src_data) {
7628			case 0: /* D2 vblank */
7629				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632				if (rdev->irq.crtc_vblank_int[1]) {
7633					drm_handle_vblank(rdev->ddev, 1);
7634					rdev->pm.vblank_sync = true;
7635					wake_up(&rdev->irq.vblank_queue);
7636				}
7637				if (atomic_read(&rdev->irq.pflip[1]))
7638					radeon_crtc_handle_vblank(rdev, 1);
7639				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640				DRM_DEBUG("IH: D2 vblank\n");
7641
7642				break;
7643			case 1: /* D2 vline */
7644				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648				DRM_DEBUG("IH: D2 vline\n");
7649
7650				break;
7651			default:
7652				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653				break;
7654			}
7655			break;
7656		case 3: /* D3 vblank/vline */
7657			switch (src_data) {
7658			case 0: /* D3 vblank */
7659				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662				if (rdev->irq.crtc_vblank_int[2]) {
7663					drm_handle_vblank(rdev->ddev, 2);
7664					rdev->pm.vblank_sync = true;
7665					wake_up(&rdev->irq.vblank_queue);
7666				}
7667				if (atomic_read(&rdev->irq.pflip[2]))
7668					radeon_crtc_handle_vblank(rdev, 2);
7669				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670				DRM_DEBUG("IH: D3 vblank\n");
7671
7672				break;
7673			case 1: /* D3 vline */
7674				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678				DRM_DEBUG("IH: D3 vline\n");
7679
7680				break;
7681			default:
7682				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683				break;
7684			}
7685			break;
7686		case 4: /* D4 vblank/vline */
7687			switch (src_data) {
7688			case 0: /* D4 vblank */
7689				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692				if (rdev->irq.crtc_vblank_int[3]) {
7693					drm_handle_vblank(rdev->ddev, 3);
7694					rdev->pm.vblank_sync = true;
7695					wake_up(&rdev->irq.vblank_queue);
7696				}
7697				if (atomic_read(&rdev->irq.pflip[3]))
7698					radeon_crtc_handle_vblank(rdev, 3);
7699				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700				DRM_DEBUG("IH: D4 vblank\n");
7701
7702				break;
7703			case 1: /* D4 vline */
7704				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708				DRM_DEBUG("IH: D4 vline\n");
7709
7710				break;
7711			default:
7712				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713				break;
7714			}
7715			break;
7716		case 5: /* D5 vblank/vline */
7717			switch (src_data) {
7718			case 0: /* D5 vblank */
7719				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722				if (rdev->irq.crtc_vblank_int[4]) {
7723					drm_handle_vblank(rdev->ddev, 4);
7724					rdev->pm.vblank_sync = true;
7725					wake_up(&rdev->irq.vblank_queue);
7726				}
7727				if (atomic_read(&rdev->irq.pflip[4]))
7728					radeon_crtc_handle_vblank(rdev, 4);
7729				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730				DRM_DEBUG("IH: D5 vblank\n");
7731
7732				break;
7733			case 1: /* D5 vline */
7734				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738				DRM_DEBUG("IH: D5 vline\n");
7739
7740				break;
7741			default:
7742				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743				break;
7744			}
7745			break;
7746		case 6: /* D6 vblank/vline */
7747			switch (src_data) {
7748			case 0: /* D6 vblank */
7749				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752				if (rdev->irq.crtc_vblank_int[5]) {
7753					drm_handle_vblank(rdev->ddev, 5);
7754					rdev->pm.vblank_sync = true;
7755					wake_up(&rdev->irq.vblank_queue);
7756				}
7757				if (atomic_read(&rdev->irq.pflip[5]))
7758					radeon_crtc_handle_vblank(rdev, 5);
7759				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760				DRM_DEBUG("IH: D6 vblank\n");
7761
7762				break;
7763			case 1: /* D6 vline */
7764				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768				DRM_DEBUG("IH: D6 vline\n");
7769
7770				break;
7771			default:
7772				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773				break;
7774			}
7775			break;
7776		case 8: /* D1 page flip */
7777		case 10: /* D2 page flip */
7778		case 12: /* D3 page flip */
7779		case 14: /* D4 page flip */
7780		case 16: /* D5 page flip */
7781		case 18: /* D6 page flip */
7782			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783			if (radeon_use_pflipirq > 0)
7784				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785			break;
7786		case 42: /* HPD hotplug */
7787			switch (src_data) {
7788			case 0:
7789				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793				queue_hotplug = true;
7794				DRM_DEBUG("IH: HPD1\n");
7795
7796				break;
7797			case 1:
7798				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802				queue_hotplug = true;
7803				DRM_DEBUG("IH: HPD2\n");
7804
7805				break;
7806			case 2:
7807				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811				queue_hotplug = true;
7812				DRM_DEBUG("IH: HPD3\n");
7813
7814				break;
7815			case 3:
7816				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820				queue_hotplug = true;
7821				DRM_DEBUG("IH: HPD4\n");
7822
7823				break;
7824			case 4:
7825				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829				queue_hotplug = true;
7830				DRM_DEBUG("IH: HPD5\n");
7831
7832				break;
7833			case 5:
7834				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838				queue_hotplug = true;
7839				DRM_DEBUG("IH: HPD6\n");
7840
7841				break;
7842			case 6:
7843				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847				queue_dp = true;
7848				DRM_DEBUG("IH: HPD_RX 1\n");
7849
7850				break;
7851			case 7:
7852				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856				queue_dp = true;
7857				DRM_DEBUG("IH: HPD_RX 2\n");
7858
7859				break;
7860			case 8:
7861				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865				queue_dp = true;
7866				DRM_DEBUG("IH: HPD_RX 3\n");
7867
7868				break;
7869			case 9:
7870				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874				queue_dp = true;
7875				DRM_DEBUG("IH: HPD_RX 4\n");
7876
7877				break;
7878			case 10:
7879				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883				queue_dp = true;
7884				DRM_DEBUG("IH: HPD_RX 5\n");
7885
7886				break;
7887			case 11:
7888				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892				queue_dp = true;
7893				DRM_DEBUG("IH: HPD_RX 6\n");
7894
7895				break;
7896			default:
7897				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898				break;
7899			}
7900			break;
7901		case 96:
7902			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903			WREG32(SRBM_INT_ACK, 0x1);
7904			break;
7905		case 124: /* UVD */
7906			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908			break;
7909		case 146:
7910		case 147:
7911			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914			/* reset addr and status */
7915			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916			if (addr == 0x0 && status == 0x0)
7917				break;
7918			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7920				addr);
7921			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922				status);
7923			cik_vm_decode_fault(rdev, status, addr, mc_client);
7924			break;
7925		case 167: /* VCE */
7926			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927			switch (src_data) {
7928			case 0:
7929				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930				break;
7931			case 1:
7932				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933				break;
7934			default:
7935				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936				break;
7937			}
7938			break;
7939		case 176: /* GFX RB CP_INT */
7940		case 177: /* GFX IB CP_INT */
7941			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942			break;
7943		case 181: /* CP EOP event */
7944			DRM_DEBUG("IH: CP EOP\n");
7945			/* XXX check the bitfield order! */
7946			me_id = (ring_id & 0x60) >> 5;
7947			pipe_id = (ring_id & 0x18) >> 3;
7948			queue_id = (ring_id & 0x7) >> 0;
7949			switch (me_id) {
7950			case 0:
7951				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952				break;
7953			case 1:
7954			case 2:
7955				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959				break;
7960			}
7961			break;
7962		case 184: /* CP Privileged reg access */
7963			DRM_ERROR("Illegal register access in command stream\n");
7964			/* XXX check the bitfield order! */
7965			me_id = (ring_id & 0x60) >> 5;
7966			pipe_id = (ring_id & 0x18) >> 3;
7967			queue_id = (ring_id & 0x7) >> 0;
7968			switch (me_id) {
7969			case 0:
7970				/* This results in a full GPU reset, but all we need to do is soft
7971				 * reset the CP for gfx
7972				 */
7973				queue_reset = true;
7974				break;
7975			case 1:
7976				/* XXX compute */
7977				queue_reset = true;
7978				break;
7979			case 2:
7980				/* XXX compute */
7981				queue_reset = true;
7982				break;
7983			}
7984			break;
7985		case 185: /* CP Privileged inst */
7986			DRM_ERROR("Illegal instruction in command stream\n");
7987			/* XXX check the bitfield order! */
7988			me_id = (ring_id & 0x60) >> 5;
7989			pipe_id = (ring_id & 0x18) >> 3;
7990			queue_id = (ring_id & 0x7) >> 0;
7991			switch (me_id) {
7992			case 0:
7993				/* This results in a full GPU reset, but all we need to do is soft
7994				 * reset the CP for gfx
7995				 */
7996				queue_reset = true;
7997				break;
7998			case 1:
7999				/* XXX compute */
8000				queue_reset = true;
8001				break;
8002			case 2:
8003				/* XXX compute */
8004				queue_reset = true;
8005				break;
8006			}
8007			break;
8008		case 224: /* SDMA trap event */
8009			/* XXX check the bitfield order! */
8010			me_id = (ring_id & 0x3) >> 0;
8011			queue_id = (ring_id & 0xc) >> 2;
8012			DRM_DEBUG("IH: SDMA trap\n");
8013			switch (me_id) {
8014			case 0:
8015				switch (queue_id) {
8016				case 0:
8017					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018					break;
8019				case 1:
8020					/* XXX compute */
8021					break;
8022				case 2:
8023					/* XXX compute */
8024					break;
8025				}
8026				break;
8027			case 1:
8028				switch (queue_id) {
8029				case 0:
8030					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031					break;
8032				case 1:
8033					/* XXX compute */
8034					break;
8035				case 2:
8036					/* XXX compute */
8037					break;
8038				}
8039				break;
8040			}
8041			break;
8042		case 230: /* thermal low to high */
8043			DRM_DEBUG("IH: thermal low to high\n");
8044			rdev->pm.dpm.thermal.high_to_low = false;
8045			queue_thermal = true;
8046			break;
8047		case 231: /* thermal high to low */
8048			DRM_DEBUG("IH: thermal high to low\n");
8049			rdev->pm.dpm.thermal.high_to_low = true;
8050			queue_thermal = true;
8051			break;
8052		case 233: /* GUI IDLE */
8053			DRM_DEBUG("IH: GUI idle\n");
8054			break;
8055		case 241: /* SDMA Privileged inst */
8056		case 247: /* SDMA Privileged inst */
8057			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058			/* XXX check the bitfield order! */
8059			me_id = (ring_id & 0x3) >> 0;
8060			queue_id = (ring_id & 0xc) >> 2;
8061			switch (me_id) {
8062			case 0:
8063				switch (queue_id) {
8064				case 0:
8065					queue_reset = true;
8066					break;
8067				case 1:
8068					/* XXX compute */
8069					queue_reset = true;
8070					break;
8071				case 2:
8072					/* XXX compute */
8073					queue_reset = true;
8074					break;
8075				}
8076				break;
8077			case 1:
8078				switch (queue_id) {
8079				case 0:
8080					queue_reset = true;
8081					break;
8082				case 1:
8083					/* XXX compute */
8084					queue_reset = true;
8085					break;
8086				case 2:
8087					/* XXX compute */
8088					queue_reset = true;
8089					break;
8090				}
8091				break;
8092			}
8093			break;
8094		default:
8095			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096			break;
8097		}
8098
8099		/* wptr/rptr are in bytes! */
8100		rptr += 16;
8101		rptr &= rdev->ih.ptr_mask;
8102		WREG32(IH_RB_RPTR, rptr);
8103	}
8104	if (queue_dp)
8105		schedule_work(&rdev->dp_work);
8106	if (queue_hotplug)
8107		schedule_delayed_work(&rdev->hotplug_work, 0);
8108	if (queue_reset) {
8109		rdev->needs_reset = true;
8110		wake_up_all(&rdev->fence_queue);
8111	}
8112	if (queue_thermal)
8113		schedule_work(&rdev->pm.dpm.thermal.work);
8114	rdev->ih.rptr = rptr;
8115	atomic_set(&rdev->ih.lock, 0);
8116
8117	/* make sure wptr hasn't changed while processing */
8118	wptr = cik_get_ih_wptr(rdev);
8119	if (wptr != rptr)
8120		goto restart_ih;
8121
8122	return IRQ_HANDLED;
8123}
8124
8125/*
8126 * startup/shutdown callbacks
8127 */
8128static void cik_uvd_init(struct radeon_device *rdev)
8129{
8130	int r;
8131
8132	if (!rdev->has_uvd)
8133		return;
8134
8135	r = radeon_uvd_init(rdev);
8136	if (r) {
8137		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138		/*
8139		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140		 * to early fails cik_uvd_start() and thus nothing happens
8141		 * there. So it is pointless to try to go through that code
8142		 * hence why we disable uvd here.
8143		 */
8144		rdev->has_uvd = 0;
8145		return;
8146	}
8147	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149}
8150
8151static void cik_uvd_start(struct radeon_device *rdev)
8152{
8153	int r;
8154
8155	if (!rdev->has_uvd)
8156		return;
8157
8158	r = radeon_uvd_resume(rdev);
8159	if (r) {
8160		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161		goto error;
8162	}
8163	r = uvd_v4_2_resume(rdev);
8164	if (r) {
8165		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166		goto error;
8167	}
8168	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169	if (r) {
8170		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171		goto error;
8172	}
8173	return;
8174
8175error:
8176	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177}
8178
8179static void cik_uvd_resume(struct radeon_device *rdev)
8180{
8181	struct radeon_ring *ring;
8182	int r;
8183
8184	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185		return;
8186
8187	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189	if (r) {
8190		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191		return;
8192	}
8193	r = uvd_v1_0_init(rdev);
8194	if (r) {
8195		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196		return;
8197	}
8198}
8199
8200static void cik_vce_init(struct radeon_device *rdev)
8201{
8202	int r;
8203
8204	if (!rdev->has_vce)
8205		return;
8206
8207	r = radeon_vce_init(rdev);
8208	if (r) {
8209		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210		/*
8211		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212		 * to early fails cik_vce_start() and thus nothing happens
8213		 * there. So it is pointless to try to go through that code
8214		 * hence why we disable vce here.
8215		 */
8216		rdev->has_vce = 0;
8217		return;
8218	}
8219	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223}
8224
8225static void cik_vce_start(struct radeon_device *rdev)
8226{
8227	int r;
8228
8229	if (!rdev->has_vce)
8230		return;
8231
8232	r = radeon_vce_resume(rdev);
8233	if (r) {
8234		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235		goto error;
8236	}
8237	r = vce_v2_0_resume(rdev);
8238	if (r) {
8239		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240		goto error;
8241	}
8242	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243	if (r) {
8244		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245		goto error;
8246	}
8247	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248	if (r) {
8249		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250		goto error;
8251	}
8252	return;
8253
8254error:
8255	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257}
8258
8259static void cik_vce_resume(struct radeon_device *rdev)
8260{
8261	struct radeon_ring *ring;
8262	int r;
8263
8264	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265		return;
8266
8267	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269	if (r) {
8270		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271		return;
8272	}
8273	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275	if (r) {
8276		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277		return;
8278	}
8279	r = vce_v1_0_init(rdev);
8280	if (r) {
8281		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282		return;
8283	}
8284}
8285
8286/**
8287 * cik_startup - program the asic to a functional state
8288 *
8289 * @rdev: radeon_device pointer
8290 *
8291 * Programs the asic to a functional state (CIK).
8292 * Called by cik_init() and cik_resume().
8293 * Returns 0 for success, error for failure.
8294 */
8295static int cik_startup(struct radeon_device *rdev)
8296{
8297	struct radeon_ring *ring;
8298	u32 nop;
8299	int r;
8300
8301	/* enable pcie gen2/3 link */
8302	cik_pcie_gen3_enable(rdev);
8303	/* enable aspm */
8304	cik_program_aspm(rdev);
8305
8306	/* scratch needs to be initialized before MC */
8307	r = r600_vram_scratch_init(rdev);
8308	if (r)
8309		return r;
8310
8311	cik_mc_program(rdev);
8312
8313	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314		r = ci_mc_load_microcode(rdev);
8315		if (r) {
8316			DRM_ERROR("Failed to load MC firmware!\n");
8317			return r;
8318		}
8319	}
8320
8321	r = cik_pcie_gart_enable(rdev);
8322	if (r)
8323		return r;
8324	cik_gpu_init(rdev);
8325
8326	/* allocate rlc buffers */
8327	if (rdev->flags & RADEON_IS_IGP) {
8328		if (rdev->family == CHIP_KAVERI) {
8329			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330			rdev->rlc.reg_list_size =
8331				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332		} else {
8333			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334			rdev->rlc.reg_list_size =
8335				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336		}
8337	}
8338	rdev->rlc.cs_data = ci_cs_data;
8339	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341	r = sumo_rlc_init(rdev);
8342	if (r) {
8343		DRM_ERROR("Failed to init rlc BOs!\n");
8344		return r;
8345	}
8346
8347	/* allocate wb buffer */
8348	r = radeon_wb_init(rdev);
8349	if (r)
8350		return r;
8351
8352	/* allocate mec buffers */
8353	r = cik_mec_init(rdev);
8354	if (r) {
8355		DRM_ERROR("Failed to init MEC BOs!\n");
8356		return r;
8357	}
8358
8359	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360	if (r) {
8361		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362		return r;
8363	}
8364
8365	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366	if (r) {
8367		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368		return r;
8369	}
8370
8371	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372	if (r) {
8373		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374		return r;
8375	}
8376
8377	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378	if (r) {
8379		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380		return r;
8381	}
8382
8383	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384	if (r) {
8385		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386		return r;
8387	}
8388
8389	cik_uvd_start(rdev);
8390	cik_vce_start(rdev);
8391
8392	/* Enable IRQ */
8393	if (!rdev->irq.installed) {
8394		r = radeon_irq_kms_init(rdev);
8395		if (r)
8396			return r;
8397	}
8398
8399	r = cik_irq_init(rdev);
8400	if (r) {
8401		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402		radeon_irq_kms_fini(rdev);
8403		return r;
8404	}
8405	cik_irq_set(rdev);
8406
8407	if (rdev->family == CHIP_HAWAII) {
8408		if (rdev->new_fw)
8409			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410		else
8411			nop = RADEON_CP_PACKET2;
8412	} else {
8413		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414	}
8415
8416	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418			     nop);
8419	if (r)
8420		return r;
8421
8422	/* set up the compute queues */
8423	/* type-2 packets are deprecated on MEC, use type-3 instead */
8424	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426			     nop);
8427	if (r)
8428		return r;
8429	ring->me = 1; /* first MEC */
8430	ring->pipe = 0; /* first pipe */
8431	ring->queue = 0; /* first queue */
8432	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433
8434	/* type-2 packets are deprecated on MEC, use type-3 instead */
8435	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437			     nop);
8438	if (r)
8439		return r;
8440	/* dGPU only have 1 MEC */
8441	ring->me = 1; /* first MEC */
8442	ring->pipe = 0; /* first pipe */
8443	ring->queue = 1; /* second queue */
8444	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445
8446	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449	if (r)
8450		return r;
8451
8452	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455	if (r)
8456		return r;
8457
8458	r = cik_cp_resume(rdev);
8459	if (r)
8460		return r;
8461
8462	r = cik_sdma_resume(rdev);
8463	if (r)
8464		return r;
8465
8466	cik_uvd_resume(rdev);
8467	cik_vce_resume(rdev);
8468
8469	r = radeon_ib_pool_init(rdev);
8470	if (r) {
8471		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472		return r;
8473	}
8474
8475	r = radeon_vm_manager_init(rdev);
8476	if (r) {
8477		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478		return r;
8479	}
8480
8481	r = radeon_audio_init(rdev);
8482	if (r)
8483		return r;
8484
8485	return 0;
8486}
8487
8488/**
8489 * cik_resume - resume the asic to a functional state
8490 *
8491 * @rdev: radeon_device pointer
8492 *
8493 * Programs the asic to a functional state (CIK).
8494 * Called at resume.
8495 * Returns 0 for success, error for failure.
8496 */
8497int cik_resume(struct radeon_device *rdev)
8498{
8499	int r;
8500
8501	/* post card */
8502	atom_asic_init(rdev->mode_info.atom_context);
8503
8504	/* init golden registers */
8505	cik_init_golden_registers(rdev);
8506
8507	if (rdev->pm.pm_method == PM_METHOD_DPM)
8508		radeon_pm_resume(rdev);
8509
8510	rdev->accel_working = true;
8511	r = cik_startup(rdev);
8512	if (r) {
8513		DRM_ERROR("cik startup failed on resume\n");
8514		rdev->accel_working = false;
8515		return r;
8516	}
8517
8518	return r;
8519
8520}
8521
8522/**
8523 * cik_suspend - suspend the asic
8524 *
8525 * @rdev: radeon_device pointer
8526 *
8527 * Bring the chip into a state suitable for suspend (CIK).
8528 * Called at suspend.
8529 * Returns 0 for success.
8530 */
8531int cik_suspend(struct radeon_device *rdev)
8532{
8533	radeon_pm_suspend(rdev);
8534	radeon_audio_fini(rdev);
8535	radeon_vm_manager_fini(rdev);
8536	cik_cp_enable(rdev, false);
8537	cik_sdma_enable(rdev, false);
8538	if (rdev->has_uvd) {
8539		uvd_v1_0_fini(rdev);
8540		radeon_uvd_suspend(rdev);
8541	}
8542	if (rdev->has_vce)
8543		radeon_vce_suspend(rdev);
8544	cik_fini_pg(rdev);
8545	cik_fini_cg(rdev);
8546	cik_irq_suspend(rdev);
8547	radeon_wb_disable(rdev);
8548	cik_pcie_gart_disable(rdev);
8549	return 0;
8550}
8551
8552/* Plan is to move initialization in that function and use
8553 * helper function so that radeon_device_init pretty much
8554 * do nothing more than calling asic specific function. This
8555 * should also allow to remove a bunch of callback function
8556 * like vram_info.
8557 */
8558/**
8559 * cik_init - asic specific driver and hw init
8560 *
8561 * @rdev: radeon_device pointer
8562 *
8563 * Setup asic specific driver variables and program the hw
8564 * to a functional state (CIK).
8565 * Called at driver startup.
8566 * Returns 0 for success, errors for failure.
8567 */
8568int cik_init(struct radeon_device *rdev)
8569{
8570	struct radeon_ring *ring;
8571	int r;
8572
8573	/* Read BIOS */
8574	if (!radeon_get_bios(rdev)) {
8575		if (ASIC_IS_AVIVO(rdev))
8576			return -EINVAL;
8577	}
8578	/* Must be an ATOMBIOS */
8579	if (!rdev->is_atom_bios) {
8580		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581		return -EINVAL;
8582	}
8583	r = radeon_atombios_init(rdev);
8584	if (r)
8585		return r;
8586
8587	/* Post card if necessary */
8588	if (!radeon_card_posted(rdev)) {
8589		if (!rdev->bios) {
8590			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591			return -EINVAL;
8592		}
8593		DRM_INFO("GPU not posted. posting now...\n");
8594		atom_asic_init(rdev->mode_info.atom_context);
8595	}
8596	/* init golden registers */
8597	cik_init_golden_registers(rdev);
8598	/* Initialize scratch registers */
8599	cik_scratch_init(rdev);
8600	/* Initialize surface registers */
8601	radeon_surface_init(rdev);
8602	/* Initialize clocks */
8603	radeon_get_clock_info(rdev->ddev);
8604
8605	/* Fence driver */
8606	r = radeon_fence_driver_init(rdev);
8607	if (r)
8608		return r;
8609
8610	/* initialize memory controller */
8611	r = cik_mc_init(rdev);
8612	if (r)
8613		return r;
8614	/* Memory manager */
8615	r = radeon_bo_init(rdev);
8616	if (r)
8617		return r;
8618
8619	if (rdev->flags & RADEON_IS_IGP) {
8620		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622			r = cik_init_microcode(rdev);
8623			if (r) {
8624				DRM_ERROR("Failed to load firmware!\n");
8625				return r;
8626			}
8627		}
8628	} else {
8629		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631		    !rdev->mc_fw) {
8632			r = cik_init_microcode(rdev);
8633			if (r) {
8634				DRM_ERROR("Failed to load firmware!\n");
8635				return r;
8636			}
8637		}
8638	}
8639
8640	/* Initialize power management */
8641	radeon_pm_init(rdev);
8642
8643	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644	ring->ring_obj = NULL;
8645	r600_ring_init(rdev, ring, 1024 * 1024);
8646
8647	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648	ring->ring_obj = NULL;
8649	r600_ring_init(rdev, ring, 1024 * 1024);
8650	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651	if (r)
8652		return r;
8653
8654	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655	ring->ring_obj = NULL;
8656	r600_ring_init(rdev, ring, 1024 * 1024);
8657	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658	if (r)
8659		return r;
8660
8661	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662	ring->ring_obj = NULL;
8663	r600_ring_init(rdev, ring, 256 * 1024);
8664
8665	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666	ring->ring_obj = NULL;
8667	r600_ring_init(rdev, ring, 256 * 1024);
8668
8669	cik_uvd_init(rdev);
8670	cik_vce_init(rdev);
8671
8672	rdev->ih.ring_obj = NULL;
8673	r600_ih_ring_init(rdev, 64 * 1024);
8674
8675	r = r600_pcie_gart_init(rdev);
8676	if (r)
8677		return r;
8678
8679	rdev->accel_working = true;
8680	r = cik_startup(rdev);
8681	if (r) {
8682		dev_err(rdev->dev, "disabling GPU acceleration\n");
8683		cik_cp_fini(rdev);
8684		cik_sdma_fini(rdev);
8685		cik_irq_fini(rdev);
8686		sumo_rlc_fini(rdev);
8687		cik_mec_fini(rdev);
8688		radeon_wb_fini(rdev);
8689		radeon_ib_pool_fini(rdev);
8690		radeon_vm_manager_fini(rdev);
8691		radeon_irq_kms_fini(rdev);
8692		cik_pcie_gart_fini(rdev);
8693		rdev->accel_working = false;
8694	}
8695
8696	/* Don't start up if the MC ucode is missing.
8697	 * The default clocks and voltages before the MC ucode
8698	 * is loaded are not suffient for advanced operations.
8699	 */
8700	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702		return -EINVAL;
8703	}
8704
8705	return 0;
8706}
8707
8708/**
8709 * cik_fini - asic specific driver and hw fini
8710 *
8711 * @rdev: radeon_device pointer
8712 *
8713 * Tear down the asic specific driver variables and program the hw
8714 * to an idle state (CIK).
8715 * Called at driver unload.
8716 */
8717void cik_fini(struct radeon_device *rdev)
8718{
8719	radeon_pm_fini(rdev);
8720	cik_cp_fini(rdev);
8721	cik_sdma_fini(rdev);
8722	cik_fini_pg(rdev);
8723	cik_fini_cg(rdev);
8724	cik_irq_fini(rdev);
8725	sumo_rlc_fini(rdev);
8726	cik_mec_fini(rdev);
8727	radeon_wb_fini(rdev);
8728	radeon_vm_manager_fini(rdev);
8729	radeon_ib_pool_fini(rdev);
8730	radeon_irq_kms_fini(rdev);
8731	uvd_v1_0_fini(rdev);
8732	radeon_uvd_fini(rdev);
8733	radeon_vce_fini(rdev);
8734	cik_pcie_gart_fini(rdev);
8735	r600_vram_scratch_fini(rdev);
8736	radeon_gem_fini(rdev);
8737	radeon_fence_driver_fini(rdev);
8738	radeon_bo_fini(rdev);
8739	radeon_atombios_fini(rdev);
8740	kfree(rdev->bios);
8741	rdev->bios = NULL;
8742}
8743
8744void dce8_program_fmt(struct drm_encoder *encoder)
8745{
8746	struct drm_device *dev = encoder->dev;
8747	struct radeon_device *rdev = dev->dev_private;
8748	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751	int bpc = 0;
8752	u32 tmp = 0;
8753	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754
8755	if (connector) {
8756		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757		bpc = radeon_get_monitor_bpc(connector);
8758		dither = radeon_connector->dither;
8759	}
8760
8761	/* LVDS/eDP FMT is set up by atom */
8762	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763		return;
8764
8765	/* not needed for analog */
8766	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768		return;
8769
8770	if (bpc == 0)
8771		return;
8772
8773	switch (bpc) {
8774	case 6:
8775		if (dither == RADEON_FMT_DITHER_ENABLE)
8776			/* XXX sort out optimal dither settings */
8777			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779		else
8780			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781		break;
8782	case 8:
8783		if (dither == RADEON_FMT_DITHER_ENABLE)
8784			/* XXX sort out optimal dither settings */
8785			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786				FMT_RGB_RANDOM_ENABLE |
8787				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788		else
8789			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790		break;
8791	case 10:
8792		if (dither == RADEON_FMT_DITHER_ENABLE)
8793			/* XXX sort out optimal dither settings */
8794			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795				FMT_RGB_RANDOM_ENABLE |
8796				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797		else
8798			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799		break;
8800	default:
8801		/* not needed */
8802		break;
8803	}
8804
8805	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806}
8807
8808/* display watermark setup */
8809/**
8810 * dce8_line_buffer_adjust - Set up the line buffer
8811 *
8812 * @rdev: radeon_device pointer
8813 * @radeon_crtc: the selected display controller
8814 * @mode: the current display mode on the selected display
8815 * controller
8816 *
8817 * Setup up the line buffer allocation for
8818 * the selected display controller (CIK).
8819 * Returns the line buffer size in pixels.
8820 */
8821static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822				   struct radeon_crtc *radeon_crtc,
8823				   struct drm_display_mode *mode)
8824{
8825	u32 tmp, buffer_alloc, i;
8826	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827	/*
8828	 * Line Buffer Setup
8829	 * There are 6 line buffers, one for each display controllers.
8830	 * There are 3 partitions per LB. Select the number of partitions
8831	 * to enable based on the display width.  For display widths larger
8832	 * than 4096, you need use to use 2 display controllers and combine
8833	 * them using the stereo blender.
8834	 */
8835	if (radeon_crtc->base.enabled && mode) {
8836		if (mode->crtc_hdisplay < 1920) {
8837			tmp = 1;
8838			buffer_alloc = 2;
8839		} else if (mode->crtc_hdisplay < 2560) {
8840			tmp = 2;
8841			buffer_alloc = 2;
8842		} else if (mode->crtc_hdisplay < 4096) {
8843			tmp = 0;
8844			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845		} else {
8846			DRM_DEBUG_KMS("Mode too big for LB!\n");
8847			tmp = 0;
8848			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849		}
8850	} else {
8851		tmp = 1;
8852		buffer_alloc = 0;
8853	}
8854
8855	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857
8858	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860	for (i = 0; i < rdev->usec_timeout; i++) {
8861		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863			break;
8864		udelay(1);
8865	}
8866
8867	if (radeon_crtc->base.enabled && mode) {
8868		switch (tmp) {
8869		case 0:
8870		default:
8871			return 4096 * 2;
8872		case 1:
8873			return 1920 * 2;
8874		case 2:
8875			return 2560 * 2;
8876		}
8877	}
8878
8879	/* controller not enabled, so no lb used */
8880	return 0;
8881}
8882
8883/**
8884 * cik_get_number_of_dram_channels - get the number of dram channels
8885 *
8886 * @rdev: radeon_device pointer
8887 *
8888 * Look up the number of video ram channels (CIK).
8889 * Used for display watermark bandwidth calculations
8890 * Returns the number of dram channels
8891 */
8892static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893{
8894	u32 tmp = RREG32(MC_SHARED_CHMAP);
8895
8896	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897	case 0:
8898	default:
8899		return 1;
8900	case 1:
8901		return 2;
8902	case 2:
8903		return 4;
8904	case 3:
8905		return 8;
8906	case 4:
8907		return 3;
8908	case 5:
8909		return 6;
8910	case 6:
8911		return 10;
8912	case 7:
8913		return 12;
8914	case 8:
8915		return 16;
8916	}
8917}
8918
8919struct dce8_wm_params {
8920	u32 dram_channels; /* number of dram channels */
8921	u32 yclk;          /* bandwidth per dram data pin in kHz */
8922	u32 sclk;          /* engine clock in kHz */
8923	u32 disp_clk;      /* display clock in kHz */
8924	u32 src_width;     /* viewport width */
8925	u32 active_time;   /* active display time in ns */
8926	u32 blank_time;    /* blank time in ns */
8927	bool interlaced;    /* mode is interlaced */
8928	fixed20_12 vsc;    /* vertical scale ratio */
8929	u32 num_heads;     /* number of active crtcs */
8930	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931	u32 lb_size;       /* line buffer allocated to pipe */
8932	u32 vtaps;         /* vertical scaler taps */
8933};
8934
8935/**
8936 * dce8_dram_bandwidth - get the dram bandwidth
8937 *
8938 * @wm: watermark calculation data
8939 *
8940 * Calculate the raw dram bandwidth (CIK).
8941 * Used for display watermark bandwidth calculations
8942 * Returns the dram bandwidth in MBytes/s
8943 */
8944static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945{
8946	/* Calculate raw DRAM Bandwidth */
8947	fixed20_12 dram_efficiency; /* 0.7 */
8948	fixed20_12 yclk, dram_channels, bandwidth;
8949	fixed20_12 a;
8950
8951	a.full = dfixed_const(1000);
8952	yclk.full = dfixed_const(wm->yclk);
8953	yclk.full = dfixed_div(yclk, a);
8954	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955	a.full = dfixed_const(10);
8956	dram_efficiency.full = dfixed_const(7);
8957	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958	bandwidth.full = dfixed_mul(dram_channels, yclk);
8959	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960
8961	return dfixed_trunc(bandwidth);
8962}
8963
8964/**
8965 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966 *
8967 * @wm: watermark calculation data
8968 *
8969 * Calculate the dram bandwidth used for display (CIK).
8970 * Used for display watermark bandwidth calculations
8971 * Returns the dram bandwidth for display in MBytes/s
8972 */
8973static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974{
8975	/* Calculate DRAM Bandwidth and the part allocated to display. */
8976	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977	fixed20_12 yclk, dram_channels, bandwidth;
8978	fixed20_12 a;
8979
8980	a.full = dfixed_const(1000);
8981	yclk.full = dfixed_const(wm->yclk);
8982	yclk.full = dfixed_div(yclk, a);
8983	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984	a.full = dfixed_const(10);
8985	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987	bandwidth.full = dfixed_mul(dram_channels, yclk);
8988	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989
8990	return dfixed_trunc(bandwidth);
8991}
8992
8993/**
8994 * dce8_data_return_bandwidth - get the data return bandwidth
8995 *
8996 * @wm: watermark calculation data
8997 *
8998 * Calculate the data return bandwidth used for display (CIK).
8999 * Used for display watermark bandwidth calculations
9000 * Returns the data return bandwidth in MBytes/s
9001 */
9002static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003{
9004	/* Calculate the display Data return Bandwidth */
9005	fixed20_12 return_efficiency; /* 0.8 */
9006	fixed20_12 sclk, bandwidth;
9007	fixed20_12 a;
9008
9009	a.full = dfixed_const(1000);
9010	sclk.full = dfixed_const(wm->sclk);
9011	sclk.full = dfixed_div(sclk, a);
9012	a.full = dfixed_const(10);
9013	return_efficiency.full = dfixed_const(8);
9014	return_efficiency.full = dfixed_div(return_efficiency, a);
9015	a.full = dfixed_const(32);
9016	bandwidth.full = dfixed_mul(a, sclk);
9017	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018
9019	return dfixed_trunc(bandwidth);
9020}
9021
9022/**
9023 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024 *
9025 * @wm: watermark calculation data
9026 *
9027 * Calculate the dmif bandwidth used for display (CIK).
9028 * Used for display watermark bandwidth calculations
9029 * Returns the dmif bandwidth in MBytes/s
9030 */
9031static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032{
9033	/* Calculate the DMIF Request Bandwidth */
9034	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035	fixed20_12 disp_clk, bandwidth;
9036	fixed20_12 a, b;
9037
9038	a.full = dfixed_const(1000);
9039	disp_clk.full = dfixed_const(wm->disp_clk);
9040	disp_clk.full = dfixed_div(disp_clk, a);
9041	a.full = dfixed_const(32);
9042	b.full = dfixed_mul(a, disp_clk);
9043
9044	a.full = dfixed_const(10);
9045	disp_clk_request_efficiency.full = dfixed_const(8);
9046	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047
9048	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049
9050	return dfixed_trunc(bandwidth);
9051}
9052
9053/**
9054 * dce8_available_bandwidth - get the min available bandwidth
9055 *
9056 * @wm: watermark calculation data
9057 *
9058 * Calculate the min available bandwidth used for display (CIK).
9059 * Used for display watermark bandwidth calculations
9060 * Returns the min available bandwidth in MBytes/s
9061 */
9062static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063{
9064	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068
9069	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070}
9071
9072/**
9073 * dce8_average_bandwidth - get the average available bandwidth
9074 *
9075 * @wm: watermark calculation data
9076 *
9077 * Calculate the average available bandwidth used for display (CIK).
9078 * Used for display watermark bandwidth calculations
9079 * Returns the average available bandwidth in MBytes/s
9080 */
9081static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082{
9083	/* Calculate the display mode Average Bandwidth
9084	 * DisplayMode should contain the source and destination dimensions,
9085	 * timing, etc.
9086	 */
9087	fixed20_12 bpp;
9088	fixed20_12 line_time;
9089	fixed20_12 src_width;
9090	fixed20_12 bandwidth;
9091	fixed20_12 a;
9092
9093	a.full = dfixed_const(1000);
9094	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095	line_time.full = dfixed_div(line_time, a);
9096	bpp.full = dfixed_const(wm->bytes_per_pixel);
9097	src_width.full = dfixed_const(wm->src_width);
9098	bandwidth.full = dfixed_mul(src_width, bpp);
9099	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100	bandwidth.full = dfixed_div(bandwidth, line_time);
9101
9102	return dfixed_trunc(bandwidth);
9103}
9104
9105/**
9106 * dce8_latency_watermark - get the latency watermark
9107 *
9108 * @wm: watermark calculation data
9109 *
9110 * Calculate the latency watermark (CIK).
9111 * Used for display watermark bandwidth calculations
9112 * Returns the latency watermark in ns
9113 */
9114static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115{
9116	/* First calculate the latency in ns */
9117	u32 mc_latency = 2000; /* 2000 ns. */
9118	u32 available_bandwidth = dce8_available_bandwidth(wm);
9119	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123		(wm->num_heads * cursor_line_pair_return_time);
9124	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126	u32 tmp, dmif_size = 12288;
9127	fixed20_12 a, b, c;
9128
9129	if (wm->num_heads == 0)
9130		return 0;
9131
9132	a.full = dfixed_const(2);
9133	b.full = dfixed_const(1);
9134	if ((wm->vsc.full > a.full) ||
9135	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136	    (wm->vtaps >= 5) ||
9137	    ((wm->vsc.full >= a.full) && wm->interlaced))
9138		max_src_lines_per_dst_line = 4;
9139	else
9140		max_src_lines_per_dst_line = 2;
9141
9142	a.full = dfixed_const(available_bandwidth);
9143	b.full = dfixed_const(wm->num_heads);
9144	a.full = dfixed_div(a, b);
9145	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146	tmp = min(dfixed_trunc(a), tmp);
9147
9148	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149
9150	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151	b.full = dfixed_const(1000);
9152	c.full = dfixed_const(lb_fill_bw);
9153	b.full = dfixed_div(c, b);
9154	a.full = dfixed_div(a, b);
9155	line_fill_time = dfixed_trunc(a);
9156
9157	if (line_fill_time < wm->active_time)
9158		return latency;
9159	else
9160		return latency + (line_fill_time - wm->active_time);
9161
9162}
9163
9164/**
9165 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166 * average and available dram bandwidth
9167 *
9168 * @wm: watermark calculation data
9169 *
9170 * Check if the display average bandwidth fits in the display
9171 * dram bandwidth (CIK).
9172 * Used for display watermark bandwidth calculations
9173 * Returns true if the display fits, false if not.
9174 */
9175static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176{
9177	if (dce8_average_bandwidth(wm) <=
9178	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179		return true;
9180	else
9181		return false;
9182}
9183
9184/**
9185 * dce8_average_bandwidth_vs_available_bandwidth - check
9186 * average and available bandwidth
9187 *
9188 * @wm: watermark calculation data
9189 *
9190 * Check if the display average bandwidth fits in the display
9191 * available bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns true if the display fits, false if not.
9194 */
9195static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196{
9197	if (dce8_average_bandwidth(wm) <=
9198	    (dce8_available_bandwidth(wm) / wm->num_heads))
9199		return true;
9200	else
9201		return false;
9202}
9203
9204/**
9205 * dce8_check_latency_hiding - check latency hiding
9206 *
9207 * @wm: watermark calculation data
9208 *
9209 * Check latency hiding (CIK).
9210 * Used for display watermark bandwidth calculations
9211 * Returns true if the display fits, false if not.
9212 */
9213static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214{
9215	u32 lb_partitions = wm->lb_size / wm->src_width;
9216	u32 line_time = wm->active_time + wm->blank_time;
9217	u32 latency_tolerant_lines;
9218	u32 latency_hiding;
9219	fixed20_12 a;
9220
9221	a.full = dfixed_const(1);
9222	if (wm->vsc.full > a.full)
9223		latency_tolerant_lines = 1;
9224	else {
9225		if (lb_partitions <= (wm->vtaps + 1))
9226			latency_tolerant_lines = 1;
9227		else
9228			latency_tolerant_lines = 2;
9229	}
9230
9231	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232
9233	if (dce8_latency_watermark(wm) <= latency_hiding)
9234		return true;
9235	else
9236		return false;
9237}
9238
9239/**
9240 * dce8_program_watermarks - program display watermarks
9241 *
9242 * @rdev: radeon_device pointer
9243 * @radeon_crtc: the selected display controller
9244 * @lb_size: line buffer size
9245 * @num_heads: number of display controllers in use
9246 *
9247 * Calculate and program the display watermarks for the
9248 * selected display controller (CIK).
9249 */
9250static void dce8_program_watermarks(struct radeon_device *rdev,
9251				    struct radeon_crtc *radeon_crtc,
9252				    u32 lb_size, u32 num_heads)
9253{
9254	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255	struct dce8_wm_params wm_low, wm_high;
9256	u32 active_time;
9257	u32 line_time = 0;
9258	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259	u32 tmp, wm_mask;
9260
9261	if (radeon_crtc->base.enabled && num_heads && mode) {
9262		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263					    (u32)mode->clock);
9264		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265					  (u32)mode->clock);
9266		line_time = min(line_time, (u32)65535);
9267
9268		/* watermark for high clocks */
9269		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270		    rdev->pm.dpm_enabled) {
9271			wm_high.yclk =
9272				radeon_dpm_get_mclk(rdev, false) * 10;
9273			wm_high.sclk =
9274				radeon_dpm_get_sclk(rdev, false) * 10;
9275		} else {
9276			wm_high.yclk = rdev->pm.current_mclk * 10;
9277			wm_high.sclk = rdev->pm.current_sclk * 10;
9278		}
9279
9280		wm_high.disp_clk = mode->clock;
9281		wm_high.src_width = mode->crtc_hdisplay;
9282		wm_high.active_time = active_time;
9283		wm_high.blank_time = line_time - wm_high.active_time;
9284		wm_high.interlaced = false;
9285		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286			wm_high.interlaced = true;
9287		wm_high.vsc = radeon_crtc->vsc;
9288		wm_high.vtaps = 1;
9289		if (radeon_crtc->rmx_type != RMX_OFF)
9290			wm_high.vtaps = 2;
9291		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292		wm_high.lb_size = lb_size;
9293		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294		wm_high.num_heads = num_heads;
9295
9296		/* set for high clocks */
9297		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298
9299		/* possibly force display priority to high */
9300		/* should really do this at mode validation time... */
9301		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303		    !dce8_check_latency_hiding(&wm_high) ||
9304		    (rdev->disp_priority == 2)) {
9305			DRM_DEBUG_KMS("force priority to high\n");
9306		}
9307
9308		/* watermark for low clocks */
9309		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310		    rdev->pm.dpm_enabled) {
9311			wm_low.yclk =
9312				radeon_dpm_get_mclk(rdev, true) * 10;
9313			wm_low.sclk =
9314				radeon_dpm_get_sclk(rdev, true) * 10;
9315		} else {
9316			wm_low.yclk = rdev->pm.current_mclk * 10;
9317			wm_low.sclk = rdev->pm.current_sclk * 10;
9318		}
9319
9320		wm_low.disp_clk = mode->clock;
9321		wm_low.src_width = mode->crtc_hdisplay;
9322		wm_low.active_time = active_time;
9323		wm_low.blank_time = line_time - wm_low.active_time;
9324		wm_low.interlaced = false;
9325		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326			wm_low.interlaced = true;
9327		wm_low.vsc = radeon_crtc->vsc;
9328		wm_low.vtaps = 1;
9329		if (radeon_crtc->rmx_type != RMX_OFF)
9330			wm_low.vtaps = 2;
9331		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332		wm_low.lb_size = lb_size;
9333		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334		wm_low.num_heads = num_heads;
9335
9336		/* set for low clocks */
9337		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338
9339		/* possibly force display priority to high */
9340		/* should really do this at mode validation time... */
9341		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343		    !dce8_check_latency_hiding(&wm_low) ||
9344		    (rdev->disp_priority == 2)) {
9345			DRM_DEBUG_KMS("force priority to high\n");
9346		}
9347
9348		/* Save number of lines the linebuffer leads before the scanout */
9349		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350	}
9351
9352	/* select wm A */
9353	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354	tmp = wm_mask;
9355	tmp &= ~LATENCY_WATERMARK_MASK(3);
9356	tmp |= LATENCY_WATERMARK_MASK(1);
9357	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360		LATENCY_HIGH_WATERMARK(line_time)));
9361	/* select wm B */
9362	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363	tmp &= ~LATENCY_WATERMARK_MASK(3);
9364	tmp |= LATENCY_WATERMARK_MASK(2);
9365	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368		LATENCY_HIGH_WATERMARK(line_time)));
9369	/* restore original selection */
9370	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371
9372	/* save values for DPM */
9373	radeon_crtc->line_time = line_time;
9374	radeon_crtc->wm_high = latency_watermark_a;
9375	radeon_crtc->wm_low = latency_watermark_b;
9376}
9377
9378/**
9379 * dce8_bandwidth_update - program display watermarks
9380 *
9381 * @rdev: radeon_device pointer
9382 *
9383 * Calculate and program the display watermarks and line
9384 * buffer allocation (CIK).
9385 */
9386void dce8_bandwidth_update(struct radeon_device *rdev)
9387{
9388	struct drm_display_mode *mode = NULL;
9389	u32 num_heads = 0, lb_size;
9390	int i;
9391
9392	if (!rdev->mode_info.mode_config_initialized)
9393		return;
9394
9395	radeon_update_display_priority(rdev);
9396
9397	for (i = 0; i < rdev->num_crtc; i++) {
9398		if (rdev->mode_info.crtcs[i]->base.enabled)
9399			num_heads++;
9400	}
9401	for (i = 0; i < rdev->num_crtc; i++) {
9402		mode = &rdev->mode_info.crtcs[i]->base.mode;
9403		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405	}
9406}
9407
9408/**
9409 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410 *
9411 * @rdev: radeon_device pointer
9412 *
9413 * Fetches a GPU clock counter snapshot (SI).
9414 * Returns the 64 bit clock counter snapshot.
9415 */
9416uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417{
9418	uint64_t clock;
9419
9420	mutex_lock(&rdev->gpu_clock_mutex);
9421	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424	mutex_unlock(&rdev->gpu_clock_mutex);
9425	return clock;
9426}
9427
9428static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429			     u32 cntl_reg, u32 status_reg)
9430{
9431	int r, i;
9432	struct atom_clock_dividers dividers;
9433	uint32_t tmp;
9434
9435	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436					   clock, false, &dividers);
9437	if (r)
9438		return r;
9439
9440	tmp = RREG32_SMC(cntl_reg);
9441	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442	tmp |= dividers.post_divider;
9443	WREG32_SMC(cntl_reg, tmp);
9444
9445	for (i = 0; i < 100; i++) {
9446		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447			break;
9448		mdelay(10);
9449	}
9450	if (i == 100)
9451		return -ETIMEDOUT;
9452
9453	return 0;
9454}
9455
9456int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457{
9458	int r = 0;
9459
9460	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461	if (r)
9462		return r;
9463
9464	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465	return r;
9466}
9467
9468int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469{
9470	int r, i;
9471	struct atom_clock_dividers dividers;
9472	u32 tmp;
9473
9474	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475					   ecclk, false, &dividers);
9476	if (r)
9477		return r;
9478
9479	for (i = 0; i < 100; i++) {
9480		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481			break;
9482		mdelay(10);
9483	}
9484	if (i == 100)
9485		return -ETIMEDOUT;
9486
9487	tmp = RREG32_SMC(CG_ECLK_CNTL);
9488	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489	tmp |= dividers.post_divider;
9490	WREG32_SMC(CG_ECLK_CNTL, tmp);
9491
9492	for (i = 0; i < 100; i++) {
9493		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494			break;
9495		mdelay(10);
9496	}
9497	if (i == 100)
9498		return -ETIMEDOUT;
9499
9500	return 0;
9501}
9502
9503static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504{
9505	struct pci_dev *root = rdev->pdev->bus->self;
9506	enum pci_bus_speed speed_cap;
9507	int bridge_pos, gpu_pos;
9508	u32 speed_cntl, current_data_rate;
9509	int i;
9510	u16 tmp16;
9511
9512	if (pci_is_root_bus(rdev->pdev->bus))
9513		return;
9514
9515	if (radeon_pcie_gen2 == 0)
9516		return;
9517
9518	if (rdev->flags & RADEON_IS_IGP)
9519		return;
9520
9521	if (!(rdev->flags & RADEON_IS_PCIE))
9522		return;
9523
9524	speed_cap = pcie_get_speed_cap(root);
9525	if (speed_cap == PCI_SPEED_UNKNOWN)
9526		return;
9527
9528	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529	    (speed_cap != PCIE_SPEED_5_0GT))
9530		return;
9531
9532	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534		LC_CURRENT_DATA_RATE_SHIFT;
9535	if (speed_cap == PCIE_SPEED_8_0GT) {
9536		if (current_data_rate == 2) {
9537			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538			return;
9539		}
9540		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9542		if (current_data_rate == 1) {
9543			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544			return;
9545		}
9546		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547	}
9548
9549	bridge_pos = pci_pcie_cap(root);
9550	if (!bridge_pos)
9551		return;
9552
9553	gpu_pos = pci_pcie_cap(rdev->pdev);
9554	if (!gpu_pos)
9555		return;
9556
9557	if (speed_cap == PCIE_SPEED_8_0GT) {
9558		/* re-try equalization if gen3 is not already enabled */
9559		if (current_data_rate != 2) {
9560			u16 bridge_cfg, gpu_cfg;
9561			u16 bridge_cfg2, gpu_cfg2;
9562			u32 max_lw, current_lw, tmp;
9563
9564			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566
9567			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569
9570			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572
9573			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576
9577			if (current_lw < max_lw) {
9578				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584				}
9585			}
9586
9587			for (i = 0; i < 10; i++) {
9588				/* check status */
9589				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9590				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591					break;
9592
9593				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595
9596				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9598
9599				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600				tmp |= LC_SET_QUIESCE;
9601				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602
9603				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604				tmp |= LC_REDO_EQ;
9605				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606
9607				msleep(100);
9608
9609				/* linkctl */
9610				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614
9615				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619
9620				/* linkctl2 */
9621				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622				tmp16 &= ~((1 << 4) | (7 << 9));
9623				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625
9626				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627				tmp16 &= ~((1 << 4) | (7 << 9));
9628				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9630
9631				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632				tmp &= ~LC_SET_QUIESCE;
9633				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634			}
9635		}
9636	}
9637
9638	/* set the link speed */
9639	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642
9643	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644	tmp16 &= ~0xf;
9645	if (speed_cap == PCIE_SPEED_8_0GT)
9646		tmp16 |= 3; /* gen3 */
9647	else if (speed_cap == PCIE_SPEED_5_0GT)
9648		tmp16 |= 2; /* gen2 */
9649	else
9650		tmp16 |= 1; /* gen1 */
9651	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9652
9653	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656
9657	for (i = 0; i < rdev->usec_timeout; i++) {
9658		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660			break;
9661		udelay(1);
9662	}
9663}
9664
9665static void cik_program_aspm(struct radeon_device *rdev)
9666{
9667	u32 data, orig;
9668	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669	bool disable_clkreq = false;
9670
9671	if (radeon_aspm == 0)
9672		return;
9673
9674	/* XXX double check IGPs */
9675	if (rdev->flags & RADEON_IS_IGP)
9676		return;
9677
9678	if (!(rdev->flags & RADEON_IS_PCIE))
9679		return;
9680
9681	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682	data &= ~LC_XMIT_N_FTS_MASK;
9683	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684	if (orig != data)
9685		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686
9687	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688	data |= LC_GO_TO_RECOVERY;
9689	if (orig != data)
9690		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691
9692	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693	data |= P_IGNORE_EDB_ERR;
9694	if (orig != data)
9695		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696
9697	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699	data |= LC_PMI_TO_L1_DIS;
9700	if (!disable_l0s)
9701		data |= LC_L0S_INACTIVITY(7);
9702
9703	if (!disable_l1) {
9704		data |= LC_L1_INACTIVITY(7);
9705		data &= ~LC_PMI_TO_L1_DIS;
9706		if (orig != data)
9707			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708
9709		if (!disable_plloff_in_l1) {
9710			bool clk_req_support;
9711
9712			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715			if (orig != data)
9716				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717
9718			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721			if (orig != data)
9722				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723
9724			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727			if (orig != data)
9728				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729
9730			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733			if (orig != data)
9734				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735
9736			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738			data |= LC_DYN_LANES_PWR_STATE(3);
9739			if (orig != data)
9740				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741
9742			if (!disable_clkreq &&
9743			    !pci_is_root_bus(rdev->pdev->bus)) {
9744				struct pci_dev *root = rdev->pdev->bus->self;
9745				u32 lnkcap;
9746
9747				clk_req_support = false;
9748				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750					clk_req_support = true;
9751			} else {
9752				clk_req_support = false;
9753			}
9754
9755			if (clk_req_support) {
9756				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758				if (orig != data)
9759					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760
9761				orig = data = RREG32_SMC(THM_CLK_CNTL);
9762				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764				if (orig != data)
9765					WREG32_SMC(THM_CLK_CNTL, data);
9766
9767				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770				if (orig != data)
9771					WREG32_SMC(MISC_CLK_CTRL, data);
9772
9773				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774				data &= ~BCLK_AS_XCLK;
9775				if (orig != data)
9776					WREG32_SMC(CG_CLKPIN_CNTL, data);
9777
9778				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779				data &= ~FORCE_BIF_REFCLK_EN;
9780				if (orig != data)
9781					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782
9783				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784				data &= ~MPLL_CLKOUT_SEL_MASK;
9785				data |= MPLL_CLKOUT_SEL(4);
9786				if (orig != data)
9787					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788			}
9789		}
9790	} else {
9791		if (orig != data)
9792			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793	}
9794
9795	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797	if (orig != data)
9798		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799
9800	if (!disable_l0s) {
9801		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806				data &= ~LC_L0S_INACTIVITY_MASK;
9807				if (orig != data)
9808					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809			}
9810		}
9811	}
9812}