Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2012 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/slab.h>
  26#include <linux/module.h>
  27#include "drmP.h"
  28#include "radeon.h"
  29#include "radeon_asic.h"
  30#include "cikd.h"
  31#include "atom.h"
  32#include "cik_blit_shaders.h"
  33#include "radeon_ucode.h"
  34#include "clearstate_ci.h"
  35
  36MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
  37MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
  38MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
  39MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
  40MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
  41MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
  42MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
  43MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
  44MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
  45MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
  46MODULE_FIRMWARE("radeon/HAWAII_me.bin");
  47MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
  48MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
  49MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
  50MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
  51MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
  52MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
  53MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
  54MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
  55MODULE_FIRMWARE("radeon/KAVERI_me.bin");
  56MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
  57MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
  58MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
  59MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
  60MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
  61MODULE_FIRMWARE("radeon/KABINI_me.bin");
  62MODULE_FIRMWARE("radeon/KABINI_ce.bin");
  63MODULE_FIRMWARE("radeon/KABINI_mec.bin");
  64MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
  65MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
  66MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
  67MODULE_FIRMWARE("radeon/MULLINS_me.bin");
  68MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
  69MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
  70MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
  71MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
  72
  73extern int r600_ih_ring_alloc(struct radeon_device *rdev);
  74extern void r600_ih_ring_fini(struct radeon_device *rdev);
  75extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
  76extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
  77extern bool evergreen_is_display_hung(struct radeon_device *rdev);
  78extern void sumo_rlc_fini(struct radeon_device *rdev);
  79extern int sumo_rlc_init(struct radeon_device *rdev);
  80extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
  81extern void si_rlc_reset(struct radeon_device *rdev);
  82extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
  83extern int cik_sdma_resume(struct radeon_device *rdev);
  84extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
  85extern void cik_sdma_fini(struct radeon_device *rdev);
  86extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
  87static void cik_rlc_stop(struct radeon_device *rdev);
  88static void cik_pcie_gen3_enable(struct radeon_device *rdev);
  89static void cik_program_aspm(struct radeon_device *rdev);
  90static void cik_init_pg(struct radeon_device *rdev);
  91static void cik_init_cg(struct radeon_device *rdev);
  92static void cik_fini_pg(struct radeon_device *rdev);
  93static void cik_fini_cg(struct radeon_device *rdev);
  94static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
  95					  bool enable);
  96
  97/* get temperature in millidegrees */
  98int ci_get_temp(struct radeon_device *rdev)
  99{
 100	u32 temp;
 101	int actual_temp = 0;
 102
 103	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
 104		CTF_TEMP_SHIFT;
 105
 106	if (temp & 0x200)
 107		actual_temp = 255;
 108	else
 109		actual_temp = temp & 0x1ff;
 110
 111	actual_temp = actual_temp * 1000;
 112
 113	return actual_temp;
 114}
 115
 116/* get temperature in millidegrees */
 117int kv_get_temp(struct radeon_device *rdev)
 118{
 119	u32 temp;
 120	int actual_temp = 0;
 121
 122	temp = RREG32_SMC(0xC0300E0C);
 123
 124	if (temp)
 125		actual_temp = (temp / 8) - 49;
 126	else
 127		actual_temp = 0;
 128
 129	actual_temp = actual_temp * 1000;
 130
 131	return actual_temp;
 132}
 133
 134/*
 135 * Indirect registers accessor
 136 */
 137u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
 138{
 139	unsigned long flags;
 140	u32 r;
 141
 142	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 143	WREG32(PCIE_INDEX, reg);
 144	(void)RREG32(PCIE_INDEX);
 145	r = RREG32(PCIE_DATA);
 146	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 147	return r;
 148}
 149
 150void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 151{
 152	unsigned long flags;
 153
 154	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
 155	WREG32(PCIE_INDEX, reg);
 156	(void)RREG32(PCIE_INDEX);
 157	WREG32(PCIE_DATA, v);
 158	(void)RREG32(PCIE_DATA);
 159	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
 160}
 161
 162static const u32 spectre_rlc_save_restore_register_list[] =
 163{
 164	(0x0e00 << 16) | (0xc12c >> 2),
 165	0x00000000,
 166	(0x0e00 << 16) | (0xc140 >> 2),
 167	0x00000000,
 168	(0x0e00 << 16) | (0xc150 >> 2),
 169	0x00000000,
 170	(0x0e00 << 16) | (0xc15c >> 2),
 171	0x00000000,
 172	(0x0e00 << 16) | (0xc168 >> 2),
 173	0x00000000,
 174	(0x0e00 << 16) | (0xc170 >> 2),
 175	0x00000000,
 176	(0x0e00 << 16) | (0xc178 >> 2),
 177	0x00000000,
 178	(0x0e00 << 16) | (0xc204 >> 2),
 179	0x00000000,
 180	(0x0e00 << 16) | (0xc2b4 >> 2),
 181	0x00000000,
 182	(0x0e00 << 16) | (0xc2b8 >> 2),
 183	0x00000000,
 184	(0x0e00 << 16) | (0xc2bc >> 2),
 185	0x00000000,
 186	(0x0e00 << 16) | (0xc2c0 >> 2),
 187	0x00000000,
 188	(0x0e00 << 16) | (0x8228 >> 2),
 189	0x00000000,
 190	(0x0e00 << 16) | (0x829c >> 2),
 191	0x00000000,
 192	(0x0e00 << 16) | (0x869c >> 2),
 193	0x00000000,
 194	(0x0600 << 16) | (0x98f4 >> 2),
 195	0x00000000,
 196	(0x0e00 << 16) | (0x98f8 >> 2),
 197	0x00000000,
 198	(0x0e00 << 16) | (0x9900 >> 2),
 199	0x00000000,
 200	(0x0e00 << 16) | (0xc260 >> 2),
 201	0x00000000,
 202	(0x0e00 << 16) | (0x90e8 >> 2),
 203	0x00000000,
 204	(0x0e00 << 16) | (0x3c000 >> 2),
 205	0x00000000,
 206	(0x0e00 << 16) | (0x3c00c >> 2),
 207	0x00000000,
 208	(0x0e00 << 16) | (0x8c1c >> 2),
 209	0x00000000,
 210	(0x0e00 << 16) | (0x9700 >> 2),
 211	0x00000000,
 212	(0x0e00 << 16) | (0xcd20 >> 2),
 213	0x00000000,
 214	(0x4e00 << 16) | (0xcd20 >> 2),
 215	0x00000000,
 216	(0x5e00 << 16) | (0xcd20 >> 2),
 217	0x00000000,
 218	(0x6e00 << 16) | (0xcd20 >> 2),
 219	0x00000000,
 220	(0x7e00 << 16) | (0xcd20 >> 2),
 221	0x00000000,
 222	(0x8e00 << 16) | (0xcd20 >> 2),
 223	0x00000000,
 224	(0x9e00 << 16) | (0xcd20 >> 2),
 225	0x00000000,
 226	(0xae00 << 16) | (0xcd20 >> 2),
 227	0x00000000,
 228	(0xbe00 << 16) | (0xcd20 >> 2),
 229	0x00000000,
 230	(0x0e00 << 16) | (0x89bc >> 2),
 231	0x00000000,
 232	(0x0e00 << 16) | (0x8900 >> 2),
 233	0x00000000,
 234	0x3,
 235	(0x0e00 << 16) | (0xc130 >> 2),
 236	0x00000000,
 237	(0x0e00 << 16) | (0xc134 >> 2),
 238	0x00000000,
 239	(0x0e00 << 16) | (0xc1fc >> 2),
 240	0x00000000,
 241	(0x0e00 << 16) | (0xc208 >> 2),
 242	0x00000000,
 243	(0x0e00 << 16) | (0xc264 >> 2),
 244	0x00000000,
 245	(0x0e00 << 16) | (0xc268 >> 2),
 246	0x00000000,
 247	(0x0e00 << 16) | (0xc26c >> 2),
 248	0x00000000,
 249	(0x0e00 << 16) | (0xc270 >> 2),
 250	0x00000000,
 251	(0x0e00 << 16) | (0xc274 >> 2),
 252	0x00000000,
 253	(0x0e00 << 16) | (0xc278 >> 2),
 254	0x00000000,
 255	(0x0e00 << 16) | (0xc27c >> 2),
 256	0x00000000,
 257	(0x0e00 << 16) | (0xc280 >> 2),
 258	0x00000000,
 259	(0x0e00 << 16) | (0xc284 >> 2),
 260	0x00000000,
 261	(0x0e00 << 16) | (0xc288 >> 2),
 262	0x00000000,
 263	(0x0e00 << 16) | (0xc28c >> 2),
 264	0x00000000,
 265	(0x0e00 << 16) | (0xc290 >> 2),
 266	0x00000000,
 267	(0x0e00 << 16) | (0xc294 >> 2),
 268	0x00000000,
 269	(0x0e00 << 16) | (0xc298 >> 2),
 270	0x00000000,
 271	(0x0e00 << 16) | (0xc29c >> 2),
 272	0x00000000,
 273	(0x0e00 << 16) | (0xc2a0 >> 2),
 274	0x00000000,
 275	(0x0e00 << 16) | (0xc2a4 >> 2),
 276	0x00000000,
 277	(0x0e00 << 16) | (0xc2a8 >> 2),
 278	0x00000000,
 279	(0x0e00 << 16) | (0xc2ac  >> 2),
 280	0x00000000,
 281	(0x0e00 << 16) | (0xc2b0 >> 2),
 282	0x00000000,
 283	(0x0e00 << 16) | (0x301d0 >> 2),
 284	0x00000000,
 285	(0x0e00 << 16) | (0x30238 >> 2),
 286	0x00000000,
 287	(0x0e00 << 16) | (0x30250 >> 2),
 288	0x00000000,
 289	(0x0e00 << 16) | (0x30254 >> 2),
 290	0x00000000,
 291	(0x0e00 << 16) | (0x30258 >> 2),
 292	0x00000000,
 293	(0x0e00 << 16) | (0x3025c >> 2),
 294	0x00000000,
 295	(0x4e00 << 16) | (0xc900 >> 2),
 296	0x00000000,
 297	(0x5e00 << 16) | (0xc900 >> 2),
 298	0x00000000,
 299	(0x6e00 << 16) | (0xc900 >> 2),
 300	0x00000000,
 301	(0x7e00 << 16) | (0xc900 >> 2),
 302	0x00000000,
 303	(0x8e00 << 16) | (0xc900 >> 2),
 304	0x00000000,
 305	(0x9e00 << 16) | (0xc900 >> 2),
 306	0x00000000,
 307	(0xae00 << 16) | (0xc900 >> 2),
 308	0x00000000,
 309	(0xbe00 << 16) | (0xc900 >> 2),
 310	0x00000000,
 311	(0x4e00 << 16) | (0xc904 >> 2),
 312	0x00000000,
 313	(0x5e00 << 16) | (0xc904 >> 2),
 314	0x00000000,
 315	(0x6e00 << 16) | (0xc904 >> 2),
 316	0x00000000,
 317	(0x7e00 << 16) | (0xc904 >> 2),
 318	0x00000000,
 319	(0x8e00 << 16) | (0xc904 >> 2),
 320	0x00000000,
 321	(0x9e00 << 16) | (0xc904 >> 2),
 322	0x00000000,
 323	(0xae00 << 16) | (0xc904 >> 2),
 324	0x00000000,
 325	(0xbe00 << 16) | (0xc904 >> 2),
 326	0x00000000,
 327	(0x4e00 << 16) | (0xc908 >> 2),
 328	0x00000000,
 329	(0x5e00 << 16) | (0xc908 >> 2),
 330	0x00000000,
 331	(0x6e00 << 16) | (0xc908 >> 2),
 332	0x00000000,
 333	(0x7e00 << 16) | (0xc908 >> 2),
 334	0x00000000,
 335	(0x8e00 << 16) | (0xc908 >> 2),
 336	0x00000000,
 337	(0x9e00 << 16) | (0xc908 >> 2),
 338	0x00000000,
 339	(0xae00 << 16) | (0xc908 >> 2),
 340	0x00000000,
 341	(0xbe00 << 16) | (0xc908 >> 2),
 342	0x00000000,
 343	(0x4e00 << 16) | (0xc90c >> 2),
 344	0x00000000,
 345	(0x5e00 << 16) | (0xc90c >> 2),
 346	0x00000000,
 347	(0x6e00 << 16) | (0xc90c >> 2),
 348	0x00000000,
 349	(0x7e00 << 16) | (0xc90c >> 2),
 350	0x00000000,
 351	(0x8e00 << 16) | (0xc90c >> 2),
 352	0x00000000,
 353	(0x9e00 << 16) | (0xc90c >> 2),
 354	0x00000000,
 355	(0xae00 << 16) | (0xc90c >> 2),
 356	0x00000000,
 357	(0xbe00 << 16) | (0xc90c >> 2),
 358	0x00000000,
 359	(0x4e00 << 16) | (0xc910 >> 2),
 360	0x00000000,
 361	(0x5e00 << 16) | (0xc910 >> 2),
 362	0x00000000,
 363	(0x6e00 << 16) | (0xc910 >> 2),
 364	0x00000000,
 365	(0x7e00 << 16) | (0xc910 >> 2),
 366	0x00000000,
 367	(0x8e00 << 16) | (0xc910 >> 2),
 368	0x00000000,
 369	(0x9e00 << 16) | (0xc910 >> 2),
 370	0x00000000,
 371	(0xae00 << 16) | (0xc910 >> 2),
 372	0x00000000,
 373	(0xbe00 << 16) | (0xc910 >> 2),
 374	0x00000000,
 375	(0x0e00 << 16) | (0xc99c >> 2),
 376	0x00000000,
 377	(0x0e00 << 16) | (0x9834 >> 2),
 378	0x00000000,
 379	(0x0000 << 16) | (0x30f00 >> 2),
 380	0x00000000,
 381	(0x0001 << 16) | (0x30f00 >> 2),
 382	0x00000000,
 383	(0x0000 << 16) | (0x30f04 >> 2),
 384	0x00000000,
 385	(0x0001 << 16) | (0x30f04 >> 2),
 386	0x00000000,
 387	(0x0000 << 16) | (0x30f08 >> 2),
 388	0x00000000,
 389	(0x0001 << 16) | (0x30f08 >> 2),
 390	0x00000000,
 391	(0x0000 << 16) | (0x30f0c >> 2),
 392	0x00000000,
 393	(0x0001 << 16) | (0x30f0c >> 2),
 394	0x00000000,
 395	(0x0600 << 16) | (0x9b7c >> 2),
 396	0x00000000,
 397	(0x0e00 << 16) | (0x8a14 >> 2),
 398	0x00000000,
 399	(0x0e00 << 16) | (0x8a18 >> 2),
 400	0x00000000,
 401	(0x0600 << 16) | (0x30a00 >> 2),
 402	0x00000000,
 403	(0x0e00 << 16) | (0x8bf0 >> 2),
 404	0x00000000,
 405	(0x0e00 << 16) | (0x8bcc >> 2),
 406	0x00000000,
 407	(0x0e00 << 16) | (0x8b24 >> 2),
 408	0x00000000,
 409	(0x0e00 << 16) | (0x30a04 >> 2),
 410	0x00000000,
 411	(0x0600 << 16) | (0x30a10 >> 2),
 412	0x00000000,
 413	(0x0600 << 16) | (0x30a14 >> 2),
 414	0x00000000,
 415	(0x0600 << 16) | (0x30a18 >> 2),
 416	0x00000000,
 417	(0x0600 << 16) | (0x30a2c >> 2),
 418	0x00000000,
 419	(0x0e00 << 16) | (0xc700 >> 2),
 420	0x00000000,
 421	(0x0e00 << 16) | (0xc704 >> 2),
 422	0x00000000,
 423	(0x0e00 << 16) | (0xc708 >> 2),
 424	0x00000000,
 425	(0x0e00 << 16) | (0xc768 >> 2),
 426	0x00000000,
 427	(0x0400 << 16) | (0xc770 >> 2),
 428	0x00000000,
 429	(0x0400 << 16) | (0xc774 >> 2),
 430	0x00000000,
 431	(0x0400 << 16) | (0xc778 >> 2),
 432	0x00000000,
 433	(0x0400 << 16) | (0xc77c >> 2),
 434	0x00000000,
 435	(0x0400 << 16) | (0xc780 >> 2),
 436	0x00000000,
 437	(0x0400 << 16) | (0xc784 >> 2),
 438	0x00000000,
 439	(0x0400 << 16) | (0xc788 >> 2),
 440	0x00000000,
 441	(0x0400 << 16) | (0xc78c >> 2),
 442	0x00000000,
 443	(0x0400 << 16) | (0xc798 >> 2),
 444	0x00000000,
 445	(0x0400 << 16) | (0xc79c >> 2),
 446	0x00000000,
 447	(0x0400 << 16) | (0xc7a0 >> 2),
 448	0x00000000,
 449	(0x0400 << 16) | (0xc7a4 >> 2),
 450	0x00000000,
 451	(0x0400 << 16) | (0xc7a8 >> 2),
 452	0x00000000,
 453	(0x0400 << 16) | (0xc7ac >> 2),
 454	0x00000000,
 455	(0x0400 << 16) | (0xc7b0 >> 2),
 456	0x00000000,
 457	(0x0400 << 16) | (0xc7b4 >> 2),
 458	0x00000000,
 459	(0x0e00 << 16) | (0x9100 >> 2),
 460	0x00000000,
 461	(0x0e00 << 16) | (0x3c010 >> 2),
 462	0x00000000,
 463	(0x0e00 << 16) | (0x92a8 >> 2),
 464	0x00000000,
 465	(0x0e00 << 16) | (0x92ac >> 2),
 466	0x00000000,
 467	(0x0e00 << 16) | (0x92b4 >> 2),
 468	0x00000000,
 469	(0x0e00 << 16) | (0x92b8 >> 2),
 470	0x00000000,
 471	(0x0e00 << 16) | (0x92bc >> 2),
 472	0x00000000,
 473	(0x0e00 << 16) | (0x92c0 >> 2),
 474	0x00000000,
 475	(0x0e00 << 16) | (0x92c4 >> 2),
 476	0x00000000,
 477	(0x0e00 << 16) | (0x92c8 >> 2),
 478	0x00000000,
 479	(0x0e00 << 16) | (0x92cc >> 2),
 480	0x00000000,
 481	(0x0e00 << 16) | (0x92d0 >> 2),
 482	0x00000000,
 483	(0x0e00 << 16) | (0x8c00 >> 2),
 484	0x00000000,
 485	(0x0e00 << 16) | (0x8c04 >> 2),
 486	0x00000000,
 487	(0x0e00 << 16) | (0x8c20 >> 2),
 488	0x00000000,
 489	(0x0e00 << 16) | (0x8c38 >> 2),
 490	0x00000000,
 491	(0x0e00 << 16) | (0x8c3c >> 2),
 492	0x00000000,
 493	(0x0e00 << 16) | (0xae00 >> 2),
 494	0x00000000,
 495	(0x0e00 << 16) | (0x9604 >> 2),
 496	0x00000000,
 497	(0x0e00 << 16) | (0xac08 >> 2),
 498	0x00000000,
 499	(0x0e00 << 16) | (0xac0c >> 2),
 500	0x00000000,
 501	(0x0e00 << 16) | (0xac10 >> 2),
 502	0x00000000,
 503	(0x0e00 << 16) | (0xac14 >> 2),
 504	0x00000000,
 505	(0x0e00 << 16) | (0xac58 >> 2),
 506	0x00000000,
 507	(0x0e00 << 16) | (0xac68 >> 2),
 508	0x00000000,
 509	(0x0e00 << 16) | (0xac6c >> 2),
 510	0x00000000,
 511	(0x0e00 << 16) | (0xac70 >> 2),
 512	0x00000000,
 513	(0x0e00 << 16) | (0xac74 >> 2),
 514	0x00000000,
 515	(0x0e00 << 16) | (0xac78 >> 2),
 516	0x00000000,
 517	(0x0e00 << 16) | (0xac7c >> 2),
 518	0x00000000,
 519	(0x0e00 << 16) | (0xac80 >> 2),
 520	0x00000000,
 521	(0x0e00 << 16) | (0xac84 >> 2),
 522	0x00000000,
 523	(0x0e00 << 16) | (0xac88 >> 2),
 524	0x00000000,
 525	(0x0e00 << 16) | (0xac8c >> 2),
 526	0x00000000,
 527	(0x0e00 << 16) | (0x970c >> 2),
 528	0x00000000,
 529	(0x0e00 << 16) | (0x9714 >> 2),
 530	0x00000000,
 531	(0x0e00 << 16) | (0x9718 >> 2),
 532	0x00000000,
 533	(0x0e00 << 16) | (0x971c >> 2),
 534	0x00000000,
 535	(0x0e00 << 16) | (0x31068 >> 2),
 536	0x00000000,
 537	(0x4e00 << 16) | (0x31068 >> 2),
 538	0x00000000,
 539	(0x5e00 << 16) | (0x31068 >> 2),
 540	0x00000000,
 541	(0x6e00 << 16) | (0x31068 >> 2),
 542	0x00000000,
 543	(0x7e00 << 16) | (0x31068 >> 2),
 544	0x00000000,
 545	(0x8e00 << 16) | (0x31068 >> 2),
 546	0x00000000,
 547	(0x9e00 << 16) | (0x31068 >> 2),
 548	0x00000000,
 549	(0xae00 << 16) | (0x31068 >> 2),
 550	0x00000000,
 551	(0xbe00 << 16) | (0x31068 >> 2),
 552	0x00000000,
 553	(0x0e00 << 16) | (0xcd10 >> 2),
 554	0x00000000,
 555	(0x0e00 << 16) | (0xcd14 >> 2),
 556	0x00000000,
 557	(0x0e00 << 16) | (0x88b0 >> 2),
 558	0x00000000,
 559	(0x0e00 << 16) | (0x88b4 >> 2),
 560	0x00000000,
 561	(0x0e00 << 16) | (0x88b8 >> 2),
 562	0x00000000,
 563	(0x0e00 << 16) | (0x88bc >> 2),
 564	0x00000000,
 565	(0x0400 << 16) | (0x89c0 >> 2),
 566	0x00000000,
 567	(0x0e00 << 16) | (0x88c4 >> 2),
 568	0x00000000,
 569	(0x0e00 << 16) | (0x88c8 >> 2),
 570	0x00000000,
 571	(0x0e00 << 16) | (0x88d0 >> 2),
 572	0x00000000,
 573	(0x0e00 << 16) | (0x88d4 >> 2),
 574	0x00000000,
 575	(0x0e00 << 16) | (0x88d8 >> 2),
 576	0x00000000,
 577	(0x0e00 << 16) | (0x8980 >> 2),
 578	0x00000000,
 579	(0x0e00 << 16) | (0x30938 >> 2),
 580	0x00000000,
 581	(0x0e00 << 16) | (0x3093c >> 2),
 582	0x00000000,
 583	(0x0e00 << 16) | (0x30940 >> 2),
 584	0x00000000,
 585	(0x0e00 << 16) | (0x89a0 >> 2),
 586	0x00000000,
 587	(0x0e00 << 16) | (0x30900 >> 2),
 588	0x00000000,
 589	(0x0e00 << 16) | (0x30904 >> 2),
 590	0x00000000,
 591	(0x0e00 << 16) | (0x89b4 >> 2),
 592	0x00000000,
 593	(0x0e00 << 16) | (0x3c210 >> 2),
 594	0x00000000,
 595	(0x0e00 << 16) | (0x3c214 >> 2),
 596	0x00000000,
 597	(0x0e00 << 16) | (0x3c218 >> 2),
 598	0x00000000,
 599	(0x0e00 << 16) | (0x8904 >> 2),
 600	0x00000000,
 601	0x5,
 602	(0x0e00 << 16) | (0x8c28 >> 2),
 603	(0x0e00 << 16) | (0x8c2c >> 2),
 604	(0x0e00 << 16) | (0x8c30 >> 2),
 605	(0x0e00 << 16) | (0x8c34 >> 2),
 606	(0x0e00 << 16) | (0x9600 >> 2),
 607};
 608
 609static const u32 kalindi_rlc_save_restore_register_list[] =
 610{
 611	(0x0e00 << 16) | (0xc12c >> 2),
 612	0x00000000,
 613	(0x0e00 << 16) | (0xc140 >> 2),
 614	0x00000000,
 615	(0x0e00 << 16) | (0xc150 >> 2),
 616	0x00000000,
 617	(0x0e00 << 16) | (0xc15c >> 2),
 618	0x00000000,
 619	(0x0e00 << 16) | (0xc168 >> 2),
 620	0x00000000,
 621	(0x0e00 << 16) | (0xc170 >> 2),
 622	0x00000000,
 623	(0x0e00 << 16) | (0xc204 >> 2),
 624	0x00000000,
 625	(0x0e00 << 16) | (0xc2b4 >> 2),
 626	0x00000000,
 627	(0x0e00 << 16) | (0xc2b8 >> 2),
 628	0x00000000,
 629	(0x0e00 << 16) | (0xc2bc >> 2),
 630	0x00000000,
 631	(0x0e00 << 16) | (0xc2c0 >> 2),
 632	0x00000000,
 633	(0x0e00 << 16) | (0x8228 >> 2),
 634	0x00000000,
 635	(0x0e00 << 16) | (0x829c >> 2),
 636	0x00000000,
 637	(0x0e00 << 16) | (0x869c >> 2),
 638	0x00000000,
 639	(0x0600 << 16) | (0x98f4 >> 2),
 640	0x00000000,
 641	(0x0e00 << 16) | (0x98f8 >> 2),
 642	0x00000000,
 643	(0x0e00 << 16) | (0x9900 >> 2),
 644	0x00000000,
 645	(0x0e00 << 16) | (0xc260 >> 2),
 646	0x00000000,
 647	(0x0e00 << 16) | (0x90e8 >> 2),
 648	0x00000000,
 649	(0x0e00 << 16) | (0x3c000 >> 2),
 650	0x00000000,
 651	(0x0e00 << 16) | (0x3c00c >> 2),
 652	0x00000000,
 653	(0x0e00 << 16) | (0x8c1c >> 2),
 654	0x00000000,
 655	(0x0e00 << 16) | (0x9700 >> 2),
 656	0x00000000,
 657	(0x0e00 << 16) | (0xcd20 >> 2),
 658	0x00000000,
 659	(0x4e00 << 16) | (0xcd20 >> 2),
 660	0x00000000,
 661	(0x5e00 << 16) | (0xcd20 >> 2),
 662	0x00000000,
 663	(0x6e00 << 16) | (0xcd20 >> 2),
 664	0x00000000,
 665	(0x7e00 << 16) | (0xcd20 >> 2),
 666	0x00000000,
 667	(0x0e00 << 16) | (0x89bc >> 2),
 668	0x00000000,
 669	(0x0e00 << 16) | (0x8900 >> 2),
 670	0x00000000,
 671	0x3,
 672	(0x0e00 << 16) | (0xc130 >> 2),
 673	0x00000000,
 674	(0x0e00 << 16) | (0xc134 >> 2),
 675	0x00000000,
 676	(0x0e00 << 16) | (0xc1fc >> 2),
 677	0x00000000,
 678	(0x0e00 << 16) | (0xc208 >> 2),
 679	0x00000000,
 680	(0x0e00 << 16) | (0xc264 >> 2),
 681	0x00000000,
 682	(0x0e00 << 16) | (0xc268 >> 2),
 683	0x00000000,
 684	(0x0e00 << 16) | (0xc26c >> 2),
 685	0x00000000,
 686	(0x0e00 << 16) | (0xc270 >> 2),
 687	0x00000000,
 688	(0x0e00 << 16) | (0xc274 >> 2),
 689	0x00000000,
 690	(0x0e00 << 16) | (0xc28c >> 2),
 691	0x00000000,
 692	(0x0e00 << 16) | (0xc290 >> 2),
 693	0x00000000,
 694	(0x0e00 << 16) | (0xc294 >> 2),
 695	0x00000000,
 696	(0x0e00 << 16) | (0xc298 >> 2),
 697	0x00000000,
 698	(0x0e00 << 16) | (0xc2a0 >> 2),
 699	0x00000000,
 700	(0x0e00 << 16) | (0xc2a4 >> 2),
 701	0x00000000,
 702	(0x0e00 << 16) | (0xc2a8 >> 2),
 703	0x00000000,
 704	(0x0e00 << 16) | (0xc2ac >> 2),
 705	0x00000000,
 706	(0x0e00 << 16) | (0x301d0 >> 2),
 707	0x00000000,
 708	(0x0e00 << 16) | (0x30238 >> 2),
 709	0x00000000,
 710	(0x0e00 << 16) | (0x30250 >> 2),
 711	0x00000000,
 712	(0x0e00 << 16) | (0x30254 >> 2),
 713	0x00000000,
 714	(0x0e00 << 16) | (0x30258 >> 2),
 715	0x00000000,
 716	(0x0e00 << 16) | (0x3025c >> 2),
 717	0x00000000,
 718	(0x4e00 << 16) | (0xc900 >> 2),
 719	0x00000000,
 720	(0x5e00 << 16) | (0xc900 >> 2),
 721	0x00000000,
 722	(0x6e00 << 16) | (0xc900 >> 2),
 723	0x00000000,
 724	(0x7e00 << 16) | (0xc900 >> 2),
 725	0x00000000,
 726	(0x4e00 << 16) | (0xc904 >> 2),
 727	0x00000000,
 728	(0x5e00 << 16) | (0xc904 >> 2),
 729	0x00000000,
 730	(0x6e00 << 16) | (0xc904 >> 2),
 731	0x00000000,
 732	(0x7e00 << 16) | (0xc904 >> 2),
 733	0x00000000,
 734	(0x4e00 << 16) | (0xc908 >> 2),
 735	0x00000000,
 736	(0x5e00 << 16) | (0xc908 >> 2),
 737	0x00000000,
 738	(0x6e00 << 16) | (0xc908 >> 2),
 739	0x00000000,
 740	(0x7e00 << 16) | (0xc908 >> 2),
 741	0x00000000,
 742	(0x4e00 << 16) | (0xc90c >> 2),
 743	0x00000000,
 744	(0x5e00 << 16) | (0xc90c >> 2),
 745	0x00000000,
 746	(0x6e00 << 16) | (0xc90c >> 2),
 747	0x00000000,
 748	(0x7e00 << 16) | (0xc90c >> 2),
 749	0x00000000,
 750	(0x4e00 << 16) | (0xc910 >> 2),
 751	0x00000000,
 752	(0x5e00 << 16) | (0xc910 >> 2),
 753	0x00000000,
 754	(0x6e00 << 16) | (0xc910 >> 2),
 755	0x00000000,
 756	(0x7e00 << 16) | (0xc910 >> 2),
 757	0x00000000,
 758	(0x0e00 << 16) | (0xc99c >> 2),
 759	0x00000000,
 760	(0x0e00 << 16) | (0x9834 >> 2),
 761	0x00000000,
 762	(0x0000 << 16) | (0x30f00 >> 2),
 763	0x00000000,
 764	(0x0000 << 16) | (0x30f04 >> 2),
 765	0x00000000,
 766	(0x0000 << 16) | (0x30f08 >> 2),
 767	0x00000000,
 768	(0x0000 << 16) | (0x30f0c >> 2),
 769	0x00000000,
 770	(0x0600 << 16) | (0x9b7c >> 2),
 771	0x00000000,
 772	(0x0e00 << 16) | (0x8a14 >> 2),
 773	0x00000000,
 774	(0x0e00 << 16) | (0x8a18 >> 2),
 775	0x00000000,
 776	(0x0600 << 16) | (0x30a00 >> 2),
 777	0x00000000,
 778	(0x0e00 << 16) | (0x8bf0 >> 2),
 779	0x00000000,
 780	(0x0e00 << 16) | (0x8bcc >> 2),
 781	0x00000000,
 782	(0x0e00 << 16) | (0x8b24 >> 2),
 783	0x00000000,
 784	(0x0e00 << 16) | (0x30a04 >> 2),
 785	0x00000000,
 786	(0x0600 << 16) | (0x30a10 >> 2),
 787	0x00000000,
 788	(0x0600 << 16) | (0x30a14 >> 2),
 789	0x00000000,
 790	(0x0600 << 16) | (0x30a18 >> 2),
 791	0x00000000,
 792	(0x0600 << 16) | (0x30a2c >> 2),
 793	0x00000000,
 794	(0x0e00 << 16) | (0xc700 >> 2),
 795	0x00000000,
 796	(0x0e00 << 16) | (0xc704 >> 2),
 797	0x00000000,
 798	(0x0e00 << 16) | (0xc708 >> 2),
 799	0x00000000,
 800	(0x0e00 << 16) | (0xc768 >> 2),
 801	0x00000000,
 802	(0x0400 << 16) | (0xc770 >> 2),
 803	0x00000000,
 804	(0x0400 << 16) | (0xc774 >> 2),
 805	0x00000000,
 806	(0x0400 << 16) | (0xc798 >> 2),
 807	0x00000000,
 808	(0x0400 << 16) | (0xc79c >> 2),
 809	0x00000000,
 810	(0x0e00 << 16) | (0x9100 >> 2),
 811	0x00000000,
 812	(0x0e00 << 16) | (0x3c010 >> 2),
 813	0x00000000,
 814	(0x0e00 << 16) | (0x8c00 >> 2),
 815	0x00000000,
 816	(0x0e00 << 16) | (0x8c04 >> 2),
 817	0x00000000,
 818	(0x0e00 << 16) | (0x8c20 >> 2),
 819	0x00000000,
 820	(0x0e00 << 16) | (0x8c38 >> 2),
 821	0x00000000,
 822	(0x0e00 << 16) | (0x8c3c >> 2),
 823	0x00000000,
 824	(0x0e00 << 16) | (0xae00 >> 2),
 825	0x00000000,
 826	(0x0e00 << 16) | (0x9604 >> 2),
 827	0x00000000,
 828	(0x0e00 << 16) | (0xac08 >> 2),
 829	0x00000000,
 830	(0x0e00 << 16) | (0xac0c >> 2),
 831	0x00000000,
 832	(0x0e00 << 16) | (0xac10 >> 2),
 833	0x00000000,
 834	(0x0e00 << 16) | (0xac14 >> 2),
 835	0x00000000,
 836	(0x0e00 << 16) | (0xac58 >> 2),
 837	0x00000000,
 838	(0x0e00 << 16) | (0xac68 >> 2),
 839	0x00000000,
 840	(0x0e00 << 16) | (0xac6c >> 2),
 841	0x00000000,
 842	(0x0e00 << 16) | (0xac70 >> 2),
 843	0x00000000,
 844	(0x0e00 << 16) | (0xac74 >> 2),
 845	0x00000000,
 846	(0x0e00 << 16) | (0xac78 >> 2),
 847	0x00000000,
 848	(0x0e00 << 16) | (0xac7c >> 2),
 849	0x00000000,
 850	(0x0e00 << 16) | (0xac80 >> 2),
 851	0x00000000,
 852	(0x0e00 << 16) | (0xac84 >> 2),
 853	0x00000000,
 854	(0x0e00 << 16) | (0xac88 >> 2),
 855	0x00000000,
 856	(0x0e00 << 16) | (0xac8c >> 2),
 857	0x00000000,
 858	(0x0e00 << 16) | (0x970c >> 2),
 859	0x00000000,
 860	(0x0e00 << 16) | (0x9714 >> 2),
 861	0x00000000,
 862	(0x0e00 << 16) | (0x9718 >> 2),
 863	0x00000000,
 864	(0x0e00 << 16) | (0x971c >> 2),
 865	0x00000000,
 866	(0x0e00 << 16) | (0x31068 >> 2),
 867	0x00000000,
 868	(0x4e00 << 16) | (0x31068 >> 2),
 869	0x00000000,
 870	(0x5e00 << 16) | (0x31068 >> 2),
 871	0x00000000,
 872	(0x6e00 << 16) | (0x31068 >> 2),
 873	0x00000000,
 874	(0x7e00 << 16) | (0x31068 >> 2),
 875	0x00000000,
 876	(0x0e00 << 16) | (0xcd10 >> 2),
 877	0x00000000,
 878	(0x0e00 << 16) | (0xcd14 >> 2),
 879	0x00000000,
 880	(0x0e00 << 16) | (0x88b0 >> 2),
 881	0x00000000,
 882	(0x0e00 << 16) | (0x88b4 >> 2),
 883	0x00000000,
 884	(0x0e00 << 16) | (0x88b8 >> 2),
 885	0x00000000,
 886	(0x0e00 << 16) | (0x88bc >> 2),
 887	0x00000000,
 888	(0x0400 << 16) | (0x89c0 >> 2),
 889	0x00000000,
 890	(0x0e00 << 16) | (0x88c4 >> 2),
 891	0x00000000,
 892	(0x0e00 << 16) | (0x88c8 >> 2),
 893	0x00000000,
 894	(0x0e00 << 16) | (0x88d0 >> 2),
 895	0x00000000,
 896	(0x0e00 << 16) | (0x88d4 >> 2),
 897	0x00000000,
 898	(0x0e00 << 16) | (0x88d8 >> 2),
 899	0x00000000,
 900	(0x0e00 << 16) | (0x8980 >> 2),
 901	0x00000000,
 902	(0x0e00 << 16) | (0x30938 >> 2),
 903	0x00000000,
 904	(0x0e00 << 16) | (0x3093c >> 2),
 905	0x00000000,
 906	(0x0e00 << 16) | (0x30940 >> 2),
 907	0x00000000,
 908	(0x0e00 << 16) | (0x89a0 >> 2),
 909	0x00000000,
 910	(0x0e00 << 16) | (0x30900 >> 2),
 911	0x00000000,
 912	(0x0e00 << 16) | (0x30904 >> 2),
 913	0x00000000,
 914	(0x0e00 << 16) | (0x89b4 >> 2),
 915	0x00000000,
 916	(0x0e00 << 16) | (0x3e1fc >> 2),
 917	0x00000000,
 918	(0x0e00 << 16) | (0x3c210 >> 2),
 919	0x00000000,
 920	(0x0e00 << 16) | (0x3c214 >> 2),
 921	0x00000000,
 922	(0x0e00 << 16) | (0x3c218 >> 2),
 923	0x00000000,
 924	(0x0e00 << 16) | (0x8904 >> 2),
 925	0x00000000,
 926	0x5,
 927	(0x0e00 << 16) | (0x8c28 >> 2),
 928	(0x0e00 << 16) | (0x8c2c >> 2),
 929	(0x0e00 << 16) | (0x8c30 >> 2),
 930	(0x0e00 << 16) | (0x8c34 >> 2),
 931	(0x0e00 << 16) | (0x9600 >> 2),
 932};
 933
 934static const u32 bonaire_golden_spm_registers[] =
 935{
 936	0x30800, 0xe0ffffff, 0xe0000000
 937};
 938
 939static const u32 bonaire_golden_common_registers[] =
 940{
 941	0xc770, 0xffffffff, 0x00000800,
 942	0xc774, 0xffffffff, 0x00000800,
 943	0xc798, 0xffffffff, 0x00007fbf,
 944	0xc79c, 0xffffffff, 0x00007faf
 945};
 946
 947static const u32 bonaire_golden_registers[] =
 948{
 949	0x3354, 0x00000333, 0x00000333,
 950	0x3350, 0x000c0fc0, 0x00040200,
 951	0x9a10, 0x00010000, 0x00058208,
 952	0x3c000, 0xffff1fff, 0x00140000,
 953	0x3c200, 0xfdfc0fff, 0x00000100,
 954	0x3c234, 0x40000000, 0x40000200,
 955	0x9830, 0xffffffff, 0x00000000,
 956	0x9834, 0xf00fffff, 0x00000400,
 957	0x9838, 0x0002021c, 0x00020200,
 958	0xc78, 0x00000080, 0x00000000,
 959	0x5bb0, 0x000000f0, 0x00000070,
 960	0x5bc0, 0xf0311fff, 0x80300000,
 961	0x98f8, 0x73773777, 0x12010001,
 962	0x350c, 0x00810000, 0x408af000,
 963	0x7030, 0x31000111, 0x00000011,
 964	0x2f48, 0x73773777, 0x12010001,
 965	0x220c, 0x00007fb6, 0x0021a1b1,
 966	0x2210, 0x00007fb6, 0x002021b1,
 967	0x2180, 0x00007fb6, 0x00002191,
 968	0x2218, 0x00007fb6, 0x002121b1,
 969	0x221c, 0x00007fb6, 0x002021b1,
 970	0x21dc, 0x00007fb6, 0x00002191,
 971	0x21e0, 0x00007fb6, 0x00002191,
 972	0x3628, 0x0000003f, 0x0000000a,
 973	0x362c, 0x0000003f, 0x0000000a,
 974	0x2ae4, 0x00073ffe, 0x000022a2,
 975	0x240c, 0x000007ff, 0x00000000,
 976	0x8a14, 0xf000003f, 0x00000007,
 977	0x8bf0, 0x00002001, 0x00000001,
 978	0x8b24, 0xffffffff, 0x00ffffff,
 979	0x30a04, 0x0000ff0f, 0x00000000,
 980	0x28a4c, 0x07ffffff, 0x06000000,
 981	0x4d8, 0x00000fff, 0x00000100,
 982	0x3e78, 0x00000001, 0x00000002,
 983	0x9100, 0x03000000, 0x0362c688,
 984	0x8c00, 0x000000ff, 0x00000001,
 985	0xe40, 0x00001fff, 0x00001fff,
 986	0x9060, 0x0000007f, 0x00000020,
 987	0x9508, 0x00010000, 0x00010000,
 988	0xac14, 0x000003ff, 0x000000f3,
 989	0xac0c, 0xffffffff, 0x00001032
 990};
 991
 992static const u32 bonaire_mgcg_cgcg_init[] =
 993{
 994	0xc420, 0xffffffff, 0xfffffffc,
 995	0x30800, 0xffffffff, 0xe0000000,
 996	0x3c2a0, 0xffffffff, 0x00000100,
 997	0x3c208, 0xffffffff, 0x00000100,
 998	0x3c2c0, 0xffffffff, 0xc0000100,
 999	0x3c2c8, 0xffffffff, 0xc0000100,
1000	0x3c2c4, 0xffffffff, 0xc0000100,
1001	0x55e4, 0xffffffff, 0x00600100,
1002	0x3c280, 0xffffffff, 0x00000100,
1003	0x3c214, 0xffffffff, 0x06000100,
1004	0x3c220, 0xffffffff, 0x00000100,
1005	0x3c218, 0xffffffff, 0x06000100,
1006	0x3c204, 0xffffffff, 0x00000100,
1007	0x3c2e0, 0xffffffff, 0x00000100,
1008	0x3c224, 0xffffffff, 0x00000100,
1009	0x3c200, 0xffffffff, 0x00000100,
1010	0x3c230, 0xffffffff, 0x00000100,
1011	0x3c234, 0xffffffff, 0x00000100,
1012	0x3c250, 0xffffffff, 0x00000100,
1013	0x3c254, 0xffffffff, 0x00000100,
1014	0x3c258, 0xffffffff, 0x00000100,
1015	0x3c25c, 0xffffffff, 0x00000100,
1016	0x3c260, 0xffffffff, 0x00000100,
1017	0x3c27c, 0xffffffff, 0x00000100,
1018	0x3c278, 0xffffffff, 0x00000100,
1019	0x3c210, 0xffffffff, 0x06000100,
1020	0x3c290, 0xffffffff, 0x00000100,
1021	0x3c274, 0xffffffff, 0x00000100,
1022	0x3c2b4, 0xffffffff, 0x00000100,
1023	0x3c2b0, 0xffffffff, 0x00000100,
1024	0x3c270, 0xffffffff, 0x00000100,
1025	0x30800, 0xffffffff, 0xe0000000,
1026	0x3c020, 0xffffffff, 0x00010000,
1027	0x3c024, 0xffffffff, 0x00030002,
1028	0x3c028, 0xffffffff, 0x00040007,
1029	0x3c02c, 0xffffffff, 0x00060005,
1030	0x3c030, 0xffffffff, 0x00090008,
1031	0x3c034, 0xffffffff, 0x00010000,
1032	0x3c038, 0xffffffff, 0x00030002,
1033	0x3c03c, 0xffffffff, 0x00040007,
1034	0x3c040, 0xffffffff, 0x00060005,
1035	0x3c044, 0xffffffff, 0x00090008,
1036	0x3c048, 0xffffffff, 0x00010000,
1037	0x3c04c, 0xffffffff, 0x00030002,
1038	0x3c050, 0xffffffff, 0x00040007,
1039	0x3c054, 0xffffffff, 0x00060005,
1040	0x3c058, 0xffffffff, 0x00090008,
1041	0x3c05c, 0xffffffff, 0x00010000,
1042	0x3c060, 0xffffffff, 0x00030002,
1043	0x3c064, 0xffffffff, 0x00040007,
1044	0x3c068, 0xffffffff, 0x00060005,
1045	0x3c06c, 0xffffffff, 0x00090008,
1046	0x3c070, 0xffffffff, 0x00010000,
1047	0x3c074, 0xffffffff, 0x00030002,
1048	0x3c078, 0xffffffff, 0x00040007,
1049	0x3c07c, 0xffffffff, 0x00060005,
1050	0x3c080, 0xffffffff, 0x00090008,
1051	0x3c084, 0xffffffff, 0x00010000,
1052	0x3c088, 0xffffffff, 0x00030002,
1053	0x3c08c, 0xffffffff, 0x00040007,
1054	0x3c090, 0xffffffff, 0x00060005,
1055	0x3c094, 0xffffffff, 0x00090008,
1056	0x3c098, 0xffffffff, 0x00010000,
1057	0x3c09c, 0xffffffff, 0x00030002,
1058	0x3c0a0, 0xffffffff, 0x00040007,
1059	0x3c0a4, 0xffffffff, 0x00060005,
1060	0x3c0a8, 0xffffffff, 0x00090008,
1061	0x3c000, 0xffffffff, 0x96e00200,
1062	0x8708, 0xffffffff, 0x00900100,
1063	0xc424, 0xffffffff, 0x0020003f,
1064	0x38, 0xffffffff, 0x0140001c,
1065	0x3c, 0x000f0000, 0x000f0000,
1066	0x220, 0xffffffff, 0xC060000C,
1067	0x224, 0xc0000fff, 0x00000100,
1068	0xf90, 0xffffffff, 0x00000100,
1069	0xf98, 0x00000101, 0x00000000,
1070	0x20a8, 0xffffffff, 0x00000104,
1071	0x55e4, 0xff000fff, 0x00000100,
1072	0x30cc, 0xc0000fff, 0x00000104,
1073	0xc1e4, 0x00000001, 0x00000001,
1074	0xd00c, 0xff000ff0, 0x00000100,
1075	0xd80c, 0xff000ff0, 0x00000100
1076};
1077
1078static const u32 spectre_golden_spm_registers[] =
1079{
1080	0x30800, 0xe0ffffff, 0xe0000000
1081};
1082
1083static const u32 spectre_golden_common_registers[] =
1084{
1085	0xc770, 0xffffffff, 0x00000800,
1086	0xc774, 0xffffffff, 0x00000800,
1087	0xc798, 0xffffffff, 0x00007fbf,
1088	0xc79c, 0xffffffff, 0x00007faf
1089};
1090
1091static const u32 spectre_golden_registers[] =
1092{
1093	0x3c000, 0xffff1fff, 0x96940200,
1094	0x3c00c, 0xffff0001, 0xff000000,
1095	0x3c200, 0xfffc0fff, 0x00000100,
1096	0x6ed8, 0x00010101, 0x00010000,
1097	0x9834, 0xf00fffff, 0x00000400,
1098	0x9838, 0xfffffffc, 0x00020200,
1099	0x5bb0, 0x000000f0, 0x00000070,
1100	0x5bc0, 0xf0311fff, 0x80300000,
1101	0x98f8, 0x73773777, 0x12010001,
1102	0x9b7c, 0x00ff0000, 0x00fc0000,
1103	0x2f48, 0x73773777, 0x12010001,
1104	0x8a14, 0xf000003f, 0x00000007,
1105	0x8b24, 0xffffffff, 0x00ffffff,
1106	0x28350, 0x3f3f3fff, 0x00000082,
1107	0x28354, 0x0000003f, 0x00000000,
1108	0x3e78, 0x00000001, 0x00000002,
1109	0x913c, 0xffff03df, 0x00000004,
1110	0xc768, 0x00000008, 0x00000008,
1111	0x8c00, 0x000008ff, 0x00000800,
1112	0x9508, 0x00010000, 0x00010000,
1113	0xac0c, 0xffffffff, 0x54763210,
1114	0x214f8, 0x01ff01ff, 0x00000002,
1115	0x21498, 0x007ff800, 0x00200000,
1116	0x2015c, 0xffffffff, 0x00000f40,
1117	0x30934, 0xffffffff, 0x00000001
1118};
1119
1120static const u32 spectre_mgcg_cgcg_init[] =
1121{
1122	0xc420, 0xffffffff, 0xfffffffc,
1123	0x30800, 0xffffffff, 0xe0000000,
1124	0x3c2a0, 0xffffffff, 0x00000100,
1125	0x3c208, 0xffffffff, 0x00000100,
1126	0x3c2c0, 0xffffffff, 0x00000100,
1127	0x3c2c8, 0xffffffff, 0x00000100,
1128	0x3c2c4, 0xffffffff, 0x00000100,
1129	0x55e4, 0xffffffff, 0x00600100,
1130	0x3c280, 0xffffffff, 0x00000100,
1131	0x3c214, 0xffffffff, 0x06000100,
1132	0x3c220, 0xffffffff, 0x00000100,
1133	0x3c218, 0xffffffff, 0x06000100,
1134	0x3c204, 0xffffffff, 0x00000100,
1135	0x3c2e0, 0xffffffff, 0x00000100,
1136	0x3c224, 0xffffffff, 0x00000100,
1137	0x3c200, 0xffffffff, 0x00000100,
1138	0x3c230, 0xffffffff, 0x00000100,
1139	0x3c234, 0xffffffff, 0x00000100,
1140	0x3c250, 0xffffffff, 0x00000100,
1141	0x3c254, 0xffffffff, 0x00000100,
1142	0x3c258, 0xffffffff, 0x00000100,
1143	0x3c25c, 0xffffffff, 0x00000100,
1144	0x3c260, 0xffffffff, 0x00000100,
1145	0x3c27c, 0xffffffff, 0x00000100,
1146	0x3c278, 0xffffffff, 0x00000100,
1147	0x3c210, 0xffffffff, 0x06000100,
1148	0x3c290, 0xffffffff, 0x00000100,
1149	0x3c274, 0xffffffff, 0x00000100,
1150	0x3c2b4, 0xffffffff, 0x00000100,
1151	0x3c2b0, 0xffffffff, 0x00000100,
1152	0x3c270, 0xffffffff, 0x00000100,
1153	0x30800, 0xffffffff, 0xe0000000,
1154	0x3c020, 0xffffffff, 0x00010000,
1155	0x3c024, 0xffffffff, 0x00030002,
1156	0x3c028, 0xffffffff, 0x00040007,
1157	0x3c02c, 0xffffffff, 0x00060005,
1158	0x3c030, 0xffffffff, 0x00090008,
1159	0x3c034, 0xffffffff, 0x00010000,
1160	0x3c038, 0xffffffff, 0x00030002,
1161	0x3c03c, 0xffffffff, 0x00040007,
1162	0x3c040, 0xffffffff, 0x00060005,
1163	0x3c044, 0xffffffff, 0x00090008,
1164	0x3c048, 0xffffffff, 0x00010000,
1165	0x3c04c, 0xffffffff, 0x00030002,
1166	0x3c050, 0xffffffff, 0x00040007,
1167	0x3c054, 0xffffffff, 0x00060005,
1168	0x3c058, 0xffffffff, 0x00090008,
1169	0x3c05c, 0xffffffff, 0x00010000,
1170	0x3c060, 0xffffffff, 0x00030002,
1171	0x3c064, 0xffffffff, 0x00040007,
1172	0x3c068, 0xffffffff, 0x00060005,
1173	0x3c06c, 0xffffffff, 0x00090008,
1174	0x3c070, 0xffffffff, 0x00010000,
1175	0x3c074, 0xffffffff, 0x00030002,
1176	0x3c078, 0xffffffff, 0x00040007,
1177	0x3c07c, 0xffffffff, 0x00060005,
1178	0x3c080, 0xffffffff, 0x00090008,
1179	0x3c084, 0xffffffff, 0x00010000,
1180	0x3c088, 0xffffffff, 0x00030002,
1181	0x3c08c, 0xffffffff, 0x00040007,
1182	0x3c090, 0xffffffff, 0x00060005,
1183	0x3c094, 0xffffffff, 0x00090008,
1184	0x3c098, 0xffffffff, 0x00010000,
1185	0x3c09c, 0xffffffff, 0x00030002,
1186	0x3c0a0, 0xffffffff, 0x00040007,
1187	0x3c0a4, 0xffffffff, 0x00060005,
1188	0x3c0a8, 0xffffffff, 0x00090008,
1189	0x3c0ac, 0xffffffff, 0x00010000,
1190	0x3c0b0, 0xffffffff, 0x00030002,
1191	0x3c0b4, 0xffffffff, 0x00040007,
1192	0x3c0b8, 0xffffffff, 0x00060005,
1193	0x3c0bc, 0xffffffff, 0x00090008,
1194	0x3c000, 0xffffffff, 0x96e00200,
1195	0x8708, 0xffffffff, 0x00900100,
1196	0xc424, 0xffffffff, 0x0020003f,
1197	0x38, 0xffffffff, 0x0140001c,
1198	0x3c, 0x000f0000, 0x000f0000,
1199	0x220, 0xffffffff, 0xC060000C,
1200	0x224, 0xc0000fff, 0x00000100,
1201	0xf90, 0xffffffff, 0x00000100,
1202	0xf98, 0x00000101, 0x00000000,
1203	0x20a8, 0xffffffff, 0x00000104,
1204	0x55e4, 0xff000fff, 0x00000100,
1205	0x30cc, 0xc0000fff, 0x00000104,
1206	0xc1e4, 0x00000001, 0x00000001,
1207	0xd00c, 0xff000ff0, 0x00000100,
1208	0xd80c, 0xff000ff0, 0x00000100
1209};
1210
1211static const u32 kalindi_golden_spm_registers[] =
1212{
1213	0x30800, 0xe0ffffff, 0xe0000000
1214};
1215
1216static const u32 kalindi_golden_common_registers[] =
1217{
1218	0xc770, 0xffffffff, 0x00000800,
1219	0xc774, 0xffffffff, 0x00000800,
1220	0xc798, 0xffffffff, 0x00007fbf,
1221	0xc79c, 0xffffffff, 0x00007faf
1222};
1223
1224static const u32 kalindi_golden_registers[] =
1225{
1226	0x3c000, 0xffffdfff, 0x6e944040,
1227	0x55e4, 0xff607fff, 0xfc000100,
1228	0x3c220, 0xff000fff, 0x00000100,
1229	0x3c224, 0xff000fff, 0x00000100,
1230	0x3c200, 0xfffc0fff, 0x00000100,
1231	0x6ed8, 0x00010101, 0x00010000,
1232	0x9830, 0xffffffff, 0x00000000,
1233	0x9834, 0xf00fffff, 0x00000400,
1234	0x5bb0, 0x000000f0, 0x00000070,
1235	0x5bc0, 0xf0311fff, 0x80300000,
1236	0x98f8, 0x73773777, 0x12010001,
1237	0x98fc, 0xffffffff, 0x00000010,
1238	0x9b7c, 0x00ff0000, 0x00fc0000,
1239	0x8030, 0x00001f0f, 0x0000100a,
1240	0x2f48, 0x73773777, 0x12010001,
1241	0x2408, 0x000fffff, 0x000c007f,
1242	0x8a14, 0xf000003f, 0x00000007,
1243	0x8b24, 0x3fff3fff, 0x00ffcfff,
1244	0x30a04, 0x0000ff0f, 0x00000000,
1245	0x28a4c, 0x07ffffff, 0x06000000,
1246	0x4d8, 0x00000fff, 0x00000100,
1247	0x3e78, 0x00000001, 0x00000002,
1248	0xc768, 0x00000008, 0x00000008,
1249	0x8c00, 0x000000ff, 0x00000003,
1250	0x214f8, 0x01ff01ff, 0x00000002,
1251	0x21498, 0x007ff800, 0x00200000,
1252	0x2015c, 0xffffffff, 0x00000f40,
1253	0x88c4, 0x001f3ae3, 0x00000082,
1254	0x88d4, 0x0000001f, 0x00000010,
1255	0x30934, 0xffffffff, 0x00000000
1256};
1257
1258static const u32 kalindi_mgcg_cgcg_init[] =
1259{
1260	0xc420, 0xffffffff, 0xfffffffc,
1261	0x30800, 0xffffffff, 0xe0000000,
1262	0x3c2a0, 0xffffffff, 0x00000100,
1263	0x3c208, 0xffffffff, 0x00000100,
1264	0x3c2c0, 0xffffffff, 0x00000100,
1265	0x3c2c8, 0xffffffff, 0x00000100,
1266	0x3c2c4, 0xffffffff, 0x00000100,
1267	0x55e4, 0xffffffff, 0x00600100,
1268	0x3c280, 0xffffffff, 0x00000100,
1269	0x3c214, 0xffffffff, 0x06000100,
1270	0x3c220, 0xffffffff, 0x00000100,
1271	0x3c218, 0xffffffff, 0x06000100,
1272	0x3c204, 0xffffffff, 0x00000100,
1273	0x3c2e0, 0xffffffff, 0x00000100,
1274	0x3c224, 0xffffffff, 0x00000100,
1275	0x3c200, 0xffffffff, 0x00000100,
1276	0x3c230, 0xffffffff, 0x00000100,
1277	0x3c234, 0xffffffff, 0x00000100,
1278	0x3c250, 0xffffffff, 0x00000100,
1279	0x3c254, 0xffffffff, 0x00000100,
1280	0x3c258, 0xffffffff, 0x00000100,
1281	0x3c25c, 0xffffffff, 0x00000100,
1282	0x3c260, 0xffffffff, 0x00000100,
1283	0x3c27c, 0xffffffff, 0x00000100,
1284	0x3c278, 0xffffffff, 0x00000100,
1285	0x3c210, 0xffffffff, 0x06000100,
1286	0x3c290, 0xffffffff, 0x00000100,
1287	0x3c274, 0xffffffff, 0x00000100,
1288	0x3c2b4, 0xffffffff, 0x00000100,
1289	0x3c2b0, 0xffffffff, 0x00000100,
1290	0x3c270, 0xffffffff, 0x00000100,
1291	0x30800, 0xffffffff, 0xe0000000,
1292	0x3c020, 0xffffffff, 0x00010000,
1293	0x3c024, 0xffffffff, 0x00030002,
1294	0x3c028, 0xffffffff, 0x00040007,
1295	0x3c02c, 0xffffffff, 0x00060005,
1296	0x3c030, 0xffffffff, 0x00090008,
1297	0x3c034, 0xffffffff, 0x00010000,
1298	0x3c038, 0xffffffff, 0x00030002,
1299	0x3c03c, 0xffffffff, 0x00040007,
1300	0x3c040, 0xffffffff, 0x00060005,
1301	0x3c044, 0xffffffff, 0x00090008,
1302	0x3c000, 0xffffffff, 0x96e00200,
1303	0x8708, 0xffffffff, 0x00900100,
1304	0xc424, 0xffffffff, 0x0020003f,
1305	0x38, 0xffffffff, 0x0140001c,
1306	0x3c, 0x000f0000, 0x000f0000,
1307	0x220, 0xffffffff, 0xC060000C,
1308	0x224, 0xc0000fff, 0x00000100,
1309	0x20a8, 0xffffffff, 0x00000104,
1310	0x55e4, 0xff000fff, 0x00000100,
1311	0x30cc, 0xc0000fff, 0x00000104,
1312	0xc1e4, 0x00000001, 0x00000001,
1313	0xd00c, 0xff000ff0, 0x00000100,
1314	0xd80c, 0xff000ff0, 0x00000100
1315};
1316
1317static const u32 hawaii_golden_spm_registers[] =
1318{
1319	0x30800, 0xe0ffffff, 0xe0000000
1320};
1321
1322static const u32 hawaii_golden_common_registers[] =
1323{
1324	0x30800, 0xffffffff, 0xe0000000,
1325	0x28350, 0xffffffff, 0x3a00161a,
1326	0x28354, 0xffffffff, 0x0000002e,
1327	0x9a10, 0xffffffff, 0x00018208,
1328	0x98f8, 0xffffffff, 0x12011003
1329};
1330
1331static const u32 hawaii_golden_registers[] =
1332{
1333	0x3354, 0x00000333, 0x00000333,
1334	0x9a10, 0x00010000, 0x00058208,
1335	0x9830, 0xffffffff, 0x00000000,
1336	0x9834, 0xf00fffff, 0x00000400,
1337	0x9838, 0x0002021c, 0x00020200,
1338	0xc78, 0x00000080, 0x00000000,
1339	0x5bb0, 0x000000f0, 0x00000070,
1340	0x5bc0, 0xf0311fff, 0x80300000,
1341	0x350c, 0x00810000, 0x408af000,
1342	0x7030, 0x31000111, 0x00000011,
1343	0x2f48, 0x73773777, 0x12010001,
1344	0x2120, 0x0000007f, 0x0000001b,
1345	0x21dc, 0x00007fb6, 0x00002191,
1346	0x3628, 0x0000003f, 0x0000000a,
1347	0x362c, 0x0000003f, 0x0000000a,
1348	0x2ae4, 0x00073ffe, 0x000022a2,
1349	0x240c, 0x000007ff, 0x00000000,
1350	0x8bf0, 0x00002001, 0x00000001,
1351	0x8b24, 0xffffffff, 0x00ffffff,
1352	0x30a04, 0x0000ff0f, 0x00000000,
1353	0x28a4c, 0x07ffffff, 0x06000000,
1354	0x3e78, 0x00000001, 0x00000002,
1355	0xc768, 0x00000008, 0x00000008,
1356	0xc770, 0x00000f00, 0x00000800,
1357	0xc774, 0x00000f00, 0x00000800,
1358	0xc798, 0x00ffffff, 0x00ff7fbf,
1359	0xc79c, 0x00ffffff, 0x00ff7faf,
1360	0x8c00, 0x000000ff, 0x00000800,
1361	0xe40, 0x00001fff, 0x00001fff,
1362	0x9060, 0x0000007f, 0x00000020,
1363	0x9508, 0x00010000, 0x00010000,
1364	0xae00, 0x00100000, 0x000ff07c,
1365	0xac14, 0x000003ff, 0x0000000f,
1366	0xac10, 0xffffffff, 0x7564fdec,
1367	0xac0c, 0xffffffff, 0x3120b9a8,
1368	0xac08, 0x20000000, 0x0f9c0000
1369};
1370
1371static const u32 hawaii_mgcg_cgcg_init[] =
1372{
1373	0xc420, 0xffffffff, 0xfffffffd,
1374	0x30800, 0xffffffff, 0xe0000000,
1375	0x3c2a0, 0xffffffff, 0x00000100,
1376	0x3c208, 0xffffffff, 0x00000100,
1377	0x3c2c0, 0xffffffff, 0x00000100,
1378	0x3c2c8, 0xffffffff, 0x00000100,
1379	0x3c2c4, 0xffffffff, 0x00000100,
1380	0x55e4, 0xffffffff, 0x00200100,
1381	0x3c280, 0xffffffff, 0x00000100,
1382	0x3c214, 0xffffffff, 0x06000100,
1383	0x3c220, 0xffffffff, 0x00000100,
1384	0x3c218, 0xffffffff, 0x06000100,
1385	0x3c204, 0xffffffff, 0x00000100,
1386	0x3c2e0, 0xffffffff, 0x00000100,
1387	0x3c224, 0xffffffff, 0x00000100,
1388	0x3c200, 0xffffffff, 0x00000100,
1389	0x3c230, 0xffffffff, 0x00000100,
1390	0x3c234, 0xffffffff, 0x00000100,
1391	0x3c250, 0xffffffff, 0x00000100,
1392	0x3c254, 0xffffffff, 0x00000100,
1393	0x3c258, 0xffffffff, 0x00000100,
1394	0x3c25c, 0xffffffff, 0x00000100,
1395	0x3c260, 0xffffffff, 0x00000100,
1396	0x3c27c, 0xffffffff, 0x00000100,
1397	0x3c278, 0xffffffff, 0x00000100,
1398	0x3c210, 0xffffffff, 0x06000100,
1399	0x3c290, 0xffffffff, 0x00000100,
1400	0x3c274, 0xffffffff, 0x00000100,
1401	0x3c2b4, 0xffffffff, 0x00000100,
1402	0x3c2b0, 0xffffffff, 0x00000100,
1403	0x3c270, 0xffffffff, 0x00000100,
1404	0x30800, 0xffffffff, 0xe0000000,
1405	0x3c020, 0xffffffff, 0x00010000,
1406	0x3c024, 0xffffffff, 0x00030002,
1407	0x3c028, 0xffffffff, 0x00040007,
1408	0x3c02c, 0xffffffff, 0x00060005,
1409	0x3c030, 0xffffffff, 0x00090008,
1410	0x3c034, 0xffffffff, 0x00010000,
1411	0x3c038, 0xffffffff, 0x00030002,
1412	0x3c03c, 0xffffffff, 0x00040007,
1413	0x3c040, 0xffffffff, 0x00060005,
1414	0x3c044, 0xffffffff, 0x00090008,
1415	0x3c048, 0xffffffff, 0x00010000,
1416	0x3c04c, 0xffffffff, 0x00030002,
1417	0x3c050, 0xffffffff, 0x00040007,
1418	0x3c054, 0xffffffff, 0x00060005,
1419	0x3c058, 0xffffffff, 0x00090008,
1420	0x3c05c, 0xffffffff, 0x00010000,
1421	0x3c060, 0xffffffff, 0x00030002,
1422	0x3c064, 0xffffffff, 0x00040007,
1423	0x3c068, 0xffffffff, 0x00060005,
1424	0x3c06c, 0xffffffff, 0x00090008,
1425	0x3c070, 0xffffffff, 0x00010000,
1426	0x3c074, 0xffffffff, 0x00030002,
1427	0x3c078, 0xffffffff, 0x00040007,
1428	0x3c07c, 0xffffffff, 0x00060005,
1429	0x3c080, 0xffffffff, 0x00090008,
1430	0x3c084, 0xffffffff, 0x00010000,
1431	0x3c088, 0xffffffff, 0x00030002,
1432	0x3c08c, 0xffffffff, 0x00040007,
1433	0x3c090, 0xffffffff, 0x00060005,
1434	0x3c094, 0xffffffff, 0x00090008,
1435	0x3c098, 0xffffffff, 0x00010000,
1436	0x3c09c, 0xffffffff, 0x00030002,
1437	0x3c0a0, 0xffffffff, 0x00040007,
1438	0x3c0a4, 0xffffffff, 0x00060005,
1439	0x3c0a8, 0xffffffff, 0x00090008,
1440	0x3c0ac, 0xffffffff, 0x00010000,
1441	0x3c0b0, 0xffffffff, 0x00030002,
1442	0x3c0b4, 0xffffffff, 0x00040007,
1443	0x3c0b8, 0xffffffff, 0x00060005,
1444	0x3c0bc, 0xffffffff, 0x00090008,
1445	0x3c0c0, 0xffffffff, 0x00010000,
1446	0x3c0c4, 0xffffffff, 0x00030002,
1447	0x3c0c8, 0xffffffff, 0x00040007,
1448	0x3c0cc, 0xffffffff, 0x00060005,
1449	0x3c0d0, 0xffffffff, 0x00090008,
1450	0x3c0d4, 0xffffffff, 0x00010000,
1451	0x3c0d8, 0xffffffff, 0x00030002,
1452	0x3c0dc, 0xffffffff, 0x00040007,
1453	0x3c0e0, 0xffffffff, 0x00060005,
1454	0x3c0e4, 0xffffffff, 0x00090008,
1455	0x3c0e8, 0xffffffff, 0x00010000,
1456	0x3c0ec, 0xffffffff, 0x00030002,
1457	0x3c0f0, 0xffffffff, 0x00040007,
1458	0x3c0f4, 0xffffffff, 0x00060005,
1459	0x3c0f8, 0xffffffff, 0x00090008,
1460	0xc318, 0xffffffff, 0x00020200,
1461	0x3350, 0xffffffff, 0x00000200,
1462	0x15c0, 0xffffffff, 0x00000400,
1463	0x55e8, 0xffffffff, 0x00000000,
1464	0x2f50, 0xffffffff, 0x00000902,
1465	0x3c000, 0xffffffff, 0x96940200,
1466	0x8708, 0xffffffff, 0x00900100,
1467	0xc424, 0xffffffff, 0x0020003f,
1468	0x38, 0xffffffff, 0x0140001c,
1469	0x3c, 0x000f0000, 0x000f0000,
1470	0x220, 0xffffffff, 0xc060000c,
1471	0x224, 0xc0000fff, 0x00000100,
1472	0xf90, 0xffffffff, 0x00000100,
1473	0xf98, 0x00000101, 0x00000000,
1474	0x20a8, 0xffffffff, 0x00000104,
1475	0x55e4, 0xff000fff, 0x00000100,
1476	0x30cc, 0xc0000fff, 0x00000104,
1477	0xc1e4, 0x00000001, 0x00000001,
1478	0xd00c, 0xff000ff0, 0x00000100,
1479	0xd80c, 0xff000ff0, 0x00000100
1480};
1481
1482static const u32 godavari_golden_registers[] =
1483{
1484	0x55e4, 0xff607fff, 0xfc000100,
1485	0x6ed8, 0x00010101, 0x00010000,
1486	0x9830, 0xffffffff, 0x00000000,
1487	0x98302, 0xf00fffff, 0x00000400,
1488	0x6130, 0xffffffff, 0x00010000,
1489	0x5bb0, 0x000000f0, 0x00000070,
1490	0x5bc0, 0xf0311fff, 0x80300000,
1491	0x98f8, 0x73773777, 0x12010001,
1492	0x98fc, 0xffffffff, 0x00000010,
1493	0x8030, 0x00001f0f, 0x0000100a,
1494	0x2f48, 0x73773777, 0x12010001,
1495	0x2408, 0x000fffff, 0x000c007f,
1496	0x8a14, 0xf000003f, 0x00000007,
1497	0x8b24, 0xffffffff, 0x00ff0fff,
1498	0x30a04, 0x0000ff0f, 0x00000000,
1499	0x28a4c, 0x07ffffff, 0x06000000,
1500	0x4d8, 0x00000fff, 0x00000100,
1501	0xd014, 0x00010000, 0x00810001,
1502	0xd814, 0x00010000, 0x00810001,
1503	0x3e78, 0x00000001, 0x00000002,
1504	0xc768, 0x00000008, 0x00000008,
1505	0xc770, 0x00000f00, 0x00000800,
1506	0xc774, 0x00000f00, 0x00000800,
1507	0xc798, 0x00ffffff, 0x00ff7fbf,
1508	0xc79c, 0x00ffffff, 0x00ff7faf,
1509	0x8c00, 0x000000ff, 0x00000001,
1510	0x214f8, 0x01ff01ff, 0x00000002,
1511	0x21498, 0x007ff800, 0x00200000,
1512	0x2015c, 0xffffffff, 0x00000f40,
1513	0x88c4, 0x001f3ae3, 0x00000082,
1514	0x88d4, 0x0000001f, 0x00000010,
1515	0x30934, 0xffffffff, 0x00000000
1516};
1517
1518
1519static void cik_init_golden_registers(struct radeon_device *rdev)
1520{
1521	switch (rdev->family) {
1522	case CHIP_BONAIRE:
1523		radeon_program_register_sequence(rdev,
1524						 bonaire_mgcg_cgcg_init,
1525						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1526		radeon_program_register_sequence(rdev,
1527						 bonaire_golden_registers,
1528						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1529		radeon_program_register_sequence(rdev,
1530						 bonaire_golden_common_registers,
1531						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1532		radeon_program_register_sequence(rdev,
1533						 bonaire_golden_spm_registers,
1534						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1535		break;
1536	case CHIP_KABINI:
1537		radeon_program_register_sequence(rdev,
1538						 kalindi_mgcg_cgcg_init,
1539						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1540		radeon_program_register_sequence(rdev,
1541						 kalindi_golden_registers,
1542						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1543		radeon_program_register_sequence(rdev,
1544						 kalindi_golden_common_registers,
1545						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1546		radeon_program_register_sequence(rdev,
1547						 kalindi_golden_spm_registers,
1548						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1549		break;
1550	case CHIP_MULLINS:
1551		radeon_program_register_sequence(rdev,
1552						 kalindi_mgcg_cgcg_init,
1553						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1554		radeon_program_register_sequence(rdev,
1555						 godavari_golden_registers,
1556						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1557		radeon_program_register_sequence(rdev,
1558						 kalindi_golden_common_registers,
1559						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1560		radeon_program_register_sequence(rdev,
1561						 kalindi_golden_spm_registers,
1562						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1563		break;
1564	case CHIP_KAVERI:
1565		radeon_program_register_sequence(rdev,
1566						 spectre_mgcg_cgcg_init,
1567						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1568		radeon_program_register_sequence(rdev,
1569						 spectre_golden_registers,
1570						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1571		radeon_program_register_sequence(rdev,
1572						 spectre_golden_common_registers,
1573						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1574		radeon_program_register_sequence(rdev,
1575						 spectre_golden_spm_registers,
1576						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1577		break;
1578	case CHIP_HAWAII:
1579		radeon_program_register_sequence(rdev,
1580						 hawaii_mgcg_cgcg_init,
1581						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1582		radeon_program_register_sequence(rdev,
1583						 hawaii_golden_registers,
1584						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1585		radeon_program_register_sequence(rdev,
1586						 hawaii_golden_common_registers,
1587						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1588		radeon_program_register_sequence(rdev,
1589						 hawaii_golden_spm_registers,
1590						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1591		break;
1592	default:
1593		break;
1594	}
1595}
1596
1597/**
1598 * cik_get_xclk - get the xclk
1599 *
1600 * @rdev: radeon_device pointer
1601 *
1602 * Returns the reference clock used by the gfx engine
1603 * (CIK).
1604 */
1605u32 cik_get_xclk(struct radeon_device *rdev)
1606{
1607        u32 reference_clock = rdev->clock.spll.reference_freq;
1608
1609	if (rdev->flags & RADEON_IS_IGP) {
1610		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1611			return reference_clock / 2;
1612	} else {
1613		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1614			return reference_clock / 4;
1615	}
1616	return reference_clock;
1617}
1618
1619/**
1620 * cik_mm_rdoorbell - read a doorbell dword
1621 *
1622 * @rdev: radeon_device pointer
1623 * @index: doorbell index
1624 *
1625 * Returns the value in the doorbell aperture at the
1626 * requested doorbell index (CIK).
1627 */
1628u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1629{
1630	if (index < rdev->doorbell.num_doorbells) {
1631		return readl(rdev->doorbell.ptr + index);
1632	} else {
1633		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1634		return 0;
1635	}
1636}
1637
1638/**
1639 * cik_mm_wdoorbell - write a doorbell dword
1640 *
1641 * @rdev: radeon_device pointer
1642 * @index: doorbell index
1643 * @v: value to write
1644 *
1645 * Writes @v to the doorbell aperture at the
1646 * requested doorbell index (CIK).
1647 */
1648void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1649{
1650	if (index < rdev->doorbell.num_doorbells) {
1651		writel(v, rdev->doorbell.ptr + index);
1652	} else {
1653		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1654	}
1655}
1656
1657#define BONAIRE_IO_MC_REGS_SIZE 36
1658
1659static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1660{
1661	{0x00000070, 0x04400000},
1662	{0x00000071, 0x80c01803},
1663	{0x00000072, 0x00004004},
1664	{0x00000073, 0x00000100},
1665	{0x00000074, 0x00ff0000},
1666	{0x00000075, 0x34000000},
1667	{0x00000076, 0x08000014},
1668	{0x00000077, 0x00cc08ec},
1669	{0x00000078, 0x00000400},
1670	{0x00000079, 0x00000000},
1671	{0x0000007a, 0x04090000},
1672	{0x0000007c, 0x00000000},
1673	{0x0000007e, 0x4408a8e8},
1674	{0x0000007f, 0x00000304},
1675	{0x00000080, 0x00000000},
1676	{0x00000082, 0x00000001},
1677	{0x00000083, 0x00000002},
1678	{0x00000084, 0xf3e4f400},
1679	{0x00000085, 0x052024e3},
1680	{0x00000087, 0x00000000},
1681	{0x00000088, 0x01000000},
1682	{0x0000008a, 0x1c0a0000},
1683	{0x0000008b, 0xff010000},
1684	{0x0000008d, 0xffffefff},
1685	{0x0000008e, 0xfff3efff},
1686	{0x0000008f, 0xfff3efbf},
1687	{0x00000092, 0xf7ffffff},
1688	{0x00000093, 0xffffff7f},
1689	{0x00000095, 0x00101101},
1690	{0x00000096, 0x00000fff},
1691	{0x00000097, 0x00116fff},
1692	{0x00000098, 0x60010000},
1693	{0x00000099, 0x10010000},
1694	{0x0000009a, 0x00006000},
1695	{0x0000009b, 0x00001000},
1696	{0x0000009f, 0x00b48000}
1697};
1698
1699#define HAWAII_IO_MC_REGS_SIZE 22
1700
1701static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1702{
1703	{0x0000007d, 0x40000000},
1704	{0x0000007e, 0x40180304},
1705	{0x0000007f, 0x0000ff00},
1706	{0x00000081, 0x00000000},
1707	{0x00000083, 0x00000800},
1708	{0x00000086, 0x00000000},
1709	{0x00000087, 0x00000100},
1710	{0x00000088, 0x00020100},
1711	{0x00000089, 0x00000000},
1712	{0x0000008b, 0x00040000},
1713	{0x0000008c, 0x00000100},
1714	{0x0000008e, 0xff010000},
1715	{0x00000090, 0xffffefff},
1716	{0x00000091, 0xfff3efff},
1717	{0x00000092, 0xfff3efbf},
1718	{0x00000093, 0xf7ffffff},
1719	{0x00000094, 0xffffff7f},
1720	{0x00000095, 0x00000fff},
1721	{0x00000096, 0x00116fff},
1722	{0x00000097, 0x60010000},
1723	{0x00000098, 0x10010000},
1724	{0x0000009f, 0x00c79000}
1725};
1726
1727
1728/**
1729 * cik_srbm_select - select specific register instances
1730 *
1731 * @rdev: radeon_device pointer
1732 * @me: selected ME (micro engine)
1733 * @pipe: pipe
1734 * @queue: queue
1735 * @vmid: VMID
1736 *
1737 * Switches the currently active registers instances.  Some
1738 * registers are instanced per VMID, others are instanced per
1739 * me/pipe/queue combination.
1740 */
1741static void cik_srbm_select(struct radeon_device *rdev,
1742			    u32 me, u32 pipe, u32 queue, u32 vmid)
1743{
1744	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1745			     MEID(me & 0x3) |
1746			     VMID(vmid & 0xf) |
1747			     QUEUEID(queue & 0x7));
1748	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1749}
1750
1751/* ucode loading */
1752/**
1753 * ci_mc_load_microcode - load MC ucode into the hw
1754 *
1755 * @rdev: radeon_device pointer
1756 *
1757 * Load the GDDR MC ucode into the hw (CIK).
1758 * Returns 0 on success, error on failure.
1759 */
1760int ci_mc_load_microcode(struct radeon_device *rdev)
1761{
1762	const __be32 *fw_data;
1763	u32 running, blackout = 0;
1764	u32 *io_mc_regs;
1765	int i, regs_size, ucode_size;
1766
1767	if (!rdev->mc_fw)
1768		return -EINVAL;
1769
1770	ucode_size = rdev->mc_fw->size / 4;
1771
1772	switch (rdev->family) {
1773	case CHIP_BONAIRE:
1774		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1775		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1776		break;
1777	case CHIP_HAWAII:
1778		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1779		regs_size = HAWAII_IO_MC_REGS_SIZE;
1780		break;
1781	default:
1782		return -EINVAL;
1783	}
1784
1785	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1786
1787	if (running == 0) {
1788		if (running) {
1789			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1790			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1791		}
1792
1793		/* reset the engine and set to writable */
1794		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1795		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1796
1797		/* load mc io regs */
1798		for (i = 0; i < regs_size; i++) {
1799			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1800			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1801		}
1802		/* load the MC ucode */
1803		fw_data = (const __be32 *)rdev->mc_fw->data;
1804		for (i = 0; i < ucode_size; i++)
1805			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1806
1807		/* put the engine back into the active state */
1808		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1809		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1810		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1811
1812		/* wait for training to complete */
1813		for (i = 0; i < rdev->usec_timeout; i++) {
1814			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1815				break;
1816			udelay(1);
1817		}
1818		for (i = 0; i < rdev->usec_timeout; i++) {
1819			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1820				break;
1821			udelay(1);
1822		}
1823
1824		if (running)
1825			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1826	}
1827
1828	return 0;
1829}
1830
1831/**
1832 * cik_init_microcode - load ucode images from disk
1833 *
1834 * @rdev: radeon_device pointer
1835 *
1836 * Use the firmware interface to load the ucode images into
1837 * the driver (not loaded into hw).
1838 * Returns 0 on success, error on failure.
1839 */
1840static int cik_init_microcode(struct radeon_device *rdev)
1841{
1842	const char *chip_name;
1843	size_t pfp_req_size, me_req_size, ce_req_size,
1844		mec_req_size, rlc_req_size, mc_req_size = 0,
1845		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1846	char fw_name[30];
1847	int err;
1848
1849	DRM_DEBUG("\n");
1850
1851	switch (rdev->family) {
1852	case CHIP_BONAIRE:
1853		chip_name = "BONAIRE";
1854		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1855		me_req_size = CIK_ME_UCODE_SIZE * 4;
1856		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1857		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1858		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1859		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1860		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1861		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1862		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1863		break;
1864	case CHIP_HAWAII:
1865		chip_name = "HAWAII";
1866		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1867		me_req_size = CIK_ME_UCODE_SIZE * 4;
1868		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1869		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1870		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1871		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1872		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1873		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1874		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1875		break;
1876	case CHIP_KAVERI:
1877		chip_name = "KAVERI";
1878		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1879		me_req_size = CIK_ME_UCODE_SIZE * 4;
1880		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1881		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1882		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1883		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1884		break;
1885	case CHIP_KABINI:
1886		chip_name = "KABINI";
1887		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1888		me_req_size = CIK_ME_UCODE_SIZE * 4;
1889		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1890		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1891		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1892		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1893		break;
1894	case CHIP_MULLINS:
1895		chip_name = "MULLINS";
1896		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1897		me_req_size = CIK_ME_UCODE_SIZE * 4;
1898		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1899		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1900		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1901		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1902		break;
1903	default: BUG();
1904	}
1905
1906	DRM_INFO("Loading %s Microcode\n", chip_name);
1907
1908	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1909	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1910	if (err)
1911		goto out;
1912	if (rdev->pfp_fw->size != pfp_req_size) {
1913		printk(KERN_ERR
1914		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1915		       rdev->pfp_fw->size, fw_name);
1916		err = -EINVAL;
1917		goto out;
1918	}
1919
1920	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1921	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1922	if (err)
1923		goto out;
1924	if (rdev->me_fw->size != me_req_size) {
1925		printk(KERN_ERR
1926		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1927		       rdev->me_fw->size, fw_name);
1928		err = -EINVAL;
1929	}
1930
1931	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1932	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1933	if (err)
1934		goto out;
1935	if (rdev->ce_fw->size != ce_req_size) {
1936		printk(KERN_ERR
1937		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1938		       rdev->ce_fw->size, fw_name);
1939		err = -EINVAL;
1940	}
1941
1942	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1943	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1944	if (err)
1945		goto out;
1946	if (rdev->mec_fw->size != mec_req_size) {
1947		printk(KERN_ERR
1948		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1949		       rdev->mec_fw->size, fw_name);
1950		err = -EINVAL;
1951	}
1952
1953	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1954	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1955	if (err)
1956		goto out;
1957	if (rdev->rlc_fw->size != rlc_req_size) {
1958		printk(KERN_ERR
1959		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1960		       rdev->rlc_fw->size, fw_name);
1961		err = -EINVAL;
1962	}
1963
1964	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1965	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1966	if (err)
1967		goto out;
1968	if (rdev->sdma_fw->size != sdma_req_size) {
1969		printk(KERN_ERR
1970		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1971		       rdev->sdma_fw->size, fw_name);
1972		err = -EINVAL;
1973	}
1974
1975	/* No SMC, MC ucode on APUs */
1976	if (!(rdev->flags & RADEON_IS_IGP)) {
1977		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1978		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1979		if (err) {
1980			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1981			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1982			if (err)
1983				goto out;
1984		}
1985		if ((rdev->mc_fw->size != mc_req_size) &&
1986		    (rdev->mc_fw->size != mc2_req_size)){
1987			printk(KERN_ERR
1988			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1989			       rdev->mc_fw->size, fw_name);
1990			err = -EINVAL;
1991		}
1992		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1993
1994		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1995		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1996		if (err) {
1997			printk(KERN_ERR
1998			       "smc: error loading firmware \"%s\"\n",
1999			       fw_name);
2000			release_firmware(rdev->smc_fw);
2001			rdev->smc_fw = NULL;
2002			err = 0;
2003		} else if (rdev->smc_fw->size != smc_req_size) {
2004			printk(KERN_ERR
2005			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2006			       rdev->smc_fw->size, fw_name);
2007			err = -EINVAL;
2008		}
2009	}
2010
2011out:
2012	if (err) {
2013		if (err != -EINVAL)
2014			printk(KERN_ERR
2015			       "cik_cp: Failed to load firmware \"%s\"\n",
2016			       fw_name);
2017		release_firmware(rdev->pfp_fw);
2018		rdev->pfp_fw = NULL;
2019		release_firmware(rdev->me_fw);
2020		rdev->me_fw = NULL;
2021		release_firmware(rdev->ce_fw);
2022		rdev->ce_fw = NULL;
2023		release_firmware(rdev->rlc_fw);
2024		rdev->rlc_fw = NULL;
2025		release_firmware(rdev->mc_fw);
2026		rdev->mc_fw = NULL;
2027		release_firmware(rdev->smc_fw);
2028		rdev->smc_fw = NULL;
2029	}
2030	return err;
2031}
2032
2033/*
2034 * Core functions
2035 */
2036/**
2037 * cik_tiling_mode_table_init - init the hw tiling table
2038 *
2039 * @rdev: radeon_device pointer
2040 *
2041 * Starting with SI, the tiling setup is done globally in a
2042 * set of 32 tiling modes.  Rather than selecting each set of
2043 * parameters per surface as on older asics, we just select
2044 * which index in the tiling table we want to use, and the
2045 * surface uses those parameters (CIK).
2046 */
2047static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2048{
2049	const u32 num_tile_mode_states = 32;
2050	const u32 num_secondary_tile_mode_states = 16;
2051	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2052	u32 num_pipe_configs;
2053	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2054		rdev->config.cik.max_shader_engines;
2055
2056	switch (rdev->config.cik.mem_row_size_in_kb) {
2057	case 1:
2058		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2059		break;
2060	case 2:
2061	default:
2062		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2063		break;
2064	case 4:
2065		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2066		break;
2067	}
2068
2069	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2070	if (num_pipe_configs > 8)
2071		num_pipe_configs = 16;
2072
2073	if (num_pipe_configs == 16) {
2074		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2075			switch (reg_offset) {
2076			case 0:
2077				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2079						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2080						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2081				break;
2082			case 1:
2083				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2084						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2085						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2086						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2087				break;
2088			case 2:
2089				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2091						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2092						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2093				break;
2094			case 3:
2095				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2096						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2097						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2098						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2099				break;
2100			case 4:
2101				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2102						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2103						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2104						 TILE_SPLIT(split_equal_to_row_size));
2105				break;
2106			case 5:
2107				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2108						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2109						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110				break;
2111			case 6:
2112				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2113						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2114						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2115						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2116				break;
2117			case 7:
2118				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2119						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2120						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2121						 TILE_SPLIT(split_equal_to_row_size));
2122				break;
2123			case 8:
2124				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2125						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2126				break;
2127			case 9:
2128				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2129						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2130						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2131				break;
2132			case 10:
2133				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2134						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2135						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2136						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2137				break;
2138			case 11:
2139				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2140						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2141						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2142						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2143				break;
2144			case 12:
2145				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2148						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149				break;
2150			case 13:
2151				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2152						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2153						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2154				break;
2155			case 14:
2156				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2158						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2159						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160				break;
2161			case 16:
2162				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2163						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2165						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2166				break;
2167			case 17:
2168				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2169						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2170						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2171						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2172				break;
2173			case 27:
2174				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2175						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2176						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2177				break;
2178			case 28:
2179				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2180						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2181						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183				break;
2184			case 29:
2185				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2187						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2188						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189				break;
2190			case 30:
2191				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2192						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2193						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2195				break;
2196			default:
2197				gb_tile_moden = 0;
2198				break;
2199			}
2200			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2201			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2202		}
2203		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2204			switch (reg_offset) {
2205			case 0:
2206				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2208						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2209						 NUM_BANKS(ADDR_SURF_16_BANK));
2210				break;
2211			case 1:
2212				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2214						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2215						 NUM_BANKS(ADDR_SURF_16_BANK));
2216				break;
2217			case 2:
2218				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2219						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2220						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2221						 NUM_BANKS(ADDR_SURF_16_BANK));
2222				break;
2223			case 3:
2224				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2225						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2226						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2227						 NUM_BANKS(ADDR_SURF_16_BANK));
2228				break;
2229			case 4:
2230				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2231						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2232						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2233						 NUM_BANKS(ADDR_SURF_8_BANK));
2234				break;
2235			case 5:
2236				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2237						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2238						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2239						 NUM_BANKS(ADDR_SURF_4_BANK));
2240				break;
2241			case 6:
2242				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2245						 NUM_BANKS(ADDR_SURF_2_BANK));
2246				break;
2247			case 8:
2248				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2249						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2250						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2251						 NUM_BANKS(ADDR_SURF_16_BANK));
2252				break;
2253			case 9:
2254				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2255						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2256						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257						 NUM_BANKS(ADDR_SURF_16_BANK));
2258				break;
2259			case 10:
2260				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2261						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2262						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2263						 NUM_BANKS(ADDR_SURF_16_BANK));
2264				break;
2265			case 11:
2266				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2268						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2269						 NUM_BANKS(ADDR_SURF_8_BANK));
2270				break;
2271			case 12:
2272				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2275						 NUM_BANKS(ADDR_SURF_4_BANK));
2276				break;
2277			case 13:
2278				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2281						 NUM_BANKS(ADDR_SURF_2_BANK));
2282				break;
2283			case 14:
2284				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2285						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2286						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2287						 NUM_BANKS(ADDR_SURF_2_BANK));
2288				break;
2289			default:
2290				gb_tile_moden = 0;
2291				break;
2292			}
2293			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2294		}
2295	} else if (num_pipe_configs == 8) {
2296		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2297			switch (reg_offset) {
2298			case 0:
2299				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2301						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2302						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2303				break;
2304			case 1:
2305				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2308						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2309				break;
2310			case 2:
2311				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2313						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2314						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2315				break;
2316			case 3:
2317				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2319						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2320						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2321				break;
2322			case 4:
2323				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2325						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2326						 TILE_SPLIT(split_equal_to_row_size));
2327				break;
2328			case 5:
2329				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2330						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332				break;
2333			case 6:
2334				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2335						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2337						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2338				break;
2339			case 7:
2340				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2341						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2343						 TILE_SPLIT(split_equal_to_row_size));
2344				break;
2345			case 8:
2346				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2347						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2348				break;
2349			case 9:
2350				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2351						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2352						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2353				break;
2354			case 10:
2355				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2356						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2358						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359				break;
2360			case 11:
2361				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2362						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2364						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365				break;
2366			case 12:
2367				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2368						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2370						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2371				break;
2372			case 13:
2373				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2376				break;
2377			case 14:
2378				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2381						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382				break;
2383			case 16:
2384				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2387						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388				break;
2389			case 17:
2390				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2391						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2393						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2394				break;
2395			case 27:
2396				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2397						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2398						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2399				break;
2400			case 28:
2401				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2404						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405				break;
2406			case 29:
2407				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2409						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2410						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411				break;
2412			case 30:
2413				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2416						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417				break;
2418			default:
2419				gb_tile_moden = 0;
2420				break;
2421			}
2422			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2423			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2424		}
2425		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2426			switch (reg_offset) {
2427			case 0:
2428				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2430						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2431						 NUM_BANKS(ADDR_SURF_16_BANK));
2432				break;
2433			case 1:
2434				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2436						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437						 NUM_BANKS(ADDR_SURF_16_BANK));
2438				break;
2439			case 2:
2440				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443						 NUM_BANKS(ADDR_SURF_16_BANK));
2444				break;
2445			case 3:
2446				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449						 NUM_BANKS(ADDR_SURF_16_BANK));
2450				break;
2451			case 4:
2452				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455						 NUM_BANKS(ADDR_SURF_8_BANK));
2456				break;
2457			case 5:
2458				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2461						 NUM_BANKS(ADDR_SURF_4_BANK));
2462				break;
2463			case 6:
2464				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467						 NUM_BANKS(ADDR_SURF_2_BANK));
2468				break;
2469			case 8:
2470				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2472						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2473						 NUM_BANKS(ADDR_SURF_16_BANK));
2474				break;
2475			case 9:
2476				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2479						 NUM_BANKS(ADDR_SURF_16_BANK));
2480				break;
2481			case 10:
2482				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485						 NUM_BANKS(ADDR_SURF_16_BANK));
2486				break;
2487			case 11:
2488				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491						 NUM_BANKS(ADDR_SURF_16_BANK));
2492				break;
2493			case 12:
2494				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497						 NUM_BANKS(ADDR_SURF_8_BANK));
2498				break;
2499			case 13:
2500				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503						 NUM_BANKS(ADDR_SURF_4_BANK));
2504				break;
2505			case 14:
2506				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2509						 NUM_BANKS(ADDR_SURF_2_BANK));
2510				break;
2511			default:
2512				gb_tile_moden = 0;
2513				break;
2514			}
2515			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2516			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2517		}
2518	} else if (num_pipe_configs == 4) {
2519		if (num_rbs == 4) {
2520			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2521				switch (reg_offset) {
2522				case 0:
2523					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2527					break;
2528				case 1:
2529					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2533					break;
2534				case 2:
2535					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2539					break;
2540				case 3:
2541					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2545					break;
2546				case 4:
2547					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2549							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2550							 TILE_SPLIT(split_equal_to_row_size));
2551					break;
2552				case 5:
2553					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2554							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2556					break;
2557				case 6:
2558					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2559							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2560							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2562					break;
2563				case 7:
2564					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2567							 TILE_SPLIT(split_equal_to_row_size));
2568					break;
2569				case 8:
2570					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2571							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2572					break;
2573				case 9:
2574					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2575							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2577					break;
2578				case 10:
2579					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2582							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583					break;
2584				case 11:
2585					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2587							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2588							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589					break;
2590				case 12:
2591					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2592							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2595					break;
2596				case 13:
2597					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2600					break;
2601				case 14:
2602					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606					break;
2607				case 16:
2608					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2609							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612					break;
2613				case 17:
2614					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2615							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2616							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2617							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2618					break;
2619				case 27:
2620					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2621							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2623					break;
2624				case 28:
2625					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2626							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629					break;
2630				case 29:
2631					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2635					break;
2636				case 30:
2637					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2638							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2639							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2641					break;
2642				default:
2643					gb_tile_moden = 0;
2644					break;
2645				}
2646				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2647				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2648			}
2649		} else if (num_rbs < 4) {
2650			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2651				switch (reg_offset) {
2652				case 0:
2653					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2657					break;
2658				case 1:
2659					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2661							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2663					break;
2664				case 2:
2665					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669					break;
2670				case 3:
2671					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2675					break;
2676				case 4:
2677					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2679							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2680							 TILE_SPLIT(split_equal_to_row_size));
2681					break;
2682				case 5:
2683					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2684							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2685							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686					break;
2687				case 6:
2688					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2692					break;
2693				case 7:
2694					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2695							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2696							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2697							 TILE_SPLIT(split_equal_to_row_size));
2698					break;
2699				case 8:
2700					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2701						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2702					break;
2703				case 9:
2704					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2707					break;
2708				case 10:
2709					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713					break;
2714				case 11:
2715					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2717							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2718							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719					break;
2720				case 12:
2721					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2722							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725					break;
2726				case 13:
2727					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2730					break;
2731				case 14:
2732					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736					break;
2737				case 16:
2738					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2740							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742					break;
2743				case 17:
2744					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2745							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2747							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748					break;
2749				case 27:
2750					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2751							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2753					break;
2754				case 28:
2755					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759					break;
2760				case 29:
2761					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2763							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765					break;
2766				case 30:
2767					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2768							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2769							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771					break;
2772				default:
2773					gb_tile_moden = 0;
2774					break;
2775				}
2776				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2777				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2778			}
2779		}
2780		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2781			switch (reg_offset) {
2782			case 0:
2783				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786						 NUM_BANKS(ADDR_SURF_16_BANK));
2787				break;
2788			case 1:
2789				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792						 NUM_BANKS(ADDR_SURF_16_BANK));
2793				break;
2794			case 2:
2795				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798						 NUM_BANKS(ADDR_SURF_16_BANK));
2799				break;
2800			case 3:
2801				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804						 NUM_BANKS(ADDR_SURF_16_BANK));
2805				break;
2806			case 4:
2807				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2810						 NUM_BANKS(ADDR_SURF_16_BANK));
2811				break;
2812			case 5:
2813				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816						 NUM_BANKS(ADDR_SURF_8_BANK));
2817				break;
2818			case 6:
2819				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2821						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2822						 NUM_BANKS(ADDR_SURF_4_BANK));
2823				break;
2824			case 8:
2825				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2826						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2827						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828						 NUM_BANKS(ADDR_SURF_16_BANK));
2829				break;
2830			case 9:
2831				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2832						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834						 NUM_BANKS(ADDR_SURF_16_BANK));
2835				break;
2836			case 10:
2837				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840						 NUM_BANKS(ADDR_SURF_16_BANK));
2841				break;
2842			case 11:
2843				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846						 NUM_BANKS(ADDR_SURF_16_BANK));
2847				break;
2848			case 12:
2849				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852						 NUM_BANKS(ADDR_SURF_16_BANK));
2853				break;
2854			case 13:
2855				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2858						 NUM_BANKS(ADDR_SURF_8_BANK));
2859				break;
2860			case 14:
2861				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2864						 NUM_BANKS(ADDR_SURF_4_BANK));
2865				break;
2866			default:
2867				gb_tile_moden = 0;
2868				break;
2869			}
2870			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2871			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2872		}
2873	} else if (num_pipe_configs == 2) {
2874		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2875			switch (reg_offset) {
2876			case 0:
2877				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879						 PIPE_CONFIG(ADDR_SURF_P2) |
2880						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2881				break;
2882			case 1:
2883				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2885						 PIPE_CONFIG(ADDR_SURF_P2) |
2886						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2887				break;
2888			case 2:
2889				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891						 PIPE_CONFIG(ADDR_SURF_P2) |
2892						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893				break;
2894			case 3:
2895				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897						 PIPE_CONFIG(ADDR_SURF_P2) |
2898						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2899				break;
2900			case 4:
2901				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2903						 PIPE_CONFIG(ADDR_SURF_P2) |
2904						 TILE_SPLIT(split_equal_to_row_size));
2905				break;
2906			case 5:
2907				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908						 PIPE_CONFIG(ADDR_SURF_P2) |
2909						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910				break;
2911			case 6:
2912				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914						 PIPE_CONFIG(ADDR_SURF_P2) |
2915						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2916				break;
2917			case 7:
2918				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2919						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2920						 PIPE_CONFIG(ADDR_SURF_P2) |
2921						 TILE_SPLIT(split_equal_to_row_size));
2922				break;
2923			case 8:
2924				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2925						PIPE_CONFIG(ADDR_SURF_P2);
2926				break;
2927			case 9:
2928				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2929						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2930						 PIPE_CONFIG(ADDR_SURF_P2));
2931				break;
2932			case 10:
2933				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2934						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935						 PIPE_CONFIG(ADDR_SURF_P2) |
2936						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937				break;
2938			case 11:
2939				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2940						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941						 PIPE_CONFIG(ADDR_SURF_P2) |
2942						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943				break;
2944			case 12:
2945				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2946						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947						 PIPE_CONFIG(ADDR_SURF_P2) |
2948						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949				break;
2950			case 13:
2951				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952						 PIPE_CONFIG(ADDR_SURF_P2) |
2953						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2954				break;
2955			case 14:
2956				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958						 PIPE_CONFIG(ADDR_SURF_P2) |
2959						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960				break;
2961			case 16:
2962				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964						 PIPE_CONFIG(ADDR_SURF_P2) |
2965						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966				break;
2967			case 17:
2968				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2969						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970						 PIPE_CONFIG(ADDR_SURF_P2) |
2971						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972				break;
2973			case 27:
2974				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2975						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976						 PIPE_CONFIG(ADDR_SURF_P2));
2977				break;
2978			case 28:
2979				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981						 PIPE_CONFIG(ADDR_SURF_P2) |
2982						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983				break;
2984			case 29:
2985				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2986						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2987						 PIPE_CONFIG(ADDR_SURF_P2) |
2988						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2989				break;
2990			case 30:
2991				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2992						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2993						 PIPE_CONFIG(ADDR_SURF_P2) |
2994						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2995				break;
2996			default:
2997				gb_tile_moden = 0;
2998				break;
2999			}
3000			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3001			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3002		}
3003		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3004			switch (reg_offset) {
3005			case 0:
3006				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3007						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3008						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009						 NUM_BANKS(ADDR_SURF_16_BANK));
3010				break;
3011			case 1:
3012				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3013						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3014						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015						 NUM_BANKS(ADDR_SURF_16_BANK));
3016				break;
3017			case 2:
3018				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3020						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021						 NUM_BANKS(ADDR_SURF_16_BANK));
3022				break;
3023			case 3:
3024				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027						 NUM_BANKS(ADDR_SURF_16_BANK));
3028				break;
3029			case 4:
3030				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033						 NUM_BANKS(ADDR_SURF_16_BANK));
3034				break;
3035			case 5:
3036				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039						 NUM_BANKS(ADDR_SURF_16_BANK));
3040				break;
3041			case 6:
3042				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3044						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3045						 NUM_BANKS(ADDR_SURF_8_BANK));
3046				break;
3047			case 8:
3048				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3049						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3050						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051						 NUM_BANKS(ADDR_SURF_16_BANK));
3052				break;
3053			case 9:
3054				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3055						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3056						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057						 NUM_BANKS(ADDR_SURF_16_BANK));
3058				break;
3059			case 10:
3060				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3061						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3062						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3063						 NUM_BANKS(ADDR_SURF_16_BANK));
3064				break;
3065			case 11:
3066				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3067						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069						 NUM_BANKS(ADDR_SURF_16_BANK));
3070				break;
3071			case 12:
3072				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3075						 NUM_BANKS(ADDR_SURF_16_BANK));
3076				break;
3077			case 13:
3078				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3081						 NUM_BANKS(ADDR_SURF_16_BANK));
3082				break;
3083			case 14:
3084				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3087						 NUM_BANKS(ADDR_SURF_8_BANK));
3088				break;
3089			default:
3090				gb_tile_moden = 0;
3091				break;
3092			}
3093			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3094			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3095		}
3096	} else
3097		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3098}
3099
3100/**
3101 * cik_select_se_sh - select which SE, SH to address
3102 *
3103 * @rdev: radeon_device pointer
3104 * @se_num: shader engine to address
3105 * @sh_num: sh block to address
3106 *
3107 * Select which SE, SH combinations to address. Certain
3108 * registers are instanced per SE or SH.  0xffffffff means
3109 * broadcast to all SEs or SHs (CIK).
3110 */
3111static void cik_select_se_sh(struct radeon_device *rdev,
3112			     u32 se_num, u32 sh_num)
3113{
3114	u32 data = INSTANCE_BROADCAST_WRITES;
3115
3116	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3117		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3118	else if (se_num == 0xffffffff)
3119		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3120	else if (sh_num == 0xffffffff)
3121		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3122	else
3123		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3124	WREG32(GRBM_GFX_INDEX, data);
3125}
3126
3127/**
3128 * cik_create_bitmask - create a bitmask
3129 *
3130 * @bit_width: length of the mask
3131 *
3132 * create a variable length bit mask (CIK).
3133 * Returns the bitmask.
3134 */
3135static u32 cik_create_bitmask(u32 bit_width)
3136{
3137	u32 i, mask = 0;
3138
3139	for (i = 0; i < bit_width; i++) {
3140		mask <<= 1;
3141		mask |= 1;
3142	}
3143	return mask;
3144}
3145
3146/**
3147 * cik_get_rb_disabled - computes the mask of disabled RBs
3148 *
3149 * @rdev: radeon_device pointer
3150 * @max_rb_num: max RBs (render backends) for the asic
3151 * @se_num: number of SEs (shader engines) for the asic
3152 * @sh_per_se: number of SH blocks per SE for the asic
3153 *
3154 * Calculates the bitmask of disabled RBs (CIK).
3155 * Returns the disabled RB bitmask.
3156 */
3157static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3158			      u32 max_rb_num_per_se,
3159			      u32 sh_per_se)
3160{
3161	u32 data, mask;
3162
3163	data = RREG32(CC_RB_BACKEND_DISABLE);
3164	if (data & 1)
3165		data &= BACKEND_DISABLE_MASK;
3166	else
3167		data = 0;
3168	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3169
3170	data >>= BACKEND_DISABLE_SHIFT;
3171
3172	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3173
3174	return data & mask;
3175}
3176
3177/**
3178 * cik_setup_rb - setup the RBs on the asic
3179 *
3180 * @rdev: radeon_device pointer
3181 * @se_num: number of SEs (shader engines) for the asic
3182 * @sh_per_se: number of SH blocks per SE for the asic
3183 * @max_rb_num: max RBs (render backends) for the asic
3184 *
3185 * Configures per-SE/SH RB registers (CIK).
3186 */
3187static void cik_setup_rb(struct radeon_device *rdev,
3188			 u32 se_num, u32 sh_per_se,
3189			 u32 max_rb_num_per_se)
3190{
3191	int i, j;
3192	u32 data, mask;
3193	u32 disabled_rbs = 0;
3194	u32 enabled_rbs = 0;
3195
3196	for (i = 0; i < se_num; i++) {
3197		for (j = 0; j < sh_per_se; j++) {
3198			cik_select_se_sh(rdev, i, j);
3199			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3200			if (rdev->family == CHIP_HAWAII)
3201				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3202			else
3203				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3204		}
3205	}
3206	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3207
3208	mask = 1;
3209	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3210		if (!(disabled_rbs & mask))
3211			enabled_rbs |= mask;
3212		mask <<= 1;
3213	}
3214
3215	rdev->config.cik.backend_enable_mask = enabled_rbs;
3216
3217	for (i = 0; i < se_num; i++) {
3218		cik_select_se_sh(rdev, i, 0xffffffff);
3219		data = 0;
3220		for (j = 0; j < sh_per_se; j++) {
3221			switch (enabled_rbs & 3) {
3222			case 0:
3223				if (j == 0)
3224					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3225				else
3226					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3227				break;
3228			case 1:
3229				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3230				break;
3231			case 2:
3232				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3233				break;
3234			case 3:
3235			default:
3236				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3237				break;
3238			}
3239			enabled_rbs >>= 2;
3240		}
3241		WREG32(PA_SC_RASTER_CONFIG, data);
3242	}
3243	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3244}
3245
3246/**
3247 * cik_gpu_init - setup the 3D engine
3248 *
3249 * @rdev: radeon_device pointer
3250 *
3251 * Configures the 3D engine and tiling configuration
3252 * registers so that the 3D engine is usable.
3253 */
3254static void cik_gpu_init(struct radeon_device *rdev)
3255{
3256	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3257	u32 mc_shared_chmap, mc_arb_ramcfg;
3258	u32 hdp_host_path_cntl;
3259	u32 tmp;
3260	int i, j;
3261
3262	switch (rdev->family) {
3263	case CHIP_BONAIRE:
3264		rdev->config.cik.max_shader_engines = 2;
3265		rdev->config.cik.max_tile_pipes = 4;
3266		rdev->config.cik.max_cu_per_sh = 7;
3267		rdev->config.cik.max_sh_per_se = 1;
3268		rdev->config.cik.max_backends_per_se = 2;
3269		rdev->config.cik.max_texture_channel_caches = 4;
3270		rdev->config.cik.max_gprs = 256;
3271		rdev->config.cik.max_gs_threads = 32;
3272		rdev->config.cik.max_hw_contexts = 8;
3273
3274		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279		break;
3280	case CHIP_HAWAII:
3281		rdev->config.cik.max_shader_engines = 4;
3282		rdev->config.cik.max_tile_pipes = 16;
3283		rdev->config.cik.max_cu_per_sh = 11;
3284		rdev->config.cik.max_sh_per_se = 1;
3285		rdev->config.cik.max_backends_per_se = 4;
3286		rdev->config.cik.max_texture_channel_caches = 16;
3287		rdev->config.cik.max_gprs = 256;
3288		rdev->config.cik.max_gs_threads = 32;
3289		rdev->config.cik.max_hw_contexts = 8;
3290
3291		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3292		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3293		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3294		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3295		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3296		break;
3297	case CHIP_KAVERI:
3298		rdev->config.cik.max_shader_engines = 1;
3299		rdev->config.cik.max_tile_pipes = 4;
3300		if ((rdev->pdev->device == 0x1304) ||
3301		    (rdev->pdev->device == 0x1305) ||
3302		    (rdev->pdev->device == 0x130C) ||
3303		    (rdev->pdev->device == 0x130F) ||
3304		    (rdev->pdev->device == 0x1310) ||
3305		    (rdev->pdev->device == 0x1311) ||
3306		    (rdev->pdev->device == 0x131C)) {
3307			rdev->config.cik.max_cu_per_sh = 8;
3308			rdev->config.cik.max_backends_per_se = 2;
3309		} else if ((rdev->pdev->device == 0x1309) ||
3310			   (rdev->pdev->device == 0x130A) ||
3311			   (rdev->pdev->device == 0x130D) ||
3312			   (rdev->pdev->device == 0x1313) ||
3313			   (rdev->pdev->device == 0x131D)) {
3314			rdev->config.cik.max_cu_per_sh = 6;
3315			rdev->config.cik.max_backends_per_se = 2;
3316		} else if ((rdev->pdev->device == 0x1306) ||
3317			   (rdev->pdev->device == 0x1307) ||
3318			   (rdev->pdev->device == 0x130B) ||
3319			   (rdev->pdev->device == 0x130E) ||
3320			   (rdev->pdev->device == 0x1315) ||
3321			   (rdev->pdev->device == 0x131B)) {
3322			rdev->config.cik.max_cu_per_sh = 4;
3323			rdev->config.cik.max_backends_per_se = 1;
3324		} else {
3325			rdev->config.cik.max_cu_per_sh = 3;
3326			rdev->config.cik.max_backends_per_se = 1;
3327		}
3328		rdev->config.cik.max_sh_per_se = 1;
3329		rdev->config.cik.max_texture_channel_caches = 4;
3330		rdev->config.cik.max_gprs = 256;
3331		rdev->config.cik.max_gs_threads = 16;
3332		rdev->config.cik.max_hw_contexts = 8;
3333
3334		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3335		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3336		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3337		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3338		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3339		break;
3340	case CHIP_KABINI:
3341	case CHIP_MULLINS:
3342	default:
3343		rdev->config.cik.max_shader_engines = 1;
3344		rdev->config.cik.max_tile_pipes = 2;
3345		rdev->config.cik.max_cu_per_sh = 2;
3346		rdev->config.cik.max_sh_per_se = 1;
3347		rdev->config.cik.max_backends_per_se = 1;
3348		rdev->config.cik.max_texture_channel_caches = 2;
3349		rdev->config.cik.max_gprs = 256;
3350		rdev->config.cik.max_gs_threads = 16;
3351		rdev->config.cik.max_hw_contexts = 8;
3352
3353		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3354		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3355		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3356		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3357		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3358		break;
3359	}
3360
3361	/* Initialize HDP */
3362	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3363		WREG32((0x2c14 + j), 0x00000000);
3364		WREG32((0x2c18 + j), 0x00000000);
3365		WREG32((0x2c1c + j), 0x00000000);
3366		WREG32((0x2c20 + j), 0x00000000);
3367		WREG32((0x2c24 + j), 0x00000000);
3368	}
3369
3370	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3371
3372	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3373
3374	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3375	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3376
3377	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3378	rdev->config.cik.mem_max_burst_length_bytes = 256;
3379	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3380	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3381	if (rdev->config.cik.mem_row_size_in_kb > 4)
3382		rdev->config.cik.mem_row_size_in_kb = 4;
3383	/* XXX use MC settings? */
3384	rdev->config.cik.shader_engine_tile_size = 32;
3385	rdev->config.cik.num_gpus = 1;
3386	rdev->config.cik.multi_gpu_tile_size = 64;
3387
3388	/* fix up row size */
3389	gb_addr_config &= ~ROW_SIZE_MASK;
3390	switch (rdev->config.cik.mem_row_size_in_kb) {
3391	case 1:
3392	default:
3393		gb_addr_config |= ROW_SIZE(0);
3394		break;
3395	case 2:
3396		gb_addr_config |= ROW_SIZE(1);
3397		break;
3398	case 4:
3399		gb_addr_config |= ROW_SIZE(2);
3400		break;
3401	}
3402
3403	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3404	 * not have bank info, so create a custom tiling dword.
3405	 * bits 3:0   num_pipes
3406	 * bits 7:4   num_banks
3407	 * bits 11:8  group_size
3408	 * bits 15:12 row_size
3409	 */
3410	rdev->config.cik.tile_config = 0;
3411	switch (rdev->config.cik.num_tile_pipes) {
3412	case 1:
3413		rdev->config.cik.tile_config |= (0 << 0);
3414		break;
3415	case 2:
3416		rdev->config.cik.tile_config |= (1 << 0);
3417		break;
3418	case 4:
3419		rdev->config.cik.tile_config |= (2 << 0);
3420		break;
3421	case 8:
3422	default:
3423		/* XXX what about 12? */
3424		rdev->config.cik.tile_config |= (3 << 0);
3425		break;
3426	}
3427	rdev->config.cik.tile_config |=
3428		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3429	rdev->config.cik.tile_config |=
3430		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3431	rdev->config.cik.tile_config |=
3432		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3433
3434	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3435	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3436	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3437	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3438	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3439	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3440	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3441	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3442
3443	cik_tiling_mode_table_init(rdev);
3444
3445	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3446		     rdev->config.cik.max_sh_per_se,
3447		     rdev->config.cik.max_backends_per_se);
3448
3449	/* set HW defaults for 3D engine */
3450	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3451
3452	WREG32(SX_DEBUG_1, 0x20);
3453
3454	WREG32(TA_CNTL_AUX, 0x00010000);
3455
3456	tmp = RREG32(SPI_CONFIG_CNTL);
3457	tmp |= 0x03000000;
3458	WREG32(SPI_CONFIG_CNTL, tmp);
3459
3460	WREG32(SQ_CONFIG, 1);
3461
3462	WREG32(DB_DEBUG, 0);
3463
3464	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3465	tmp |= 0x00000400;
3466	WREG32(DB_DEBUG2, tmp);
3467
3468	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3469	tmp |= 0x00020200;
3470	WREG32(DB_DEBUG3, tmp);
3471
3472	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3473	tmp |= 0x00018208;
3474	WREG32(CB_HW_CONTROL, tmp);
3475
3476	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3477
3478	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3479				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3480				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3481				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3482
3483	WREG32(VGT_NUM_INSTANCES, 1);
3484
3485	WREG32(CP_PERFMON_CNTL, 0);
3486
3487	WREG32(SQ_CONFIG, 0);
3488
3489	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3490					  FORCE_EOV_MAX_REZ_CNT(255)));
3491
3492	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3493	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3494
3495	WREG32(VGT_GS_VERTEX_REUSE, 16);
3496	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3497
3498	tmp = RREG32(HDP_MISC_CNTL);
3499	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3500	WREG32(HDP_MISC_CNTL, tmp);
3501
3502	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3503	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3504
3505	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3506	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3507
3508	udelay(50);
3509}
3510
3511/*
3512 * GPU scratch registers helpers function.
3513 */
3514/**
3515 * cik_scratch_init - setup driver info for CP scratch regs
3516 *
3517 * @rdev: radeon_device pointer
3518 *
3519 * Set up the number and offset of the CP scratch registers.
3520 * NOTE: use of CP scratch registers is a legacy inferface and
3521 * is not used by default on newer asics (r6xx+).  On newer asics,
3522 * memory buffers are used for fences rather than scratch regs.
3523 */
3524static void cik_scratch_init(struct radeon_device *rdev)
3525{
3526	int i;
3527
3528	rdev->scratch.num_reg = 7;
3529	rdev->scratch.reg_base = SCRATCH_REG0;
3530	for (i = 0; i < rdev->scratch.num_reg; i++) {
3531		rdev->scratch.free[i] = true;
3532		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3533	}
3534}
3535
3536/**
3537 * cik_ring_test - basic gfx ring test
3538 *
3539 * @rdev: radeon_device pointer
3540 * @ring: radeon_ring structure holding ring information
3541 *
3542 * Allocate a scratch register and write to it using the gfx ring (CIK).
3543 * Provides a basic gfx ring test to verify that the ring is working.
3544 * Used by cik_cp_gfx_resume();
3545 * Returns 0 on success, error on failure.
3546 */
3547int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3548{
3549	uint32_t scratch;
3550	uint32_t tmp = 0;
3551	unsigned i;
3552	int r;
3553
3554	r = radeon_scratch_get(rdev, &scratch);
3555	if (r) {
3556		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3557		return r;
3558	}
3559	WREG32(scratch, 0xCAFEDEAD);
3560	r = radeon_ring_lock(rdev, ring, 3);
3561	if (r) {
3562		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3563		radeon_scratch_free(rdev, scratch);
3564		return r;
3565	}
3566	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3567	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3568	radeon_ring_write(ring, 0xDEADBEEF);
3569	radeon_ring_unlock_commit(rdev, ring);
3570
3571	for (i = 0; i < rdev->usec_timeout; i++) {
3572		tmp = RREG32(scratch);
3573		if (tmp == 0xDEADBEEF)
3574			break;
3575		DRM_UDELAY(1);
3576	}
3577	if (i < rdev->usec_timeout) {
3578		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3579	} else {
3580		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3581			  ring->idx, scratch, tmp);
3582		r = -EINVAL;
3583	}
3584	radeon_scratch_free(rdev, scratch);
3585	return r;
3586}
3587
3588/**
3589 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3590 *
3591 * @rdev: radeon_device pointer
3592 * @ridx: radeon ring index
3593 *
3594 * Emits an hdp flush on the cp.
3595 */
3596static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3597				       int ridx)
3598{
3599	struct radeon_ring *ring = &rdev->ring[ridx];
3600	u32 ref_and_mask;
3601
3602	switch (ring->idx) {
3603	case CAYMAN_RING_TYPE_CP1_INDEX:
3604	case CAYMAN_RING_TYPE_CP2_INDEX:
3605	default:
3606		switch (ring->me) {
3607		case 0:
3608			ref_and_mask = CP2 << ring->pipe;
3609			break;
3610		case 1:
3611			ref_and_mask = CP6 << ring->pipe;
3612			break;
3613		default:
3614			return;
3615		}
3616		break;
3617	case RADEON_RING_TYPE_GFX_INDEX:
3618		ref_and_mask = CP0;
3619		break;
3620	}
3621
3622	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3623	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3624				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3625				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3626	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3627	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3628	radeon_ring_write(ring, ref_and_mask);
3629	radeon_ring_write(ring, ref_and_mask);
3630	radeon_ring_write(ring, 0x20); /* poll interval */
3631}
3632
3633/**
3634 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3635 *
3636 * @rdev: radeon_device pointer
3637 * @fence: radeon fence object
3638 *
3639 * Emits a fence sequnce number on the gfx ring and flushes
3640 * GPU caches.
3641 */
3642void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3643			     struct radeon_fence *fence)
3644{
3645	struct radeon_ring *ring = &rdev->ring[fence->ring];
3646	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3647
3648	/* EVENT_WRITE_EOP - flush caches, send int */
3649	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3650	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3651				 EOP_TC_ACTION_EN |
3652				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3653				 EVENT_INDEX(5)));
3654	radeon_ring_write(ring, addr & 0xfffffffc);
3655	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3656	radeon_ring_write(ring, fence->seq);
3657	radeon_ring_write(ring, 0);
3658	/* HDP flush */
3659	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3660}
3661
3662/**
3663 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3664 *
3665 * @rdev: radeon_device pointer
3666 * @fence: radeon fence object
3667 *
3668 * Emits a fence sequnce number on the compute ring and flushes
3669 * GPU caches.
3670 */
3671void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3672				 struct radeon_fence *fence)
3673{
3674	struct radeon_ring *ring = &rdev->ring[fence->ring];
3675	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3676
3677	/* RELEASE_MEM - flush caches, send int */
3678	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3679	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3680				 EOP_TC_ACTION_EN |
3681				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3682				 EVENT_INDEX(5)));
3683	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3684	radeon_ring_write(ring, addr & 0xfffffffc);
3685	radeon_ring_write(ring, upper_32_bits(addr));
3686	radeon_ring_write(ring, fence->seq);
3687	radeon_ring_write(ring, 0);
3688	/* HDP flush */
3689	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3690}
3691
3692bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3693			     struct radeon_ring *ring,
3694			     struct radeon_semaphore *semaphore,
3695			     bool emit_wait)
3696{
3697	uint64_t addr = semaphore->gpu_addr;
3698	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3699
3700	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3701	radeon_ring_write(ring, addr & 0xffffffff);
3702	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3703
3704	return true;
3705}
3706
3707/**
3708 * cik_copy_cpdma - copy pages using the CP DMA engine
3709 *
3710 * @rdev: radeon_device pointer
3711 * @src_offset: src GPU address
3712 * @dst_offset: dst GPU address
3713 * @num_gpu_pages: number of GPU pages to xfer
3714 * @fence: radeon fence object
3715 *
3716 * Copy GPU paging using the CP DMA engine (CIK+).
3717 * Used by the radeon ttm implementation to move pages if
3718 * registered as the asic copy callback.
3719 */
3720int cik_copy_cpdma(struct radeon_device *rdev,
3721		   uint64_t src_offset, uint64_t dst_offset,
3722		   unsigned num_gpu_pages,
3723		   struct radeon_fence **fence)
3724{
3725	struct radeon_semaphore *sem = NULL;
3726	int ring_index = rdev->asic->copy.blit_ring_index;
3727	struct radeon_ring *ring = &rdev->ring[ring_index];
3728	u32 size_in_bytes, cur_size_in_bytes, control;
3729	int i, num_loops;
3730	int r = 0;
3731
3732	r = radeon_semaphore_create(rdev, &sem);
3733	if (r) {
3734		DRM_ERROR("radeon: moving bo (%d).\n", r);
3735		return r;
3736	}
3737
3738	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3739	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3740	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3741	if (r) {
3742		DRM_ERROR("radeon: moving bo (%d).\n", r);
3743		radeon_semaphore_free(rdev, &sem, NULL);
3744		return r;
3745	}
3746
3747	radeon_semaphore_sync_to(sem, *fence);
3748	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3749
3750	for (i = 0; i < num_loops; i++) {
3751		cur_size_in_bytes = size_in_bytes;
3752		if (cur_size_in_bytes > 0x1fffff)
3753			cur_size_in_bytes = 0x1fffff;
3754		size_in_bytes -= cur_size_in_bytes;
3755		control = 0;
3756		if (size_in_bytes == 0)
3757			control |= PACKET3_DMA_DATA_CP_SYNC;
3758		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3759		radeon_ring_write(ring, control);
3760		radeon_ring_write(ring, lower_32_bits(src_offset));
3761		radeon_ring_write(ring, upper_32_bits(src_offset));
3762		radeon_ring_write(ring, lower_32_bits(dst_offset));
3763		radeon_ring_write(ring, upper_32_bits(dst_offset));
3764		radeon_ring_write(ring, cur_size_in_bytes);
3765		src_offset += cur_size_in_bytes;
3766		dst_offset += cur_size_in_bytes;
3767	}
3768
3769	r = radeon_fence_emit(rdev, fence, ring->idx);
3770	if (r) {
3771		radeon_ring_unlock_undo(rdev, ring);
3772		radeon_semaphore_free(rdev, &sem, NULL);
3773		return r;
3774	}
3775
3776	radeon_ring_unlock_commit(rdev, ring);
3777	radeon_semaphore_free(rdev, &sem, *fence);
3778
3779	return r;
3780}
3781
3782/*
3783 * IB stuff
3784 */
3785/**
3786 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3787 *
3788 * @rdev: radeon_device pointer
3789 * @ib: radeon indirect buffer object
3790 *
3791 * Emits an DE (drawing engine) or CE (constant engine) IB
3792 * on the gfx ring.  IBs are usually generated by userspace
3793 * acceleration drivers and submitted to the kernel for
3794 * sheduling on the ring.  This function schedules the IB
3795 * on the gfx ring for execution by the GPU.
3796 */
3797void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3798{
3799	struct radeon_ring *ring = &rdev->ring[ib->ring];
3800	u32 header, control = INDIRECT_BUFFER_VALID;
3801
3802	if (ib->is_const_ib) {
3803		/* set switch buffer packet before const IB */
3804		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3805		radeon_ring_write(ring, 0);
3806
3807		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3808	} else {
3809		u32 next_rptr;
3810		if (ring->rptr_save_reg) {
3811			next_rptr = ring->wptr + 3 + 4;
3812			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3813			radeon_ring_write(ring, ((ring->rptr_save_reg -
3814						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3815			radeon_ring_write(ring, next_rptr);
3816		} else if (rdev->wb.enabled) {
3817			next_rptr = ring->wptr + 5 + 4;
3818			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3819			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3820			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3821			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3822			radeon_ring_write(ring, next_rptr);
3823		}
3824
3825		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3826	}
3827
3828	control |= ib->length_dw |
3829		(ib->vm ? (ib->vm->id << 24) : 0);
3830
3831	radeon_ring_write(ring, header);
3832	radeon_ring_write(ring,
3833#ifdef __BIG_ENDIAN
3834			  (2 << 0) |
3835#endif
3836			  (ib->gpu_addr & 0xFFFFFFFC));
3837	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3838	radeon_ring_write(ring, control);
3839}
3840
3841/**
3842 * cik_ib_test - basic gfx ring IB test
3843 *
3844 * @rdev: radeon_device pointer
3845 * @ring: radeon_ring structure holding ring information
3846 *
3847 * Allocate an IB and execute it on the gfx ring (CIK).
3848 * Provides a basic gfx ring test to verify that IBs are working.
3849 * Returns 0 on success, error on failure.
3850 */
3851int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3852{
3853	struct radeon_ib ib;
3854	uint32_t scratch;
3855	uint32_t tmp = 0;
3856	unsigned i;
3857	int r;
3858
3859	r = radeon_scratch_get(rdev, &scratch);
3860	if (r) {
3861		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3862		return r;
3863	}
3864	WREG32(scratch, 0xCAFEDEAD);
3865	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3866	if (r) {
3867		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3868		radeon_scratch_free(rdev, scratch);
3869		return r;
3870	}
3871	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3872	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3873	ib.ptr[2] = 0xDEADBEEF;
3874	ib.length_dw = 3;
3875	r = radeon_ib_schedule(rdev, &ib, NULL);
3876	if (r) {
3877		radeon_scratch_free(rdev, scratch);
3878		radeon_ib_free(rdev, &ib);
3879		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3880		return r;
3881	}
3882	r = radeon_fence_wait(ib.fence, false);
3883	if (r) {
3884		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3885		radeon_scratch_free(rdev, scratch);
3886		radeon_ib_free(rdev, &ib);
3887		return r;
3888	}
3889	for (i = 0; i < rdev->usec_timeout; i++) {
3890		tmp = RREG32(scratch);
3891		if (tmp == 0xDEADBEEF)
3892			break;
3893		DRM_UDELAY(1);
3894	}
3895	if (i < rdev->usec_timeout) {
3896		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3897	} else {
3898		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3899			  scratch, tmp);
3900		r = -EINVAL;
3901	}
3902	radeon_scratch_free(rdev, scratch);
3903	radeon_ib_free(rdev, &ib);
3904	return r;
3905}
3906
3907/*
3908 * CP.
3909 * On CIK, gfx and compute now have independant command processors.
3910 *
3911 * GFX
3912 * Gfx consists of a single ring and can process both gfx jobs and
3913 * compute jobs.  The gfx CP consists of three microengines (ME):
3914 * PFP - Pre-Fetch Parser
3915 * ME - Micro Engine
3916 * CE - Constant Engine
3917 * The PFP and ME make up what is considered the Drawing Engine (DE).
3918 * The CE is an asynchronous engine used for updating buffer desciptors
3919 * used by the DE so that they can be loaded into cache in parallel
3920 * while the DE is processing state update packets.
3921 *
3922 * Compute
3923 * The compute CP consists of two microengines (ME):
3924 * MEC1 - Compute MicroEngine 1
3925 * MEC2 - Compute MicroEngine 2
3926 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3927 * The queues are exposed to userspace and are programmed directly
3928 * by the compute runtime.
3929 */
3930/**
3931 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3932 *
3933 * @rdev: radeon_device pointer
3934 * @enable: enable or disable the MEs
3935 *
3936 * Halts or unhalts the gfx MEs.
3937 */
3938static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3939{
3940	if (enable)
3941		WREG32(CP_ME_CNTL, 0);
3942	else {
3943		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3944			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3945		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3946		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3947	}
3948	udelay(50);
3949}
3950
3951/**
3952 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3953 *
3954 * @rdev: radeon_device pointer
3955 *
3956 * Loads the gfx PFP, ME, and CE ucode.
3957 * Returns 0 for success, -EINVAL if the ucode is not available.
3958 */
3959static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3960{
3961	const __be32 *fw_data;
3962	int i;
3963
3964	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3965		return -EINVAL;
3966
3967	cik_cp_gfx_enable(rdev, false);
3968
3969	/* PFP */
3970	fw_data = (const __be32 *)rdev->pfp_fw->data;
3971	WREG32(CP_PFP_UCODE_ADDR, 0);
3972	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3973		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3974	WREG32(CP_PFP_UCODE_ADDR, 0);
3975
3976	/* CE */
3977	fw_data = (const __be32 *)rdev->ce_fw->data;
3978	WREG32(CP_CE_UCODE_ADDR, 0);
3979	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3980		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3981	WREG32(CP_CE_UCODE_ADDR, 0);
3982
3983	/* ME */
3984	fw_data = (const __be32 *)rdev->me_fw->data;
3985	WREG32(CP_ME_RAM_WADDR, 0);
3986	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3987		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3988	WREG32(CP_ME_RAM_WADDR, 0);
3989
3990	WREG32(CP_PFP_UCODE_ADDR, 0);
3991	WREG32(CP_CE_UCODE_ADDR, 0);
3992	WREG32(CP_ME_RAM_WADDR, 0);
3993	WREG32(CP_ME_RAM_RADDR, 0);
3994	return 0;
3995}
3996
3997/**
3998 * cik_cp_gfx_start - start the gfx ring
3999 *
4000 * @rdev: radeon_device pointer
4001 *
4002 * Enables the ring and loads the clear state context and other
4003 * packets required to init the ring.
4004 * Returns 0 for success, error for failure.
4005 */
4006static int cik_cp_gfx_start(struct radeon_device *rdev)
4007{
4008	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4009	int r, i;
4010
4011	/* init the CP */
4012	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4013	WREG32(CP_ENDIAN_SWAP, 0);
4014	WREG32(CP_DEVICE_ID, 1);
4015
4016	cik_cp_gfx_enable(rdev, true);
4017
4018	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4019	if (r) {
4020		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4021		return r;
4022	}
4023
4024	/* init the CE partitions.  CE only used for gfx on CIK */
4025	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4026	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4027	radeon_ring_write(ring, 0xc000);
4028	radeon_ring_write(ring, 0xc000);
4029
4030	/* setup clear context state */
4031	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4032	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4033
4034	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4035	radeon_ring_write(ring, 0x80000000);
4036	radeon_ring_write(ring, 0x80000000);
4037
4038	for (i = 0; i < cik_default_size; i++)
4039		radeon_ring_write(ring, cik_default_state[i]);
4040
4041	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4043
4044	/* set clear context state */
4045	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4046	radeon_ring_write(ring, 0);
4047
4048	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4049	radeon_ring_write(ring, 0x00000316);
4050	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4051	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4052
4053	radeon_ring_unlock_commit(rdev, ring);
4054
4055	return 0;
4056}
4057
4058/**
4059 * cik_cp_gfx_fini - stop the gfx ring
4060 *
4061 * @rdev: radeon_device pointer
4062 *
4063 * Stop the gfx ring and tear down the driver ring
4064 * info.
4065 */
4066static void cik_cp_gfx_fini(struct radeon_device *rdev)
4067{
4068	cik_cp_gfx_enable(rdev, false);
4069	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4070}
4071
4072/**
4073 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4074 *
4075 * @rdev: radeon_device pointer
4076 *
4077 * Program the location and size of the gfx ring buffer
4078 * and test it to make sure it's working.
4079 * Returns 0 for success, error for failure.
4080 */
4081static int cik_cp_gfx_resume(struct radeon_device *rdev)
4082{
4083	struct radeon_ring *ring;
4084	u32 tmp;
4085	u32 rb_bufsz;
4086	u64 rb_addr;
4087	int r;
4088
4089	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4090	if (rdev->family != CHIP_HAWAII)
4091		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4092
4093	/* Set the write pointer delay */
4094	WREG32(CP_RB_WPTR_DELAY, 0);
4095
4096	/* set the RB to use vmid 0 */
4097	WREG32(CP_RB_VMID, 0);
4098
4099	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4100
4101	/* ring 0 - compute and gfx */
4102	/* Set ring buffer size */
4103	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4104	rb_bufsz = order_base_2(ring->ring_size / 8);
4105	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4106#ifdef __BIG_ENDIAN
4107	tmp |= BUF_SWAP_32BIT;
4108#endif
4109	WREG32(CP_RB0_CNTL, tmp);
4110
4111	/* Initialize the ring buffer's read and write pointers */
4112	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4113	ring->wptr = 0;
4114	WREG32(CP_RB0_WPTR, ring->wptr);
4115
4116	/* set the wb address wether it's enabled or not */
4117	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4118	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4119
4120	/* scratch register shadowing is no longer supported */
4121	WREG32(SCRATCH_UMSK, 0);
4122
4123	if (!rdev->wb.enabled)
4124		tmp |= RB_NO_UPDATE;
4125
4126	mdelay(1);
4127	WREG32(CP_RB0_CNTL, tmp);
4128
4129	rb_addr = ring->gpu_addr >> 8;
4130	WREG32(CP_RB0_BASE, rb_addr);
4131	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4132
4133	/* start the ring */
4134	cik_cp_gfx_start(rdev);
4135	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4136	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4137	if (r) {
4138		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4139		return r;
4140	}
4141
4142	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4143		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4144
4145	return 0;
4146}
4147
4148u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4149		     struct radeon_ring *ring)
4150{
4151	u32 rptr;
4152
4153	if (rdev->wb.enabled)
4154		rptr = rdev->wb.wb[ring->rptr_offs/4];
4155	else
4156		rptr = RREG32(CP_RB0_RPTR);
4157
4158	return rptr;
4159}
4160
4161u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4162		     struct radeon_ring *ring)
4163{
4164	u32 wptr;
4165
4166	wptr = RREG32(CP_RB0_WPTR);
4167
4168	return wptr;
4169}
4170
4171void cik_gfx_set_wptr(struct radeon_device *rdev,
4172		      struct radeon_ring *ring)
4173{
4174	WREG32(CP_RB0_WPTR, ring->wptr);
4175	(void)RREG32(CP_RB0_WPTR);
4176}
4177
4178u32 cik_compute_get_rptr(struct radeon_device *rdev,
4179			 struct radeon_ring *ring)
4180{
4181	u32 rptr;
4182
4183	if (rdev->wb.enabled) {
4184		rptr = rdev->wb.wb[ring->rptr_offs/4];
4185	} else {
4186		mutex_lock(&rdev->srbm_mutex);
4187		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4188		rptr = RREG32(CP_HQD_PQ_RPTR);
4189		cik_srbm_select(rdev, 0, 0, 0, 0);
4190		mutex_unlock(&rdev->srbm_mutex);
4191	}
4192
4193	return rptr;
4194}
4195
4196u32 cik_compute_get_wptr(struct radeon_device *rdev,
4197			 struct radeon_ring *ring)
4198{
4199	u32 wptr;
4200
4201	if (rdev->wb.enabled) {
4202		/* XXX check if swapping is necessary on BE */
4203		wptr = rdev->wb.wb[ring->wptr_offs/4];
4204	} else {
4205		mutex_lock(&rdev->srbm_mutex);
4206		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4207		wptr = RREG32(CP_HQD_PQ_WPTR);
4208		cik_srbm_select(rdev, 0, 0, 0, 0);
4209		mutex_unlock(&rdev->srbm_mutex);
4210	}
4211
4212	return wptr;
4213}
4214
4215void cik_compute_set_wptr(struct radeon_device *rdev,
4216			  struct radeon_ring *ring)
4217{
4218	/* XXX check if swapping is necessary on BE */
4219	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4220	WDOORBELL32(ring->doorbell_index, ring->wptr);
4221}
4222
4223/**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232{
4233	if (enable)
4234		WREG32(CP_MEC_CNTL, 0);
4235	else {
4236		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4237		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4238		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4239	}
4240	udelay(50);
4241}
4242
4243/**
4244 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4245 *
4246 * @rdev: radeon_device pointer
4247 *
4248 * Loads the compute MEC1&2 ucode.
4249 * Returns 0 for success, -EINVAL if the ucode is not available.
4250 */
4251static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4252{
4253	const __be32 *fw_data;
4254	int i;
4255
4256	if (!rdev->mec_fw)
4257		return -EINVAL;
4258
4259	cik_cp_compute_enable(rdev, false);
4260
4261	/* MEC1 */
4262	fw_data = (const __be32 *)rdev->mec_fw->data;
4263	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4264	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4265		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4266	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4267
4268	if (rdev->family == CHIP_KAVERI) {
4269		/* MEC2 */
4270		fw_data = (const __be32 *)rdev->mec_fw->data;
4271		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4272		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4273			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4274		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4275	}
4276
4277	return 0;
4278}
4279
4280/**
4281 * cik_cp_compute_start - start the compute queues
4282 *
4283 * @rdev: radeon_device pointer
4284 *
4285 * Enable the compute queues.
4286 * Returns 0 for success, error for failure.
4287 */
4288static int cik_cp_compute_start(struct radeon_device *rdev)
4289{
4290	cik_cp_compute_enable(rdev, true);
4291
4292	return 0;
4293}
4294
4295/**
4296 * cik_cp_compute_fini - stop the compute queues
4297 *
4298 * @rdev: radeon_device pointer
4299 *
4300 * Stop the compute queues and tear down the driver queue
4301 * info.
4302 */
4303static void cik_cp_compute_fini(struct radeon_device *rdev)
4304{
4305	int i, idx, r;
4306
4307	cik_cp_compute_enable(rdev, false);
4308
4309	for (i = 0; i < 2; i++) {
4310		if (i == 0)
4311			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4312		else
4313			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4314
4315		if (rdev->ring[idx].mqd_obj) {
4316			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4317			if (unlikely(r != 0))
4318				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4319
4320			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4321			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4322
4323			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4324			rdev->ring[idx].mqd_obj = NULL;
4325		}
4326	}
4327}
4328
4329static void cik_mec_fini(struct radeon_device *rdev)
4330{
4331	int r;
4332
4333	if (rdev->mec.hpd_eop_obj) {
4334		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4335		if (unlikely(r != 0))
4336			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4337		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4338		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4339
4340		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4341		rdev->mec.hpd_eop_obj = NULL;
4342	}
4343}
4344
4345#define MEC_HPD_SIZE 2048
4346
4347static int cik_mec_init(struct radeon_device *rdev)
4348{
4349	int r;
4350	u32 *hpd;
4351
4352	/*
4353	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4354	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4355	 */
4356	if (rdev->family == CHIP_KAVERI)
4357		rdev->mec.num_mec = 2;
4358	else
4359		rdev->mec.num_mec = 1;
4360	rdev->mec.num_pipe = 4;
4361	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4362
4363	if (rdev->mec.hpd_eop_obj == NULL) {
4364		r = radeon_bo_create(rdev,
4365				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4366				     PAGE_SIZE, true,
4367				     RADEON_GEM_DOMAIN_GTT, NULL,
4368				     &rdev->mec.hpd_eop_obj);
4369		if (r) {
4370			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4371			return r;
4372		}
4373	}
4374
4375	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4376	if (unlikely(r != 0)) {
4377		cik_mec_fini(rdev);
4378		return r;
4379	}
4380	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4381			  &rdev->mec.hpd_eop_gpu_addr);
4382	if (r) {
4383		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4384		cik_mec_fini(rdev);
4385		return r;
4386	}
4387	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4388	if (r) {
4389		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4390		cik_mec_fini(rdev);
4391		return r;
4392	}
4393
4394	/* clear memory.  Not sure if this is required or not */
4395	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4396
4397	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4398	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4399
4400	return 0;
4401}
4402
4403struct hqd_registers
4404{
4405	u32 cp_mqd_base_addr;
4406	u32 cp_mqd_base_addr_hi;
4407	u32 cp_hqd_active;
4408	u32 cp_hqd_vmid;
4409	u32 cp_hqd_persistent_state;
4410	u32 cp_hqd_pipe_priority;
4411	u32 cp_hqd_queue_priority;
4412	u32 cp_hqd_quantum;
4413	u32 cp_hqd_pq_base;
4414	u32 cp_hqd_pq_base_hi;
4415	u32 cp_hqd_pq_rptr;
4416	u32 cp_hqd_pq_rptr_report_addr;
4417	u32 cp_hqd_pq_rptr_report_addr_hi;
4418	u32 cp_hqd_pq_wptr_poll_addr;
4419	u32 cp_hqd_pq_wptr_poll_addr_hi;
4420	u32 cp_hqd_pq_doorbell_control;
4421	u32 cp_hqd_pq_wptr;
4422	u32 cp_hqd_pq_control;
4423	u32 cp_hqd_ib_base_addr;
4424	u32 cp_hqd_ib_base_addr_hi;
4425	u32 cp_hqd_ib_rptr;
4426	u32 cp_hqd_ib_control;
4427	u32 cp_hqd_iq_timer;
4428	u32 cp_hqd_iq_rptr;
4429	u32 cp_hqd_dequeue_request;
4430	u32 cp_hqd_dma_offload;
4431	u32 cp_hqd_sema_cmd;
4432	u32 cp_hqd_msg_type;
4433	u32 cp_hqd_atomic0_preop_lo;
4434	u32 cp_hqd_atomic0_preop_hi;
4435	u32 cp_hqd_atomic1_preop_lo;
4436	u32 cp_hqd_atomic1_preop_hi;
4437	u32 cp_hqd_hq_scheduler0;
4438	u32 cp_hqd_hq_scheduler1;
4439	u32 cp_mqd_control;
4440};
4441
4442struct bonaire_mqd
4443{
4444	u32 header;
4445	u32 dispatch_initiator;
4446	u32 dimensions[3];
4447	u32 start_idx[3];
4448	u32 num_threads[3];
4449	u32 pipeline_stat_enable;
4450	u32 perf_counter_enable;
4451	u32 pgm[2];
4452	u32 tba[2];
4453	u32 tma[2];
4454	u32 pgm_rsrc[2];
4455	u32 vmid;
4456	u32 resource_limits;
4457	u32 static_thread_mgmt01[2];
4458	u32 tmp_ring_size;
4459	u32 static_thread_mgmt23[2];
4460	u32 restart[3];
4461	u32 thread_trace_enable;
4462	u32 reserved1;
4463	u32 user_data[16];
4464	u32 vgtcs_invoke_count[2];
4465	struct hqd_registers queue_state;
4466	u32 dequeue_cntr;
4467	u32 interrupt_queue[64];
4468};
4469
4470/**
4471 * cik_cp_compute_resume - setup the compute queue registers
4472 *
4473 * @rdev: radeon_device pointer
4474 *
4475 * Program the compute queues and test them to make sure they
4476 * are working.
4477 * Returns 0 for success, error for failure.
4478 */
4479static int cik_cp_compute_resume(struct radeon_device *rdev)
4480{
4481	int r, i, idx;
4482	u32 tmp;
4483	bool use_doorbell = true;
4484	u64 hqd_gpu_addr;
4485	u64 mqd_gpu_addr;
4486	u64 eop_gpu_addr;
4487	u64 wb_gpu_addr;
4488	u32 *buf;
4489	struct bonaire_mqd *mqd;
4490
4491	r = cik_cp_compute_start(rdev);
4492	if (r)
4493		return r;
4494
4495	/* fix up chicken bits */
4496	tmp = RREG32(CP_CPF_DEBUG);
4497	tmp |= (1 << 23);
4498	WREG32(CP_CPF_DEBUG, tmp);
4499
4500	/* init the pipes */
4501	mutex_lock(&rdev->srbm_mutex);
4502	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4503		int me = (i < 4) ? 1 : 2;
4504		int pipe = (i < 4) ? i : (i - 4);
4505
4506		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4507
4508		cik_srbm_select(rdev, me, pipe, 0, 0);
4509
4510		/* write the EOP addr */
4511		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4512		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4513
4514		/* set the VMID assigned */
4515		WREG32(CP_HPD_EOP_VMID, 0);
4516
4517		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4518		tmp = RREG32(CP_HPD_EOP_CONTROL);
4519		tmp &= ~EOP_SIZE_MASK;
4520		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4521		WREG32(CP_HPD_EOP_CONTROL, tmp);
4522	}
4523	cik_srbm_select(rdev, 0, 0, 0, 0);
4524	mutex_unlock(&rdev->srbm_mutex);
4525
4526	/* init the queues.  Just two for now. */
4527	for (i = 0; i < 2; i++) {
4528		if (i == 0)
4529			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4530		else
4531			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4532
4533		if (rdev->ring[idx].mqd_obj == NULL) {
4534			r = radeon_bo_create(rdev,
4535					     sizeof(struct bonaire_mqd),
4536					     PAGE_SIZE, true,
4537					     RADEON_GEM_DOMAIN_GTT, NULL,
4538					     &rdev->ring[idx].mqd_obj);
4539			if (r) {
4540				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4541				return r;
4542			}
4543		}
4544
4545		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4546		if (unlikely(r != 0)) {
4547			cik_cp_compute_fini(rdev);
4548			return r;
4549		}
4550		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4551				  &mqd_gpu_addr);
4552		if (r) {
4553			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4554			cik_cp_compute_fini(rdev);
4555			return r;
4556		}
4557		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4558		if (r) {
4559			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4560			cik_cp_compute_fini(rdev);
4561			return r;
4562		}
4563
4564		/* init the mqd struct */
4565		memset(buf, 0, sizeof(struct bonaire_mqd));
4566
4567		mqd = (struct bonaire_mqd *)buf;
4568		mqd->header = 0xC0310800;
4569		mqd->static_thread_mgmt01[0] = 0xffffffff;
4570		mqd->static_thread_mgmt01[1] = 0xffffffff;
4571		mqd->static_thread_mgmt23[0] = 0xffffffff;
4572		mqd->static_thread_mgmt23[1] = 0xffffffff;
4573
4574		mutex_lock(&rdev->srbm_mutex);
4575		cik_srbm_select(rdev, rdev->ring[idx].me,
4576				rdev->ring[idx].pipe,
4577				rdev->ring[idx].queue, 0);
4578
4579		/* disable wptr polling */
4580		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4581		tmp &= ~WPTR_POLL_EN;
4582		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4583
4584		/* enable doorbell? */
4585		mqd->queue_state.cp_hqd_pq_doorbell_control =
4586			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4587		if (use_doorbell)
4588			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4589		else
4590			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4591		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4592		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4593
4594		/* disable the queue if it's active */
4595		mqd->queue_state.cp_hqd_dequeue_request = 0;
4596		mqd->queue_state.cp_hqd_pq_rptr = 0;
4597		mqd->queue_state.cp_hqd_pq_wptr= 0;
4598		if (RREG32(CP_HQD_ACTIVE) & 1) {
4599			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4600			for (i = 0; i < rdev->usec_timeout; i++) {
4601				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4602					break;
4603				udelay(1);
4604			}
4605			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4606			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4607			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4608		}
4609
4610		/* set the pointer to the MQD */
4611		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4612		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4613		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4614		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4615		/* set MQD vmid to 0 */
4616		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4617		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4618		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4619
4620		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4621		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4622		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4623		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4624		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4625		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4626
4627		/* set up the HQD, this is similar to CP_RB0_CNTL */
4628		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4629		mqd->queue_state.cp_hqd_pq_control &=
4630			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4631
4632		mqd->queue_state.cp_hqd_pq_control |=
4633			order_base_2(rdev->ring[idx].ring_size / 8);
4634		mqd->queue_state.cp_hqd_pq_control |=
4635			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4636#ifdef __BIG_ENDIAN
4637		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4638#endif
4639		mqd->queue_state.cp_hqd_pq_control &=
4640			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4641		mqd->queue_state.cp_hqd_pq_control |=
4642			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4643		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4644
4645		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4646		if (i == 0)
4647			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4648		else
4649			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4650		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4651		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4652		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4653		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4654		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4655
4656		/* set the wb address wether it's enabled or not */
4657		if (i == 0)
4658			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4659		else
4660			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4661		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4662		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4663			upper_32_bits(wb_gpu_addr) & 0xffff;
4664		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4665		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4666		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4667		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4668
4669		/* enable the doorbell if requested */
4670		if (use_doorbell) {
4671			mqd->queue_state.cp_hqd_pq_doorbell_control =
4672				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4673			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4674			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4675				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4676			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4677			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4678				~(DOORBELL_SOURCE | DOORBELL_HIT);
4679
4680		} else {
4681			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4682		}
4683		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4684		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4685
4686		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4687		rdev->ring[idx].wptr = 0;
4688		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4689		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4690		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4691
4692		/* set the vmid for the queue */
4693		mqd->queue_state.cp_hqd_vmid = 0;
4694		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4695
4696		/* activate the queue */
4697		mqd->queue_state.cp_hqd_active = 1;
4698		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4699
4700		cik_srbm_select(rdev, 0, 0, 0, 0);
4701		mutex_unlock(&rdev->srbm_mutex);
4702
4703		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4704		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4705
4706		rdev->ring[idx].ready = true;
4707		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4708		if (r)
4709			rdev->ring[idx].ready = false;
4710	}
4711
4712	return 0;
4713}
4714
4715static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4716{
4717	cik_cp_gfx_enable(rdev, enable);
4718	cik_cp_compute_enable(rdev, enable);
4719}
4720
4721static int cik_cp_load_microcode(struct radeon_device *rdev)
4722{
4723	int r;
4724
4725	r = cik_cp_gfx_load_microcode(rdev);
4726	if (r)
4727		return r;
4728	r = cik_cp_compute_load_microcode(rdev);
4729	if (r)
4730		return r;
4731
4732	return 0;
4733}
4734
4735static void cik_cp_fini(struct radeon_device *rdev)
4736{
4737	cik_cp_gfx_fini(rdev);
4738	cik_cp_compute_fini(rdev);
4739}
4740
4741static int cik_cp_resume(struct radeon_device *rdev)
4742{
4743	int r;
4744
4745	cik_enable_gui_idle_interrupt(rdev, false);
4746
4747	r = cik_cp_load_microcode(rdev);
4748	if (r)
4749		return r;
4750
4751	r = cik_cp_gfx_resume(rdev);
4752	if (r)
4753		return r;
4754	r = cik_cp_compute_resume(rdev);
4755	if (r)
4756		return r;
4757
4758	cik_enable_gui_idle_interrupt(rdev, true);
4759
4760	return 0;
4761}
4762
4763static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4764{
4765	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4766		RREG32(GRBM_STATUS));
4767	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4768		RREG32(GRBM_STATUS2));
4769	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4770		RREG32(GRBM_STATUS_SE0));
4771	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4772		RREG32(GRBM_STATUS_SE1));
4773	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4774		RREG32(GRBM_STATUS_SE2));
4775	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4776		RREG32(GRBM_STATUS_SE3));
4777	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4778		RREG32(SRBM_STATUS));
4779	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4780		RREG32(SRBM_STATUS2));
4781	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4782		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4783	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4784		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4785	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4786	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4787		 RREG32(CP_STALLED_STAT1));
4788	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4789		 RREG32(CP_STALLED_STAT2));
4790	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4791		 RREG32(CP_STALLED_STAT3));
4792	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4793		 RREG32(CP_CPF_BUSY_STAT));
4794	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4795		 RREG32(CP_CPF_STALLED_STAT1));
4796	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4797	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4798	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4799		 RREG32(CP_CPC_STALLED_STAT1));
4800	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4801}
4802
4803/**
4804 * cik_gpu_check_soft_reset - check which blocks are busy
4805 *
4806 * @rdev: radeon_device pointer
4807 *
4808 * Check which blocks are busy and return the relevant reset
4809 * mask to be used by cik_gpu_soft_reset().
4810 * Returns a mask of the blocks to be reset.
4811 */
4812u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4813{
4814	u32 reset_mask = 0;
4815	u32 tmp;
4816
4817	/* GRBM_STATUS */
4818	tmp = RREG32(GRBM_STATUS);
4819	if (tmp & (PA_BUSY | SC_BUSY |
4820		   BCI_BUSY | SX_BUSY |
4821		   TA_BUSY | VGT_BUSY |
4822		   DB_BUSY | CB_BUSY |
4823		   GDS_BUSY | SPI_BUSY |
4824		   IA_BUSY | IA_BUSY_NO_DMA))
4825		reset_mask |= RADEON_RESET_GFX;
4826
4827	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4828		reset_mask |= RADEON_RESET_CP;
4829
4830	/* GRBM_STATUS2 */
4831	tmp = RREG32(GRBM_STATUS2);
4832	if (tmp & RLC_BUSY)
4833		reset_mask |= RADEON_RESET_RLC;
4834
4835	/* SDMA0_STATUS_REG */
4836	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4837	if (!(tmp & SDMA_IDLE))
4838		reset_mask |= RADEON_RESET_DMA;
4839
4840	/* SDMA1_STATUS_REG */
4841	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4842	if (!(tmp & SDMA_IDLE))
4843		reset_mask |= RADEON_RESET_DMA1;
4844
4845	/* SRBM_STATUS2 */
4846	tmp = RREG32(SRBM_STATUS2);
4847	if (tmp & SDMA_BUSY)
4848		reset_mask |= RADEON_RESET_DMA;
4849
4850	if (tmp & SDMA1_BUSY)
4851		reset_mask |= RADEON_RESET_DMA1;
4852
4853	/* SRBM_STATUS */
4854	tmp = RREG32(SRBM_STATUS);
4855
4856	if (tmp & IH_BUSY)
4857		reset_mask |= RADEON_RESET_IH;
4858
4859	if (tmp & SEM_BUSY)
4860		reset_mask |= RADEON_RESET_SEM;
4861
4862	if (tmp & GRBM_RQ_PENDING)
4863		reset_mask |= RADEON_RESET_GRBM;
4864
4865	if (tmp & VMC_BUSY)
4866		reset_mask |= RADEON_RESET_VMC;
4867
4868	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4869		   MCC_BUSY | MCD_BUSY))
4870		reset_mask |= RADEON_RESET_MC;
4871
4872	if (evergreen_is_display_hung(rdev))
4873		reset_mask |= RADEON_RESET_DISPLAY;
4874
4875	/* Skip MC reset as it's mostly likely not hung, just busy */
4876	if (reset_mask & RADEON_RESET_MC) {
4877		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4878		reset_mask &= ~RADEON_RESET_MC;
4879	}
4880
4881	return reset_mask;
4882}
4883
4884/**
4885 * cik_gpu_soft_reset - soft reset GPU
4886 *
4887 * @rdev: radeon_device pointer
4888 * @reset_mask: mask of which blocks to reset
4889 *
4890 * Soft reset the blocks specified in @reset_mask.
4891 */
4892static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4893{
4894	struct evergreen_mc_save save;
4895	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4896	u32 tmp;
4897
4898	if (reset_mask == 0)
4899		return;
4900
4901	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4902
4903	cik_print_gpu_status_regs(rdev);
4904	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4905		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4906	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4907		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4908
4909	/* disable CG/PG */
4910	cik_fini_pg(rdev);
4911	cik_fini_cg(rdev);
4912
4913	/* stop the rlc */
4914	cik_rlc_stop(rdev);
4915
4916	/* Disable GFX parsing/prefetching */
4917	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4918
4919	/* Disable MEC parsing/prefetching */
4920	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4921
4922	if (reset_mask & RADEON_RESET_DMA) {
4923		/* sdma0 */
4924		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4925		tmp |= SDMA_HALT;
4926		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4927	}
4928	if (reset_mask & RADEON_RESET_DMA1) {
4929		/* sdma1 */
4930		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4931		tmp |= SDMA_HALT;
4932		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4933	}
4934
4935	evergreen_mc_stop(rdev, &save);
4936	if (evergreen_mc_wait_for_idle(rdev)) {
4937		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4938	}
4939
4940	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4941		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4942
4943	if (reset_mask & RADEON_RESET_CP) {
4944		grbm_soft_reset |= SOFT_RESET_CP;
4945
4946		srbm_soft_reset |= SOFT_RESET_GRBM;
4947	}
4948
4949	if (reset_mask & RADEON_RESET_DMA)
4950		srbm_soft_reset |= SOFT_RESET_SDMA;
4951
4952	if (reset_mask & RADEON_RESET_DMA1)
4953		srbm_soft_reset |= SOFT_RESET_SDMA1;
4954
4955	if (reset_mask & RADEON_RESET_DISPLAY)
4956		srbm_soft_reset |= SOFT_RESET_DC;
4957
4958	if (reset_mask & RADEON_RESET_RLC)
4959		grbm_soft_reset |= SOFT_RESET_RLC;
4960
4961	if (reset_mask & RADEON_RESET_SEM)
4962		srbm_soft_reset |= SOFT_RESET_SEM;
4963
4964	if (reset_mask & RADEON_RESET_IH)
4965		srbm_soft_reset |= SOFT_RESET_IH;
4966
4967	if (reset_mask & RADEON_RESET_GRBM)
4968		srbm_soft_reset |= SOFT_RESET_GRBM;
4969
4970	if (reset_mask & RADEON_RESET_VMC)
4971		srbm_soft_reset |= SOFT_RESET_VMC;
4972
4973	if (!(rdev->flags & RADEON_IS_IGP)) {
4974		if (reset_mask & RADEON_RESET_MC)
4975			srbm_soft_reset |= SOFT_RESET_MC;
4976	}
4977
4978	if (grbm_soft_reset) {
4979		tmp = RREG32(GRBM_SOFT_RESET);
4980		tmp |= grbm_soft_reset;
4981		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4982		WREG32(GRBM_SOFT_RESET, tmp);
4983		tmp = RREG32(GRBM_SOFT_RESET);
4984
4985		udelay(50);
4986
4987		tmp &= ~grbm_soft_reset;
4988		WREG32(GRBM_SOFT_RESET, tmp);
4989		tmp = RREG32(GRBM_SOFT_RESET);
4990	}
4991
4992	if (srbm_soft_reset) {
4993		tmp = RREG32(SRBM_SOFT_RESET);
4994		tmp |= srbm_soft_reset;
4995		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4996		WREG32(SRBM_SOFT_RESET, tmp);
4997		tmp = RREG32(SRBM_SOFT_RESET);
4998
4999		udelay(50);
5000
5001		tmp &= ~srbm_soft_reset;
5002		WREG32(SRBM_SOFT_RESET, tmp);
5003		tmp = RREG32(SRBM_SOFT_RESET);
5004	}
5005
5006	/* Wait a little for things to settle down */
5007	udelay(50);
5008
5009	evergreen_mc_resume(rdev, &save);
5010	udelay(50);
5011
5012	cik_print_gpu_status_regs(rdev);
5013}
5014
5015struct kv_reset_save_regs {
5016	u32 gmcon_reng_execute;
5017	u32 gmcon_misc;
5018	u32 gmcon_misc3;
5019};
5020
5021static void kv_save_regs_for_reset(struct radeon_device *rdev,
5022				   struct kv_reset_save_regs *save)
5023{
5024	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5025	save->gmcon_misc = RREG32(GMCON_MISC);
5026	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5027
5028	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5029	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5030						STCTRL_STUTTER_EN));
5031}
5032
5033static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5034				      struct kv_reset_save_regs *save)
5035{
5036	int i;
5037
5038	WREG32(GMCON_PGFSM_WRITE, 0);
5039	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5040
5041	for (i = 0; i < 5; i++)
5042		WREG32(GMCON_PGFSM_WRITE, 0);
5043
5044	WREG32(GMCON_PGFSM_WRITE, 0);
5045	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5046
5047	for (i = 0; i < 5; i++)
5048		WREG32(GMCON_PGFSM_WRITE, 0);
5049
5050	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5051	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5052
5053	for (i = 0; i < 5; i++)
5054		WREG32(GMCON_PGFSM_WRITE, 0);
5055
5056	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5057	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5058
5059	for (i = 0; i < 5; i++)
5060		WREG32(GMCON_PGFSM_WRITE, 0);
5061
5062	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5063	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5064
5065	for (i = 0; i < 5; i++)
5066		WREG32(GMCON_PGFSM_WRITE, 0);
5067
5068	WREG32(GMCON_PGFSM_WRITE, 0);
5069	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5070
5071	for (i = 0; i < 5; i++)
5072		WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5075	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5076
5077	for (i = 0; i < 5; i++)
5078		WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5081	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5082
5083	for (i = 0; i < 5; i++)
5084		WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5087	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5088
5089	for (i = 0; i < 5; i++)
5090		WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5093	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5094
5095	for (i = 0; i < 5; i++)
5096		WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5099	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5100
5101	WREG32(GMCON_MISC3, save->gmcon_misc3);
5102	WREG32(GMCON_MISC, save->gmcon_misc);
5103	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5104}
5105
5106static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5107{
5108	struct evergreen_mc_save save;
5109	struct kv_reset_save_regs kv_save = { 0 };
5110	u32 tmp, i;
5111
5112	dev_info(rdev->dev, "GPU pci config reset\n");
5113
5114	/* disable dpm? */
5115
5116	/* disable cg/pg */
5117	cik_fini_pg(rdev);
5118	cik_fini_cg(rdev);
5119
5120	/* Disable GFX parsing/prefetching */
5121	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5122
5123	/* Disable MEC parsing/prefetching */
5124	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5125
5126	/* sdma0 */
5127	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5128	tmp |= SDMA_HALT;
5129	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5130	/* sdma1 */
5131	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5132	tmp |= SDMA_HALT;
5133	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5134	/* XXX other engines? */
5135
5136	/* halt the rlc, disable cp internal ints */
5137	cik_rlc_stop(rdev);
5138
5139	udelay(50);
5140
5141	/* disable mem access */
5142	evergreen_mc_stop(rdev, &save);
5143	if (evergreen_mc_wait_for_idle(rdev)) {
5144		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5145	}
5146
5147	if (rdev->flags & RADEON_IS_IGP)
5148		kv_save_regs_for_reset(rdev, &kv_save);
5149
5150	/* disable BM */
5151	pci_clear_master(rdev->pdev);
5152	/* reset */
5153	radeon_pci_config_reset(rdev);
5154
5155	udelay(100);
5156
5157	/* wait for asic to come out of reset */
5158	for (i = 0; i < rdev->usec_timeout; i++) {
5159		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5160			break;
5161		udelay(1);
5162	}
5163
5164	/* does asic init need to be run first??? */
5165	if (rdev->flags & RADEON_IS_IGP)
5166		kv_restore_regs_for_reset(rdev, &kv_save);
5167}
5168
5169/**
5170 * cik_asic_reset - soft reset GPU
5171 *
5172 * @rdev: radeon_device pointer
5173 *
5174 * Look up which blocks are hung and attempt
5175 * to reset them.
5176 * Returns 0 for success.
5177 */
5178int cik_asic_reset(struct radeon_device *rdev)
5179{
5180	u32 reset_mask;
5181
5182	reset_mask = cik_gpu_check_soft_reset(rdev);
5183
5184	if (reset_mask)
5185		r600_set_bios_scratch_engine_hung(rdev, true);
5186
5187	/* try soft reset */
5188	cik_gpu_soft_reset(rdev, reset_mask);
5189
5190	reset_mask = cik_gpu_check_soft_reset(rdev);
5191
5192	/* try pci config reset */
5193	if (reset_mask && radeon_hard_reset)
5194		cik_gpu_pci_config_reset(rdev);
5195
5196	reset_mask = cik_gpu_check_soft_reset(rdev);
5197
5198	if (!reset_mask)
5199		r600_set_bios_scratch_engine_hung(rdev, false);
5200
5201	return 0;
5202}
5203
5204/**
5205 * cik_gfx_is_lockup - check if the 3D engine is locked up
5206 *
5207 * @rdev: radeon_device pointer
5208 * @ring: radeon_ring structure holding ring information
5209 *
5210 * Check if the 3D engine is locked up (CIK).
5211 * Returns true if the engine is locked, false if not.
5212 */
5213bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5214{
5215	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5216
5217	if (!(reset_mask & (RADEON_RESET_GFX |
5218			    RADEON_RESET_COMPUTE |
5219			    RADEON_RESET_CP))) {
5220		radeon_ring_lockup_update(rdev, ring);
5221		return false;
5222	}
5223	return radeon_ring_test_lockup(rdev, ring);
5224}
5225
5226/* MC */
5227/**
5228 * cik_mc_program - program the GPU memory controller
5229 *
5230 * @rdev: radeon_device pointer
5231 *
5232 * Set the location of vram, gart, and AGP in the GPU's
5233 * physical address space (CIK).
5234 */
5235static void cik_mc_program(struct radeon_device *rdev)
5236{
5237	struct evergreen_mc_save save;
5238	u32 tmp;
5239	int i, j;
5240
5241	/* Initialize HDP */
5242	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5243		WREG32((0x2c14 + j), 0x00000000);
5244		WREG32((0x2c18 + j), 0x00000000);
5245		WREG32((0x2c1c + j), 0x00000000);
5246		WREG32((0x2c20 + j), 0x00000000);
5247		WREG32((0x2c24 + j), 0x00000000);
5248	}
5249	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5250
5251	evergreen_mc_stop(rdev, &save);
5252	if (radeon_mc_wait_for_idle(rdev)) {
5253		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5254	}
5255	/* Lockout access through VGA aperture*/
5256	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5257	/* Update configuration */
5258	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5259	       rdev->mc.vram_start >> 12);
5260	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5261	       rdev->mc.vram_end >> 12);
5262	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5263	       rdev->vram_scratch.gpu_addr >> 12);
5264	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5265	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5266	WREG32(MC_VM_FB_LOCATION, tmp);
5267	/* XXX double check these! */
5268	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5269	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5270	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5271	WREG32(MC_VM_AGP_BASE, 0);
5272	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5273	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5274	if (radeon_mc_wait_for_idle(rdev)) {
5275		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276	}
5277	evergreen_mc_resume(rdev, &save);
5278	/* we need to own VRAM, so turn off the VGA renderer here
5279	 * to stop it overwriting our objects */
5280	rv515_vga_render_disable(rdev);
5281}
5282
5283/**
5284 * cik_mc_init - initialize the memory controller driver params
5285 *
5286 * @rdev: radeon_device pointer
5287 *
5288 * Look up the amount of vram, vram width, and decide how to place
5289 * vram and gart within the GPU's physical address space (CIK).
5290 * Returns 0 for success.
5291 */
5292static int cik_mc_init(struct radeon_device *rdev)
5293{
5294	u32 tmp;
5295	int chansize, numchan;
5296
5297	/* Get VRAM informations */
5298	rdev->mc.vram_is_ddr = true;
5299	tmp = RREG32(MC_ARB_RAMCFG);
5300	if (tmp & CHANSIZE_MASK) {
5301		chansize = 64;
5302	} else {
5303		chansize = 32;
5304	}
5305	tmp = RREG32(MC_SHARED_CHMAP);
5306	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5307	case 0:
5308	default:
5309		numchan = 1;
5310		break;
5311	case 1:
5312		numchan = 2;
5313		break;
5314	case 2:
5315		numchan = 4;
5316		break;
5317	case 3:
5318		numchan = 8;
5319		break;
5320	case 4:
5321		numchan = 3;
5322		break;
5323	case 5:
5324		numchan = 6;
5325		break;
5326	case 6:
5327		numchan = 10;
5328		break;
5329	case 7:
5330		numchan = 12;
5331		break;
5332	case 8:
5333		numchan = 16;
5334		break;
5335	}
5336	rdev->mc.vram_width = numchan * chansize;
5337	/* Could aper size report 0 ? */
5338	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5339	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5340	/* size in MB on si */
5341	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5342	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5343	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5344	si_vram_gtt_location(rdev, &rdev->mc);
5345	radeon_update_bandwidth_info(rdev);
5346
5347	return 0;
5348}
5349
5350/*
5351 * GART
5352 * VMID 0 is the physical GPU addresses as used by the kernel.
5353 * VMIDs 1-15 are used for userspace clients and are handled
5354 * by the radeon vm/hsa code.
5355 */
5356/**
5357 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5358 *
5359 * @rdev: radeon_device pointer
5360 *
5361 * Flush the TLB for the VMID 0 page table (CIK).
5362 */
5363void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5364{
5365	/* flush hdp cache */
5366	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5367
5368	/* bits 0-15 are the VM contexts0-15 */
5369	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5370}
5371
5372/**
5373 * cik_pcie_gart_enable - gart enable
5374 *
5375 * @rdev: radeon_device pointer
5376 *
5377 * This sets up the TLBs, programs the page tables for VMID0,
5378 * sets up the hw for VMIDs 1-15 which are allocated on
5379 * demand, and sets up the global locations for the LDS, GDS,
5380 * and GPUVM for FSA64 clients (CIK).
5381 * Returns 0 for success, errors for failure.
5382 */
5383static int cik_pcie_gart_enable(struct radeon_device *rdev)
5384{
5385	int r, i;
5386
5387	if (rdev->gart.robj == NULL) {
5388		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5389		return -EINVAL;
5390	}
5391	r = radeon_gart_table_vram_pin(rdev);
5392	if (r)
5393		return r;
5394	radeon_gart_restore(rdev);
5395	/* Setup TLB control */
5396	WREG32(MC_VM_MX_L1_TLB_CNTL,
5397	       (0xA << 7) |
5398	       ENABLE_L1_TLB |
5399	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5400	       ENABLE_ADVANCED_DRIVER_MODEL |
5401	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5402	/* Setup L2 cache */
5403	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5404	       ENABLE_L2_FRAGMENT_PROCESSING |
5405	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5406	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5407	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5408	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5409	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5410	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5411	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5412	/* setup context0 */
5413	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5414	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5415	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5416	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5417			(u32)(rdev->dummy_page.addr >> 12));
5418	WREG32(VM_CONTEXT0_CNTL2, 0);
5419	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5420				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5421
5422	WREG32(0x15D4, 0);
5423	WREG32(0x15D8, 0);
5424	WREG32(0x15DC, 0);
5425
5426	/* empty context1-15 */
5427	/* FIXME start with 4G, once using 2 level pt switch to full
5428	 * vm size space
5429	 */
5430	/* set vm size, must be a multiple of 4 */
5431	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5432	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5433	for (i = 1; i < 16; i++) {
5434		if (i < 8)
5435			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5436			       rdev->gart.table_addr >> 12);
5437		else
5438			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5439			       rdev->gart.table_addr >> 12);
5440	}
5441
5442	/* enable context1-15 */
5443	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5444	       (u32)(rdev->dummy_page.addr >> 12));
5445	WREG32(VM_CONTEXT1_CNTL2, 4);
5446	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5447				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5448				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5449				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5450				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5451				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5452				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5453				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5454				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5455				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5456				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5457				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5458				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5459
5460	if (rdev->family == CHIP_KAVERI) {
5461		u32 tmp = RREG32(CHUB_CONTROL);
5462		tmp &= ~BYPASS_VM;
5463		WREG32(CHUB_CONTROL, tmp);
5464	}
5465
5466	/* XXX SH_MEM regs */
5467	/* where to put LDS, scratch, GPUVM in FSA64 space */
5468	mutex_lock(&rdev->srbm_mutex);
5469	for (i = 0; i < 16; i++) {
5470		cik_srbm_select(rdev, 0, 0, 0, i);
5471		/* CP and shaders */
5472		WREG32(SH_MEM_CONFIG, 0);
5473		WREG32(SH_MEM_APE1_BASE, 1);
5474		WREG32(SH_MEM_APE1_LIMIT, 0);
5475		WREG32(SH_MEM_BASES, 0);
5476		/* SDMA GFX */
5477		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5478		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5479		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5480		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5481		/* XXX SDMA RLC - todo */
5482	}
5483	cik_srbm_select(rdev, 0, 0, 0, 0);
5484	mutex_unlock(&rdev->srbm_mutex);
5485
5486	cik_pcie_gart_tlb_flush(rdev);
5487	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5488		 (unsigned)(rdev->mc.gtt_size >> 20),
5489		 (unsigned long long)rdev->gart.table_addr);
5490	rdev->gart.ready = true;
5491	return 0;
5492}
5493
5494/**
5495 * cik_pcie_gart_disable - gart disable
5496 *
5497 * @rdev: radeon_device pointer
5498 *
5499 * This disables all VM page table (CIK).
5500 */
5501static void cik_pcie_gart_disable(struct radeon_device *rdev)
5502{
5503	/* Disable all tables */
5504	WREG32(VM_CONTEXT0_CNTL, 0);
5505	WREG32(VM_CONTEXT1_CNTL, 0);
5506	/* Setup TLB control */
5507	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5508	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5509	/* Setup L2 cache */
5510	WREG32(VM_L2_CNTL,
5511	       ENABLE_L2_FRAGMENT_PROCESSING |
5512	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5513	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5514	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5515	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5516	WREG32(VM_L2_CNTL2, 0);
5517	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5518	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5519	radeon_gart_table_vram_unpin(rdev);
5520}
5521
5522/**
5523 * cik_pcie_gart_fini - vm fini callback
5524 *
5525 * @rdev: radeon_device pointer
5526 *
5527 * Tears down the driver GART/VM setup (CIK).
5528 */
5529static void cik_pcie_gart_fini(struct radeon_device *rdev)
5530{
5531	cik_pcie_gart_disable(rdev);
5532	radeon_gart_table_vram_free(rdev);
5533	radeon_gart_fini(rdev);
5534}
5535
5536/* vm parser */
5537/**
5538 * cik_ib_parse - vm ib_parse callback
5539 *
5540 * @rdev: radeon_device pointer
5541 * @ib: indirect buffer pointer
5542 *
5543 * CIK uses hw IB checking so this is a nop (CIK).
5544 */
5545int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5546{
5547	return 0;
5548}
5549
5550/*
5551 * vm
5552 * VMID 0 is the physical GPU addresses as used by the kernel.
5553 * VMIDs 1-15 are used for userspace clients and are handled
5554 * by the radeon vm/hsa code.
5555 */
5556/**
5557 * cik_vm_init - cik vm init callback
5558 *
5559 * @rdev: radeon_device pointer
5560 *
5561 * Inits cik specific vm parameters (number of VMs, base of vram for
5562 * VMIDs 1-15) (CIK).
5563 * Returns 0 for success.
5564 */
5565int cik_vm_init(struct radeon_device *rdev)
5566{
5567	/* number of VMs */
5568	rdev->vm_manager.nvm = 16;
5569	/* base offset of vram pages */
5570	if (rdev->flags & RADEON_IS_IGP) {
5571		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5572		tmp <<= 22;
5573		rdev->vm_manager.vram_base_offset = tmp;
5574	} else
5575		rdev->vm_manager.vram_base_offset = 0;
5576
5577	return 0;
5578}
5579
5580/**
5581 * cik_vm_fini - cik vm fini callback
5582 *
5583 * @rdev: radeon_device pointer
5584 *
5585 * Tear down any asic specific VM setup (CIK).
5586 */
5587void cik_vm_fini(struct radeon_device *rdev)
5588{
5589}
5590
5591/**
5592 * cik_vm_decode_fault - print human readable fault info
5593 *
5594 * @rdev: radeon_device pointer
5595 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5596 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5597 *
5598 * Print human readable fault information (CIK).
5599 */
5600static void cik_vm_decode_fault(struct radeon_device *rdev,
5601				u32 status, u32 addr, u32 mc_client)
5602{
5603	u32 mc_id;
5604	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5605	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5606	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5607		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5608
5609	if (rdev->family == CHIP_HAWAII)
5610		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5611	else
5612		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5613
5614	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5615	       protections, vmid, addr,
5616	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5617	       block, mc_client, mc_id);
5618}
5619
5620/**
5621 * cik_vm_flush - cik vm flush using the CP
5622 *
5623 * @rdev: radeon_device pointer
5624 *
5625 * Update the page table base and flush the VM TLB
5626 * using the CP (CIK).
5627 */
5628void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5629{
5630	struct radeon_ring *ring = &rdev->ring[ridx];
5631
5632	if (vm == NULL)
5633		return;
5634
5635	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5636	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5637				 WRITE_DATA_DST_SEL(0)));
5638	if (vm->id < 8) {
5639		radeon_ring_write(ring,
5640				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5641	} else {
5642		radeon_ring_write(ring,
5643				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5644	}
5645	radeon_ring_write(ring, 0);
5646	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5647
5648	/* update SH_MEM_* regs */
5649	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651				 WRITE_DATA_DST_SEL(0)));
5652	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5653	radeon_ring_write(ring, 0);
5654	radeon_ring_write(ring, VMID(vm->id));
5655
5656	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5657	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5658				 WRITE_DATA_DST_SEL(0)));
5659	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5660	radeon_ring_write(ring, 0);
5661
5662	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5663	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5664	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5665	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5666
5667	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5668	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5669				 WRITE_DATA_DST_SEL(0)));
5670	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5671	radeon_ring_write(ring, 0);
5672	radeon_ring_write(ring, VMID(0));
5673
5674	/* HDP flush */
5675	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5676
5677	/* bits 0-15 are the VM contexts0-15 */
5678	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5679	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5680				 WRITE_DATA_DST_SEL(0)));
5681	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5682	radeon_ring_write(ring, 0);
5683	radeon_ring_write(ring, 1 << vm->id);
5684
5685	/* compute doesn't have PFP */
5686	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5687		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5688		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5689		radeon_ring_write(ring, 0x0);
5690	}
5691}
5692
5693/*
5694 * RLC
5695 * The RLC is a multi-purpose microengine that handles a
5696 * variety of functions, the most important of which is
5697 * the interrupt controller.
5698 */
5699static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5700					  bool enable)
5701{
5702	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5703
5704	if (enable)
5705		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5706	else
5707		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5708	WREG32(CP_INT_CNTL_RING0, tmp);
5709}
5710
5711static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5712{
5713	u32 tmp;
5714
5715	tmp = RREG32(RLC_LB_CNTL);
5716	if (enable)
5717		tmp |= LOAD_BALANCE_ENABLE;
5718	else
5719		tmp &= ~LOAD_BALANCE_ENABLE;
5720	WREG32(RLC_LB_CNTL, tmp);
5721}
5722
5723static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5724{
5725	u32 i, j, k;
5726	u32 mask;
5727
5728	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5729		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5730			cik_select_se_sh(rdev, i, j);
5731			for (k = 0; k < rdev->usec_timeout; k++) {
5732				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5733					break;
5734				udelay(1);
5735			}
5736		}
5737	}
5738	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5739
5740	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5741	for (k = 0; k < rdev->usec_timeout; k++) {
5742		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5743			break;
5744		udelay(1);
5745	}
5746}
5747
5748static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5749{
5750	u32 tmp;
5751
5752	tmp = RREG32(RLC_CNTL);
5753	if (tmp != rlc)
5754		WREG32(RLC_CNTL, rlc);
5755}
5756
5757static u32 cik_halt_rlc(struct radeon_device *rdev)
5758{
5759	u32 data, orig;
5760
5761	orig = data = RREG32(RLC_CNTL);
5762
5763	if (data & RLC_ENABLE) {
5764		u32 i;
5765
5766		data &= ~RLC_ENABLE;
5767		WREG32(RLC_CNTL, data);
5768
5769		for (i = 0; i < rdev->usec_timeout; i++) {
5770			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5771				break;
5772			udelay(1);
5773		}
5774
5775		cik_wait_for_rlc_serdes(rdev);
5776	}
5777
5778	return orig;
5779}
5780
5781void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5782{
5783	u32 tmp, i, mask;
5784
5785	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5786	WREG32(RLC_GPR_REG2, tmp);
5787
5788	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5789	for (i = 0; i < rdev->usec_timeout; i++) {
5790		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5791			break;
5792		udelay(1);
5793	}
5794
5795	for (i = 0; i < rdev->usec_timeout; i++) {
5796		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5797			break;
5798		udelay(1);
5799	}
5800}
5801
5802void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5803{
5804	u32 tmp;
5805
5806	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5807	WREG32(RLC_GPR_REG2, tmp);
5808}
5809
5810/**
5811 * cik_rlc_stop - stop the RLC ME
5812 *
5813 * @rdev: radeon_device pointer
5814 *
5815 * Halt the RLC ME (MicroEngine) (CIK).
5816 */
5817static void cik_rlc_stop(struct radeon_device *rdev)
5818{
5819	WREG32(RLC_CNTL, 0);
5820
5821	cik_enable_gui_idle_interrupt(rdev, false);
5822
5823	cik_wait_for_rlc_serdes(rdev);
5824}
5825
5826/**
5827 * cik_rlc_start - start the RLC ME
5828 *
5829 * @rdev: radeon_device pointer
5830 *
5831 * Unhalt the RLC ME (MicroEngine) (CIK).
5832 */
5833static void cik_rlc_start(struct radeon_device *rdev)
5834{
5835	WREG32(RLC_CNTL, RLC_ENABLE);
5836
5837	cik_enable_gui_idle_interrupt(rdev, true);
5838
5839	udelay(50);
5840}
5841
5842/**
5843 * cik_rlc_resume - setup the RLC hw
5844 *
5845 * @rdev: radeon_device pointer
5846 *
5847 * Initialize the RLC registers, load the ucode,
5848 * and start the RLC (CIK).
5849 * Returns 0 for success, -EINVAL if the ucode is not available.
5850 */
5851static int cik_rlc_resume(struct radeon_device *rdev)
5852{
5853	u32 i, size, tmp;
5854	const __be32 *fw_data;
5855
5856	if (!rdev->rlc_fw)
5857		return -EINVAL;
5858
5859	switch (rdev->family) {
5860	case CHIP_BONAIRE:
5861	case CHIP_HAWAII:
5862	default:
5863		size = BONAIRE_RLC_UCODE_SIZE;
5864		break;
5865	case CHIP_KAVERI:
5866		size = KV_RLC_UCODE_SIZE;
5867		break;
5868	case CHIP_KABINI:
5869		size = KB_RLC_UCODE_SIZE;
5870		break;
5871	case CHIP_MULLINS:
5872		size = ML_RLC_UCODE_SIZE;
5873		break;
5874	}
5875
5876	cik_rlc_stop(rdev);
5877
5878	/* disable CG */
5879	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5880	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5881
5882	si_rlc_reset(rdev);
5883
5884	cik_init_pg(rdev);
5885
5886	cik_init_cg(rdev);
5887
5888	WREG32(RLC_LB_CNTR_INIT, 0);
5889	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5890
5891	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5892	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5893	WREG32(RLC_LB_PARAMS, 0x00600408);
5894	WREG32(RLC_LB_CNTL, 0x80000004);
5895
5896	WREG32(RLC_MC_CNTL, 0);
5897	WREG32(RLC_UCODE_CNTL, 0);
5898
5899	fw_data = (const __be32 *)rdev->rlc_fw->data;
5900		WREG32(RLC_GPM_UCODE_ADDR, 0);
5901	for (i = 0; i < size; i++)
5902		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5903	WREG32(RLC_GPM_UCODE_ADDR, 0);
5904
5905	/* XXX - find out what chips support lbpw */
5906	cik_enable_lbpw(rdev, false);
5907
5908	if (rdev->family == CHIP_BONAIRE)
5909		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5910
5911	cik_rlc_start(rdev);
5912
5913	return 0;
5914}
5915
5916static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5917{
5918	u32 data, orig, tmp, tmp2;
5919
5920	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5921
5922	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5923		cik_enable_gui_idle_interrupt(rdev, true);
5924
5925		tmp = cik_halt_rlc(rdev);
5926
5927		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5928		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5929		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5930		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5931		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5932
5933		cik_update_rlc(rdev, tmp);
5934
5935		data |= CGCG_EN | CGLS_EN;
5936	} else {
5937		cik_enable_gui_idle_interrupt(rdev, false);
5938
5939		RREG32(CB_CGTT_SCLK_CTRL);
5940		RREG32(CB_CGTT_SCLK_CTRL);
5941		RREG32(CB_CGTT_SCLK_CTRL);
5942		RREG32(CB_CGTT_SCLK_CTRL);
5943
5944		data &= ~(CGCG_EN | CGLS_EN);
5945	}
5946
5947	if (orig != data)
5948		WREG32(RLC_CGCG_CGLS_CTRL, data);
5949
5950}
5951
5952static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5953{
5954	u32 data, orig, tmp = 0;
5955
5956	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5957		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5958			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5959				orig = data = RREG32(CP_MEM_SLP_CNTL);
5960				data |= CP_MEM_LS_EN;
5961				if (orig != data)
5962					WREG32(CP_MEM_SLP_CNTL, data);
5963			}
5964		}
5965
5966		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5967		data &= 0xfffffffd;
5968		if (orig != data)
5969			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5970
5971		tmp = cik_halt_rlc(rdev);
5972
5973		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5974		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5975		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5976		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5977		WREG32(RLC_SERDES_WR_CTRL, data);
5978
5979		cik_update_rlc(rdev, tmp);
5980
5981		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5982			orig = data = RREG32(CGTS_SM_CTRL_REG);
5983			data &= ~SM_MODE_MASK;
5984			data |= SM_MODE(0x2);
5985			data |= SM_MODE_ENABLE;
5986			data &= ~CGTS_OVERRIDE;
5987			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5988			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5989				data &= ~CGTS_LS_OVERRIDE;
5990			data &= ~ON_MONITOR_ADD_MASK;
5991			data |= ON_MONITOR_ADD_EN;
5992			data |= ON_MONITOR_ADD(0x96);
5993			if (orig != data)
5994				WREG32(CGTS_SM_CTRL_REG, data);
5995		}
5996	} else {
5997		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5998		data |= 0x00000002;
5999		if (orig != data)
6000			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6001
6002		data = RREG32(RLC_MEM_SLP_CNTL);
6003		if (data & RLC_MEM_LS_EN) {
6004			data &= ~RLC_MEM_LS_EN;
6005			WREG32(RLC_MEM_SLP_CNTL, data);
6006		}
6007
6008		data = RREG32(CP_MEM_SLP_CNTL);
6009		if (data & CP_MEM_LS_EN) {
6010			data &= ~CP_MEM_LS_EN;
6011			WREG32(CP_MEM_SLP_CNTL, data);
6012		}
6013
6014		orig = data = RREG32(CGTS_SM_CTRL_REG);
6015		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6016		if (orig != data)
6017			WREG32(CGTS_SM_CTRL_REG, data);
6018
6019		tmp = cik_halt_rlc(rdev);
6020
6021		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6022		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6023		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6024		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6025		WREG32(RLC_SERDES_WR_CTRL, data);
6026
6027		cik_update_rlc(rdev, tmp);
6028	}
6029}
6030
6031static const u32 mc_cg_registers[] =
6032{
6033	MC_HUB_MISC_HUB_CG,
6034	MC_HUB_MISC_SIP_CG,
6035	MC_HUB_MISC_VM_CG,
6036	MC_XPB_CLK_GAT,
6037	ATC_MISC_CG,
6038	MC_CITF_MISC_WR_CG,
6039	MC_CITF_MISC_RD_CG,
6040	MC_CITF_MISC_VM_CG,
6041	VM_L2_CG,
6042};
6043
6044static void cik_enable_mc_ls(struct radeon_device *rdev,
6045			     bool enable)
6046{
6047	int i;
6048	u32 orig, data;
6049
6050	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6051		orig = data = RREG32(mc_cg_registers[i]);
6052		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6053			data |= MC_LS_ENABLE;
6054		else
6055			data &= ~MC_LS_ENABLE;
6056		if (data != orig)
6057			WREG32(mc_cg_registers[i], data);
6058	}
6059}
6060
6061static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6062			       bool enable)
6063{
6064	int i;
6065	u32 orig, data;
6066
6067	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6068		orig = data = RREG32(mc_cg_registers[i]);
6069		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6070			data |= MC_CG_ENABLE;
6071		else
6072			data &= ~MC_CG_ENABLE;
6073		if (data != orig)
6074			WREG32(mc_cg_registers[i], data);
6075	}
6076}
6077
6078static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6079				 bool enable)
6080{
6081	u32 orig, data;
6082
6083	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6084		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6085		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6086	} else {
6087		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6088		data |= 0xff000000;
6089		if (data != orig)
6090			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6091
6092		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6093		data |= 0xff000000;
6094		if (data != orig)
6095			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6096	}
6097}
6098
6099static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6100				 bool enable)
6101{
6102	u32 orig, data;
6103
6104	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6105		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6106		data |= 0x100;
6107		if (orig != data)
6108			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6109
6110		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6111		data |= 0x100;
6112		if (orig != data)
6113			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6114	} else {
6115		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6116		data &= ~0x100;
6117		if (orig != data)
6118			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6119
6120		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6121		data &= ~0x100;
6122		if (orig != data)
6123			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6124	}
6125}
6126
6127static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6128				bool enable)
6129{
6130	u32 orig, data;
6131
6132	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6133		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6134		data = 0xfff;
6135		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6136
6137		orig = data = RREG32(UVD_CGC_CTRL);
6138		data |= DCM;
6139		if (orig != data)
6140			WREG32(UVD_CGC_CTRL, data);
6141	} else {
6142		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6143		data &= ~0xfff;
6144		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6145
6146		orig = data = RREG32(UVD_CGC_CTRL);
6147		data &= ~DCM;
6148		if (orig != data)
6149			WREG32(UVD_CGC_CTRL, data);
6150	}
6151}
6152
6153static void cik_enable_bif_mgls(struct radeon_device *rdev,
6154			       bool enable)
6155{
6156	u32 orig, data;
6157
6158	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6159
6160	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6161		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6162			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6163	else
6164		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6165			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6166
6167	if (orig != data)
6168		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6169}
6170
6171static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6172				bool enable)
6173{
6174	u32 orig, data;
6175
6176	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6177
6178	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6179		data &= ~CLOCK_GATING_DIS;
6180	else
6181		data |= CLOCK_GATING_DIS;
6182
6183	if (orig != data)
6184		WREG32(HDP_HOST_PATH_CNTL, data);
6185}
6186
6187static void cik_enable_hdp_ls(struct radeon_device *rdev,
6188			      bool enable)
6189{
6190	u32 orig, data;
6191
6192	orig = data = RREG32(HDP_MEM_POWER_LS);
6193
6194	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6195		data |= HDP_LS_ENABLE;
6196	else
6197		data &= ~HDP_LS_ENABLE;
6198
6199	if (orig != data)
6200		WREG32(HDP_MEM_POWER_LS, data);
6201}
6202
6203void cik_update_cg(struct radeon_device *rdev,
6204		   u32 block, bool enable)
6205{
6206
6207	if (block & RADEON_CG_BLOCK_GFX) {
6208		cik_enable_gui_idle_interrupt(rdev, false);
6209		/* order matters! */
6210		if (enable) {
6211			cik_enable_mgcg(rdev, true);
6212			cik_enable_cgcg(rdev, true);
6213		} else {
6214			cik_enable_cgcg(rdev, false);
6215			cik_enable_mgcg(rdev, false);
6216		}
6217		cik_enable_gui_idle_interrupt(rdev, true);
6218	}
6219
6220	if (block & RADEON_CG_BLOCK_MC) {
6221		if (!(rdev->flags & RADEON_IS_IGP)) {
6222			cik_enable_mc_mgcg(rdev, enable);
6223			cik_enable_mc_ls(rdev, enable);
6224		}
6225	}
6226
6227	if (block & RADEON_CG_BLOCK_SDMA) {
6228		cik_enable_sdma_mgcg(rdev, enable);
6229		cik_enable_sdma_mgls(rdev, enable);
6230	}
6231
6232	if (block & RADEON_CG_BLOCK_BIF) {
6233		cik_enable_bif_mgls(rdev, enable);
6234	}
6235
6236	if (block & RADEON_CG_BLOCK_UVD) {
6237		if (rdev->has_uvd)
6238			cik_enable_uvd_mgcg(rdev, enable);
6239	}
6240
6241	if (block & RADEON_CG_BLOCK_HDP) {
6242		cik_enable_hdp_mgcg(rdev, enable);
6243		cik_enable_hdp_ls(rdev, enable);
6244	}
6245
6246	if (block & RADEON_CG_BLOCK_VCE) {
6247		vce_v2_0_enable_mgcg(rdev, enable);
6248	}
6249}
6250
6251static void cik_init_cg(struct radeon_device *rdev)
6252{
6253
6254	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6255
6256	if (rdev->has_uvd)
6257		si_init_uvd_internal_cg(rdev);
6258
6259	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6260			     RADEON_CG_BLOCK_SDMA |
6261			     RADEON_CG_BLOCK_BIF |
6262			     RADEON_CG_BLOCK_UVD |
6263			     RADEON_CG_BLOCK_HDP), true);
6264}
6265
6266static void cik_fini_cg(struct radeon_device *rdev)
6267{
6268	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6269			     RADEON_CG_BLOCK_SDMA |
6270			     RADEON_CG_BLOCK_BIF |
6271			     RADEON_CG_BLOCK_UVD |
6272			     RADEON_CG_BLOCK_HDP), false);
6273
6274	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6275}
6276
6277static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6278					  bool enable)
6279{
6280	u32 data, orig;
6281
6282	orig = data = RREG32(RLC_PG_CNTL);
6283	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6284		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6285	else
6286		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6287	if (orig != data)
6288		WREG32(RLC_PG_CNTL, data);
6289}
6290
6291static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6292					  bool enable)
6293{
6294	u32 data, orig;
6295
6296	orig = data = RREG32(RLC_PG_CNTL);
6297	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6298		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6299	else
6300		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6301	if (orig != data)
6302		WREG32(RLC_PG_CNTL, data);
6303}
6304
6305static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6306{
6307	u32 data, orig;
6308
6309	orig = data = RREG32(RLC_PG_CNTL);
6310	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6311		data &= ~DISABLE_CP_PG;
6312	else
6313		data |= DISABLE_CP_PG;
6314	if (orig != data)
6315		WREG32(RLC_PG_CNTL, data);
6316}
6317
6318static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6319{
6320	u32 data, orig;
6321
6322	orig = data = RREG32(RLC_PG_CNTL);
6323	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6324		data &= ~DISABLE_GDS_PG;
6325	else
6326		data |= DISABLE_GDS_PG;
6327	if (orig != data)
6328		WREG32(RLC_PG_CNTL, data);
6329}
6330
6331#define CP_ME_TABLE_SIZE    96
6332#define CP_ME_TABLE_OFFSET  2048
6333#define CP_MEC_TABLE_OFFSET 4096
6334
6335void cik_init_cp_pg_table(struct radeon_device *rdev)
6336{
6337	const __be32 *fw_data;
6338	volatile u32 *dst_ptr;
6339	int me, i, max_me = 4;
6340	u32 bo_offset = 0;
6341	u32 table_offset;
6342
6343	if (rdev->family == CHIP_KAVERI)
6344		max_me = 5;
6345
6346	if (rdev->rlc.cp_table_ptr == NULL)
6347		return;
6348
6349	/* write the cp table buffer */
6350	dst_ptr = rdev->rlc.cp_table_ptr;
6351	for (me = 0; me < max_me; me++) {
6352		if (me == 0) {
6353			fw_data = (const __be32 *)rdev->ce_fw->data;
6354			table_offset = CP_ME_TABLE_OFFSET;
6355		} else if (me == 1) {
6356			fw_data = (const __be32 *)rdev->pfp_fw->data;
6357			table_offset = CP_ME_TABLE_OFFSET;
6358		} else if (me == 2) {
6359			fw_data = (const __be32 *)rdev->me_fw->data;
6360			table_offset = CP_ME_TABLE_OFFSET;
6361		} else {
6362			fw_data = (const __be32 *)rdev->mec_fw->data;
6363			table_offset = CP_MEC_TABLE_OFFSET;
6364		}
6365
6366		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6367			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6368		}
6369		bo_offset += CP_ME_TABLE_SIZE;
6370	}
6371}
6372
6373static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6374				bool enable)
6375{
6376	u32 data, orig;
6377
6378	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6379		orig = data = RREG32(RLC_PG_CNTL);
6380		data |= GFX_PG_ENABLE;
6381		if (orig != data)
6382			WREG32(RLC_PG_CNTL, data);
6383
6384		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6385		data |= AUTO_PG_EN;
6386		if (orig != data)
6387			WREG32(RLC_AUTO_PG_CTRL, data);
6388	} else {
6389		orig = data = RREG32(RLC_PG_CNTL);
6390		data &= ~GFX_PG_ENABLE;
6391		if (orig != data)
6392			WREG32(RLC_PG_CNTL, data);
6393
6394		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6395		data &= ~AUTO_PG_EN;
6396		if (orig != data)
6397			WREG32(RLC_AUTO_PG_CTRL, data);
6398
6399		data = RREG32(DB_RENDER_CONTROL);
6400	}
6401}
6402
6403static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6404{
6405	u32 mask = 0, tmp, tmp1;
6406	int i;
6407
6408	cik_select_se_sh(rdev, se, sh);
6409	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6410	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6411	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6412
6413	tmp &= 0xffff0000;
6414
6415	tmp |= tmp1;
6416	tmp >>= 16;
6417
6418	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6419		mask <<= 1;
6420		mask |= 1;
6421	}
6422
6423	return (~tmp) & mask;
6424}
6425
6426static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6427{
6428	u32 i, j, k, active_cu_number = 0;
6429	u32 mask, counter, cu_bitmap;
6430	u32 tmp = 0;
6431
6432	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6433		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6434			mask = 1;
6435			cu_bitmap = 0;
6436			counter = 0;
6437			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6438				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6439					if (counter < 2)
6440						cu_bitmap |= mask;
6441					counter ++;
6442				}
6443				mask <<= 1;
6444			}
6445
6446			active_cu_number += counter;
6447			tmp |= (cu_bitmap << (i * 16 + j * 8));
6448		}
6449	}
6450
6451	WREG32(RLC_PG_AO_CU_MASK, tmp);
6452
6453	tmp = RREG32(RLC_MAX_PG_CU);
6454	tmp &= ~MAX_PU_CU_MASK;
6455	tmp |= MAX_PU_CU(active_cu_number);
6456	WREG32(RLC_MAX_PG_CU, tmp);
6457}
6458
6459static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6460				       bool enable)
6461{
6462	u32 data, orig;
6463
6464	orig = data = RREG32(RLC_PG_CNTL);
6465	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6466		data |= STATIC_PER_CU_PG_ENABLE;
6467	else
6468		data &= ~STATIC_PER_CU_PG_ENABLE;
6469	if (orig != data)
6470		WREG32(RLC_PG_CNTL, data);
6471}
6472
6473static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6474					bool enable)
6475{
6476	u32 data, orig;
6477
6478	orig = data = RREG32(RLC_PG_CNTL);
6479	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6480		data |= DYN_PER_CU_PG_ENABLE;
6481	else
6482		data &= ~DYN_PER_CU_PG_ENABLE;
6483	if (orig != data)
6484		WREG32(RLC_PG_CNTL, data);
6485}
6486
6487#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6488#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6489
6490static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6491{
6492	u32 data, orig;
6493	u32 i;
6494
6495	if (rdev->rlc.cs_data) {
6496		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6497		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6498		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6499		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6500	} else {
6501		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6502		for (i = 0; i < 3; i++)
6503			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6504	}
6505	if (rdev->rlc.reg_list) {
6506		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6507		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6508			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6509	}
6510
6511	orig = data = RREG32(RLC_PG_CNTL);
6512	data |= GFX_PG_SRC;
6513	if (orig != data)
6514		WREG32(RLC_PG_CNTL, data);
6515
6516	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6517	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6518
6519	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6520	data &= ~IDLE_POLL_COUNT_MASK;
6521	data |= IDLE_POLL_COUNT(0x60);
6522	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6523
6524	data = 0x10101010;
6525	WREG32(RLC_PG_DELAY, data);
6526
6527	data = RREG32(RLC_PG_DELAY_2);
6528	data &= ~0xff;
6529	data |= 0x3;
6530	WREG32(RLC_PG_DELAY_2, data);
6531
6532	data = RREG32(RLC_AUTO_PG_CTRL);
6533	data &= ~GRBM_REG_SGIT_MASK;
6534	data |= GRBM_REG_SGIT(0x700);
6535	WREG32(RLC_AUTO_PG_CTRL, data);
6536
6537}
6538
6539static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6540{
6541	cik_enable_gfx_cgpg(rdev, enable);
6542	cik_enable_gfx_static_mgpg(rdev, enable);
6543	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6544}
6545
6546u32 cik_get_csb_size(struct radeon_device *rdev)
6547{
6548	u32 count = 0;
6549	const struct cs_section_def *sect = NULL;
6550	const struct cs_extent_def *ext = NULL;
6551
6552	if (rdev->rlc.cs_data == NULL)
6553		return 0;
6554
6555	/* begin clear state */
6556	count += 2;
6557	/* context control state */
6558	count += 3;
6559
6560	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6561		for (ext = sect->section; ext->extent != NULL; ++ext) {
6562			if (sect->id == SECT_CONTEXT)
6563				count += 2 + ext->reg_count;
6564			else
6565				return 0;
6566		}
6567	}
6568	/* pa_sc_raster_config/pa_sc_raster_config1 */
6569	count += 4;
6570	/* end clear state */
6571	count += 2;
6572	/* clear state */
6573	count += 2;
6574
6575	return count;
6576}
6577
6578void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6579{
6580	u32 count = 0, i;
6581	const struct cs_section_def *sect = NULL;
6582	const struct cs_extent_def *ext = NULL;
6583
6584	if (rdev->rlc.cs_data == NULL)
6585		return;
6586	if (buffer == NULL)
6587		return;
6588
6589	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6590	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6591
6592	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6593	buffer[count++] = cpu_to_le32(0x80000000);
6594	buffer[count++] = cpu_to_le32(0x80000000);
6595
6596	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6597		for (ext = sect->section; ext->extent != NULL; ++ext) {
6598			if (sect->id == SECT_CONTEXT) {
6599				buffer[count++] =
6600					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6601				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6602				for (i = 0; i < ext->reg_count; i++)
6603					buffer[count++] = cpu_to_le32(ext->extent[i]);
6604			} else {
6605				return;
6606			}
6607		}
6608	}
6609
6610	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6611	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6612	switch (rdev->family) {
6613	case CHIP_BONAIRE:
6614		buffer[count++] = cpu_to_le32(0x16000012);
6615		buffer[count++] = cpu_to_le32(0x00000000);
6616		break;
6617	case CHIP_KAVERI:
6618		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6619		buffer[count++] = cpu_to_le32(0x00000000);
6620		break;
6621	case CHIP_KABINI:
6622	case CHIP_MULLINS:
6623		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6624		buffer[count++] = cpu_to_le32(0x00000000);
6625		break;
6626	case CHIP_HAWAII:
6627		buffer[count++] = cpu_to_le32(0x3a00161a);
6628		buffer[count++] = cpu_to_le32(0x0000002e);
6629		break;
6630	default:
6631		buffer[count++] = cpu_to_le32(0x00000000);
6632		buffer[count++] = cpu_to_le32(0x00000000);
6633		break;
6634	}
6635
6636	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6637	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6638
6639	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6640	buffer[count++] = cpu_to_le32(0);
6641}
6642
6643static void cik_init_pg(struct radeon_device *rdev)
6644{
6645	if (rdev->pg_flags) {
6646		cik_enable_sck_slowdown_on_pu(rdev, true);
6647		cik_enable_sck_slowdown_on_pd(rdev, true);
6648		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6649			cik_init_gfx_cgpg(rdev);
6650			cik_enable_cp_pg(rdev, true);
6651			cik_enable_gds_pg(rdev, true);
6652		}
6653		cik_init_ao_cu_mask(rdev);
6654		cik_update_gfx_pg(rdev, true);
6655	}
6656}
6657
6658static void cik_fini_pg(struct radeon_device *rdev)
6659{
6660	if (rdev->pg_flags) {
6661		cik_update_gfx_pg(rdev, false);
6662		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6663			cik_enable_cp_pg(rdev, false);
6664			cik_enable_gds_pg(rdev, false);
6665		}
6666	}
6667}
6668
6669/*
6670 * Interrupts
6671 * Starting with r6xx, interrupts are handled via a ring buffer.
6672 * Ring buffers are areas of GPU accessible memory that the GPU
6673 * writes interrupt vectors into and the host reads vectors out of.
6674 * There is a rptr (read pointer) that determines where the
6675 * host is currently reading, and a wptr (write pointer)
6676 * which determines where the GPU has written.  When the
6677 * pointers are equal, the ring is idle.  When the GPU
6678 * writes vectors to the ring buffer, it increments the
6679 * wptr.  When there is an interrupt, the host then starts
6680 * fetching commands and processing them until the pointers are
6681 * equal again at which point it updates the rptr.
6682 */
6683
6684/**
6685 * cik_enable_interrupts - Enable the interrupt ring buffer
6686 *
6687 * @rdev: radeon_device pointer
6688 *
6689 * Enable the interrupt ring buffer (CIK).
6690 */
6691static void cik_enable_interrupts(struct radeon_device *rdev)
6692{
6693	u32 ih_cntl = RREG32(IH_CNTL);
6694	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6695
6696	ih_cntl |= ENABLE_INTR;
6697	ih_rb_cntl |= IH_RB_ENABLE;
6698	WREG32(IH_CNTL, ih_cntl);
6699	WREG32(IH_RB_CNTL, ih_rb_cntl);
6700	rdev->ih.enabled = true;
6701}
6702
6703/**
6704 * cik_disable_interrupts - Disable the interrupt ring buffer
6705 *
6706 * @rdev: radeon_device pointer
6707 *
6708 * Disable the interrupt ring buffer (CIK).
6709 */
6710static void cik_disable_interrupts(struct radeon_device *rdev)
6711{
6712	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6713	u32 ih_cntl = RREG32(IH_CNTL);
6714
6715	ih_rb_cntl &= ~IH_RB_ENABLE;
6716	ih_cntl &= ~ENABLE_INTR;
6717	WREG32(IH_RB_CNTL, ih_rb_cntl);
6718	WREG32(IH_CNTL, ih_cntl);
6719	/* set rptr, wptr to 0 */
6720	WREG32(IH_RB_RPTR, 0);
6721	WREG32(IH_RB_WPTR, 0);
6722	rdev->ih.enabled = false;
6723	rdev->ih.rptr = 0;
6724}
6725
6726/**
6727 * cik_disable_interrupt_state - Disable all interrupt sources
6728 *
6729 * @rdev: radeon_device pointer
6730 *
6731 * Clear all interrupt enable bits used by the driver (CIK).
6732 */
6733static void cik_disable_interrupt_state(struct radeon_device *rdev)
6734{
6735	u32 tmp;
6736
6737	/* gfx ring */
6738	tmp = RREG32(CP_INT_CNTL_RING0) &
6739		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6740	WREG32(CP_INT_CNTL_RING0, tmp);
6741	/* sdma */
6742	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6743	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6744	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6745	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6746	/* compute queues */
6747	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6748	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6749	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6750	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6751	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6752	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6753	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6754	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6755	/* grbm */
6756	WREG32(GRBM_INT_CNTL, 0);
6757	/* vline/vblank, etc. */
6758	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6759	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6760	if (rdev->num_crtc >= 4) {
6761		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6762		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6763	}
6764	if (rdev->num_crtc >= 6) {
6765		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6766		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6767	}
6768	/* pflip */
6769	if (rdev->num_crtc >= 2) {
6770		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6771		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6772	}
6773	if (rdev->num_crtc >= 4) {
6774		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6775		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6776	}
6777	if (rdev->num_crtc >= 6) {
6778		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6779		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6780	}
6781
6782	/* dac hotplug */
6783	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6784
6785	/* digital hotplug */
6786	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6787	WREG32(DC_HPD1_INT_CONTROL, tmp);
6788	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6789	WREG32(DC_HPD2_INT_CONTROL, tmp);
6790	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6791	WREG32(DC_HPD3_INT_CONTROL, tmp);
6792	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6793	WREG32(DC_HPD4_INT_CONTROL, tmp);
6794	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6795	WREG32(DC_HPD5_INT_CONTROL, tmp);
6796	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6797	WREG32(DC_HPD6_INT_CONTROL, tmp);
6798
6799}
6800
6801/**
6802 * cik_irq_init - init and enable the interrupt ring
6803 *
6804 * @rdev: radeon_device pointer
6805 *
6806 * Allocate a ring buffer for the interrupt controller,
6807 * enable the RLC, disable interrupts, enable the IH
6808 * ring buffer and enable it (CIK).
6809 * Called at device load and reume.
6810 * Returns 0 for success, errors for failure.
6811 */
6812static int cik_irq_init(struct radeon_device *rdev)
6813{
6814	int ret = 0;
6815	int rb_bufsz;
6816	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6817
6818	/* allocate ring */
6819	ret = r600_ih_ring_alloc(rdev);
6820	if (ret)
6821		return ret;
6822
6823	/* disable irqs */
6824	cik_disable_interrupts(rdev);
6825
6826	/* init rlc */
6827	ret = cik_rlc_resume(rdev);
6828	if (ret) {
6829		r600_ih_ring_fini(rdev);
6830		return ret;
6831	}
6832
6833	/* setup interrupt control */
6834	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6835	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6836	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6837	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6838	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6839	 */
6840	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6841	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6842	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6843	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6844
6845	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6846	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6847
6848	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6849		      IH_WPTR_OVERFLOW_CLEAR |
6850		      (rb_bufsz << 1));
6851
6852	if (rdev->wb.enabled)
6853		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6854
6855	/* set the writeback address whether it's enabled or not */
6856	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6857	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6858
6859	WREG32(IH_RB_CNTL, ih_rb_cntl);
6860
6861	/* set rptr, wptr to 0 */
6862	WREG32(IH_RB_RPTR, 0);
6863	WREG32(IH_RB_WPTR, 0);
6864
6865	/* Default settings for IH_CNTL (disabled at first) */
6866	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6867	/* RPTR_REARM only works if msi's are enabled */
6868	if (rdev->msi_enabled)
6869		ih_cntl |= RPTR_REARM;
6870	WREG32(IH_CNTL, ih_cntl);
6871
6872	/* force the active interrupt state to all disabled */
6873	cik_disable_interrupt_state(rdev);
6874
6875	pci_set_master(rdev->pdev);
6876
6877	/* enable irqs */
6878	cik_enable_interrupts(rdev);
6879
6880	return ret;
6881}
6882
6883/**
6884 * cik_irq_set - enable/disable interrupt sources
6885 *
6886 * @rdev: radeon_device pointer
6887 *
6888 * Enable interrupt sources on the GPU (vblanks, hpd,
6889 * etc.) (CIK).
6890 * Returns 0 for success, errors for failure.
6891 */
6892int cik_irq_set(struct radeon_device *rdev)
6893{
6894	u32 cp_int_cntl;
6895	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6896	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6897	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6898	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6899	u32 grbm_int_cntl = 0;
6900	u32 dma_cntl, dma_cntl1;
6901	u32 thermal_int;
6902
6903	if (!rdev->irq.installed) {
6904		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6905		return -EINVAL;
6906	}
6907	/* don't enable anything if the ih is disabled */
6908	if (!rdev->ih.enabled) {
6909		cik_disable_interrupts(rdev);
6910		/* force the active interrupt state to all disabled */
6911		cik_disable_interrupt_state(rdev);
6912		return 0;
6913	}
6914
6915	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6916		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6917	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6918
6919	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6920	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6921	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6922	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6923	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6924	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6925
6926	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6927	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6928
6929	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6930	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6931	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6932	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6933	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6934	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6935	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6936	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6937
6938	if (rdev->flags & RADEON_IS_IGP)
6939		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6940			~(THERM_INTH_MASK | THERM_INTL_MASK);
6941	else
6942		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6943			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6944
6945	/* enable CP interrupts on all rings */
6946	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6947		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6948		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6949	}
6950	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6951		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6952		DRM_DEBUG("si_irq_set: sw int cp1\n");
6953		if (ring->me == 1) {
6954			switch (ring->pipe) {
6955			case 0:
6956				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6957				break;
6958			case 1:
6959				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6960				break;
6961			case 2:
6962				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6963				break;
6964			case 3:
6965				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6966				break;
6967			default:
6968				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6969				break;
6970			}
6971		} else if (ring->me == 2) {
6972			switch (ring->pipe) {
6973			case 0:
6974				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6975				break;
6976			case 1:
6977				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6978				break;
6979			case 2:
6980				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6981				break;
6982			case 3:
6983				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6984				break;
6985			default:
6986				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6987				break;
6988			}
6989		} else {
6990			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6991		}
6992	}
6993	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6994		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6995		DRM_DEBUG("si_irq_set: sw int cp2\n");
6996		if (ring->me == 1) {
6997			switch (ring->pipe) {
6998			case 0:
6999				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7000				break;
7001			case 1:
7002				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7003				break;
7004			case 2:
7005				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7006				break;
7007			case 3:
7008				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7009				break;
7010			default:
7011				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7012				break;
7013			}
7014		} else if (ring->me == 2) {
7015			switch (ring->pipe) {
7016			case 0:
7017				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7018				break;
7019			case 1:
7020				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7021				break;
7022			case 2:
7023				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7024				break;
7025			case 3:
7026				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7027				break;
7028			default:
7029				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7030				break;
7031			}
7032		} else {
7033			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7034		}
7035	}
7036
7037	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7038		DRM_DEBUG("cik_irq_set: sw int dma\n");
7039		dma_cntl |= TRAP_ENABLE;
7040	}
7041
7042	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7043		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7044		dma_cntl1 |= TRAP_ENABLE;
7045	}
7046
7047	if (rdev->irq.crtc_vblank_int[0] ||
7048	    atomic_read(&rdev->irq.pflip[0])) {
7049		DRM_DEBUG("cik_irq_set: vblank 0\n");
7050		crtc1 |= VBLANK_INTERRUPT_MASK;
7051	}
7052	if (rdev->irq.crtc_vblank_int[1] ||
7053	    atomic_read(&rdev->irq.pflip[1])) {
7054		DRM_DEBUG("cik_irq_set: vblank 1\n");
7055		crtc2 |= VBLANK_INTERRUPT_MASK;
7056	}
7057	if (rdev->irq.crtc_vblank_int[2] ||
7058	    atomic_read(&rdev->irq.pflip[2])) {
7059		DRM_DEBUG("cik_irq_set: vblank 2\n");
7060		crtc3 |= VBLANK_INTERRUPT_MASK;
7061	}
7062	if (rdev->irq.crtc_vblank_int[3] ||
7063	    atomic_read(&rdev->irq.pflip[3])) {
7064		DRM_DEBUG("cik_irq_set: vblank 3\n");
7065		crtc4 |= VBLANK_INTERRUPT_MASK;
7066	}
7067	if (rdev->irq.crtc_vblank_int[4] ||
7068	    atomic_read(&rdev->irq.pflip[4])) {
7069		DRM_DEBUG("cik_irq_set: vblank 4\n");
7070		crtc5 |= VBLANK_INTERRUPT_MASK;
7071	}
7072	if (rdev->irq.crtc_vblank_int[5] ||
7073	    atomic_read(&rdev->irq.pflip[5])) {
7074		DRM_DEBUG("cik_irq_set: vblank 5\n");
7075		crtc6 |= VBLANK_INTERRUPT_MASK;
7076	}
7077	if (rdev->irq.hpd[0]) {
7078		DRM_DEBUG("cik_irq_set: hpd 1\n");
7079		hpd1 |= DC_HPDx_INT_EN;
7080	}
7081	if (rdev->irq.hpd[1]) {
7082		DRM_DEBUG("cik_irq_set: hpd 2\n");
7083		hpd2 |= DC_HPDx_INT_EN;
7084	}
7085	if (rdev->irq.hpd[2]) {
7086		DRM_DEBUG("cik_irq_set: hpd 3\n");
7087		hpd3 |= DC_HPDx_INT_EN;
7088	}
7089	if (rdev->irq.hpd[3]) {
7090		DRM_DEBUG("cik_irq_set: hpd 4\n");
7091		hpd4 |= DC_HPDx_INT_EN;
7092	}
7093	if (rdev->irq.hpd[4]) {
7094		DRM_DEBUG("cik_irq_set: hpd 5\n");
7095		hpd5 |= DC_HPDx_INT_EN;
7096	}
7097	if (rdev->irq.hpd[5]) {
7098		DRM_DEBUG("cik_irq_set: hpd 6\n");
7099		hpd6 |= DC_HPDx_INT_EN;
7100	}
7101
7102	if (rdev->irq.dpm_thermal) {
7103		DRM_DEBUG("dpm thermal\n");
7104		if (rdev->flags & RADEON_IS_IGP)
7105			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7106		else
7107			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7108	}
7109
7110	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7111
7112	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7113	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7114
7115	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7116	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7117	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7118	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7119	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7120	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7121	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7122	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7123
7124	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7125
7126	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7127	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7128	if (rdev->num_crtc >= 4) {
7129		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7130		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7131	}
7132	if (rdev->num_crtc >= 6) {
7133		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7134		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7135	}
7136
7137	if (rdev->num_crtc >= 2) {
7138		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7139		       GRPH_PFLIP_INT_MASK);
7140		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7141		       GRPH_PFLIP_INT_MASK);
7142	}
7143	if (rdev->num_crtc >= 4) {
7144		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7145		       GRPH_PFLIP_INT_MASK);
7146		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7147		       GRPH_PFLIP_INT_MASK);
7148	}
7149	if (rdev->num_crtc >= 6) {
7150		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7151		       GRPH_PFLIP_INT_MASK);
7152		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7153		       GRPH_PFLIP_INT_MASK);
7154	}
7155
7156	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7157	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7158	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7159	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7160	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7161	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7162
7163	if (rdev->flags & RADEON_IS_IGP)
7164		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7165	else
7166		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7167
7168	return 0;
7169}
7170
7171/**
7172 * cik_irq_ack - ack interrupt sources
7173 *
7174 * @rdev: radeon_device pointer
7175 *
7176 * Ack interrupt sources on the GPU (vblanks, hpd,
7177 * etc.) (CIK).  Certain interrupts sources are sw
7178 * generated and do not require an explicit ack.
7179 */
7180static inline void cik_irq_ack(struct radeon_device *rdev)
7181{
7182	u32 tmp;
7183
7184	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7185	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7186	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7187	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7188	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7189	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7190	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7191
7192	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7193		EVERGREEN_CRTC0_REGISTER_OFFSET);
7194	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7195		EVERGREEN_CRTC1_REGISTER_OFFSET);
7196	if (rdev->num_crtc >= 4) {
7197		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7198			EVERGREEN_CRTC2_REGISTER_OFFSET);
7199		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7200			EVERGREEN_CRTC3_REGISTER_OFFSET);
7201	}
7202	if (rdev->num_crtc >= 6) {
7203		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7204			EVERGREEN_CRTC4_REGISTER_OFFSET);
7205		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7206			EVERGREEN_CRTC5_REGISTER_OFFSET);
7207	}
7208
7209	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7210		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7211		       GRPH_PFLIP_INT_CLEAR);
7212	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7213		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7214		       GRPH_PFLIP_INT_CLEAR);
7215	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7216		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7217	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7218		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7219	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7220		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7221	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7222		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7223
7224	if (rdev->num_crtc >= 4) {
7225		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7226			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7227			       GRPH_PFLIP_INT_CLEAR);
7228		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7229			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7230			       GRPH_PFLIP_INT_CLEAR);
7231		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7232			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7233		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7234			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7235		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7236			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7237		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7238			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7239	}
7240
7241	if (rdev->num_crtc >= 6) {
7242		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7243			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7244			       GRPH_PFLIP_INT_CLEAR);
7245		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7246			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7247			       GRPH_PFLIP_INT_CLEAR);
7248		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7249			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7250		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7251			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7252		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7253			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7254		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7255			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7256	}
7257
7258	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7259		tmp = RREG32(DC_HPD1_INT_CONTROL);
7260		tmp |= DC_HPDx_INT_ACK;
7261		WREG32(DC_HPD1_INT_CONTROL, tmp);
7262	}
7263	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7264		tmp = RREG32(DC_HPD2_INT_CONTROL);
7265		tmp |= DC_HPDx_INT_ACK;
7266		WREG32(DC_HPD2_INT_CONTROL, tmp);
7267	}
7268	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7269		tmp = RREG32(DC_HPD3_INT_CONTROL);
7270		tmp |= DC_HPDx_INT_ACK;
7271		WREG32(DC_HPD3_INT_CONTROL, tmp);
7272	}
7273	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7274		tmp = RREG32(DC_HPD4_INT_CONTROL);
7275		tmp |= DC_HPDx_INT_ACK;
7276		WREG32(DC_HPD4_INT_CONTROL, tmp);
7277	}
7278	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7279		tmp = RREG32(DC_HPD5_INT_CONTROL);
7280		tmp |= DC_HPDx_INT_ACK;
7281		WREG32(DC_HPD5_INT_CONTROL, tmp);
7282	}
7283	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7284		tmp = RREG32(DC_HPD5_INT_CONTROL);
7285		tmp |= DC_HPDx_INT_ACK;
7286		WREG32(DC_HPD6_INT_CONTROL, tmp);
7287	}
7288}
7289
7290/**
7291 * cik_irq_disable - disable interrupts
7292 *
7293 * @rdev: radeon_device pointer
7294 *
7295 * Disable interrupts on the hw (CIK).
7296 */
7297static void cik_irq_disable(struct radeon_device *rdev)
7298{
7299	cik_disable_interrupts(rdev);
7300	/* Wait and acknowledge irq */
7301	mdelay(1);
7302	cik_irq_ack(rdev);
7303	cik_disable_interrupt_state(rdev);
7304}
7305
7306/**
7307 * cik_irq_disable - disable interrupts for suspend
7308 *
7309 * @rdev: radeon_device pointer
7310 *
7311 * Disable interrupts and stop the RLC (CIK).
7312 * Used for suspend.
7313 */
7314static void cik_irq_suspend(struct radeon_device *rdev)
7315{
7316	cik_irq_disable(rdev);
7317	cik_rlc_stop(rdev);
7318}
7319
7320/**
7321 * cik_irq_fini - tear down interrupt support
7322 *
7323 * @rdev: radeon_device pointer
7324 *
7325 * Disable interrupts on the hw and free the IH ring
7326 * buffer (CIK).
7327 * Used for driver unload.
7328 */
7329static void cik_irq_fini(struct radeon_device *rdev)
7330{
7331	cik_irq_suspend(rdev);
7332	r600_ih_ring_fini(rdev);
7333}
7334
7335/**
7336 * cik_get_ih_wptr - get the IH ring buffer wptr
7337 *
7338 * @rdev: radeon_device pointer
7339 *
7340 * Get the IH ring buffer wptr from either the register
7341 * or the writeback memory buffer (CIK).  Also check for
7342 * ring buffer overflow and deal with it.
7343 * Used by cik_irq_process().
7344 * Returns the value of the wptr.
7345 */
7346static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7347{
7348	u32 wptr, tmp;
7349
7350	if (rdev->wb.enabled)
7351		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7352	else
7353		wptr = RREG32(IH_RB_WPTR);
7354
7355	if (wptr & RB_OVERFLOW) {
7356		/* When a ring buffer overflow happen start parsing interrupt
7357		 * from the last not overwritten vector (wptr + 16). Hopefully
7358		 * this should allow us to catchup.
7359		 */
7360		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7361			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7362		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7363		tmp = RREG32(IH_RB_CNTL);
7364		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7365		WREG32(IH_RB_CNTL, tmp);
7366	}
7367	return (wptr & rdev->ih.ptr_mask);
7368}
7369
7370/*        CIK IV Ring
7371 * Each IV ring entry is 128 bits:
7372 * [7:0]    - interrupt source id
7373 * [31:8]   - reserved
7374 * [59:32]  - interrupt source data
7375 * [63:60]  - reserved
7376 * [71:64]  - RINGID
7377 *            CP:
7378 *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7379 *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7380 *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7381 *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7382 *            PIPE_ID - ME0 0=3D
7383 *                    - ME1&2 compute dispatcher (4 pipes each)
7384 *            SDMA:
7385 *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7386 *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7387 *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7388 * [79:72]  - VMID
7389 * [95:80]  - PASID
7390 * [127:96] - reserved
7391 */
7392/**
7393 * cik_irq_process - interrupt handler
7394 *
7395 * @rdev: radeon_device pointer
7396 *
7397 * Interrupt hander (CIK).  Walk the IH ring,
7398 * ack interrupts and schedule work to handle
7399 * interrupt events.
7400 * Returns irq process return code.
7401 */
7402int cik_irq_process(struct radeon_device *rdev)
7403{
7404	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7405	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7406	u32 wptr;
7407	u32 rptr;
7408	u32 src_id, src_data, ring_id;
7409	u8 me_id, pipe_id, queue_id;
7410	u32 ring_index;
7411	bool queue_hotplug = false;
7412	bool queue_reset = false;
7413	u32 addr, status, mc_client;
7414	bool queue_thermal = false;
7415
7416	if (!rdev->ih.enabled || rdev->shutdown)
7417		return IRQ_NONE;
7418
7419	wptr = cik_get_ih_wptr(rdev);
7420
7421restart_ih:
7422	/* is somebody else already processing irqs? */
7423	if (atomic_xchg(&rdev->ih.lock, 1))
7424		return IRQ_NONE;
7425
7426	rptr = rdev->ih.rptr;
7427	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7428
7429	/* Order reading of wptr vs. reading of IH ring data */
7430	rmb();
7431
7432	/* display interrupts */
7433	cik_irq_ack(rdev);
7434
7435	while (rptr != wptr) {
7436		/* wptr/rptr are in bytes! */
7437		ring_index = rptr / 4;
7438		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7439		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7440		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7441
7442		switch (src_id) {
7443		case 1: /* D1 vblank/vline */
7444			switch (src_data) {
7445			case 0: /* D1 vblank */
7446				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7447					if (rdev->irq.crtc_vblank_int[0]) {
7448						drm_handle_vblank(rdev->ddev, 0);
7449						rdev->pm.vblank_sync = true;
7450						wake_up(&rdev->irq.vblank_queue);
7451					}
7452					if (atomic_read(&rdev->irq.pflip[0]))
7453						radeon_crtc_handle_flip(rdev, 0);
7454					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7455					DRM_DEBUG("IH: D1 vblank\n");
7456				}
7457				break;
7458			case 1: /* D1 vline */
7459				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7460					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7461					DRM_DEBUG("IH: D1 vline\n");
7462				}
7463				break;
7464			default:
7465				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7466				break;
7467			}
7468			break;
7469		case 2: /* D2 vblank/vline */
7470			switch (src_data) {
7471			case 0: /* D2 vblank */
7472				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7473					if (rdev->irq.crtc_vblank_int[1]) {
7474						drm_handle_vblank(rdev->ddev, 1);
7475						rdev->pm.vblank_sync = true;
7476						wake_up(&rdev->irq.vblank_queue);
7477					}
7478					if (atomic_read(&rdev->irq.pflip[1]))
7479						radeon_crtc_handle_flip(rdev, 1);
7480					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7481					DRM_DEBUG("IH: D2 vblank\n");
7482				}
7483				break;
7484			case 1: /* D2 vline */
7485				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7486					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7487					DRM_DEBUG("IH: D2 vline\n");
7488				}
7489				break;
7490			default:
7491				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7492				break;
7493			}
7494			break;
7495		case 3: /* D3 vblank/vline */
7496			switch (src_data) {
7497			case 0: /* D3 vblank */
7498				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7499					if (rdev->irq.crtc_vblank_int[2]) {
7500						drm_handle_vblank(rdev->ddev, 2);
7501						rdev->pm.vblank_sync = true;
7502						wake_up(&rdev->irq.vblank_queue);
7503					}
7504					if (atomic_read(&rdev->irq.pflip[2]))
7505						radeon_crtc_handle_flip(rdev, 2);
7506					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7507					DRM_DEBUG("IH: D3 vblank\n");
7508				}
7509				break;
7510			case 1: /* D3 vline */
7511				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7512					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7513					DRM_DEBUG("IH: D3 vline\n");
7514				}
7515				break;
7516			default:
7517				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7518				break;
7519			}
7520			break;
7521		case 4: /* D4 vblank/vline */
7522			switch (src_data) {
7523			case 0: /* D4 vblank */
7524				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7525					if (rdev->irq.crtc_vblank_int[3]) {
7526						drm_handle_vblank(rdev->ddev, 3);
7527						rdev->pm.vblank_sync = true;
7528						wake_up(&rdev->irq.vblank_queue);
7529					}
7530					if (atomic_read(&rdev->irq.pflip[3]))
7531						radeon_crtc_handle_flip(rdev, 3);
7532					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7533					DRM_DEBUG("IH: D4 vblank\n");
7534				}
7535				break;
7536			case 1: /* D4 vline */
7537				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7538					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7539					DRM_DEBUG("IH: D4 vline\n");
7540				}
7541				break;
7542			default:
7543				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7544				break;
7545			}
7546			break;
7547		case 5: /* D5 vblank/vline */
7548			switch (src_data) {
7549			case 0: /* D5 vblank */
7550				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7551					if (rdev->irq.crtc_vblank_int[4]) {
7552						drm_handle_vblank(rdev->ddev, 4);
7553						rdev->pm.vblank_sync = true;
7554						wake_up(&rdev->irq.vblank_queue);
7555					}
7556					if (atomic_read(&rdev->irq.pflip[4]))
7557						radeon_crtc_handle_flip(rdev, 4);
7558					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7559					DRM_DEBUG("IH: D5 vblank\n");
7560				}
7561				break;
7562			case 1: /* D5 vline */
7563				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7564					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7565					DRM_DEBUG("IH: D5 vline\n");
7566				}
7567				break;
7568			default:
7569				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7570				break;
7571			}
7572			break;
7573		case 6: /* D6 vblank/vline */
7574			switch (src_data) {
7575			case 0: /* D6 vblank */
7576				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7577					if (rdev->irq.crtc_vblank_int[5]) {
7578						drm_handle_vblank(rdev->ddev, 5);
7579						rdev->pm.vblank_sync = true;
7580						wake_up(&rdev->irq.vblank_queue);
7581					}
7582					if (atomic_read(&rdev->irq.pflip[5]))
7583						radeon_crtc_handle_flip(rdev, 5);
7584					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7585					DRM_DEBUG("IH: D6 vblank\n");
7586				}
7587				break;
7588			case 1: /* D6 vline */
7589				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7590					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7591					DRM_DEBUG("IH: D6 vline\n");
7592				}
7593				break;
7594			default:
7595				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7596				break;
7597			}
7598			break;
7599		case 8: /* D1 page flip */
7600		case 10: /* D2 page flip */
7601		case 12: /* D3 page flip */
7602		case 14: /* D4 page flip */
7603		case 16: /* D5 page flip */
7604		case 18: /* D6 page flip */
7605			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7606			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7607			break;
7608		case 42: /* HPD hotplug */
7609			switch (src_data) {
7610			case 0:
7611				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7612					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7613					queue_hotplug = true;
7614					DRM_DEBUG("IH: HPD1\n");
7615				}
7616				break;
7617			case 1:
7618				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7619					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7620					queue_hotplug = true;
7621					DRM_DEBUG("IH: HPD2\n");
7622				}
7623				break;
7624			case 2:
7625				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7626					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7627					queue_hotplug = true;
7628					DRM_DEBUG("IH: HPD3\n");
7629				}
7630				break;
7631			case 3:
7632				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7633					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7634					queue_hotplug = true;
7635					DRM_DEBUG("IH: HPD4\n");
7636				}
7637				break;
7638			case 4:
7639				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7640					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7641					queue_hotplug = true;
7642					DRM_DEBUG("IH: HPD5\n");
7643				}
7644				break;
7645			case 5:
7646				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7647					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7648					queue_hotplug = true;
7649					DRM_DEBUG("IH: HPD6\n");
7650				}
7651				break;
7652			default:
7653				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7654				break;
7655			}
7656			break;
7657		case 124: /* UVD */
7658			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7659			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7660			break;
7661		case 146:
7662		case 147:
7663			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7664			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7665			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7666			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7667			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7668				addr);
7669			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7670				status);
7671			cik_vm_decode_fault(rdev, status, addr, mc_client);
7672			/* reset addr and status */
7673			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7674			break;
7675		case 167: /* VCE */
7676			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7677			switch (src_data) {
7678			case 0:
7679				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7680				break;
7681			case 1:
7682				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7683				break;
7684			default:
7685				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7686				break;
7687			}
7688			break;
7689		case 176: /* GFX RB CP_INT */
7690		case 177: /* GFX IB CP_INT */
7691			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7692			break;
7693		case 181: /* CP EOP event */
7694			DRM_DEBUG("IH: CP EOP\n");
7695			/* XXX check the bitfield order! */
7696			me_id = (ring_id & 0x60) >> 5;
7697			pipe_id = (ring_id & 0x18) >> 3;
7698			queue_id = (ring_id & 0x7) >> 0;
7699			switch (me_id) {
7700			case 0:
7701				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7702				break;
7703			case 1:
7704			case 2:
7705				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7706					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7707				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7708					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7709				break;
7710			}
7711			break;
7712		case 184: /* CP Privileged reg access */
7713			DRM_ERROR("Illegal register access in command stream\n");
7714			/* XXX check the bitfield order! */
7715			me_id = (ring_id & 0x60) >> 5;
7716			pipe_id = (ring_id & 0x18) >> 3;
7717			queue_id = (ring_id & 0x7) >> 0;
7718			switch (me_id) {
7719			case 0:
7720				/* This results in a full GPU reset, but all we need to do is soft
7721				 * reset the CP for gfx
7722				 */
7723				queue_reset = true;
7724				break;
7725			case 1:
7726				/* XXX compute */
7727				queue_reset = true;
7728				break;
7729			case 2:
7730				/* XXX compute */
7731				queue_reset = true;
7732				break;
7733			}
7734			break;
7735		case 185: /* CP Privileged inst */
7736			DRM_ERROR("Illegal instruction in command stream\n");
7737			/* XXX check the bitfield order! */
7738			me_id = (ring_id & 0x60) >> 5;
7739			pipe_id = (ring_id & 0x18) >> 3;
7740			queue_id = (ring_id & 0x7) >> 0;
7741			switch (me_id) {
7742			case 0:
7743				/* This results in a full GPU reset, but all we need to do is soft
7744				 * reset the CP for gfx
7745				 */
7746				queue_reset = true;
7747				break;
7748			case 1:
7749				/* XXX compute */
7750				queue_reset = true;
7751				break;
7752			case 2:
7753				/* XXX compute */
7754				queue_reset = true;
7755				break;
7756			}
7757			break;
7758		case 224: /* SDMA trap event */
7759			/* XXX check the bitfield order! */
7760			me_id = (ring_id & 0x3) >> 0;
7761			queue_id = (ring_id & 0xc) >> 2;
7762			DRM_DEBUG("IH: SDMA trap\n");
7763			switch (me_id) {
7764			case 0:
7765				switch (queue_id) {
7766				case 0:
7767					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7768					break;
7769				case 1:
7770					/* XXX compute */
7771					break;
7772				case 2:
7773					/* XXX compute */
7774					break;
7775				}
7776				break;
7777			case 1:
7778				switch (queue_id) {
7779				case 0:
7780					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7781					break;
7782				case 1:
7783					/* XXX compute */
7784					break;
7785				case 2:
7786					/* XXX compute */
7787					break;
7788				}
7789				break;
7790			}
7791			break;
7792		case 230: /* thermal low to high */
7793			DRM_DEBUG("IH: thermal low to high\n");
7794			rdev->pm.dpm.thermal.high_to_low = false;
7795			queue_thermal = true;
7796			break;
7797		case 231: /* thermal high to low */
7798			DRM_DEBUG("IH: thermal high to low\n");
7799			rdev->pm.dpm.thermal.high_to_low = true;
7800			queue_thermal = true;
7801			break;
7802		case 233: /* GUI IDLE */
7803			DRM_DEBUG("IH: GUI idle\n");
7804			break;
7805		case 241: /* SDMA Privileged inst */
7806		case 247: /* SDMA Privileged inst */
7807			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7808			/* XXX check the bitfield order! */
7809			me_id = (ring_id & 0x3) >> 0;
7810			queue_id = (ring_id & 0xc) >> 2;
7811			switch (me_id) {
7812			case 0:
7813				switch (queue_id) {
7814				case 0:
7815					queue_reset = true;
7816					break;
7817				case 1:
7818					/* XXX compute */
7819					queue_reset = true;
7820					break;
7821				case 2:
7822					/* XXX compute */
7823					queue_reset = true;
7824					break;
7825				}
7826				break;
7827			case 1:
7828				switch (queue_id) {
7829				case 0:
7830					queue_reset = true;
7831					break;
7832				case 1:
7833					/* XXX compute */
7834					queue_reset = true;
7835					break;
7836				case 2:
7837					/* XXX compute */
7838					queue_reset = true;
7839					break;
7840				}
7841				break;
7842			}
7843			break;
7844		default:
7845			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7846			break;
7847		}
7848
7849		/* wptr/rptr are in bytes! */
7850		rptr += 16;
7851		rptr &= rdev->ih.ptr_mask;
7852	}
7853	if (queue_hotplug)
7854		schedule_work(&rdev->hotplug_work);
7855	if (queue_reset)
7856		schedule_work(&rdev->reset_work);
7857	if (queue_thermal)
7858		schedule_work(&rdev->pm.dpm.thermal.work);
7859	rdev->ih.rptr = rptr;
7860	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7861	atomic_set(&rdev->ih.lock, 0);
7862
7863	/* make sure wptr hasn't changed while processing */
7864	wptr = cik_get_ih_wptr(rdev);
7865	if (wptr != rptr)
7866		goto restart_ih;
7867
7868	return IRQ_HANDLED;
7869}
7870
7871/*
7872 * startup/shutdown callbacks
7873 */
7874/**
7875 * cik_startup - program the asic to a functional state
7876 *
7877 * @rdev: radeon_device pointer
7878 *
7879 * Programs the asic to a functional state (CIK).
7880 * Called by cik_init() and cik_resume().
7881 * Returns 0 for success, error for failure.
7882 */
7883static int cik_startup(struct radeon_device *rdev)
7884{
7885	struct radeon_ring *ring;
7886	int r;
7887
7888	/* enable pcie gen2/3 link */
7889	cik_pcie_gen3_enable(rdev);
7890	/* enable aspm */
7891	cik_program_aspm(rdev);
7892
7893	/* scratch needs to be initialized before MC */
7894	r = r600_vram_scratch_init(rdev);
7895	if (r)
7896		return r;
7897
7898	cik_mc_program(rdev);
7899
7900	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7901		r = ci_mc_load_microcode(rdev);
7902		if (r) {
7903			DRM_ERROR("Failed to load MC firmware!\n");
7904			return r;
7905		}
7906	}
7907
7908	r = cik_pcie_gart_enable(rdev);
7909	if (r)
7910		return r;
7911	cik_gpu_init(rdev);
7912
7913	/* allocate rlc buffers */
7914	if (rdev->flags & RADEON_IS_IGP) {
7915		if (rdev->family == CHIP_KAVERI) {
7916			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7917			rdev->rlc.reg_list_size =
7918				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7919		} else {
7920			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7921			rdev->rlc.reg_list_size =
7922				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7923		}
7924	}
7925	rdev->rlc.cs_data = ci_cs_data;
7926	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7927	r = sumo_rlc_init(rdev);
7928	if (r) {
7929		DRM_ERROR("Failed to init rlc BOs!\n");
7930		return r;
7931	}
7932
7933	/* allocate wb buffer */
7934	r = radeon_wb_init(rdev);
7935	if (r)
7936		return r;
7937
7938	/* allocate mec buffers */
7939	r = cik_mec_init(rdev);
7940	if (r) {
7941		DRM_ERROR("Failed to init MEC BOs!\n");
7942		return r;
7943	}
7944
7945	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7946	if (r) {
7947		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7948		return r;
7949	}
7950
7951	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7952	if (r) {
7953		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7954		return r;
7955	}
7956
7957	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7958	if (r) {
7959		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7960		return r;
7961	}
7962
7963	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7964	if (r) {
7965		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7966		return r;
7967	}
7968
7969	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7970	if (r) {
7971		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7972		return r;
7973	}
7974
7975	r = radeon_uvd_resume(rdev);
7976	if (!r) {
7977		r = uvd_v4_2_resume(rdev);
7978		if (!r) {
7979			r = radeon_fence_driver_start_ring(rdev,
7980							   R600_RING_TYPE_UVD_INDEX);
7981			if (r)
7982				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7983		}
7984	}
7985	if (r)
7986		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7987
7988	r = radeon_vce_resume(rdev);
7989	if (!r) {
7990		r = vce_v2_0_resume(rdev);
7991		if (!r)
7992			r = radeon_fence_driver_start_ring(rdev,
7993							   TN_RING_TYPE_VCE1_INDEX);
7994		if (!r)
7995			r = radeon_fence_driver_start_ring(rdev,
7996							   TN_RING_TYPE_VCE2_INDEX);
7997	}
7998	if (r) {
7999		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8000		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8001		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8002	}
8003
8004	/* Enable IRQ */
8005	if (!rdev->irq.installed) {
8006		r = radeon_irq_kms_init(rdev);
8007		if (r)
8008			return r;
8009	}
8010
8011	r = cik_irq_init(rdev);
8012	if (r) {
8013		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8014		radeon_irq_kms_fini(rdev);
8015		return r;
8016	}
8017	cik_irq_set(rdev);
8018
8019	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8020	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8021			     PACKET3(PACKET3_NOP, 0x3FFF));
8022	if (r)
8023		return r;
8024
8025	/* set up the compute queues */
8026	/* type-2 packets are deprecated on MEC, use type-3 instead */
8027	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8028	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8029			     PACKET3(PACKET3_NOP, 0x3FFF));
8030	if (r)
8031		return r;
8032	ring->me = 1; /* first MEC */
8033	ring->pipe = 0; /* first pipe */
8034	ring->queue = 0; /* first queue */
8035	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8036
8037	/* type-2 packets are deprecated on MEC, use type-3 instead */
8038	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8039	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8040			     PACKET3(PACKET3_NOP, 0x3FFF));
8041	if (r)
8042		return r;
8043	/* dGPU only have 1 MEC */
8044	ring->me = 1; /* first MEC */
8045	ring->pipe = 0; /* first pipe */
8046	ring->queue = 1; /* second queue */
8047	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8048
8049	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8050	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8051			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8052	if (r)
8053		return r;
8054
8055	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8056	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8057			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8058	if (r)
8059		return r;
8060
8061	r = cik_cp_resume(rdev);
8062	if (r)
8063		return r;
8064
8065	r = cik_sdma_resume(rdev);
8066	if (r)
8067		return r;
8068
8069	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8070	if (ring->ring_size) {
8071		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8072				     RADEON_CP_PACKET2);
8073		if (!r)
8074			r = uvd_v1_0_init(rdev);
8075		if (r)
8076			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8077	}
8078
8079	r = -ENOENT;
8080
8081	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8082	if (ring->ring_size)
8083		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8084				     VCE_CMD_NO_OP);
8085
8086	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8087	if (ring->ring_size)
8088		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8089				     VCE_CMD_NO_OP);
8090
8091	if (!r)
8092		r = vce_v1_0_init(rdev);
8093	else if (r != -ENOENT)
8094		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8095
8096	r = radeon_ib_pool_init(rdev);
8097	if (r) {
8098		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8099		return r;
8100	}
8101
8102	r = radeon_vm_manager_init(rdev);
8103	if (r) {
8104		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8105		return r;
8106	}
8107
8108	r = dce6_audio_init(rdev);
8109	if (r)
8110		return r;
8111
8112	return 0;
8113}
8114
8115/**
8116 * cik_resume - resume the asic to a functional state
8117 *
8118 * @rdev: radeon_device pointer
8119 *
8120 * Programs the asic to a functional state (CIK).
8121 * Called at resume.
8122 * Returns 0 for success, error for failure.
8123 */
8124int cik_resume(struct radeon_device *rdev)
8125{
8126	int r;
8127
8128	/* post card */
8129	atom_asic_init(rdev->mode_info.atom_context);
8130
8131	/* init golden registers */
8132	cik_init_golden_registers(rdev);
8133
8134	if (rdev->pm.pm_method == PM_METHOD_DPM)
8135		radeon_pm_resume(rdev);
8136
8137	rdev->accel_working = true;
8138	r = cik_startup(rdev);
8139	if (r) {
8140		DRM_ERROR("cik startup failed on resume\n");
8141		rdev->accel_working = false;
8142		return r;
8143	}
8144
8145	return r;
8146
8147}
8148
8149/**
8150 * cik_suspend - suspend the asic
8151 *
8152 * @rdev: radeon_device pointer
8153 *
8154 * Bring the chip into a state suitable for suspend (CIK).
8155 * Called at suspend.
8156 * Returns 0 for success.
8157 */
8158int cik_suspend(struct radeon_device *rdev)
8159{
8160	radeon_pm_suspend(rdev);
8161	dce6_audio_fini(rdev);
8162	radeon_vm_manager_fini(rdev);
8163	cik_cp_enable(rdev, false);
8164	cik_sdma_enable(rdev, false);
8165	uvd_v1_0_fini(rdev);
8166	radeon_uvd_suspend(rdev);
8167	radeon_vce_suspend(rdev);
8168	cik_fini_pg(rdev);
8169	cik_fini_cg(rdev);
8170	cik_irq_suspend(rdev);
8171	radeon_wb_disable(rdev);
8172	cik_pcie_gart_disable(rdev);
8173	return 0;
8174}
8175
8176/* Plan is to move initialization in that function and use
8177 * helper function so that radeon_device_init pretty much
8178 * do nothing more than calling asic specific function. This
8179 * should also allow to remove a bunch of callback function
8180 * like vram_info.
8181 */
8182/**
8183 * cik_init - asic specific driver and hw init
8184 *
8185 * @rdev: radeon_device pointer
8186 *
8187 * Setup asic specific driver variables and program the hw
8188 * to a functional state (CIK).
8189 * Called at driver startup.
8190 * Returns 0 for success, errors for failure.
8191 */
8192int cik_init(struct radeon_device *rdev)
8193{
8194	struct radeon_ring *ring;
8195	int r;
8196
8197	/* Read BIOS */
8198	if (!radeon_get_bios(rdev)) {
8199		if (ASIC_IS_AVIVO(rdev))
8200			return -EINVAL;
8201	}
8202	/* Must be an ATOMBIOS */
8203	if (!rdev->is_atom_bios) {
8204		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8205		return -EINVAL;
8206	}
8207	r = radeon_atombios_init(rdev);
8208	if (r)
8209		return r;
8210
8211	/* Post card if necessary */
8212	if (!radeon_card_posted(rdev)) {
8213		if (!rdev->bios) {
8214			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8215			return -EINVAL;
8216		}
8217		DRM_INFO("GPU not posted. posting now...\n");
8218		atom_asic_init(rdev->mode_info.atom_context);
8219	}
8220	/* init golden registers */
8221	cik_init_golden_registers(rdev);
8222	/* Initialize scratch registers */
8223	cik_scratch_init(rdev);
8224	/* Initialize surface registers */
8225	radeon_surface_init(rdev);
8226	/* Initialize clocks */
8227	radeon_get_clock_info(rdev->ddev);
8228
8229	/* Fence driver */
8230	r = radeon_fence_driver_init(rdev);
8231	if (r)
8232		return r;
8233
8234	/* initialize memory controller */
8235	r = cik_mc_init(rdev);
8236	if (r)
8237		return r;
8238	/* Memory manager */
8239	r = radeon_bo_init(rdev);
8240	if (r)
8241		return r;
8242
8243	if (rdev->flags & RADEON_IS_IGP) {
8244		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8245		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8246			r = cik_init_microcode(rdev);
8247			if (r) {
8248				DRM_ERROR("Failed to load firmware!\n");
8249				return r;
8250			}
8251		}
8252	} else {
8253		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8254		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8255		    !rdev->mc_fw) {
8256			r = cik_init_microcode(rdev);
8257			if (r) {
8258				DRM_ERROR("Failed to load firmware!\n");
8259				return r;
8260			}
8261		}
8262	}
8263
8264	/* Initialize power management */
8265	radeon_pm_init(rdev);
8266
8267	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8268	ring->ring_obj = NULL;
8269	r600_ring_init(rdev, ring, 1024 * 1024);
8270
8271	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8272	ring->ring_obj = NULL;
8273	r600_ring_init(rdev, ring, 1024 * 1024);
8274	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8275	if (r)
8276		return r;
8277
8278	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8279	ring->ring_obj = NULL;
8280	r600_ring_init(rdev, ring, 1024 * 1024);
8281	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8282	if (r)
8283		return r;
8284
8285	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8286	ring->ring_obj = NULL;
8287	r600_ring_init(rdev, ring, 256 * 1024);
8288
8289	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8290	ring->ring_obj = NULL;
8291	r600_ring_init(rdev, ring, 256 * 1024);
8292
8293	r = radeon_uvd_init(rdev);
8294	if (!r) {
8295		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8296		ring->ring_obj = NULL;
8297		r600_ring_init(rdev, ring, 4096);
8298	}
8299
8300	r = radeon_vce_init(rdev);
8301	if (!r) {
8302		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8303		ring->ring_obj = NULL;
8304		r600_ring_init(rdev, ring, 4096);
8305
8306		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8307		ring->ring_obj = NULL;
8308		r600_ring_init(rdev, ring, 4096);
8309	}
8310
8311	rdev->ih.ring_obj = NULL;
8312	r600_ih_ring_init(rdev, 64 * 1024);
8313
8314	r = r600_pcie_gart_init(rdev);
8315	if (r)
8316		return r;
8317
8318	rdev->accel_working = true;
8319	r = cik_startup(rdev);
8320	if (r) {
8321		dev_err(rdev->dev, "disabling GPU acceleration\n");
8322		cik_cp_fini(rdev);
8323		cik_sdma_fini(rdev);
8324		cik_irq_fini(rdev);
8325		sumo_rlc_fini(rdev);
8326		cik_mec_fini(rdev);
8327		radeon_wb_fini(rdev);
8328		radeon_ib_pool_fini(rdev);
8329		radeon_vm_manager_fini(rdev);
8330		radeon_irq_kms_fini(rdev);
8331		cik_pcie_gart_fini(rdev);
8332		rdev->accel_working = false;
8333	}
8334
8335	/* Don't start up if the MC ucode is missing.
8336	 * The default clocks and voltages before the MC ucode
8337	 * is loaded are not suffient for advanced operations.
8338	 */
8339	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8340		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8341		return -EINVAL;
8342	}
8343
8344	return 0;
8345}
8346
8347/**
8348 * cik_fini - asic specific driver and hw fini
8349 *
8350 * @rdev: radeon_device pointer
8351 *
8352 * Tear down the asic specific driver variables and program the hw
8353 * to an idle state (CIK).
8354 * Called at driver unload.
8355 */
8356void cik_fini(struct radeon_device *rdev)
8357{
8358	radeon_pm_fini(rdev);
8359	cik_cp_fini(rdev);
8360	cik_sdma_fini(rdev);
8361	cik_fini_pg(rdev);
8362	cik_fini_cg(rdev);
8363	cik_irq_fini(rdev);
8364	sumo_rlc_fini(rdev);
8365	cik_mec_fini(rdev);
8366	radeon_wb_fini(rdev);
8367	radeon_vm_manager_fini(rdev);
8368	radeon_ib_pool_fini(rdev);
8369	radeon_irq_kms_fini(rdev);
8370	uvd_v1_0_fini(rdev);
8371	radeon_uvd_fini(rdev);
8372	radeon_vce_fini(rdev);
8373	cik_pcie_gart_fini(rdev);
8374	r600_vram_scratch_fini(rdev);
8375	radeon_gem_fini(rdev);
8376	radeon_fence_driver_fini(rdev);
8377	radeon_bo_fini(rdev);
8378	radeon_atombios_fini(rdev);
8379	kfree(rdev->bios);
8380	rdev->bios = NULL;
8381}
8382
8383void dce8_program_fmt(struct drm_encoder *encoder)
8384{
8385	struct drm_device *dev = encoder->dev;
8386	struct radeon_device *rdev = dev->dev_private;
8387	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8388	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8389	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8390	int bpc = 0;
8391	u32 tmp = 0;
8392	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8393
8394	if (connector) {
8395		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8396		bpc = radeon_get_monitor_bpc(connector);
8397		dither = radeon_connector->dither;
8398	}
8399
8400	/* LVDS/eDP FMT is set up by atom */
8401	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8402		return;
8403
8404	/* not needed for analog */
8405	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8406	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8407		return;
8408
8409	if (bpc == 0)
8410		return;
8411
8412	switch (bpc) {
8413	case 6:
8414		if (dither == RADEON_FMT_DITHER_ENABLE)
8415			/* XXX sort out optimal dither settings */
8416			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8417				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8418		else
8419			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8420		break;
8421	case 8:
8422		if (dither == RADEON_FMT_DITHER_ENABLE)
8423			/* XXX sort out optimal dither settings */
8424			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8425				FMT_RGB_RANDOM_ENABLE |
8426				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8427		else
8428			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8429		break;
8430	case 10:
8431		if (dither == RADEON_FMT_DITHER_ENABLE)
8432			/* XXX sort out optimal dither settings */
8433			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8434				FMT_RGB_RANDOM_ENABLE |
8435				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8436		else
8437			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8438		break;
8439	default:
8440		/* not needed */
8441		break;
8442	}
8443
8444	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8445}
8446
8447/* display watermark setup */
8448/**
8449 * dce8_line_buffer_adjust - Set up the line buffer
8450 *
8451 * @rdev: radeon_device pointer
8452 * @radeon_crtc: the selected display controller
8453 * @mode: the current display mode on the selected display
8454 * controller
8455 *
8456 * Setup up the line buffer allocation for
8457 * the selected display controller (CIK).
8458 * Returns the line buffer size in pixels.
8459 */
8460static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8461				   struct radeon_crtc *radeon_crtc,
8462				   struct drm_display_mode *mode)
8463{
8464	u32 tmp, buffer_alloc, i;
8465	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8466	/*
8467	 * Line Buffer Setup
8468	 * There are 6 line buffers, one for each display controllers.
8469	 * There are 3 partitions per LB. Select the number of partitions
8470	 * to enable based on the display width.  For display widths larger
8471	 * than 4096, you need use to use 2 display controllers and combine
8472	 * them using the stereo blender.
8473	 */
8474	if (radeon_crtc->base.enabled && mode) {
8475		if (mode->crtc_hdisplay < 1920) {
8476			tmp = 1;
8477			buffer_alloc = 2;
8478		} else if (mode->crtc_hdisplay < 2560) {
8479			tmp = 2;
8480			buffer_alloc = 2;
8481		} else if (mode->crtc_hdisplay < 4096) {
8482			tmp = 0;
8483			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8484		} else {
8485			DRM_DEBUG_KMS("Mode too big for LB!\n");
8486			tmp = 0;
8487			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8488		}
8489	} else {
8490		tmp = 1;
8491		buffer_alloc = 0;
8492	}
8493
8494	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8495	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8496
8497	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8498	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8499	for (i = 0; i < rdev->usec_timeout; i++) {
8500		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8501		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8502			break;
8503		udelay(1);
8504	}
8505
8506	if (radeon_crtc->base.enabled && mode) {
8507		switch (tmp) {
8508		case 0:
8509		default:
8510			return 4096 * 2;
8511		case 1:
8512			return 1920 * 2;
8513		case 2:
8514			return 2560 * 2;
8515		}
8516	}
8517
8518	/* controller not enabled, so no lb used */
8519	return 0;
8520}
8521
8522/**
8523 * cik_get_number_of_dram_channels - get the number of dram channels
8524 *
8525 * @rdev: radeon_device pointer
8526 *
8527 * Look up the number of video ram channels (CIK).
8528 * Used for display watermark bandwidth calculations
8529 * Returns the number of dram channels
8530 */
8531static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8532{
8533	u32 tmp = RREG32(MC_SHARED_CHMAP);
8534
8535	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8536	case 0:
8537	default:
8538		return 1;
8539	case 1:
8540		return 2;
8541	case 2:
8542		return 4;
8543	case 3:
8544		return 8;
8545	case 4:
8546		return 3;
8547	case 5:
8548		return 6;
8549	case 6:
8550		return 10;
8551	case 7:
8552		return 12;
8553	case 8:
8554		return 16;
8555	}
8556}
8557
8558struct dce8_wm_params {
8559	u32 dram_channels; /* number of dram channels */
8560	u32 yclk;          /* bandwidth per dram data pin in kHz */
8561	u32 sclk;          /* engine clock in kHz */
8562	u32 disp_clk;      /* display clock in kHz */
8563	u32 src_width;     /* viewport width */
8564	u32 active_time;   /* active display time in ns */
8565	u32 blank_time;    /* blank time in ns */
8566	bool interlaced;    /* mode is interlaced */
8567	fixed20_12 vsc;    /* vertical scale ratio */
8568	u32 num_heads;     /* number of active crtcs */
8569	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8570	u32 lb_size;       /* line buffer allocated to pipe */
8571	u32 vtaps;         /* vertical scaler taps */
8572};
8573
8574/**
8575 * dce8_dram_bandwidth - get the dram bandwidth
8576 *
8577 * @wm: watermark calculation data
8578 *
8579 * Calculate the raw dram bandwidth (CIK).
8580 * Used for display watermark bandwidth calculations
8581 * Returns the dram bandwidth in MBytes/s
8582 */
8583static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8584{
8585	/* Calculate raw DRAM Bandwidth */
8586	fixed20_12 dram_efficiency; /* 0.7 */
8587	fixed20_12 yclk, dram_channels, bandwidth;
8588	fixed20_12 a;
8589
8590	a.full = dfixed_const(1000);
8591	yclk.full = dfixed_const(wm->yclk);
8592	yclk.full = dfixed_div(yclk, a);
8593	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8594	a.full = dfixed_const(10);
8595	dram_efficiency.full = dfixed_const(7);
8596	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8597	bandwidth.full = dfixed_mul(dram_channels, yclk);
8598	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8599
8600	return dfixed_trunc(bandwidth);
8601}
8602
8603/**
8604 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8605 *
8606 * @wm: watermark calculation data
8607 *
8608 * Calculate the dram bandwidth used for display (CIK).
8609 * Used for display watermark bandwidth calculations
8610 * Returns the dram bandwidth for display in MBytes/s
8611 */
8612static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8613{
8614	/* Calculate DRAM Bandwidth and the part allocated to display. */
8615	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8616	fixed20_12 yclk, dram_channels, bandwidth;
8617	fixed20_12 a;
8618
8619	a.full = dfixed_const(1000);
8620	yclk.full = dfixed_const(wm->yclk);
8621	yclk.full = dfixed_div(yclk, a);
8622	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8623	a.full = dfixed_const(10);
8624	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8625	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8626	bandwidth.full = dfixed_mul(dram_channels, yclk);
8627	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8628
8629	return dfixed_trunc(bandwidth);
8630}
8631
8632/**
8633 * dce8_data_return_bandwidth - get the data return bandwidth
8634 *
8635 * @wm: watermark calculation data
8636 *
8637 * Calculate the data return bandwidth used for display (CIK).
8638 * Used for display watermark bandwidth calculations
8639 * Returns the data return bandwidth in MBytes/s
8640 */
8641static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8642{
8643	/* Calculate the display Data return Bandwidth */
8644	fixed20_12 return_efficiency; /* 0.8 */
8645	fixed20_12 sclk, bandwidth;
8646	fixed20_12 a;
8647
8648	a.full = dfixed_const(1000);
8649	sclk.full = dfixed_const(wm->sclk);
8650	sclk.full = dfixed_div(sclk, a);
8651	a.full = dfixed_const(10);
8652	return_efficiency.full = dfixed_const(8);
8653	return_efficiency.full = dfixed_div(return_efficiency, a);
8654	a.full = dfixed_const(32);
8655	bandwidth.full = dfixed_mul(a, sclk);
8656	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8657
8658	return dfixed_trunc(bandwidth);
8659}
8660
8661/**
8662 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8663 *
8664 * @wm: watermark calculation data
8665 *
8666 * Calculate the dmif bandwidth used for display (CIK).
8667 * Used for display watermark bandwidth calculations
8668 * Returns the dmif bandwidth in MBytes/s
8669 */
8670static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8671{
8672	/* Calculate the DMIF Request Bandwidth */
8673	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8674	fixed20_12 disp_clk, bandwidth;
8675	fixed20_12 a, b;
8676
8677	a.full = dfixed_const(1000);
8678	disp_clk.full = dfixed_const(wm->disp_clk);
8679	disp_clk.full = dfixed_div(disp_clk, a);
8680	a.full = dfixed_const(32);
8681	b.full = dfixed_mul(a, disp_clk);
8682
8683	a.full = dfixed_const(10);
8684	disp_clk_request_efficiency.full = dfixed_const(8);
8685	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8686
8687	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8688
8689	return dfixed_trunc(bandwidth);
8690}
8691
8692/**
8693 * dce8_available_bandwidth - get the min available bandwidth
8694 *
8695 * @wm: watermark calculation data
8696 *
8697 * Calculate the min available bandwidth used for display (CIK).
8698 * Used for display watermark bandwidth calculations
8699 * Returns the min available bandwidth in MBytes/s
8700 */
8701static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8702{
8703	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8704	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8705	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8706	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8707
8708	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8709}
8710
8711/**
8712 * dce8_average_bandwidth - get the average available bandwidth
8713 *
8714 * @wm: watermark calculation data
8715 *
8716 * Calculate the average available bandwidth used for display (CIK).
8717 * Used for display watermark bandwidth calculations
8718 * Returns the average available bandwidth in MBytes/s
8719 */
8720static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8721{
8722	/* Calculate the display mode Average Bandwidth
8723	 * DisplayMode should contain the source and destination dimensions,
8724	 * timing, etc.
8725	 */
8726	fixed20_12 bpp;
8727	fixed20_12 line_time;
8728	fixed20_12 src_width;
8729	fixed20_12 bandwidth;
8730	fixed20_12 a;
8731
8732	a.full = dfixed_const(1000);
8733	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8734	line_time.full = dfixed_div(line_time, a);
8735	bpp.full = dfixed_const(wm->bytes_per_pixel);
8736	src_width.full = dfixed_const(wm->src_width);
8737	bandwidth.full = dfixed_mul(src_width, bpp);
8738	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8739	bandwidth.full = dfixed_div(bandwidth, line_time);
8740
8741	return dfixed_trunc(bandwidth);
8742}
8743
8744/**
8745 * dce8_latency_watermark - get the latency watermark
8746 *
8747 * @wm: watermark calculation data
8748 *
8749 * Calculate the latency watermark (CIK).
8750 * Used for display watermark bandwidth calculations
8751 * Returns the latency watermark in ns
8752 */
8753static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8754{
8755	/* First calculate the latency in ns */
8756	u32 mc_latency = 2000; /* 2000 ns. */
8757	u32 available_bandwidth = dce8_available_bandwidth(wm);
8758	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8759	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8760	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8761	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8762		(wm->num_heads * cursor_line_pair_return_time);
8763	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8764	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8765	u32 tmp, dmif_size = 12288;
8766	fixed20_12 a, b, c;
8767
8768	if (wm->num_heads == 0)
8769		return 0;
8770
8771	a.full = dfixed_const(2);
8772	b.full = dfixed_const(1);
8773	if ((wm->vsc.full > a.full) ||
8774	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8775	    (wm->vtaps >= 5) ||
8776	    ((wm->vsc.full >= a.full) && wm->interlaced))
8777		max_src_lines_per_dst_line = 4;
8778	else
8779		max_src_lines_per_dst_line = 2;
8780
8781	a.full = dfixed_const(available_bandwidth);
8782	b.full = dfixed_const(wm->num_heads);
8783	a.full = dfixed_div(a, b);
8784
8785	b.full = dfixed_const(mc_latency + 512);
8786	c.full = dfixed_const(wm->disp_clk);
8787	b.full = dfixed_div(b, c);
8788
8789	c.full = dfixed_const(dmif_size);
8790	b.full = dfixed_div(c, b);
8791
8792	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8793
8794	b.full = dfixed_const(1000);
8795	c.full = dfixed_const(wm->disp_clk);
8796	b.full = dfixed_div(c, b);
8797	c.full = dfixed_const(wm->bytes_per_pixel);
8798	b.full = dfixed_mul(b, c);
8799
8800	lb_fill_bw = min(tmp, dfixed_trunc(b));
8801
8802	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8803	b.full = dfixed_const(1000);
8804	c.full = dfixed_const(lb_fill_bw);
8805	b.full = dfixed_div(c, b);
8806	a.full = dfixed_div(a, b);
8807	line_fill_time = dfixed_trunc(a);
8808
8809	if (line_fill_time < wm->active_time)
8810		return latency;
8811	else
8812		return latency + (line_fill_time - wm->active_time);
8813
8814}
8815
8816/**
8817 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8818 * average and available dram bandwidth
8819 *
8820 * @wm: watermark calculation data
8821 *
8822 * Check if the display average bandwidth fits in the display
8823 * dram bandwidth (CIK).
8824 * Used for display watermark bandwidth calculations
8825 * Returns true if the display fits, false if not.
8826 */
8827static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8828{
8829	if (dce8_average_bandwidth(wm) <=
8830	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8831		return true;
8832	else
8833		return false;
8834}
8835
8836/**
8837 * dce8_average_bandwidth_vs_available_bandwidth - check
8838 * average and available bandwidth
8839 *
8840 * @wm: watermark calculation data
8841 *
8842 * Check if the display average bandwidth fits in the display
8843 * available bandwidth (CIK).
8844 * Used for display watermark bandwidth calculations
8845 * Returns true if the display fits, false if not.
8846 */
8847static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8848{
8849	if (dce8_average_bandwidth(wm) <=
8850	    (dce8_available_bandwidth(wm) / wm->num_heads))
8851		return true;
8852	else
8853		return false;
8854}
8855
8856/**
8857 * dce8_check_latency_hiding - check latency hiding
8858 *
8859 * @wm: watermark calculation data
8860 *
8861 * Check latency hiding (CIK).
8862 * Used for display watermark bandwidth calculations
8863 * Returns true if the display fits, false if not.
8864 */
8865static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8866{
8867	u32 lb_partitions = wm->lb_size / wm->src_width;
8868	u32 line_time = wm->active_time + wm->blank_time;
8869	u32 latency_tolerant_lines;
8870	u32 latency_hiding;
8871	fixed20_12 a;
8872
8873	a.full = dfixed_const(1);
8874	if (wm->vsc.full > a.full)
8875		latency_tolerant_lines = 1;
8876	else {
8877		if (lb_partitions <= (wm->vtaps + 1))
8878			latency_tolerant_lines = 1;
8879		else
8880			latency_tolerant_lines = 2;
8881	}
8882
8883	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8884
8885	if (dce8_latency_watermark(wm) <= latency_hiding)
8886		return true;
8887	else
8888		return false;
8889}
8890
8891/**
8892 * dce8_program_watermarks - program display watermarks
8893 *
8894 * @rdev: radeon_device pointer
8895 * @radeon_crtc: the selected display controller
8896 * @lb_size: line buffer size
8897 * @num_heads: number of display controllers in use
8898 *
8899 * Calculate and program the display watermarks for the
8900 * selected display controller (CIK).
8901 */
8902static void dce8_program_watermarks(struct radeon_device *rdev,
8903				    struct radeon_crtc *radeon_crtc,
8904				    u32 lb_size, u32 num_heads)
8905{
8906	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8907	struct dce8_wm_params wm_low, wm_high;
8908	u32 pixel_period;
8909	u32 line_time = 0;
8910	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8911	u32 tmp, wm_mask;
8912
8913	if (radeon_crtc->base.enabled && num_heads && mode) {
8914		pixel_period = 1000000 / (u32)mode->clock;
8915		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8916
8917		/* watermark for high clocks */
8918		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8919		    rdev->pm.dpm_enabled) {
8920			wm_high.yclk =
8921				radeon_dpm_get_mclk(rdev, false) * 10;
8922			wm_high.sclk =
8923				radeon_dpm_get_sclk(rdev, false) * 10;
8924		} else {
8925			wm_high.yclk = rdev->pm.current_mclk * 10;
8926			wm_high.sclk = rdev->pm.current_sclk * 10;
8927		}
8928
8929		wm_high.disp_clk = mode->clock;
8930		wm_high.src_width = mode->crtc_hdisplay;
8931		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8932		wm_high.blank_time = line_time - wm_high.active_time;
8933		wm_high.interlaced = false;
8934		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8935			wm_high.interlaced = true;
8936		wm_high.vsc = radeon_crtc->vsc;
8937		wm_high.vtaps = 1;
8938		if (radeon_crtc->rmx_type != RMX_OFF)
8939			wm_high.vtaps = 2;
8940		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8941		wm_high.lb_size = lb_size;
8942		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8943		wm_high.num_heads = num_heads;
8944
8945		/* set for high clocks */
8946		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8947
8948		/* possibly force display priority to high */
8949		/* should really do this at mode validation time... */
8950		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8951		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8952		    !dce8_check_latency_hiding(&wm_high) ||
8953		    (rdev->disp_priority == 2)) {
8954			DRM_DEBUG_KMS("force priority to high\n");
8955		}
8956
8957		/* watermark for low clocks */
8958		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8959		    rdev->pm.dpm_enabled) {
8960			wm_low.yclk =
8961				radeon_dpm_get_mclk(rdev, true) * 10;
8962			wm_low.sclk =
8963				radeon_dpm_get_sclk(rdev, true) * 10;
8964		} else {
8965			wm_low.yclk = rdev->pm.current_mclk * 10;
8966			wm_low.sclk = rdev->pm.current_sclk * 10;
8967		}
8968
8969		wm_low.disp_clk = mode->clock;
8970		wm_low.src_width = mode->crtc_hdisplay;
8971		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8972		wm_low.blank_time = line_time - wm_low.active_time;
8973		wm_low.interlaced = false;
8974		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8975			wm_low.interlaced = true;
8976		wm_low.vsc = radeon_crtc->vsc;
8977		wm_low.vtaps = 1;
8978		if (radeon_crtc->rmx_type != RMX_OFF)
8979			wm_low.vtaps = 2;
8980		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8981		wm_low.lb_size = lb_size;
8982		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8983		wm_low.num_heads = num_heads;
8984
8985		/* set for low clocks */
8986		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8987
8988		/* possibly force display priority to high */
8989		/* should really do this at mode validation time... */
8990		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8991		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8992		    !dce8_check_latency_hiding(&wm_low) ||
8993		    (rdev->disp_priority == 2)) {
8994			DRM_DEBUG_KMS("force priority to high\n");
8995		}
8996	}
8997
8998	/* select wm A */
8999	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9000	tmp = wm_mask;
9001	tmp &= ~LATENCY_WATERMARK_MASK(3);
9002	tmp |= LATENCY_WATERMARK_MASK(1);
9003	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9004	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9005	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9006		LATENCY_HIGH_WATERMARK(line_time)));
9007	/* select wm B */
9008	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9009	tmp &= ~LATENCY_WATERMARK_MASK(3);
9010	tmp |= LATENCY_WATERMARK_MASK(2);
9011	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9012	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9013	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9014		LATENCY_HIGH_WATERMARK(line_time)));
9015	/* restore original selection */
9016	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9017
9018	/* save values for DPM */
9019	radeon_crtc->line_time = line_time;
9020	radeon_crtc->wm_high = latency_watermark_a;
9021	radeon_crtc->wm_low = latency_watermark_b;
9022}
9023
9024/**
9025 * dce8_bandwidth_update - program display watermarks
9026 *
9027 * @rdev: radeon_device pointer
9028 *
9029 * Calculate and program the display watermarks and line
9030 * buffer allocation (CIK).
9031 */
9032void dce8_bandwidth_update(struct radeon_device *rdev)
9033{
9034	struct drm_display_mode *mode = NULL;
9035	u32 num_heads = 0, lb_size;
9036	int i;
9037
9038	radeon_update_display_priority(rdev);
9039
9040	for (i = 0; i < rdev->num_crtc; i++) {
9041		if (rdev->mode_info.crtcs[i]->base.enabled)
9042			num_heads++;
9043	}
9044	for (i = 0; i < rdev->num_crtc; i++) {
9045		mode = &rdev->mode_info.crtcs[i]->base.mode;
9046		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9047		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9048	}
9049}
9050
9051/**
9052 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9053 *
9054 * @rdev: radeon_device pointer
9055 *
9056 * Fetches a GPU clock counter snapshot (SI).
9057 * Returns the 64 bit clock counter snapshot.
9058 */
9059uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9060{
9061	uint64_t clock;
9062
9063	mutex_lock(&rdev->gpu_clock_mutex);
9064	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9065	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9066	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9067	mutex_unlock(&rdev->gpu_clock_mutex);
9068	return clock;
9069}
9070
9071static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9072                              u32 cntl_reg, u32 status_reg)
9073{
9074	int r, i;
9075	struct atom_clock_dividers dividers;
9076	uint32_t tmp;
9077
9078	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9079					   clock, false, &dividers);
9080	if (r)
9081		return r;
9082
9083	tmp = RREG32_SMC(cntl_reg);
9084	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9085	tmp |= dividers.post_divider;
9086	WREG32_SMC(cntl_reg, tmp);
9087
9088	for (i = 0; i < 100; i++) {
9089		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9090			break;
9091		mdelay(10);
9092	}
9093	if (i == 100)
9094		return -ETIMEDOUT;
9095
9096	return 0;
9097}
9098
9099int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9100{
9101	int r = 0;
9102
9103	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9104	if (r)
9105		return r;
9106
9107	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9108	return r;
9109}
9110
9111int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9112{
9113	int r, i;
9114	struct atom_clock_dividers dividers;
9115	u32 tmp;
9116
9117	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9118					   ecclk, false, &dividers);
9119	if (r)
9120		return r;
9121
9122	for (i = 0; i < 100; i++) {
9123		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9124			break;
9125		mdelay(10);
9126	}
9127	if (i == 100)
9128		return -ETIMEDOUT;
9129
9130	tmp = RREG32_SMC(CG_ECLK_CNTL);
9131	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9132	tmp |= dividers.post_divider;
9133	WREG32_SMC(CG_ECLK_CNTL, tmp);
9134
9135	for (i = 0; i < 100; i++) {
9136		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9137			break;
9138		mdelay(10);
9139	}
9140	if (i == 100)
9141		return -ETIMEDOUT;
9142
9143	return 0;
9144}
9145
9146static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9147{
9148	struct pci_dev *root = rdev->pdev->bus->self;
9149	int bridge_pos, gpu_pos;
9150	u32 speed_cntl, mask, current_data_rate;
9151	int ret, i;
9152	u16 tmp16;
9153
9154	if (radeon_pcie_gen2 == 0)
9155		return;
9156
9157	if (rdev->flags & RADEON_IS_IGP)
9158		return;
9159
9160	if (!(rdev->flags & RADEON_IS_PCIE))
9161		return;
9162
9163	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9164	if (ret != 0)
9165		return;
9166
9167	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9168		return;
9169
9170	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9171	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9172		LC_CURRENT_DATA_RATE_SHIFT;
9173	if (mask & DRM_PCIE_SPEED_80) {
9174		if (current_data_rate == 2) {
9175			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9176			return;
9177		}
9178		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9179	} else if (mask & DRM_PCIE_SPEED_50) {
9180		if (current_data_rate == 1) {
9181			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9182			return;
9183		}
9184		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9185	}
9186
9187	bridge_pos = pci_pcie_cap(root);
9188	if (!bridge_pos)
9189		return;
9190
9191	gpu_pos = pci_pcie_cap(rdev->pdev);
9192	if (!gpu_pos)
9193		return;
9194
9195	if (mask & DRM_PCIE_SPEED_80) {
9196		/* re-try equalization if gen3 is not already enabled */
9197		if (current_data_rate != 2) {
9198			u16 bridge_cfg, gpu_cfg;
9199			u16 bridge_cfg2, gpu_cfg2;
9200			u32 max_lw, current_lw, tmp;
9201
9202			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9203			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9204
9205			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9206			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9207
9208			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9209			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9210
9211			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9212			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9213			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9214
9215			if (current_lw < max_lw) {
9216				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9217				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9218					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9219					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9220					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9221					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9222				}
9223			}
9224
9225			for (i = 0; i < 10; i++) {
9226				/* check status */
9227				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9228				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9229					break;
9230
9231				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9232				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9233
9234				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9235				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9236
9237				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9238				tmp |= LC_SET_QUIESCE;
9239				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9240
9241				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9242				tmp |= LC_REDO_EQ;
9243				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9244
9245				mdelay(100);
9246
9247				/* linkctl */
9248				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9249				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9250				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9251				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9252
9253				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9254				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9255				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9256				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9257
9258				/* linkctl2 */
9259				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9260				tmp16 &= ~((1 << 4) | (7 << 9));
9261				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9262				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9263
9264				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9265				tmp16 &= ~((1 << 4) | (7 << 9));
9266				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9267				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9268
9269				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9270				tmp &= ~LC_SET_QUIESCE;
9271				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9272			}
9273		}
9274	}
9275
9276	/* set the link speed */
9277	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9278	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9279	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9280
9281	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9282	tmp16 &= ~0xf;
9283	if (mask & DRM_PCIE_SPEED_80)
9284		tmp16 |= 3; /* gen3 */
9285	else if (mask & DRM_PCIE_SPEED_50)
9286		tmp16 |= 2; /* gen2 */
9287	else
9288		tmp16 |= 1; /* gen1 */
9289	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9290
9291	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9292	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9293	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9294
9295	for (i = 0; i < rdev->usec_timeout; i++) {
9296		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9297		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9298			break;
9299		udelay(1);
9300	}
9301}
9302
9303static void cik_program_aspm(struct radeon_device *rdev)
9304{
9305	u32 data, orig;
9306	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9307	bool disable_clkreq = false;
9308
9309	if (radeon_aspm == 0)
9310		return;
9311
9312	/* XXX double check IGPs */
9313	if (rdev->flags & RADEON_IS_IGP)
9314		return;
9315
9316	if (!(rdev->flags & RADEON_IS_PCIE))
9317		return;
9318
9319	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9320	data &= ~LC_XMIT_N_FTS_MASK;
9321	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9322	if (orig != data)
9323		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9324
9325	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9326	data |= LC_GO_TO_RECOVERY;
9327	if (orig != data)
9328		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9329
9330	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9331	data |= P_IGNORE_EDB_ERR;
9332	if (orig != data)
9333		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9334
9335	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9336	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9337	data |= LC_PMI_TO_L1_DIS;
9338	if (!disable_l0s)
9339		data |= LC_L0S_INACTIVITY(7);
9340
9341	if (!disable_l1) {
9342		data |= LC_L1_INACTIVITY(7);
9343		data &= ~LC_PMI_TO_L1_DIS;
9344		if (orig != data)
9345			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9346
9347		if (!disable_plloff_in_l1) {
9348			bool clk_req_support;
9349
9350			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9351			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9352			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9353			if (orig != data)
9354				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9355
9356			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9357			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9358			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9359			if (orig != data)
9360				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9361
9362			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9363			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9364			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9365			if (orig != data)
9366				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9367
9368			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9369			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9370			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9371			if (orig != data)
9372				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9373
9374			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9375			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9376			data |= LC_DYN_LANES_PWR_STATE(3);
9377			if (orig != data)
9378				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9379
9380			if (!disable_clkreq) {
9381				struct pci_dev *root = rdev->pdev->bus->self;
9382				u32 lnkcap;
9383
9384				clk_req_support = false;
9385				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9386				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9387					clk_req_support = true;
9388			} else {
9389				clk_req_support = false;
9390			}
9391
9392			if (clk_req_support) {
9393				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9394				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9395				if (orig != data)
9396					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9397
9398				orig = data = RREG32_SMC(THM_CLK_CNTL);
9399				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9400				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9401				if (orig != data)
9402					WREG32_SMC(THM_CLK_CNTL, data);
9403
9404				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9405				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9406				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9407				if (orig != data)
9408					WREG32_SMC(MISC_CLK_CTRL, data);
9409
9410				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9411				data &= ~BCLK_AS_XCLK;
9412				if (orig != data)
9413					WREG32_SMC(CG_CLKPIN_CNTL, data);
9414
9415				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9416				data &= ~FORCE_BIF_REFCLK_EN;
9417				if (orig != data)
9418					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9419
9420				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9421				data &= ~MPLL_CLKOUT_SEL_MASK;
9422				data |= MPLL_CLKOUT_SEL(4);
9423				if (orig != data)
9424					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9425			}
9426		}
9427	} else {
9428		if (orig != data)
9429			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9430	}
9431
9432	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9433	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9434	if (orig != data)
9435		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9436
9437	if (!disable_l0s) {
9438		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9439		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9440			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9441			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9442				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9443				data &= ~LC_L0S_INACTIVITY_MASK;
9444				if (orig != data)
9445					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9446			}
9447		}
9448	}
9449}