Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2011 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/slab.h>
  26#include <linux/module.h>
  27#include <drm/drmP.h>
  28#include "radeon.h"
  29#include "radeon_asic.h"
  30#include "radeon_audio.h"
  31#include <drm/radeon_drm.h>
  32#include "sid.h"
  33#include "atom.h"
  34#include "si_blit_shaders.h"
  35#include "clearstate_si.h"
  36#include "radeon_ucode.h"
  37
  38
  39MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
  40MODULE_FIRMWARE("radeon/TAHITI_me.bin");
  41MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
  42MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
  43MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
  44MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
  45MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
  46
  47MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
  48MODULE_FIRMWARE("radeon/tahiti_me.bin");
  49MODULE_FIRMWARE("radeon/tahiti_ce.bin");
  50MODULE_FIRMWARE("radeon/tahiti_mc.bin");
  51MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
  52MODULE_FIRMWARE("radeon/tahiti_smc.bin");
  53
  54MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
  55MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
  56MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
  57MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
  58MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
  59MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
  60MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
  61
  62MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
  63MODULE_FIRMWARE("radeon/pitcairn_me.bin");
  64MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
  65MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
  66MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
  67MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
  68MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
  69
  70MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
  71MODULE_FIRMWARE("radeon/VERDE_me.bin");
  72MODULE_FIRMWARE("radeon/VERDE_ce.bin");
  73MODULE_FIRMWARE("radeon/VERDE_mc.bin");
  74MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
  75MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
  76MODULE_FIRMWARE("radeon/VERDE_smc.bin");
  77
  78MODULE_FIRMWARE("radeon/verde_pfp.bin");
  79MODULE_FIRMWARE("radeon/verde_me.bin");
  80MODULE_FIRMWARE("radeon/verde_ce.bin");
  81MODULE_FIRMWARE("radeon/verde_mc.bin");
  82MODULE_FIRMWARE("radeon/verde_rlc.bin");
  83MODULE_FIRMWARE("radeon/verde_smc.bin");
  84MODULE_FIRMWARE("radeon/verde_k_smc.bin");
  85
  86MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
  87MODULE_FIRMWARE("radeon/OLAND_me.bin");
  88MODULE_FIRMWARE("radeon/OLAND_ce.bin");
  89MODULE_FIRMWARE("radeon/OLAND_mc.bin");
  90MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
  91MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
  92MODULE_FIRMWARE("radeon/OLAND_smc.bin");
  93
  94MODULE_FIRMWARE("radeon/oland_pfp.bin");
  95MODULE_FIRMWARE("radeon/oland_me.bin");
  96MODULE_FIRMWARE("radeon/oland_ce.bin");
  97MODULE_FIRMWARE("radeon/oland_mc.bin");
  98MODULE_FIRMWARE("radeon/oland_rlc.bin");
  99MODULE_FIRMWARE("radeon/oland_smc.bin");
 100MODULE_FIRMWARE("radeon/oland_k_smc.bin");
 101
 102MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
 103MODULE_FIRMWARE("radeon/HAINAN_me.bin");
 104MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
 105MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
 106MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 107MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 108MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 109
 110MODULE_FIRMWARE("radeon/hainan_pfp.bin");
 111MODULE_FIRMWARE("radeon/hainan_me.bin");
 112MODULE_FIRMWARE("radeon/hainan_ce.bin");
 113MODULE_FIRMWARE("radeon/hainan_mc.bin");
 114MODULE_FIRMWARE("radeon/hainan_rlc.bin");
 115MODULE_FIRMWARE("radeon/hainan_smc.bin");
 116MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
 117MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
 118
 119MODULE_FIRMWARE("radeon/si58_mc.bin");
 120
 121static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 122static void si_pcie_gen3_enable(struct radeon_device *rdev);
 123static void si_program_aspm(struct radeon_device *rdev);
 124extern void sumo_rlc_fini(struct radeon_device *rdev);
 125extern int sumo_rlc_init(struct radeon_device *rdev);
 126extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 127extern void r600_ih_ring_fini(struct radeon_device *rdev);
 128extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
 129extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 130extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 131extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
 132extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
 133extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 134static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
 135					 bool enable);
 136static void si_init_pg(struct radeon_device *rdev);
 137static void si_init_cg(struct radeon_device *rdev);
 138static void si_fini_pg(struct radeon_device *rdev);
 139static void si_fini_cg(struct radeon_device *rdev);
 140static void si_rlc_stop(struct radeon_device *rdev);
 141
 142static const u32 crtc_offsets[] =
 143{
 144	EVERGREEN_CRTC0_REGISTER_OFFSET,
 145	EVERGREEN_CRTC1_REGISTER_OFFSET,
 146	EVERGREEN_CRTC2_REGISTER_OFFSET,
 147	EVERGREEN_CRTC3_REGISTER_OFFSET,
 148	EVERGREEN_CRTC4_REGISTER_OFFSET,
 149	EVERGREEN_CRTC5_REGISTER_OFFSET
 150};
 151
 152static const u32 si_disp_int_status[] =
 153{
 154	DISP_INTERRUPT_STATUS,
 155	DISP_INTERRUPT_STATUS_CONTINUE,
 156	DISP_INTERRUPT_STATUS_CONTINUE2,
 157	DISP_INTERRUPT_STATUS_CONTINUE3,
 158	DISP_INTERRUPT_STATUS_CONTINUE4,
 159	DISP_INTERRUPT_STATUS_CONTINUE5
 160};
 161
 162#define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
 163#define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
 164#define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
 165
 166static const u32 verde_rlc_save_restore_register_list[] =
 167{
 168	(0x8000 << 16) | (0x98f4 >> 2),
 169	0x00000000,
 170	(0x8040 << 16) | (0x98f4 >> 2),
 171	0x00000000,
 172	(0x8000 << 16) | (0xe80 >> 2),
 173	0x00000000,
 174	(0x8040 << 16) | (0xe80 >> 2),
 175	0x00000000,
 176	(0x8000 << 16) | (0x89bc >> 2),
 177	0x00000000,
 178	(0x8040 << 16) | (0x89bc >> 2),
 179	0x00000000,
 180	(0x8000 << 16) | (0x8c1c >> 2),
 181	0x00000000,
 182	(0x8040 << 16) | (0x8c1c >> 2),
 183	0x00000000,
 184	(0x9c00 << 16) | (0x98f0 >> 2),
 185	0x00000000,
 186	(0x9c00 << 16) | (0xe7c >> 2),
 187	0x00000000,
 188	(0x8000 << 16) | (0x9148 >> 2),
 189	0x00000000,
 190	(0x8040 << 16) | (0x9148 >> 2),
 191	0x00000000,
 192	(0x9c00 << 16) | (0x9150 >> 2),
 193	0x00000000,
 194	(0x9c00 << 16) | (0x897c >> 2),
 195	0x00000000,
 196	(0x9c00 << 16) | (0x8d8c >> 2),
 197	0x00000000,
 198	(0x9c00 << 16) | (0xac54 >> 2),
 199	0X00000000,
 200	0x3,
 201	(0x9c00 << 16) | (0x98f8 >> 2),
 202	0x00000000,
 203	(0x9c00 << 16) | (0x9910 >> 2),
 204	0x00000000,
 205	(0x9c00 << 16) | (0x9914 >> 2),
 206	0x00000000,
 207	(0x9c00 << 16) | (0x9918 >> 2),
 208	0x00000000,
 209	(0x9c00 << 16) | (0x991c >> 2),
 210	0x00000000,
 211	(0x9c00 << 16) | (0x9920 >> 2),
 212	0x00000000,
 213	(0x9c00 << 16) | (0x9924 >> 2),
 214	0x00000000,
 215	(0x9c00 << 16) | (0x9928 >> 2),
 216	0x00000000,
 217	(0x9c00 << 16) | (0x992c >> 2),
 218	0x00000000,
 219	(0x9c00 << 16) | (0x9930 >> 2),
 220	0x00000000,
 221	(0x9c00 << 16) | (0x9934 >> 2),
 222	0x00000000,
 223	(0x9c00 << 16) | (0x9938 >> 2),
 224	0x00000000,
 225	(0x9c00 << 16) | (0x993c >> 2),
 226	0x00000000,
 227	(0x9c00 << 16) | (0x9940 >> 2),
 228	0x00000000,
 229	(0x9c00 << 16) | (0x9944 >> 2),
 230	0x00000000,
 231	(0x9c00 << 16) | (0x9948 >> 2),
 232	0x00000000,
 233	(0x9c00 << 16) | (0x994c >> 2),
 234	0x00000000,
 235	(0x9c00 << 16) | (0x9950 >> 2),
 236	0x00000000,
 237	(0x9c00 << 16) | (0x9954 >> 2),
 238	0x00000000,
 239	(0x9c00 << 16) | (0x9958 >> 2),
 240	0x00000000,
 241	(0x9c00 << 16) | (0x995c >> 2),
 242	0x00000000,
 243	(0x9c00 << 16) | (0x9960 >> 2),
 244	0x00000000,
 245	(0x9c00 << 16) | (0x9964 >> 2),
 246	0x00000000,
 247	(0x9c00 << 16) | (0x9968 >> 2),
 248	0x00000000,
 249	(0x9c00 << 16) | (0x996c >> 2),
 250	0x00000000,
 251	(0x9c00 << 16) | (0x9970 >> 2),
 252	0x00000000,
 253	(0x9c00 << 16) | (0x9974 >> 2),
 254	0x00000000,
 255	(0x9c00 << 16) | (0x9978 >> 2),
 256	0x00000000,
 257	(0x9c00 << 16) | (0x997c >> 2),
 258	0x00000000,
 259	(0x9c00 << 16) | (0x9980 >> 2),
 260	0x00000000,
 261	(0x9c00 << 16) | (0x9984 >> 2),
 262	0x00000000,
 263	(0x9c00 << 16) | (0x9988 >> 2),
 264	0x00000000,
 265	(0x9c00 << 16) | (0x998c >> 2),
 266	0x00000000,
 267	(0x9c00 << 16) | (0x8c00 >> 2),
 268	0x00000000,
 269	(0x9c00 << 16) | (0x8c14 >> 2),
 270	0x00000000,
 271	(0x9c00 << 16) | (0x8c04 >> 2),
 272	0x00000000,
 273	(0x9c00 << 16) | (0x8c08 >> 2),
 274	0x00000000,
 275	(0x8000 << 16) | (0x9b7c >> 2),
 276	0x00000000,
 277	(0x8040 << 16) | (0x9b7c >> 2),
 278	0x00000000,
 279	(0x8000 << 16) | (0xe84 >> 2),
 280	0x00000000,
 281	(0x8040 << 16) | (0xe84 >> 2),
 282	0x00000000,
 283	(0x8000 << 16) | (0x89c0 >> 2),
 284	0x00000000,
 285	(0x8040 << 16) | (0x89c0 >> 2),
 286	0x00000000,
 287	(0x8000 << 16) | (0x914c >> 2),
 288	0x00000000,
 289	(0x8040 << 16) | (0x914c >> 2),
 290	0x00000000,
 291	(0x8000 << 16) | (0x8c20 >> 2),
 292	0x00000000,
 293	(0x8040 << 16) | (0x8c20 >> 2),
 294	0x00000000,
 295	(0x8000 << 16) | (0x9354 >> 2),
 296	0x00000000,
 297	(0x8040 << 16) | (0x9354 >> 2),
 298	0x00000000,
 299	(0x9c00 << 16) | (0x9060 >> 2),
 300	0x00000000,
 301	(0x9c00 << 16) | (0x9364 >> 2),
 302	0x00000000,
 303	(0x9c00 << 16) | (0x9100 >> 2),
 304	0x00000000,
 305	(0x9c00 << 16) | (0x913c >> 2),
 306	0x00000000,
 307	(0x8000 << 16) | (0x90e0 >> 2),
 308	0x00000000,
 309	(0x8000 << 16) | (0x90e4 >> 2),
 310	0x00000000,
 311	(0x8000 << 16) | (0x90e8 >> 2),
 312	0x00000000,
 313	(0x8040 << 16) | (0x90e0 >> 2),
 314	0x00000000,
 315	(0x8040 << 16) | (0x90e4 >> 2),
 316	0x00000000,
 317	(0x8040 << 16) | (0x90e8 >> 2),
 318	0x00000000,
 319	(0x9c00 << 16) | (0x8bcc >> 2),
 320	0x00000000,
 321	(0x9c00 << 16) | (0x8b24 >> 2),
 322	0x00000000,
 323	(0x9c00 << 16) | (0x88c4 >> 2),
 324	0x00000000,
 325	(0x9c00 << 16) | (0x8e50 >> 2),
 326	0x00000000,
 327	(0x9c00 << 16) | (0x8c0c >> 2),
 328	0x00000000,
 329	(0x9c00 << 16) | (0x8e58 >> 2),
 330	0x00000000,
 331	(0x9c00 << 16) | (0x8e5c >> 2),
 332	0x00000000,
 333	(0x9c00 << 16) | (0x9508 >> 2),
 334	0x00000000,
 335	(0x9c00 << 16) | (0x950c >> 2),
 336	0x00000000,
 337	(0x9c00 << 16) | (0x9494 >> 2),
 338	0x00000000,
 339	(0x9c00 << 16) | (0xac0c >> 2),
 340	0x00000000,
 341	(0x9c00 << 16) | (0xac10 >> 2),
 342	0x00000000,
 343	(0x9c00 << 16) | (0xac14 >> 2),
 344	0x00000000,
 345	(0x9c00 << 16) | (0xae00 >> 2),
 346	0x00000000,
 347	(0x9c00 << 16) | (0xac08 >> 2),
 348	0x00000000,
 349	(0x9c00 << 16) | (0x88d4 >> 2),
 350	0x00000000,
 351	(0x9c00 << 16) | (0x88c8 >> 2),
 352	0x00000000,
 353	(0x9c00 << 16) | (0x88cc >> 2),
 354	0x00000000,
 355	(0x9c00 << 16) | (0x89b0 >> 2),
 356	0x00000000,
 357	(0x9c00 << 16) | (0x8b10 >> 2),
 358	0x00000000,
 359	(0x9c00 << 16) | (0x8a14 >> 2),
 360	0x00000000,
 361	(0x9c00 << 16) | (0x9830 >> 2),
 362	0x00000000,
 363	(0x9c00 << 16) | (0x9834 >> 2),
 364	0x00000000,
 365	(0x9c00 << 16) | (0x9838 >> 2),
 366	0x00000000,
 367	(0x9c00 << 16) | (0x9a10 >> 2),
 368	0x00000000,
 369	(0x8000 << 16) | (0x9870 >> 2),
 370	0x00000000,
 371	(0x8000 << 16) | (0x9874 >> 2),
 372	0x00000000,
 373	(0x8001 << 16) | (0x9870 >> 2),
 374	0x00000000,
 375	(0x8001 << 16) | (0x9874 >> 2),
 376	0x00000000,
 377	(0x8040 << 16) | (0x9870 >> 2),
 378	0x00000000,
 379	(0x8040 << 16) | (0x9874 >> 2),
 380	0x00000000,
 381	(0x8041 << 16) | (0x9870 >> 2),
 382	0x00000000,
 383	(0x8041 << 16) | (0x9874 >> 2),
 384	0x00000000,
 385	0x00000000
 386};
 387
 388static const u32 tahiti_golden_rlc_registers[] =
 389{
 390	0xc424, 0xffffffff, 0x00601005,
 391	0xc47c, 0xffffffff, 0x10104040,
 392	0xc488, 0xffffffff, 0x0100000a,
 393	0xc314, 0xffffffff, 0x00000800,
 394	0xc30c, 0xffffffff, 0x800000f4,
 395	0xf4a8, 0xffffffff, 0x00000000
 396};
 397
 398static const u32 tahiti_golden_registers[] =
 399{
 400	0x9a10, 0x00010000, 0x00018208,
 401	0x9830, 0xffffffff, 0x00000000,
 402	0x9834, 0xf00fffff, 0x00000400,
 403	0x9838, 0x0002021c, 0x00020200,
 404	0xc78, 0x00000080, 0x00000000,
 405	0xd030, 0x000300c0, 0x00800040,
 406	0xd830, 0x000300c0, 0x00800040,
 407	0x5bb0, 0x000000f0, 0x00000070,
 408	0x5bc0, 0x00200000, 0x50100000,
 409	0x7030, 0x31000311, 0x00000011,
 410	0x277c, 0x00000003, 0x000007ff,
 411	0x240c, 0x000007ff, 0x00000000,
 412	0x8a14, 0xf000001f, 0x00000007,
 413	0x8b24, 0xffffffff, 0x00ffffff,
 414	0x8b10, 0x0000ff0f, 0x00000000,
 415	0x28a4c, 0x07ffffff, 0x4e000000,
 416	0x28350, 0x3f3f3fff, 0x2a00126a,
 417	0x30, 0x000000ff, 0x0040,
 418	0x34, 0x00000040, 0x00004040,
 419	0x9100, 0x07ffffff, 0x03000000,
 420	0x8e88, 0x01ff1f3f, 0x00000000,
 421	0x8e84, 0x01ff1f3f, 0x00000000,
 422	0x9060, 0x0000007f, 0x00000020,
 423	0x9508, 0x00010000, 0x00010000,
 424	0xac14, 0x00000200, 0x000002fb,
 425	0xac10, 0xffffffff, 0x0000543b,
 426	0xac0c, 0xffffffff, 0xa9210876,
 427	0x88d0, 0xffffffff, 0x000fff40,
 428	0x88d4, 0x0000001f, 0x00000010,
 429	0x1410, 0x20000000, 0x20fffed8,
 430	0x15c0, 0x000c0fc0, 0x000c0400
 431};
 432
 433static const u32 tahiti_golden_registers2[] =
 434{
 435	0xc64, 0x00000001, 0x00000001
 436};
 437
 438static const u32 pitcairn_golden_rlc_registers[] =
 439{
 440	0xc424, 0xffffffff, 0x00601004,
 441	0xc47c, 0xffffffff, 0x10102020,
 442	0xc488, 0xffffffff, 0x01000020,
 443	0xc314, 0xffffffff, 0x00000800,
 444	0xc30c, 0xffffffff, 0x800000a4
 445};
 446
 447static const u32 pitcairn_golden_registers[] =
 448{
 449	0x9a10, 0x00010000, 0x00018208,
 450	0x9830, 0xffffffff, 0x00000000,
 451	0x9834, 0xf00fffff, 0x00000400,
 452	0x9838, 0x0002021c, 0x00020200,
 453	0xc78, 0x00000080, 0x00000000,
 454	0xd030, 0x000300c0, 0x00800040,
 455	0xd830, 0x000300c0, 0x00800040,
 456	0x5bb0, 0x000000f0, 0x00000070,
 457	0x5bc0, 0x00200000, 0x50100000,
 458	0x7030, 0x31000311, 0x00000011,
 459	0x2ae4, 0x00073ffe, 0x000022a2,
 460	0x240c, 0x000007ff, 0x00000000,
 461	0x8a14, 0xf000001f, 0x00000007,
 462	0x8b24, 0xffffffff, 0x00ffffff,
 463	0x8b10, 0x0000ff0f, 0x00000000,
 464	0x28a4c, 0x07ffffff, 0x4e000000,
 465	0x28350, 0x3f3f3fff, 0x2a00126a,
 466	0x30, 0x000000ff, 0x0040,
 467	0x34, 0x00000040, 0x00004040,
 468	0x9100, 0x07ffffff, 0x03000000,
 469	0x9060, 0x0000007f, 0x00000020,
 470	0x9508, 0x00010000, 0x00010000,
 471	0xac14, 0x000003ff, 0x000000f7,
 472	0xac10, 0xffffffff, 0x00000000,
 473	0xac0c, 0xffffffff, 0x32761054,
 474	0x88d4, 0x0000001f, 0x00000010,
 475	0x15c0, 0x000c0fc0, 0x000c0400
 476};
 477
 478static const u32 verde_golden_rlc_registers[] =
 479{
 480	0xc424, 0xffffffff, 0x033f1005,
 481	0xc47c, 0xffffffff, 0x10808020,
 482	0xc488, 0xffffffff, 0x00800008,
 483	0xc314, 0xffffffff, 0x00001000,
 484	0xc30c, 0xffffffff, 0x80010014
 485};
 486
 487static const u32 verde_golden_registers[] =
 488{
 489	0x9a10, 0x00010000, 0x00018208,
 490	0x9830, 0xffffffff, 0x00000000,
 491	0x9834, 0xf00fffff, 0x00000400,
 492	0x9838, 0x0002021c, 0x00020200,
 493	0xc78, 0x00000080, 0x00000000,
 494	0xd030, 0x000300c0, 0x00800040,
 495	0xd030, 0x000300c0, 0x00800040,
 496	0xd830, 0x000300c0, 0x00800040,
 497	0xd830, 0x000300c0, 0x00800040,
 498	0x5bb0, 0x000000f0, 0x00000070,
 499	0x5bc0, 0x00200000, 0x50100000,
 500	0x7030, 0x31000311, 0x00000011,
 501	0x2ae4, 0x00073ffe, 0x000022a2,
 502	0x2ae4, 0x00073ffe, 0x000022a2,
 503	0x2ae4, 0x00073ffe, 0x000022a2,
 504	0x240c, 0x000007ff, 0x00000000,
 505	0x240c, 0x000007ff, 0x00000000,
 506	0x240c, 0x000007ff, 0x00000000,
 507	0x8a14, 0xf000001f, 0x00000007,
 508	0x8a14, 0xf000001f, 0x00000007,
 509	0x8a14, 0xf000001f, 0x00000007,
 510	0x8b24, 0xffffffff, 0x00ffffff,
 511	0x8b10, 0x0000ff0f, 0x00000000,
 512	0x28a4c, 0x07ffffff, 0x4e000000,
 513	0x28350, 0x3f3f3fff, 0x0000124a,
 514	0x28350, 0x3f3f3fff, 0x0000124a,
 515	0x28350, 0x3f3f3fff, 0x0000124a,
 516	0x30, 0x000000ff, 0x0040,
 517	0x34, 0x00000040, 0x00004040,
 518	0x9100, 0x07ffffff, 0x03000000,
 519	0x9100, 0x07ffffff, 0x03000000,
 520	0x8e88, 0x01ff1f3f, 0x00000000,
 521	0x8e88, 0x01ff1f3f, 0x00000000,
 522	0x8e88, 0x01ff1f3f, 0x00000000,
 523	0x8e84, 0x01ff1f3f, 0x00000000,
 524	0x8e84, 0x01ff1f3f, 0x00000000,
 525	0x8e84, 0x01ff1f3f, 0x00000000,
 526	0x9060, 0x0000007f, 0x00000020,
 527	0x9508, 0x00010000, 0x00010000,
 528	0xac14, 0x000003ff, 0x00000003,
 529	0xac14, 0x000003ff, 0x00000003,
 530	0xac14, 0x000003ff, 0x00000003,
 531	0xac10, 0xffffffff, 0x00000000,
 532	0xac10, 0xffffffff, 0x00000000,
 533	0xac10, 0xffffffff, 0x00000000,
 534	0xac0c, 0xffffffff, 0x00001032,
 535	0xac0c, 0xffffffff, 0x00001032,
 536	0xac0c, 0xffffffff, 0x00001032,
 537	0x88d4, 0x0000001f, 0x00000010,
 538	0x88d4, 0x0000001f, 0x00000010,
 539	0x88d4, 0x0000001f, 0x00000010,
 540	0x15c0, 0x000c0fc0, 0x000c0400
 541};
 542
 543static const u32 oland_golden_rlc_registers[] =
 544{
 545	0xc424, 0xffffffff, 0x00601005,
 546	0xc47c, 0xffffffff, 0x10104040,
 547	0xc488, 0xffffffff, 0x0100000a,
 548	0xc314, 0xffffffff, 0x00000800,
 549	0xc30c, 0xffffffff, 0x800000f4
 550};
 551
 552static const u32 oland_golden_registers[] =
 553{
 554	0x9a10, 0x00010000, 0x00018208,
 555	0x9830, 0xffffffff, 0x00000000,
 556	0x9834, 0xf00fffff, 0x00000400,
 557	0x9838, 0x0002021c, 0x00020200,
 558	0xc78, 0x00000080, 0x00000000,
 559	0xd030, 0x000300c0, 0x00800040,
 560	0xd830, 0x000300c0, 0x00800040,
 561	0x5bb0, 0x000000f0, 0x00000070,
 562	0x5bc0, 0x00200000, 0x50100000,
 563	0x7030, 0x31000311, 0x00000011,
 564	0x2ae4, 0x00073ffe, 0x000022a2,
 565	0x240c, 0x000007ff, 0x00000000,
 566	0x8a14, 0xf000001f, 0x00000007,
 567	0x8b24, 0xffffffff, 0x00ffffff,
 568	0x8b10, 0x0000ff0f, 0x00000000,
 569	0x28a4c, 0x07ffffff, 0x4e000000,
 570	0x28350, 0x3f3f3fff, 0x00000082,
 571	0x30, 0x000000ff, 0x0040,
 572	0x34, 0x00000040, 0x00004040,
 573	0x9100, 0x07ffffff, 0x03000000,
 574	0x9060, 0x0000007f, 0x00000020,
 575	0x9508, 0x00010000, 0x00010000,
 576	0xac14, 0x000003ff, 0x000000f3,
 577	0xac10, 0xffffffff, 0x00000000,
 578	0xac0c, 0xffffffff, 0x00003210,
 579	0x88d4, 0x0000001f, 0x00000010,
 580	0x15c0, 0x000c0fc0, 0x000c0400
 581};
 582
 583static const u32 hainan_golden_registers[] =
 584{
 585	0x9a10, 0x00010000, 0x00018208,
 586	0x9830, 0xffffffff, 0x00000000,
 587	0x9834, 0xf00fffff, 0x00000400,
 588	0x9838, 0x0002021c, 0x00020200,
 589	0xd0c0, 0xff000fff, 0x00000100,
 590	0xd030, 0x000300c0, 0x00800040,
 591	0xd8c0, 0xff000fff, 0x00000100,
 592	0xd830, 0x000300c0, 0x00800040,
 593	0x2ae4, 0x00073ffe, 0x000022a2,
 594	0x240c, 0x000007ff, 0x00000000,
 595	0x8a14, 0xf000001f, 0x00000007,
 596	0x8b24, 0xffffffff, 0x00ffffff,
 597	0x8b10, 0x0000ff0f, 0x00000000,
 598	0x28a4c, 0x07ffffff, 0x4e000000,
 599	0x28350, 0x3f3f3fff, 0x00000000,
 600	0x30, 0x000000ff, 0x0040,
 601	0x34, 0x00000040, 0x00004040,
 602	0x9100, 0x03e00000, 0x03600000,
 603	0x9060, 0x0000007f, 0x00000020,
 604	0x9508, 0x00010000, 0x00010000,
 605	0xac14, 0x000003ff, 0x000000f1,
 606	0xac10, 0xffffffff, 0x00000000,
 607	0xac0c, 0xffffffff, 0x00003210,
 608	0x88d4, 0x0000001f, 0x00000010,
 609	0x15c0, 0x000c0fc0, 0x000c0400
 610};
 611
 612static const u32 hainan_golden_registers2[] =
 613{
 614	0x98f8, 0xffffffff, 0x02010001
 615};
 616
 617static const u32 tahiti_mgcg_cgcg_init[] =
 618{
 619	0xc400, 0xffffffff, 0xfffffffc,
 620	0x802c, 0xffffffff, 0xe0000000,
 621	0x9a60, 0xffffffff, 0x00000100,
 622	0x92a4, 0xffffffff, 0x00000100,
 623	0xc164, 0xffffffff, 0x00000100,
 624	0x9774, 0xffffffff, 0x00000100,
 625	0x8984, 0xffffffff, 0x06000100,
 626	0x8a18, 0xffffffff, 0x00000100,
 627	0x92a0, 0xffffffff, 0x00000100,
 628	0xc380, 0xffffffff, 0x00000100,
 629	0x8b28, 0xffffffff, 0x00000100,
 630	0x9144, 0xffffffff, 0x00000100,
 631	0x8d88, 0xffffffff, 0x00000100,
 632	0x8d8c, 0xffffffff, 0x00000100,
 633	0x9030, 0xffffffff, 0x00000100,
 634	0x9034, 0xffffffff, 0x00000100,
 635	0x9038, 0xffffffff, 0x00000100,
 636	0x903c, 0xffffffff, 0x00000100,
 637	0xad80, 0xffffffff, 0x00000100,
 638	0xac54, 0xffffffff, 0x00000100,
 639	0x897c, 0xffffffff, 0x06000100,
 640	0x9868, 0xffffffff, 0x00000100,
 641	0x9510, 0xffffffff, 0x00000100,
 642	0xaf04, 0xffffffff, 0x00000100,
 643	0xae04, 0xffffffff, 0x00000100,
 644	0x949c, 0xffffffff, 0x00000100,
 645	0x802c, 0xffffffff, 0xe0000000,
 646	0x9160, 0xffffffff, 0x00010000,
 647	0x9164, 0xffffffff, 0x00030002,
 648	0x9168, 0xffffffff, 0x00040007,
 649	0x916c, 0xffffffff, 0x00060005,
 650	0x9170, 0xffffffff, 0x00090008,
 651	0x9174, 0xffffffff, 0x00020001,
 652	0x9178, 0xffffffff, 0x00040003,
 653	0x917c, 0xffffffff, 0x00000007,
 654	0x9180, 0xffffffff, 0x00060005,
 655	0x9184, 0xffffffff, 0x00090008,
 656	0x9188, 0xffffffff, 0x00030002,
 657	0x918c, 0xffffffff, 0x00050004,
 658	0x9190, 0xffffffff, 0x00000008,
 659	0x9194, 0xffffffff, 0x00070006,
 660	0x9198, 0xffffffff, 0x000a0009,
 661	0x919c, 0xffffffff, 0x00040003,
 662	0x91a0, 0xffffffff, 0x00060005,
 663	0x91a4, 0xffffffff, 0x00000009,
 664	0x91a8, 0xffffffff, 0x00080007,
 665	0x91ac, 0xffffffff, 0x000b000a,
 666	0x91b0, 0xffffffff, 0x00050004,
 667	0x91b4, 0xffffffff, 0x00070006,
 668	0x91b8, 0xffffffff, 0x0008000b,
 669	0x91bc, 0xffffffff, 0x000a0009,
 670	0x91c0, 0xffffffff, 0x000d000c,
 671	0x91c4, 0xffffffff, 0x00060005,
 672	0x91c8, 0xffffffff, 0x00080007,
 673	0x91cc, 0xffffffff, 0x0000000b,
 674	0x91d0, 0xffffffff, 0x000a0009,
 675	0x91d4, 0xffffffff, 0x000d000c,
 676	0x91d8, 0xffffffff, 0x00070006,
 677	0x91dc, 0xffffffff, 0x00090008,
 678	0x91e0, 0xffffffff, 0x0000000c,
 679	0x91e4, 0xffffffff, 0x000b000a,
 680	0x91e8, 0xffffffff, 0x000e000d,
 681	0x91ec, 0xffffffff, 0x00080007,
 682	0x91f0, 0xffffffff, 0x000a0009,
 683	0x91f4, 0xffffffff, 0x0000000d,
 684	0x91f8, 0xffffffff, 0x000c000b,
 685	0x91fc, 0xffffffff, 0x000f000e,
 686	0x9200, 0xffffffff, 0x00090008,
 687	0x9204, 0xffffffff, 0x000b000a,
 688	0x9208, 0xffffffff, 0x000c000f,
 689	0x920c, 0xffffffff, 0x000e000d,
 690	0x9210, 0xffffffff, 0x00110010,
 691	0x9214, 0xffffffff, 0x000a0009,
 692	0x9218, 0xffffffff, 0x000c000b,
 693	0x921c, 0xffffffff, 0x0000000f,
 694	0x9220, 0xffffffff, 0x000e000d,
 695	0x9224, 0xffffffff, 0x00110010,
 696	0x9228, 0xffffffff, 0x000b000a,
 697	0x922c, 0xffffffff, 0x000d000c,
 698	0x9230, 0xffffffff, 0x00000010,
 699	0x9234, 0xffffffff, 0x000f000e,
 700	0x9238, 0xffffffff, 0x00120011,
 701	0x923c, 0xffffffff, 0x000c000b,
 702	0x9240, 0xffffffff, 0x000e000d,
 703	0x9244, 0xffffffff, 0x00000011,
 704	0x9248, 0xffffffff, 0x0010000f,
 705	0x924c, 0xffffffff, 0x00130012,
 706	0x9250, 0xffffffff, 0x000d000c,
 707	0x9254, 0xffffffff, 0x000f000e,
 708	0x9258, 0xffffffff, 0x00100013,
 709	0x925c, 0xffffffff, 0x00120011,
 710	0x9260, 0xffffffff, 0x00150014,
 711	0x9264, 0xffffffff, 0x000e000d,
 712	0x9268, 0xffffffff, 0x0010000f,
 713	0x926c, 0xffffffff, 0x00000013,
 714	0x9270, 0xffffffff, 0x00120011,
 715	0x9274, 0xffffffff, 0x00150014,
 716	0x9278, 0xffffffff, 0x000f000e,
 717	0x927c, 0xffffffff, 0x00110010,
 718	0x9280, 0xffffffff, 0x00000014,
 719	0x9284, 0xffffffff, 0x00130012,
 720	0x9288, 0xffffffff, 0x00160015,
 721	0x928c, 0xffffffff, 0x0010000f,
 722	0x9290, 0xffffffff, 0x00120011,
 723	0x9294, 0xffffffff, 0x00000015,
 724	0x9298, 0xffffffff, 0x00140013,
 725	0x929c, 0xffffffff, 0x00170016,
 726	0x9150, 0xffffffff, 0x96940200,
 727	0x8708, 0xffffffff, 0x00900100,
 728	0xc478, 0xffffffff, 0x00000080,
 729	0xc404, 0xffffffff, 0x0020003f,
 730	0x30, 0xffffffff, 0x0000001c,
 731	0x34, 0x000f0000, 0x000f0000,
 732	0x160c, 0xffffffff, 0x00000100,
 733	0x1024, 0xffffffff, 0x00000100,
 734	0x102c, 0x00000101, 0x00000000,
 735	0x20a8, 0xffffffff, 0x00000104,
 736	0x264c, 0x000c0000, 0x000c0000,
 737	0x2648, 0x000c0000, 0x000c0000,
 738	0x55e4, 0xff000fff, 0x00000100,
 739	0x55e8, 0x00000001, 0x00000001,
 740	0x2f50, 0x00000001, 0x00000001,
 741	0x30cc, 0xc0000fff, 0x00000104,
 742	0xc1e4, 0x00000001, 0x00000001,
 743	0xd0c0, 0xfffffff0, 0x00000100,
 744	0xd8c0, 0xfffffff0, 0x00000100
 745};
 746
 747static const u32 pitcairn_mgcg_cgcg_init[] =
 748{
 749	0xc400, 0xffffffff, 0xfffffffc,
 750	0x802c, 0xffffffff, 0xe0000000,
 751	0x9a60, 0xffffffff, 0x00000100,
 752	0x92a4, 0xffffffff, 0x00000100,
 753	0xc164, 0xffffffff, 0x00000100,
 754	0x9774, 0xffffffff, 0x00000100,
 755	0x8984, 0xffffffff, 0x06000100,
 756	0x8a18, 0xffffffff, 0x00000100,
 757	0x92a0, 0xffffffff, 0x00000100,
 758	0xc380, 0xffffffff, 0x00000100,
 759	0x8b28, 0xffffffff, 0x00000100,
 760	0x9144, 0xffffffff, 0x00000100,
 761	0x8d88, 0xffffffff, 0x00000100,
 762	0x8d8c, 0xffffffff, 0x00000100,
 763	0x9030, 0xffffffff, 0x00000100,
 764	0x9034, 0xffffffff, 0x00000100,
 765	0x9038, 0xffffffff, 0x00000100,
 766	0x903c, 0xffffffff, 0x00000100,
 767	0xad80, 0xffffffff, 0x00000100,
 768	0xac54, 0xffffffff, 0x00000100,
 769	0x897c, 0xffffffff, 0x06000100,
 770	0x9868, 0xffffffff, 0x00000100,
 771	0x9510, 0xffffffff, 0x00000100,
 772	0xaf04, 0xffffffff, 0x00000100,
 773	0xae04, 0xffffffff, 0x00000100,
 774	0x949c, 0xffffffff, 0x00000100,
 775	0x802c, 0xffffffff, 0xe0000000,
 776	0x9160, 0xffffffff, 0x00010000,
 777	0x9164, 0xffffffff, 0x00030002,
 778	0x9168, 0xffffffff, 0x00040007,
 779	0x916c, 0xffffffff, 0x00060005,
 780	0x9170, 0xffffffff, 0x00090008,
 781	0x9174, 0xffffffff, 0x00020001,
 782	0x9178, 0xffffffff, 0x00040003,
 783	0x917c, 0xffffffff, 0x00000007,
 784	0x9180, 0xffffffff, 0x00060005,
 785	0x9184, 0xffffffff, 0x00090008,
 786	0x9188, 0xffffffff, 0x00030002,
 787	0x918c, 0xffffffff, 0x00050004,
 788	0x9190, 0xffffffff, 0x00000008,
 789	0x9194, 0xffffffff, 0x00070006,
 790	0x9198, 0xffffffff, 0x000a0009,
 791	0x919c, 0xffffffff, 0x00040003,
 792	0x91a0, 0xffffffff, 0x00060005,
 793	0x91a4, 0xffffffff, 0x00000009,
 794	0x91a8, 0xffffffff, 0x00080007,
 795	0x91ac, 0xffffffff, 0x000b000a,
 796	0x91b0, 0xffffffff, 0x00050004,
 797	0x91b4, 0xffffffff, 0x00070006,
 798	0x91b8, 0xffffffff, 0x0008000b,
 799	0x91bc, 0xffffffff, 0x000a0009,
 800	0x91c0, 0xffffffff, 0x000d000c,
 801	0x9200, 0xffffffff, 0x00090008,
 802	0x9204, 0xffffffff, 0x000b000a,
 803	0x9208, 0xffffffff, 0x000c000f,
 804	0x920c, 0xffffffff, 0x000e000d,
 805	0x9210, 0xffffffff, 0x00110010,
 806	0x9214, 0xffffffff, 0x000a0009,
 807	0x9218, 0xffffffff, 0x000c000b,
 808	0x921c, 0xffffffff, 0x0000000f,
 809	0x9220, 0xffffffff, 0x000e000d,
 810	0x9224, 0xffffffff, 0x00110010,
 811	0x9228, 0xffffffff, 0x000b000a,
 812	0x922c, 0xffffffff, 0x000d000c,
 813	0x9230, 0xffffffff, 0x00000010,
 814	0x9234, 0xffffffff, 0x000f000e,
 815	0x9238, 0xffffffff, 0x00120011,
 816	0x923c, 0xffffffff, 0x000c000b,
 817	0x9240, 0xffffffff, 0x000e000d,
 818	0x9244, 0xffffffff, 0x00000011,
 819	0x9248, 0xffffffff, 0x0010000f,
 820	0x924c, 0xffffffff, 0x00130012,
 821	0x9250, 0xffffffff, 0x000d000c,
 822	0x9254, 0xffffffff, 0x000f000e,
 823	0x9258, 0xffffffff, 0x00100013,
 824	0x925c, 0xffffffff, 0x00120011,
 825	0x9260, 0xffffffff, 0x00150014,
 826	0x9150, 0xffffffff, 0x96940200,
 827	0x8708, 0xffffffff, 0x00900100,
 828	0xc478, 0xffffffff, 0x00000080,
 829	0xc404, 0xffffffff, 0x0020003f,
 830	0x30, 0xffffffff, 0x0000001c,
 831	0x34, 0x000f0000, 0x000f0000,
 832	0x160c, 0xffffffff, 0x00000100,
 833	0x1024, 0xffffffff, 0x00000100,
 834	0x102c, 0x00000101, 0x00000000,
 835	0x20a8, 0xffffffff, 0x00000104,
 836	0x55e4, 0xff000fff, 0x00000100,
 837	0x55e8, 0x00000001, 0x00000001,
 838	0x2f50, 0x00000001, 0x00000001,
 839	0x30cc, 0xc0000fff, 0x00000104,
 840	0xc1e4, 0x00000001, 0x00000001,
 841	0xd0c0, 0xfffffff0, 0x00000100,
 842	0xd8c0, 0xfffffff0, 0x00000100
 843};
 844
 845static const u32 verde_mgcg_cgcg_init[] =
 846{
 847	0xc400, 0xffffffff, 0xfffffffc,
 848	0x802c, 0xffffffff, 0xe0000000,
 849	0x9a60, 0xffffffff, 0x00000100,
 850	0x92a4, 0xffffffff, 0x00000100,
 851	0xc164, 0xffffffff, 0x00000100,
 852	0x9774, 0xffffffff, 0x00000100,
 853	0x8984, 0xffffffff, 0x06000100,
 854	0x8a18, 0xffffffff, 0x00000100,
 855	0x92a0, 0xffffffff, 0x00000100,
 856	0xc380, 0xffffffff, 0x00000100,
 857	0x8b28, 0xffffffff, 0x00000100,
 858	0x9144, 0xffffffff, 0x00000100,
 859	0x8d88, 0xffffffff, 0x00000100,
 860	0x8d8c, 0xffffffff, 0x00000100,
 861	0x9030, 0xffffffff, 0x00000100,
 862	0x9034, 0xffffffff, 0x00000100,
 863	0x9038, 0xffffffff, 0x00000100,
 864	0x903c, 0xffffffff, 0x00000100,
 865	0xad80, 0xffffffff, 0x00000100,
 866	0xac54, 0xffffffff, 0x00000100,
 867	0x897c, 0xffffffff, 0x06000100,
 868	0x9868, 0xffffffff, 0x00000100,
 869	0x9510, 0xffffffff, 0x00000100,
 870	0xaf04, 0xffffffff, 0x00000100,
 871	0xae04, 0xffffffff, 0x00000100,
 872	0x949c, 0xffffffff, 0x00000100,
 873	0x802c, 0xffffffff, 0xe0000000,
 874	0x9160, 0xffffffff, 0x00010000,
 875	0x9164, 0xffffffff, 0x00030002,
 876	0x9168, 0xffffffff, 0x00040007,
 877	0x916c, 0xffffffff, 0x00060005,
 878	0x9170, 0xffffffff, 0x00090008,
 879	0x9174, 0xffffffff, 0x00020001,
 880	0x9178, 0xffffffff, 0x00040003,
 881	0x917c, 0xffffffff, 0x00000007,
 882	0x9180, 0xffffffff, 0x00060005,
 883	0x9184, 0xffffffff, 0x00090008,
 884	0x9188, 0xffffffff, 0x00030002,
 885	0x918c, 0xffffffff, 0x00050004,
 886	0x9190, 0xffffffff, 0x00000008,
 887	0x9194, 0xffffffff, 0x00070006,
 888	0x9198, 0xffffffff, 0x000a0009,
 889	0x919c, 0xffffffff, 0x00040003,
 890	0x91a0, 0xffffffff, 0x00060005,
 891	0x91a4, 0xffffffff, 0x00000009,
 892	0x91a8, 0xffffffff, 0x00080007,
 893	0x91ac, 0xffffffff, 0x000b000a,
 894	0x91b0, 0xffffffff, 0x00050004,
 895	0x91b4, 0xffffffff, 0x00070006,
 896	0x91b8, 0xffffffff, 0x0008000b,
 897	0x91bc, 0xffffffff, 0x000a0009,
 898	0x91c0, 0xffffffff, 0x000d000c,
 899	0x9200, 0xffffffff, 0x00090008,
 900	0x9204, 0xffffffff, 0x000b000a,
 901	0x9208, 0xffffffff, 0x000c000f,
 902	0x920c, 0xffffffff, 0x000e000d,
 903	0x9210, 0xffffffff, 0x00110010,
 904	0x9214, 0xffffffff, 0x000a0009,
 905	0x9218, 0xffffffff, 0x000c000b,
 906	0x921c, 0xffffffff, 0x0000000f,
 907	0x9220, 0xffffffff, 0x000e000d,
 908	0x9224, 0xffffffff, 0x00110010,
 909	0x9228, 0xffffffff, 0x000b000a,
 910	0x922c, 0xffffffff, 0x000d000c,
 911	0x9230, 0xffffffff, 0x00000010,
 912	0x9234, 0xffffffff, 0x000f000e,
 913	0x9238, 0xffffffff, 0x00120011,
 914	0x923c, 0xffffffff, 0x000c000b,
 915	0x9240, 0xffffffff, 0x000e000d,
 916	0x9244, 0xffffffff, 0x00000011,
 917	0x9248, 0xffffffff, 0x0010000f,
 918	0x924c, 0xffffffff, 0x00130012,
 919	0x9250, 0xffffffff, 0x000d000c,
 920	0x9254, 0xffffffff, 0x000f000e,
 921	0x9258, 0xffffffff, 0x00100013,
 922	0x925c, 0xffffffff, 0x00120011,
 923	0x9260, 0xffffffff, 0x00150014,
 924	0x9150, 0xffffffff, 0x96940200,
 925	0x8708, 0xffffffff, 0x00900100,
 926	0xc478, 0xffffffff, 0x00000080,
 927	0xc404, 0xffffffff, 0x0020003f,
 928	0x30, 0xffffffff, 0x0000001c,
 929	0x34, 0x000f0000, 0x000f0000,
 930	0x160c, 0xffffffff, 0x00000100,
 931	0x1024, 0xffffffff, 0x00000100,
 932	0x102c, 0x00000101, 0x00000000,
 933	0x20a8, 0xffffffff, 0x00000104,
 934	0x264c, 0x000c0000, 0x000c0000,
 935	0x2648, 0x000c0000, 0x000c0000,
 936	0x55e4, 0xff000fff, 0x00000100,
 937	0x55e8, 0x00000001, 0x00000001,
 938	0x2f50, 0x00000001, 0x00000001,
 939	0x30cc, 0xc0000fff, 0x00000104,
 940	0xc1e4, 0x00000001, 0x00000001,
 941	0xd0c0, 0xfffffff0, 0x00000100,
 942	0xd8c0, 0xfffffff0, 0x00000100
 943};
 944
 945static const u32 oland_mgcg_cgcg_init[] =
 946{
 947	0xc400, 0xffffffff, 0xfffffffc,
 948	0x802c, 0xffffffff, 0xe0000000,
 949	0x9a60, 0xffffffff, 0x00000100,
 950	0x92a4, 0xffffffff, 0x00000100,
 951	0xc164, 0xffffffff, 0x00000100,
 952	0x9774, 0xffffffff, 0x00000100,
 953	0x8984, 0xffffffff, 0x06000100,
 954	0x8a18, 0xffffffff, 0x00000100,
 955	0x92a0, 0xffffffff, 0x00000100,
 956	0xc380, 0xffffffff, 0x00000100,
 957	0x8b28, 0xffffffff, 0x00000100,
 958	0x9144, 0xffffffff, 0x00000100,
 959	0x8d88, 0xffffffff, 0x00000100,
 960	0x8d8c, 0xffffffff, 0x00000100,
 961	0x9030, 0xffffffff, 0x00000100,
 962	0x9034, 0xffffffff, 0x00000100,
 963	0x9038, 0xffffffff, 0x00000100,
 964	0x903c, 0xffffffff, 0x00000100,
 965	0xad80, 0xffffffff, 0x00000100,
 966	0xac54, 0xffffffff, 0x00000100,
 967	0x897c, 0xffffffff, 0x06000100,
 968	0x9868, 0xffffffff, 0x00000100,
 969	0x9510, 0xffffffff, 0x00000100,
 970	0xaf04, 0xffffffff, 0x00000100,
 971	0xae04, 0xffffffff, 0x00000100,
 972	0x949c, 0xffffffff, 0x00000100,
 973	0x802c, 0xffffffff, 0xe0000000,
 974	0x9160, 0xffffffff, 0x00010000,
 975	0x9164, 0xffffffff, 0x00030002,
 976	0x9168, 0xffffffff, 0x00040007,
 977	0x916c, 0xffffffff, 0x00060005,
 978	0x9170, 0xffffffff, 0x00090008,
 979	0x9174, 0xffffffff, 0x00020001,
 980	0x9178, 0xffffffff, 0x00040003,
 981	0x917c, 0xffffffff, 0x00000007,
 982	0x9180, 0xffffffff, 0x00060005,
 983	0x9184, 0xffffffff, 0x00090008,
 984	0x9188, 0xffffffff, 0x00030002,
 985	0x918c, 0xffffffff, 0x00050004,
 986	0x9190, 0xffffffff, 0x00000008,
 987	0x9194, 0xffffffff, 0x00070006,
 988	0x9198, 0xffffffff, 0x000a0009,
 989	0x919c, 0xffffffff, 0x00040003,
 990	0x91a0, 0xffffffff, 0x00060005,
 991	0x91a4, 0xffffffff, 0x00000009,
 992	0x91a8, 0xffffffff, 0x00080007,
 993	0x91ac, 0xffffffff, 0x000b000a,
 994	0x91b0, 0xffffffff, 0x00050004,
 995	0x91b4, 0xffffffff, 0x00070006,
 996	0x91b8, 0xffffffff, 0x0008000b,
 997	0x91bc, 0xffffffff, 0x000a0009,
 998	0x91c0, 0xffffffff, 0x000d000c,
 999	0x91c4, 0xffffffff, 0x00060005,
1000	0x91c8, 0xffffffff, 0x00080007,
1001	0x91cc, 0xffffffff, 0x0000000b,
1002	0x91d0, 0xffffffff, 0x000a0009,
1003	0x91d4, 0xffffffff, 0x000d000c,
1004	0x9150, 0xffffffff, 0x96940200,
1005	0x8708, 0xffffffff, 0x00900100,
1006	0xc478, 0xffffffff, 0x00000080,
1007	0xc404, 0xffffffff, 0x0020003f,
1008	0x30, 0xffffffff, 0x0000001c,
1009	0x34, 0x000f0000, 0x000f0000,
1010	0x160c, 0xffffffff, 0x00000100,
1011	0x1024, 0xffffffff, 0x00000100,
1012	0x102c, 0x00000101, 0x00000000,
1013	0x20a8, 0xffffffff, 0x00000104,
1014	0x264c, 0x000c0000, 0x000c0000,
1015	0x2648, 0x000c0000, 0x000c0000,
1016	0x55e4, 0xff000fff, 0x00000100,
1017	0x55e8, 0x00000001, 0x00000001,
1018	0x2f50, 0x00000001, 0x00000001,
1019	0x30cc, 0xc0000fff, 0x00000104,
1020	0xc1e4, 0x00000001, 0x00000001,
1021	0xd0c0, 0xfffffff0, 0x00000100,
1022	0xd8c0, 0xfffffff0, 0x00000100
1023};
1024
1025static const u32 hainan_mgcg_cgcg_init[] =
1026{
1027	0xc400, 0xffffffff, 0xfffffffc,
1028	0x802c, 0xffffffff, 0xe0000000,
1029	0x9a60, 0xffffffff, 0x00000100,
1030	0x92a4, 0xffffffff, 0x00000100,
1031	0xc164, 0xffffffff, 0x00000100,
1032	0x9774, 0xffffffff, 0x00000100,
1033	0x8984, 0xffffffff, 0x06000100,
1034	0x8a18, 0xffffffff, 0x00000100,
1035	0x92a0, 0xffffffff, 0x00000100,
1036	0xc380, 0xffffffff, 0x00000100,
1037	0x8b28, 0xffffffff, 0x00000100,
1038	0x9144, 0xffffffff, 0x00000100,
1039	0x8d88, 0xffffffff, 0x00000100,
1040	0x8d8c, 0xffffffff, 0x00000100,
1041	0x9030, 0xffffffff, 0x00000100,
1042	0x9034, 0xffffffff, 0x00000100,
1043	0x9038, 0xffffffff, 0x00000100,
1044	0x903c, 0xffffffff, 0x00000100,
1045	0xad80, 0xffffffff, 0x00000100,
1046	0xac54, 0xffffffff, 0x00000100,
1047	0x897c, 0xffffffff, 0x06000100,
1048	0x9868, 0xffffffff, 0x00000100,
1049	0x9510, 0xffffffff, 0x00000100,
1050	0xaf04, 0xffffffff, 0x00000100,
1051	0xae04, 0xffffffff, 0x00000100,
1052	0x949c, 0xffffffff, 0x00000100,
1053	0x802c, 0xffffffff, 0xe0000000,
1054	0x9160, 0xffffffff, 0x00010000,
1055	0x9164, 0xffffffff, 0x00030002,
1056	0x9168, 0xffffffff, 0x00040007,
1057	0x916c, 0xffffffff, 0x00060005,
1058	0x9170, 0xffffffff, 0x00090008,
1059	0x9174, 0xffffffff, 0x00020001,
1060	0x9178, 0xffffffff, 0x00040003,
1061	0x917c, 0xffffffff, 0x00000007,
1062	0x9180, 0xffffffff, 0x00060005,
1063	0x9184, 0xffffffff, 0x00090008,
1064	0x9188, 0xffffffff, 0x00030002,
1065	0x918c, 0xffffffff, 0x00050004,
1066	0x9190, 0xffffffff, 0x00000008,
1067	0x9194, 0xffffffff, 0x00070006,
1068	0x9198, 0xffffffff, 0x000a0009,
1069	0x919c, 0xffffffff, 0x00040003,
1070	0x91a0, 0xffffffff, 0x00060005,
1071	0x91a4, 0xffffffff, 0x00000009,
1072	0x91a8, 0xffffffff, 0x00080007,
1073	0x91ac, 0xffffffff, 0x000b000a,
1074	0x91b0, 0xffffffff, 0x00050004,
1075	0x91b4, 0xffffffff, 0x00070006,
1076	0x91b8, 0xffffffff, 0x0008000b,
1077	0x91bc, 0xffffffff, 0x000a0009,
1078	0x91c0, 0xffffffff, 0x000d000c,
1079	0x91c4, 0xffffffff, 0x00060005,
1080	0x91c8, 0xffffffff, 0x00080007,
1081	0x91cc, 0xffffffff, 0x0000000b,
1082	0x91d0, 0xffffffff, 0x000a0009,
1083	0x91d4, 0xffffffff, 0x000d000c,
1084	0x9150, 0xffffffff, 0x96940200,
1085	0x8708, 0xffffffff, 0x00900100,
1086	0xc478, 0xffffffff, 0x00000080,
1087	0xc404, 0xffffffff, 0x0020003f,
1088	0x30, 0xffffffff, 0x0000001c,
1089	0x34, 0x000f0000, 0x000f0000,
1090	0x160c, 0xffffffff, 0x00000100,
1091	0x1024, 0xffffffff, 0x00000100,
1092	0x20a8, 0xffffffff, 0x00000104,
1093	0x264c, 0x000c0000, 0x000c0000,
1094	0x2648, 0x000c0000, 0x000c0000,
1095	0x2f50, 0x00000001, 0x00000001,
1096	0x30cc, 0xc0000fff, 0x00000104,
1097	0xc1e4, 0x00000001, 0x00000001,
1098	0xd0c0, 0xfffffff0, 0x00000100,
1099	0xd8c0, 0xfffffff0, 0x00000100
1100};
1101
1102static u32 verde_pg_init[] =
1103{
1104	0x353c, 0xffffffff, 0x40000,
1105	0x3538, 0xffffffff, 0x200010ff,
1106	0x353c, 0xffffffff, 0x0,
1107	0x353c, 0xffffffff, 0x0,
1108	0x353c, 0xffffffff, 0x0,
1109	0x353c, 0xffffffff, 0x0,
1110	0x353c, 0xffffffff, 0x0,
1111	0x353c, 0xffffffff, 0x7007,
1112	0x3538, 0xffffffff, 0x300010ff,
1113	0x353c, 0xffffffff, 0x0,
1114	0x353c, 0xffffffff, 0x0,
1115	0x353c, 0xffffffff, 0x0,
1116	0x353c, 0xffffffff, 0x0,
1117	0x353c, 0xffffffff, 0x0,
1118	0x353c, 0xffffffff, 0x400000,
1119	0x3538, 0xffffffff, 0x100010ff,
1120	0x353c, 0xffffffff, 0x0,
1121	0x353c, 0xffffffff, 0x0,
1122	0x353c, 0xffffffff, 0x0,
1123	0x353c, 0xffffffff, 0x0,
1124	0x353c, 0xffffffff, 0x0,
1125	0x353c, 0xffffffff, 0x120200,
1126	0x3538, 0xffffffff, 0x500010ff,
1127	0x353c, 0xffffffff, 0x0,
1128	0x353c, 0xffffffff, 0x0,
1129	0x353c, 0xffffffff, 0x0,
1130	0x353c, 0xffffffff, 0x0,
1131	0x353c, 0xffffffff, 0x0,
1132	0x353c, 0xffffffff, 0x1e1e16,
1133	0x3538, 0xffffffff, 0x600010ff,
1134	0x353c, 0xffffffff, 0x0,
1135	0x353c, 0xffffffff, 0x0,
1136	0x353c, 0xffffffff, 0x0,
1137	0x353c, 0xffffffff, 0x0,
1138	0x353c, 0xffffffff, 0x0,
1139	0x353c, 0xffffffff, 0x171f1e,
1140	0x3538, 0xffffffff, 0x700010ff,
1141	0x353c, 0xffffffff, 0x0,
1142	0x353c, 0xffffffff, 0x0,
1143	0x353c, 0xffffffff, 0x0,
1144	0x353c, 0xffffffff, 0x0,
1145	0x353c, 0xffffffff, 0x0,
1146	0x353c, 0xffffffff, 0x0,
1147	0x3538, 0xffffffff, 0x9ff,
1148	0x3500, 0xffffffff, 0x0,
1149	0x3504, 0xffffffff, 0x10000800,
1150	0x3504, 0xffffffff, 0xf,
1151	0x3504, 0xffffffff, 0xf,
1152	0x3500, 0xffffffff, 0x4,
1153	0x3504, 0xffffffff, 0x1000051e,
1154	0x3504, 0xffffffff, 0xffff,
1155	0x3504, 0xffffffff, 0xffff,
1156	0x3500, 0xffffffff, 0x8,
1157	0x3504, 0xffffffff, 0x80500,
1158	0x3500, 0xffffffff, 0x12,
1159	0x3504, 0xffffffff, 0x9050c,
1160	0x3500, 0xffffffff, 0x1d,
1161	0x3504, 0xffffffff, 0xb052c,
1162	0x3500, 0xffffffff, 0x2a,
1163	0x3504, 0xffffffff, 0x1053e,
1164	0x3500, 0xffffffff, 0x2d,
1165	0x3504, 0xffffffff, 0x10546,
1166	0x3500, 0xffffffff, 0x30,
1167	0x3504, 0xffffffff, 0xa054e,
1168	0x3500, 0xffffffff, 0x3c,
1169	0x3504, 0xffffffff, 0x1055f,
1170	0x3500, 0xffffffff, 0x3f,
1171	0x3504, 0xffffffff, 0x10567,
1172	0x3500, 0xffffffff, 0x42,
1173	0x3504, 0xffffffff, 0x1056f,
1174	0x3500, 0xffffffff, 0x45,
1175	0x3504, 0xffffffff, 0x10572,
1176	0x3500, 0xffffffff, 0x48,
1177	0x3504, 0xffffffff, 0x20575,
1178	0x3500, 0xffffffff, 0x4c,
1179	0x3504, 0xffffffff, 0x190801,
1180	0x3500, 0xffffffff, 0x67,
1181	0x3504, 0xffffffff, 0x1082a,
1182	0x3500, 0xffffffff, 0x6a,
1183	0x3504, 0xffffffff, 0x1b082d,
1184	0x3500, 0xffffffff, 0x87,
1185	0x3504, 0xffffffff, 0x310851,
1186	0x3500, 0xffffffff, 0xba,
1187	0x3504, 0xffffffff, 0x891,
1188	0x3500, 0xffffffff, 0xbc,
1189	0x3504, 0xffffffff, 0x893,
1190	0x3500, 0xffffffff, 0xbe,
1191	0x3504, 0xffffffff, 0x20895,
1192	0x3500, 0xffffffff, 0xc2,
1193	0x3504, 0xffffffff, 0x20899,
1194	0x3500, 0xffffffff, 0xc6,
1195	0x3504, 0xffffffff, 0x2089d,
1196	0x3500, 0xffffffff, 0xca,
1197	0x3504, 0xffffffff, 0x8a1,
1198	0x3500, 0xffffffff, 0xcc,
1199	0x3504, 0xffffffff, 0x8a3,
1200	0x3500, 0xffffffff, 0xce,
1201	0x3504, 0xffffffff, 0x308a5,
1202	0x3500, 0xffffffff, 0xd3,
1203	0x3504, 0xffffffff, 0x6d08cd,
1204	0x3500, 0xffffffff, 0x142,
1205	0x3504, 0xffffffff, 0x2000095a,
1206	0x3504, 0xffffffff, 0x1,
1207	0x3500, 0xffffffff, 0x144,
1208	0x3504, 0xffffffff, 0x301f095b,
1209	0x3500, 0xffffffff, 0x165,
1210	0x3504, 0xffffffff, 0xc094d,
1211	0x3500, 0xffffffff, 0x173,
1212	0x3504, 0xffffffff, 0xf096d,
1213	0x3500, 0xffffffff, 0x184,
1214	0x3504, 0xffffffff, 0x15097f,
1215	0x3500, 0xffffffff, 0x19b,
1216	0x3504, 0xffffffff, 0xc0998,
1217	0x3500, 0xffffffff, 0x1a9,
1218	0x3504, 0xffffffff, 0x409a7,
1219	0x3500, 0xffffffff, 0x1af,
1220	0x3504, 0xffffffff, 0xcdc,
1221	0x3500, 0xffffffff, 0x1b1,
1222	0x3504, 0xffffffff, 0x800,
1223	0x3508, 0xffffffff, 0x6c9b2000,
1224	0x3510, 0xfc00, 0x2000,
1225	0x3544, 0xffffffff, 0xfc0,
1226	0x28d4, 0x00000100, 0x100
1227};
1228
1229static void si_init_golden_registers(struct radeon_device *rdev)
1230{
1231	switch (rdev->family) {
1232	case CHIP_TAHITI:
1233		radeon_program_register_sequence(rdev,
1234						 tahiti_golden_registers,
1235						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1236		radeon_program_register_sequence(rdev,
1237						 tahiti_golden_rlc_registers,
1238						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1239		radeon_program_register_sequence(rdev,
1240						 tahiti_mgcg_cgcg_init,
1241						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1242		radeon_program_register_sequence(rdev,
1243						 tahiti_golden_registers2,
1244						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1245		break;
1246	case CHIP_PITCAIRN:
1247		radeon_program_register_sequence(rdev,
1248						 pitcairn_golden_registers,
1249						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1250		radeon_program_register_sequence(rdev,
1251						 pitcairn_golden_rlc_registers,
1252						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1253		radeon_program_register_sequence(rdev,
1254						 pitcairn_mgcg_cgcg_init,
1255						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1256		break;
1257	case CHIP_VERDE:
1258		radeon_program_register_sequence(rdev,
1259						 verde_golden_registers,
1260						 (const u32)ARRAY_SIZE(verde_golden_registers));
1261		radeon_program_register_sequence(rdev,
1262						 verde_golden_rlc_registers,
1263						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1264		radeon_program_register_sequence(rdev,
1265						 verde_mgcg_cgcg_init,
1266						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1267		radeon_program_register_sequence(rdev,
1268						 verde_pg_init,
1269						 (const u32)ARRAY_SIZE(verde_pg_init));
1270		break;
1271	case CHIP_OLAND:
1272		radeon_program_register_sequence(rdev,
1273						 oland_golden_registers,
1274						 (const u32)ARRAY_SIZE(oland_golden_registers));
1275		radeon_program_register_sequence(rdev,
1276						 oland_golden_rlc_registers,
1277						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1278		radeon_program_register_sequence(rdev,
1279						 oland_mgcg_cgcg_init,
1280						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1281		break;
1282	case CHIP_HAINAN:
1283		radeon_program_register_sequence(rdev,
1284						 hainan_golden_registers,
1285						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1286		radeon_program_register_sequence(rdev,
1287						 hainan_golden_registers2,
1288						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1289		radeon_program_register_sequence(rdev,
1290						 hainan_mgcg_cgcg_init,
1291						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1292		break;
1293	default:
1294		break;
1295	}
1296}
1297
1298/**
1299 * si_get_allowed_info_register - fetch the register for the info ioctl
1300 *
1301 * @rdev: radeon_device pointer
1302 * @reg: register offset in bytes
1303 * @val: register value
1304 *
1305 * Returns 0 for success or -EINVAL for an invalid register
1306 *
1307 */
1308int si_get_allowed_info_register(struct radeon_device *rdev,
1309				 u32 reg, u32 *val)
1310{
1311	switch (reg) {
1312	case GRBM_STATUS:
1313	case GRBM_STATUS2:
1314	case GRBM_STATUS_SE0:
1315	case GRBM_STATUS_SE1:
1316	case SRBM_STATUS:
1317	case SRBM_STATUS2:
1318	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1319	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1320	case UVD_STATUS:
1321		*val = RREG32(reg);
1322		return 0;
1323	default:
1324		return -EINVAL;
1325	}
1326}
1327
1328#define PCIE_BUS_CLK                10000
1329#define TCLK                        (PCIE_BUS_CLK / 10)
1330
1331/**
1332 * si_get_xclk - get the xclk
1333 *
1334 * @rdev: radeon_device pointer
1335 *
1336 * Returns the reference clock used by the gfx engine
1337 * (SI).
1338 */
1339u32 si_get_xclk(struct radeon_device *rdev)
1340{
1341	u32 reference_clock = rdev->clock.spll.reference_freq;
1342	u32 tmp;
1343
1344	tmp = RREG32(CG_CLKPIN_CNTL_2);
1345	if (tmp & MUX_TCLK_TO_XCLK)
1346		return TCLK;
1347
1348	tmp = RREG32(CG_CLKPIN_CNTL);
1349	if (tmp & XTALIN_DIVIDE)
1350		return reference_clock / 4;
1351
1352	return reference_clock;
1353}
1354
1355/* get temperature in millidegrees */
1356int si_get_temp(struct radeon_device *rdev)
1357{
1358	u32 temp;
1359	int actual_temp = 0;
1360
1361	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1362		CTF_TEMP_SHIFT;
1363
1364	if (temp & 0x200)
1365		actual_temp = 255;
1366	else
1367		actual_temp = temp & 0x1ff;
1368
1369	actual_temp = (actual_temp * 1000);
1370
1371	return actual_temp;
1372}
1373
1374#define TAHITI_IO_MC_REGS_SIZE 36
1375
1376static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1377	{0x0000006f, 0x03044000},
1378	{0x00000070, 0x0480c018},
1379	{0x00000071, 0x00000040},
1380	{0x00000072, 0x01000000},
1381	{0x00000074, 0x000000ff},
1382	{0x00000075, 0x00143400},
1383	{0x00000076, 0x08ec0800},
1384	{0x00000077, 0x040000cc},
1385	{0x00000079, 0x00000000},
1386	{0x0000007a, 0x21000409},
1387	{0x0000007c, 0x00000000},
1388	{0x0000007d, 0xe8000000},
1389	{0x0000007e, 0x044408a8},
1390	{0x0000007f, 0x00000003},
1391	{0x00000080, 0x00000000},
1392	{0x00000081, 0x01000000},
1393	{0x00000082, 0x02000000},
1394	{0x00000083, 0x00000000},
1395	{0x00000084, 0xe3f3e4f4},
1396	{0x00000085, 0x00052024},
1397	{0x00000087, 0x00000000},
1398	{0x00000088, 0x66036603},
1399	{0x00000089, 0x01000000},
1400	{0x0000008b, 0x1c0a0000},
1401	{0x0000008c, 0xff010000},
1402	{0x0000008e, 0xffffefff},
1403	{0x0000008f, 0xfff3efff},
1404	{0x00000090, 0xfff3efbf},
1405	{0x00000094, 0x00101101},
1406	{0x00000095, 0x00000fff},
1407	{0x00000096, 0x00116fff},
1408	{0x00000097, 0x60010000},
1409	{0x00000098, 0x10010000},
1410	{0x00000099, 0x00006000},
1411	{0x0000009a, 0x00001000},
1412	{0x0000009f, 0x00a77400}
1413};
1414
1415static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1416	{0x0000006f, 0x03044000},
1417	{0x00000070, 0x0480c018},
1418	{0x00000071, 0x00000040},
1419	{0x00000072, 0x01000000},
1420	{0x00000074, 0x000000ff},
1421	{0x00000075, 0x00143400},
1422	{0x00000076, 0x08ec0800},
1423	{0x00000077, 0x040000cc},
1424	{0x00000079, 0x00000000},
1425	{0x0000007a, 0x21000409},
1426	{0x0000007c, 0x00000000},
1427	{0x0000007d, 0xe8000000},
1428	{0x0000007e, 0x044408a8},
1429	{0x0000007f, 0x00000003},
1430	{0x00000080, 0x00000000},
1431	{0x00000081, 0x01000000},
1432	{0x00000082, 0x02000000},
1433	{0x00000083, 0x00000000},
1434	{0x00000084, 0xe3f3e4f4},
1435	{0x00000085, 0x00052024},
1436	{0x00000087, 0x00000000},
1437	{0x00000088, 0x66036603},
1438	{0x00000089, 0x01000000},
1439	{0x0000008b, 0x1c0a0000},
1440	{0x0000008c, 0xff010000},
1441	{0x0000008e, 0xffffefff},
1442	{0x0000008f, 0xfff3efff},
1443	{0x00000090, 0xfff3efbf},
1444	{0x00000094, 0x00101101},
1445	{0x00000095, 0x00000fff},
1446	{0x00000096, 0x00116fff},
1447	{0x00000097, 0x60010000},
1448	{0x00000098, 0x10010000},
1449	{0x00000099, 0x00006000},
1450	{0x0000009a, 0x00001000},
1451	{0x0000009f, 0x00a47400}
1452};
1453
1454static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1455	{0x0000006f, 0x03044000},
1456	{0x00000070, 0x0480c018},
1457	{0x00000071, 0x00000040},
1458	{0x00000072, 0x01000000},
1459	{0x00000074, 0x000000ff},
1460	{0x00000075, 0x00143400},
1461	{0x00000076, 0x08ec0800},
1462	{0x00000077, 0x040000cc},
1463	{0x00000079, 0x00000000},
1464	{0x0000007a, 0x21000409},
1465	{0x0000007c, 0x00000000},
1466	{0x0000007d, 0xe8000000},
1467	{0x0000007e, 0x044408a8},
1468	{0x0000007f, 0x00000003},
1469	{0x00000080, 0x00000000},
1470	{0x00000081, 0x01000000},
1471	{0x00000082, 0x02000000},
1472	{0x00000083, 0x00000000},
1473	{0x00000084, 0xe3f3e4f4},
1474	{0x00000085, 0x00052024},
1475	{0x00000087, 0x00000000},
1476	{0x00000088, 0x66036603},
1477	{0x00000089, 0x01000000},
1478	{0x0000008b, 0x1c0a0000},
1479	{0x0000008c, 0xff010000},
1480	{0x0000008e, 0xffffefff},
1481	{0x0000008f, 0xfff3efff},
1482	{0x00000090, 0xfff3efbf},
1483	{0x00000094, 0x00101101},
1484	{0x00000095, 0x00000fff},
1485	{0x00000096, 0x00116fff},
1486	{0x00000097, 0x60010000},
1487	{0x00000098, 0x10010000},
1488	{0x00000099, 0x00006000},
1489	{0x0000009a, 0x00001000},
1490	{0x0000009f, 0x00a37400}
1491};
1492
1493static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1494	{0x0000006f, 0x03044000},
1495	{0x00000070, 0x0480c018},
1496	{0x00000071, 0x00000040},
1497	{0x00000072, 0x01000000},
1498	{0x00000074, 0x000000ff},
1499	{0x00000075, 0x00143400},
1500	{0x00000076, 0x08ec0800},
1501	{0x00000077, 0x040000cc},
1502	{0x00000079, 0x00000000},
1503	{0x0000007a, 0x21000409},
1504	{0x0000007c, 0x00000000},
1505	{0x0000007d, 0xe8000000},
1506	{0x0000007e, 0x044408a8},
1507	{0x0000007f, 0x00000003},
1508	{0x00000080, 0x00000000},
1509	{0x00000081, 0x01000000},
1510	{0x00000082, 0x02000000},
1511	{0x00000083, 0x00000000},
1512	{0x00000084, 0xe3f3e4f4},
1513	{0x00000085, 0x00052024},
1514	{0x00000087, 0x00000000},
1515	{0x00000088, 0x66036603},
1516	{0x00000089, 0x01000000},
1517	{0x0000008b, 0x1c0a0000},
1518	{0x0000008c, 0xff010000},
1519	{0x0000008e, 0xffffefff},
1520	{0x0000008f, 0xfff3efff},
1521	{0x00000090, 0xfff3efbf},
1522	{0x00000094, 0x00101101},
1523	{0x00000095, 0x00000fff},
1524	{0x00000096, 0x00116fff},
1525	{0x00000097, 0x60010000},
1526	{0x00000098, 0x10010000},
1527	{0x00000099, 0x00006000},
1528	{0x0000009a, 0x00001000},
1529	{0x0000009f, 0x00a17730}
1530};
1531
1532static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1533	{0x0000006f, 0x03044000},
1534	{0x00000070, 0x0480c018},
1535	{0x00000071, 0x00000040},
1536	{0x00000072, 0x01000000},
1537	{0x00000074, 0x000000ff},
1538	{0x00000075, 0x00143400},
1539	{0x00000076, 0x08ec0800},
1540	{0x00000077, 0x040000cc},
1541	{0x00000079, 0x00000000},
1542	{0x0000007a, 0x21000409},
1543	{0x0000007c, 0x00000000},
1544	{0x0000007d, 0xe8000000},
1545	{0x0000007e, 0x044408a8},
1546	{0x0000007f, 0x00000003},
1547	{0x00000080, 0x00000000},
1548	{0x00000081, 0x01000000},
1549	{0x00000082, 0x02000000},
1550	{0x00000083, 0x00000000},
1551	{0x00000084, 0xe3f3e4f4},
1552	{0x00000085, 0x00052024},
1553	{0x00000087, 0x00000000},
1554	{0x00000088, 0x66036603},
1555	{0x00000089, 0x01000000},
1556	{0x0000008b, 0x1c0a0000},
1557	{0x0000008c, 0xff010000},
1558	{0x0000008e, 0xffffefff},
1559	{0x0000008f, 0xfff3efff},
1560	{0x00000090, 0xfff3efbf},
1561	{0x00000094, 0x00101101},
1562	{0x00000095, 0x00000fff},
1563	{0x00000096, 0x00116fff},
1564	{0x00000097, 0x60010000},
1565	{0x00000098, 0x10010000},
1566	{0x00000099, 0x00006000},
1567	{0x0000009a, 0x00001000},
1568	{0x0000009f, 0x00a07730}
1569};
1570
1571/* ucode loading */
1572int si_mc_load_microcode(struct radeon_device *rdev)
1573{
1574	const __be32 *fw_data = NULL;
1575	const __le32 *new_fw_data = NULL;
1576	u32 running;
1577	u32 *io_mc_regs = NULL;
1578	const __le32 *new_io_mc_regs = NULL;
1579	int i, regs_size, ucode_size;
1580
1581	if (!rdev->mc_fw)
1582		return -EINVAL;
1583
1584	if (rdev->new_fw) {
1585		const struct mc_firmware_header_v1_0 *hdr =
1586			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1587
1588		radeon_ucode_print_mc_hdr(&hdr->header);
1589		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1590		new_io_mc_regs = (const __le32 *)
1591			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1592		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1593		new_fw_data = (const __le32 *)
1594			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1595	} else {
1596		ucode_size = rdev->mc_fw->size / 4;
1597
1598		switch (rdev->family) {
1599		case CHIP_TAHITI:
1600			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1601			regs_size = TAHITI_IO_MC_REGS_SIZE;
1602			break;
1603		case CHIP_PITCAIRN:
1604			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1605			regs_size = TAHITI_IO_MC_REGS_SIZE;
1606			break;
1607		case CHIP_VERDE:
1608		default:
1609			io_mc_regs = (u32 *)&verde_io_mc_regs;
1610			regs_size = TAHITI_IO_MC_REGS_SIZE;
1611			break;
1612		case CHIP_OLAND:
1613			io_mc_regs = (u32 *)&oland_io_mc_regs;
1614			regs_size = TAHITI_IO_MC_REGS_SIZE;
1615			break;
1616		case CHIP_HAINAN:
1617			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1618			regs_size = TAHITI_IO_MC_REGS_SIZE;
1619			break;
1620		}
1621		fw_data = (const __be32 *)rdev->mc_fw->data;
1622	}
1623
1624	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1625
1626	if (running == 0) {
1627		/* reset the engine and set to writable */
1628		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1629		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1630
1631		/* load mc io regs */
1632		for (i = 0; i < regs_size; i++) {
1633			if (rdev->new_fw) {
1634				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1635				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1636			} else {
1637				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1638				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1639			}
1640		}
1641		/* load the MC ucode */
1642		for (i = 0; i < ucode_size; i++) {
1643			if (rdev->new_fw)
1644				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1645			else
1646				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1647		}
1648
1649		/* put the engine back into the active state */
1650		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1651		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1652		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1653
1654		/* wait for training to complete */
1655		for (i = 0; i < rdev->usec_timeout; i++) {
1656			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1657				break;
1658			udelay(1);
1659		}
1660		for (i = 0; i < rdev->usec_timeout; i++) {
1661			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1662				break;
1663			udelay(1);
1664		}
1665	}
1666
1667	return 0;
1668}
1669
1670static int si_init_microcode(struct radeon_device *rdev)
1671{
1672	const char *chip_name;
1673	const char *new_chip_name;
1674	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1675	size_t smc_req_size, mc2_req_size;
1676	char fw_name[30];
1677	int err;
1678	int new_fw = 0;
1679	bool new_smc = false;
1680	bool si58_fw = false;
1681	bool banks2_fw = false;
1682
1683	DRM_DEBUG("\n");
1684
1685	switch (rdev->family) {
1686	case CHIP_TAHITI:
1687		chip_name = "TAHITI";
1688		new_chip_name = "tahiti";
1689		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1690		me_req_size = SI_PM4_UCODE_SIZE * 4;
1691		ce_req_size = SI_CE_UCODE_SIZE * 4;
1692		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1693		mc_req_size = SI_MC_UCODE_SIZE * 4;
1694		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1695		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1696		break;
1697	case CHIP_PITCAIRN:
1698		chip_name = "PITCAIRN";
1699		if ((rdev->pdev->revision == 0x81) &&
1700		    ((rdev->pdev->device == 0x6810) ||
1701		     (rdev->pdev->device == 0x6811)))
1702			new_smc = true;
1703		new_chip_name = "pitcairn";
1704		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1705		me_req_size = SI_PM4_UCODE_SIZE * 4;
1706		ce_req_size = SI_CE_UCODE_SIZE * 4;
1707		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1708		mc_req_size = SI_MC_UCODE_SIZE * 4;
1709		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1710		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1711		break;
1712	case CHIP_VERDE:
1713		chip_name = "VERDE";
1714		if (((rdev->pdev->device == 0x6820) &&
1715		     ((rdev->pdev->revision == 0x81) ||
1716		      (rdev->pdev->revision == 0x83))) ||
1717		    ((rdev->pdev->device == 0x6821) &&
1718		     ((rdev->pdev->revision == 0x83) ||
1719		      (rdev->pdev->revision == 0x87))) ||
1720		    ((rdev->pdev->revision == 0x87) &&
1721		     ((rdev->pdev->device == 0x6823) ||
1722		      (rdev->pdev->device == 0x682b))))
1723			new_smc = true;
1724		new_chip_name = "verde";
1725		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1726		me_req_size = SI_PM4_UCODE_SIZE * 4;
1727		ce_req_size = SI_CE_UCODE_SIZE * 4;
1728		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1729		mc_req_size = SI_MC_UCODE_SIZE * 4;
1730		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1731		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1732		break;
1733	case CHIP_OLAND:
1734		chip_name = "OLAND";
1735		if (((rdev->pdev->revision == 0x81) &&
1736		     ((rdev->pdev->device == 0x6600) ||
1737		      (rdev->pdev->device == 0x6604) ||
1738		      (rdev->pdev->device == 0x6605) ||
1739		      (rdev->pdev->device == 0x6610))) ||
1740		    ((rdev->pdev->revision == 0x83) &&
1741		     (rdev->pdev->device == 0x6610)))
1742			new_smc = true;
1743		new_chip_name = "oland";
1744		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1745		me_req_size = SI_PM4_UCODE_SIZE * 4;
1746		ce_req_size = SI_CE_UCODE_SIZE * 4;
1747		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1748		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1749		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1750		break;
1751	case CHIP_HAINAN:
1752		chip_name = "HAINAN";
1753		if (((rdev->pdev->revision == 0x81) &&
1754		     (rdev->pdev->device == 0x6660)) ||
1755		    ((rdev->pdev->revision == 0x83) &&
1756		     ((rdev->pdev->device == 0x6660) ||
1757		      (rdev->pdev->device == 0x6663) ||
1758		      (rdev->pdev->device == 0x6665) ||
1759		      (rdev->pdev->device == 0x6667))))
1760			new_smc = true;
1761		else if ((rdev->pdev->revision == 0xc3) &&
1762			 (rdev->pdev->device == 0x6665))
1763			banks2_fw = true;
1764		new_chip_name = "hainan";
1765		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1766		me_req_size = SI_PM4_UCODE_SIZE * 4;
1767		ce_req_size = SI_CE_UCODE_SIZE * 4;
1768		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1769		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1770		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1771		break;
1772	default: BUG();
1773	}
1774
1775	/* this memory configuration requires special firmware */
1776	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1777		si58_fw = true;
1778
1779	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1780
1781	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1782	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1783	if (err) {
1784		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1785		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1786		if (err)
1787			goto out;
1788		if (rdev->pfp_fw->size != pfp_req_size) {
1789			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790			       rdev->pfp_fw->size, fw_name);
1791			err = -EINVAL;
1792			goto out;
1793		}
1794	} else {
1795		err = radeon_ucode_validate(rdev->pfp_fw);
1796		if (err) {
1797			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1798			       fw_name);
1799			goto out;
1800		} else {
1801			new_fw++;
1802		}
1803	}
1804
1805	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1806	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1807	if (err) {
1808		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1809		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1810		if (err)
1811			goto out;
1812		if (rdev->me_fw->size != me_req_size) {
1813			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1814			       rdev->me_fw->size, fw_name);
1815			err = -EINVAL;
1816		}
1817	} else {
1818		err = radeon_ucode_validate(rdev->me_fw);
1819		if (err) {
1820			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1821			       fw_name);
1822			goto out;
1823		} else {
1824			new_fw++;
1825		}
1826	}
1827
1828	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1829	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1830	if (err) {
1831		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1832		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1833		if (err)
1834			goto out;
1835		if (rdev->ce_fw->size != ce_req_size) {
1836			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1837			       rdev->ce_fw->size, fw_name);
1838			err = -EINVAL;
1839		}
1840	} else {
1841		err = radeon_ucode_validate(rdev->ce_fw);
1842		if (err) {
1843			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1844			       fw_name);
1845			goto out;
1846		} else {
1847			new_fw++;
1848		}
1849	}
1850
1851	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1852	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1853	if (err) {
1854		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1855		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1856		if (err)
1857			goto out;
1858		if (rdev->rlc_fw->size != rlc_req_size) {
1859			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1860			       rdev->rlc_fw->size, fw_name);
1861			err = -EINVAL;
1862		}
1863	} else {
1864		err = radeon_ucode_validate(rdev->rlc_fw);
1865		if (err) {
1866			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1867			       fw_name);
1868			goto out;
1869		} else {
1870			new_fw++;
1871		}
1872	}
1873
1874	if (si58_fw)
1875		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1876	else
1877		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1878	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1879	if (err) {
1880		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1881		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1882		if (err) {
1883			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1884			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1885			if (err)
1886				goto out;
1887		}
1888		if ((rdev->mc_fw->size != mc_req_size) &&
1889		    (rdev->mc_fw->size != mc2_req_size)) {
1890			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1891			       rdev->mc_fw->size, fw_name);
1892			err = -EINVAL;
1893		}
1894		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1895	} else {
1896		err = radeon_ucode_validate(rdev->mc_fw);
1897		if (err) {
1898			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1899			       fw_name);
1900			goto out;
1901		} else {
1902			new_fw++;
1903		}
1904	}
1905
1906	if (banks2_fw)
1907		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1908	else if (new_smc)
1909		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1910	else
1911		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1912	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1913	if (err) {
1914		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1915		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1916		if (err) {
1917			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1918			release_firmware(rdev->smc_fw);
1919			rdev->smc_fw = NULL;
1920			err = 0;
1921		} else if (rdev->smc_fw->size != smc_req_size) {
1922			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1923			       rdev->smc_fw->size, fw_name);
1924			err = -EINVAL;
1925		}
1926	} else {
1927		err = radeon_ucode_validate(rdev->smc_fw);
1928		if (err) {
1929			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1930			       fw_name);
1931			goto out;
1932		} else {
1933			new_fw++;
1934		}
1935	}
1936
1937	if (new_fw == 0) {
1938		rdev->new_fw = false;
1939	} else if (new_fw < 6) {
1940		pr_err("si_fw: mixing new and old firmware!\n");
1941		err = -EINVAL;
1942	} else {
1943		rdev->new_fw = true;
1944	}
1945out:
1946	if (err) {
1947		if (err != -EINVAL)
1948			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1949			       fw_name);
1950		release_firmware(rdev->pfp_fw);
1951		rdev->pfp_fw = NULL;
1952		release_firmware(rdev->me_fw);
1953		rdev->me_fw = NULL;
1954		release_firmware(rdev->ce_fw);
1955		rdev->ce_fw = NULL;
1956		release_firmware(rdev->rlc_fw);
1957		rdev->rlc_fw = NULL;
1958		release_firmware(rdev->mc_fw);
1959		rdev->mc_fw = NULL;
1960		release_firmware(rdev->smc_fw);
1961		rdev->smc_fw = NULL;
1962	}
1963	return err;
1964}
1965
1966/* watermark setup */
1967static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1968				   struct radeon_crtc *radeon_crtc,
1969				   struct drm_display_mode *mode,
1970				   struct drm_display_mode *other_mode)
1971{
1972	u32 tmp, buffer_alloc, i;
1973	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1974	/*
1975	 * Line Buffer Setup
1976	 * There are 3 line buffers, each one shared by 2 display controllers.
1977	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1978	 * the display controllers.  The paritioning is done via one of four
1979	 * preset allocations specified in bits 21:20:
1980	 *  0 - half lb
1981	 *  2 - whole lb, other crtc must be disabled
1982	 */
1983	/* this can get tricky if we have two large displays on a paired group
1984	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1985	 * non-linked crtcs for maximum line buffer allocation.
1986	 */
1987	if (radeon_crtc->base.enabled && mode) {
1988		if (other_mode) {
1989			tmp = 0; /* 1/2 */
1990			buffer_alloc = 1;
1991		} else {
1992			tmp = 2; /* whole */
1993			buffer_alloc = 2;
1994		}
1995	} else {
1996		tmp = 0;
1997		buffer_alloc = 0;
1998	}
1999
2000	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2001	       DC_LB_MEMORY_CONFIG(tmp));
2002
2003	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2004	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2005	for (i = 0; i < rdev->usec_timeout; i++) {
2006		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2007		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2008			break;
2009		udelay(1);
2010	}
2011
2012	if (radeon_crtc->base.enabled && mode) {
2013		switch (tmp) {
2014		case 0:
2015		default:
2016			return 4096 * 2;
2017		case 2:
2018			return 8192 * 2;
2019		}
2020	}
2021
2022	/* controller not enabled, so no lb used */
2023	return 0;
2024}
2025
2026static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2027{
2028	u32 tmp = RREG32(MC_SHARED_CHMAP);
2029
2030	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2031	case 0:
2032	default:
2033		return 1;
2034	case 1:
2035		return 2;
2036	case 2:
2037		return 4;
2038	case 3:
2039		return 8;
2040	case 4:
2041		return 3;
2042	case 5:
2043		return 6;
2044	case 6:
2045		return 10;
2046	case 7:
2047		return 12;
2048	case 8:
2049		return 16;
2050	}
2051}
2052
2053struct dce6_wm_params {
2054	u32 dram_channels; /* number of dram channels */
2055	u32 yclk;          /* bandwidth per dram data pin in kHz */
2056	u32 sclk;          /* engine clock in kHz */
2057	u32 disp_clk;      /* display clock in kHz */
2058	u32 src_width;     /* viewport width */
2059	u32 active_time;   /* active display time in ns */
2060	u32 blank_time;    /* blank time in ns */
2061	bool interlaced;    /* mode is interlaced */
2062	fixed20_12 vsc;    /* vertical scale ratio */
2063	u32 num_heads;     /* number of active crtcs */
2064	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2065	u32 lb_size;       /* line buffer allocated to pipe */
2066	u32 vtaps;         /* vertical scaler taps */
2067};
2068
2069static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2070{
2071	/* Calculate raw DRAM Bandwidth */
2072	fixed20_12 dram_efficiency; /* 0.7 */
2073	fixed20_12 yclk, dram_channels, bandwidth;
2074	fixed20_12 a;
2075
2076	a.full = dfixed_const(1000);
2077	yclk.full = dfixed_const(wm->yclk);
2078	yclk.full = dfixed_div(yclk, a);
2079	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2080	a.full = dfixed_const(10);
2081	dram_efficiency.full = dfixed_const(7);
2082	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2083	bandwidth.full = dfixed_mul(dram_channels, yclk);
2084	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2085
2086	return dfixed_trunc(bandwidth);
2087}
2088
2089static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2090{
2091	/* Calculate DRAM Bandwidth and the part allocated to display. */
2092	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2093	fixed20_12 yclk, dram_channels, bandwidth;
2094	fixed20_12 a;
2095
2096	a.full = dfixed_const(1000);
2097	yclk.full = dfixed_const(wm->yclk);
2098	yclk.full = dfixed_div(yclk, a);
2099	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2100	a.full = dfixed_const(10);
2101	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2102	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2103	bandwidth.full = dfixed_mul(dram_channels, yclk);
2104	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2105
2106	return dfixed_trunc(bandwidth);
2107}
2108
2109static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2110{
2111	/* Calculate the display Data return Bandwidth */
2112	fixed20_12 return_efficiency; /* 0.8 */
2113	fixed20_12 sclk, bandwidth;
2114	fixed20_12 a;
2115
2116	a.full = dfixed_const(1000);
2117	sclk.full = dfixed_const(wm->sclk);
2118	sclk.full = dfixed_div(sclk, a);
2119	a.full = dfixed_const(10);
2120	return_efficiency.full = dfixed_const(8);
2121	return_efficiency.full = dfixed_div(return_efficiency, a);
2122	a.full = dfixed_const(32);
2123	bandwidth.full = dfixed_mul(a, sclk);
2124	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2125
2126	return dfixed_trunc(bandwidth);
2127}
2128
2129static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2130{
2131	return 32;
2132}
2133
2134static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2135{
2136	/* Calculate the DMIF Request Bandwidth */
2137	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2138	fixed20_12 disp_clk, sclk, bandwidth;
2139	fixed20_12 a, b1, b2;
2140	u32 min_bandwidth;
2141
2142	a.full = dfixed_const(1000);
2143	disp_clk.full = dfixed_const(wm->disp_clk);
2144	disp_clk.full = dfixed_div(disp_clk, a);
2145	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2146	b1.full = dfixed_mul(a, disp_clk);
2147
2148	a.full = dfixed_const(1000);
2149	sclk.full = dfixed_const(wm->sclk);
2150	sclk.full = dfixed_div(sclk, a);
2151	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2152	b2.full = dfixed_mul(a, sclk);
2153
2154	a.full = dfixed_const(10);
2155	disp_clk_request_efficiency.full = dfixed_const(8);
2156	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2157
2158	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2159
2160	a.full = dfixed_const(min_bandwidth);
2161	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2162
2163	return dfixed_trunc(bandwidth);
2164}
2165
2166static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2167{
2168	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2169	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2170	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2171	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2172
2173	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2174}
2175
2176static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2177{
2178	/* Calculate the display mode Average Bandwidth
2179	 * DisplayMode should contain the source and destination dimensions,
2180	 * timing, etc.
2181	 */
2182	fixed20_12 bpp;
2183	fixed20_12 line_time;
2184	fixed20_12 src_width;
2185	fixed20_12 bandwidth;
2186	fixed20_12 a;
2187
2188	a.full = dfixed_const(1000);
2189	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2190	line_time.full = dfixed_div(line_time, a);
2191	bpp.full = dfixed_const(wm->bytes_per_pixel);
2192	src_width.full = dfixed_const(wm->src_width);
2193	bandwidth.full = dfixed_mul(src_width, bpp);
2194	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2195	bandwidth.full = dfixed_div(bandwidth, line_time);
2196
2197	return dfixed_trunc(bandwidth);
2198}
2199
2200static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2201{
2202	/* First calcualte the latency in ns */
2203	u32 mc_latency = 2000; /* 2000 ns. */
2204	u32 available_bandwidth = dce6_available_bandwidth(wm);
2205	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2206	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2207	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2208	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2209		(wm->num_heads * cursor_line_pair_return_time);
2210	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2211	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2212	u32 tmp, dmif_size = 12288;
2213	fixed20_12 a, b, c;
2214
2215	if (wm->num_heads == 0)
2216		return 0;
2217
2218	a.full = dfixed_const(2);
2219	b.full = dfixed_const(1);
2220	if ((wm->vsc.full > a.full) ||
2221	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2222	    (wm->vtaps >= 5) ||
2223	    ((wm->vsc.full >= a.full) && wm->interlaced))
2224		max_src_lines_per_dst_line = 4;
2225	else
2226		max_src_lines_per_dst_line = 2;
2227
2228	a.full = dfixed_const(available_bandwidth);
2229	b.full = dfixed_const(wm->num_heads);
2230	a.full = dfixed_div(a, b);
2231	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2232	tmp = min(dfixed_trunc(a), tmp);
2233
2234	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2235
2236	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2237	b.full = dfixed_const(1000);
2238	c.full = dfixed_const(lb_fill_bw);
2239	b.full = dfixed_div(c, b);
2240	a.full = dfixed_div(a, b);
2241	line_fill_time = dfixed_trunc(a);
2242
2243	if (line_fill_time < wm->active_time)
2244		return latency;
2245	else
2246		return latency + (line_fill_time - wm->active_time);
2247
2248}
2249
2250static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2251{
2252	if (dce6_average_bandwidth(wm) <=
2253	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2254		return true;
2255	else
2256		return false;
2257};
2258
2259static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2260{
2261	if (dce6_average_bandwidth(wm) <=
2262	    (dce6_available_bandwidth(wm) / wm->num_heads))
2263		return true;
2264	else
2265		return false;
2266};
2267
2268static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2269{
2270	u32 lb_partitions = wm->lb_size / wm->src_width;
2271	u32 line_time = wm->active_time + wm->blank_time;
2272	u32 latency_tolerant_lines;
2273	u32 latency_hiding;
2274	fixed20_12 a;
2275
2276	a.full = dfixed_const(1);
2277	if (wm->vsc.full > a.full)
2278		latency_tolerant_lines = 1;
2279	else {
2280		if (lb_partitions <= (wm->vtaps + 1))
2281			latency_tolerant_lines = 1;
2282		else
2283			latency_tolerant_lines = 2;
2284	}
2285
2286	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2287
2288	if (dce6_latency_watermark(wm) <= latency_hiding)
2289		return true;
2290	else
2291		return false;
2292}
2293
2294static void dce6_program_watermarks(struct radeon_device *rdev,
2295					 struct radeon_crtc *radeon_crtc,
2296					 u32 lb_size, u32 num_heads)
2297{
2298	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2299	struct dce6_wm_params wm_low, wm_high;
2300	u32 dram_channels;
2301	u32 active_time;
2302	u32 line_time = 0;
2303	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2304	u32 priority_a_mark = 0, priority_b_mark = 0;
2305	u32 priority_a_cnt = PRIORITY_OFF;
2306	u32 priority_b_cnt = PRIORITY_OFF;
2307	u32 tmp, arb_control3;
2308	fixed20_12 a, b, c;
2309
2310	if (radeon_crtc->base.enabled && num_heads && mode) {
2311		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2312					    (u32)mode->clock);
2313		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2314					  (u32)mode->clock);
2315		line_time = min(line_time, (u32)65535);
2316		priority_a_cnt = 0;
2317		priority_b_cnt = 0;
2318
2319		if (rdev->family == CHIP_ARUBA)
2320			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2321		else
2322			dram_channels = si_get_number_of_dram_channels(rdev);
2323
2324		/* watermark for high clocks */
2325		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2326			wm_high.yclk =
2327				radeon_dpm_get_mclk(rdev, false) * 10;
2328			wm_high.sclk =
2329				radeon_dpm_get_sclk(rdev, false) * 10;
2330		} else {
2331			wm_high.yclk = rdev->pm.current_mclk * 10;
2332			wm_high.sclk = rdev->pm.current_sclk * 10;
2333		}
2334
2335		wm_high.disp_clk = mode->clock;
2336		wm_high.src_width = mode->crtc_hdisplay;
2337		wm_high.active_time = active_time;
2338		wm_high.blank_time = line_time - wm_high.active_time;
2339		wm_high.interlaced = false;
2340		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2341			wm_high.interlaced = true;
2342		wm_high.vsc = radeon_crtc->vsc;
2343		wm_high.vtaps = 1;
2344		if (radeon_crtc->rmx_type != RMX_OFF)
2345			wm_high.vtaps = 2;
2346		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2347		wm_high.lb_size = lb_size;
2348		wm_high.dram_channels = dram_channels;
2349		wm_high.num_heads = num_heads;
2350
2351		/* watermark for low clocks */
2352		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2353			wm_low.yclk =
2354				radeon_dpm_get_mclk(rdev, true) * 10;
2355			wm_low.sclk =
2356				radeon_dpm_get_sclk(rdev, true) * 10;
2357		} else {
2358			wm_low.yclk = rdev->pm.current_mclk * 10;
2359			wm_low.sclk = rdev->pm.current_sclk * 10;
2360		}
2361
2362		wm_low.disp_clk = mode->clock;
2363		wm_low.src_width = mode->crtc_hdisplay;
2364		wm_low.active_time = active_time;
2365		wm_low.blank_time = line_time - wm_low.active_time;
2366		wm_low.interlaced = false;
2367		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2368			wm_low.interlaced = true;
2369		wm_low.vsc = radeon_crtc->vsc;
2370		wm_low.vtaps = 1;
2371		if (radeon_crtc->rmx_type != RMX_OFF)
2372			wm_low.vtaps = 2;
2373		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2374		wm_low.lb_size = lb_size;
2375		wm_low.dram_channels = dram_channels;
2376		wm_low.num_heads = num_heads;
2377
2378		/* set for high clocks */
2379		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2380		/* set for low clocks */
2381		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2382
2383		/* possibly force display priority to high */
2384		/* should really do this at mode validation time... */
2385		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2386		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2387		    !dce6_check_latency_hiding(&wm_high) ||
2388		    (rdev->disp_priority == 2)) {
2389			DRM_DEBUG_KMS("force priority to high\n");
2390			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2391			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2392		}
2393		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2394		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2395		    !dce6_check_latency_hiding(&wm_low) ||
2396		    (rdev->disp_priority == 2)) {
2397			DRM_DEBUG_KMS("force priority to high\n");
2398			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2399			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2400		}
2401
2402		a.full = dfixed_const(1000);
2403		b.full = dfixed_const(mode->clock);
2404		b.full = dfixed_div(b, a);
2405		c.full = dfixed_const(latency_watermark_a);
2406		c.full = dfixed_mul(c, b);
2407		c.full = dfixed_mul(c, radeon_crtc->hsc);
2408		c.full = dfixed_div(c, a);
2409		a.full = dfixed_const(16);
2410		c.full = dfixed_div(c, a);
2411		priority_a_mark = dfixed_trunc(c);
2412		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2413
2414		a.full = dfixed_const(1000);
2415		b.full = dfixed_const(mode->clock);
2416		b.full = dfixed_div(b, a);
2417		c.full = dfixed_const(latency_watermark_b);
2418		c.full = dfixed_mul(c, b);
2419		c.full = dfixed_mul(c, radeon_crtc->hsc);
2420		c.full = dfixed_div(c, a);
2421		a.full = dfixed_const(16);
2422		c.full = dfixed_div(c, a);
2423		priority_b_mark = dfixed_trunc(c);
2424		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2425
2426		/* Save number of lines the linebuffer leads before the scanout */
2427		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2428	}
2429
2430	/* select wm A */
2431	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2432	tmp = arb_control3;
2433	tmp &= ~LATENCY_WATERMARK_MASK(3);
2434	tmp |= LATENCY_WATERMARK_MASK(1);
2435	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2436	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2437	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2438		LATENCY_HIGH_WATERMARK(line_time)));
2439	/* select wm B */
2440	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2441	tmp &= ~LATENCY_WATERMARK_MASK(3);
2442	tmp |= LATENCY_WATERMARK_MASK(2);
2443	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2444	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2445	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2446		LATENCY_HIGH_WATERMARK(line_time)));
2447	/* restore original selection */
2448	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2449
2450	/* write the priority marks */
2451	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2452	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2453
2454	/* save values for DPM */
2455	radeon_crtc->line_time = line_time;
2456	radeon_crtc->wm_high = latency_watermark_a;
2457	radeon_crtc->wm_low = latency_watermark_b;
2458}
2459
2460void dce6_bandwidth_update(struct radeon_device *rdev)
2461{
2462	struct drm_display_mode *mode0 = NULL;
2463	struct drm_display_mode *mode1 = NULL;
2464	u32 num_heads = 0, lb_size;
2465	int i;
2466
2467	if (!rdev->mode_info.mode_config_initialized)
2468		return;
2469
2470	radeon_update_display_priority(rdev);
2471
2472	for (i = 0; i < rdev->num_crtc; i++) {
2473		if (rdev->mode_info.crtcs[i]->base.enabled)
2474			num_heads++;
2475	}
2476	for (i = 0; i < rdev->num_crtc; i += 2) {
2477		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2478		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2479		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2480		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2481		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2482		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2483	}
2484}
2485
2486/*
2487 * Core functions
2488 */
2489static void si_tiling_mode_table_init(struct radeon_device *rdev)
2490{
2491	u32 *tile = rdev->config.si.tile_mode_array;
2492	const u32 num_tile_mode_states =
2493			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2494	u32 reg_offset, split_equal_to_row_size;
2495
2496	switch (rdev->config.si.mem_row_size_in_kb) {
2497	case 1:
2498		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2499		break;
2500	case 2:
2501	default:
2502		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2503		break;
2504	case 4:
2505		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2506		break;
2507	}
2508
2509	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2510		tile[reg_offset] = 0;
2511
2512	switch(rdev->family) {
2513	case CHIP_TAHITI:
2514	case CHIP_PITCAIRN:
2515		/* non-AA compressed depth or any compressed stencil */
2516		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2518			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2519			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2520			   NUM_BANKS(ADDR_SURF_16_BANK) |
2521			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2524		/* 2xAA/4xAA compressed depth only */
2525		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2529			   NUM_BANKS(ADDR_SURF_16_BANK) |
2530			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533		/* 8xAA compressed depth only */
2534		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2537			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2538			   NUM_BANKS(ADDR_SURF_16_BANK) |
2539			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2543		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2545			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2547			   NUM_BANKS(ADDR_SURF_16_BANK) |
2548			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2552		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2553			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2554			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2556			   NUM_BANKS(ADDR_SURF_16_BANK) |
2557			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2559			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2561		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2564			   TILE_SPLIT(split_equal_to_row_size) |
2565			   NUM_BANKS(ADDR_SURF_16_BANK) |
2566			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2570		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2572			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573			   TILE_SPLIT(split_equal_to_row_size) |
2574			   NUM_BANKS(ADDR_SURF_16_BANK) |
2575			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2578		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2579		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2581			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582			   TILE_SPLIT(split_equal_to_row_size) |
2583			   NUM_BANKS(ADDR_SURF_16_BANK) |
2584			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2587		/* 1D and 1D Array Surfaces */
2588		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2589			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2590			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2591			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592			   NUM_BANKS(ADDR_SURF_16_BANK) |
2593			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2595			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596		/* Displayable maps. */
2597		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2600			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2601			   NUM_BANKS(ADDR_SURF_16_BANK) |
2602			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2604			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2605		/* Display 8bpp. */
2606		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2610			   NUM_BANKS(ADDR_SURF_16_BANK) |
2611			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2614		/* Display 16bpp. */
2615		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2617			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619			   NUM_BANKS(ADDR_SURF_16_BANK) |
2620			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623		/* Display 32bpp. */
2624		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2628			   NUM_BANKS(ADDR_SURF_16_BANK) |
2629			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2632		/* Thin. */
2633		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2637			   NUM_BANKS(ADDR_SURF_16_BANK) |
2638			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641		/* Thin 8 bpp. */
2642		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2645			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646			   NUM_BANKS(ADDR_SURF_16_BANK) |
2647			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2650		/* Thin 16 bpp. */
2651		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2654			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655			   NUM_BANKS(ADDR_SURF_16_BANK) |
2656			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2659		/* Thin 32 bpp. */
2660		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2662			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2663			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2664			   NUM_BANKS(ADDR_SURF_16_BANK) |
2665			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2668		/* Thin 64 bpp. */
2669		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2671			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2672			   TILE_SPLIT(split_equal_to_row_size) |
2673			   NUM_BANKS(ADDR_SURF_16_BANK) |
2674			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2677		/* 8 bpp PRT. */
2678		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2681			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682			   NUM_BANKS(ADDR_SURF_16_BANK) |
2683			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2684			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2685			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686		/* 16 bpp PRT */
2687		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2689			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2690			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2691			   NUM_BANKS(ADDR_SURF_16_BANK) |
2692			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2695		/* 32 bpp PRT */
2696		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2698			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2699			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2700			   NUM_BANKS(ADDR_SURF_16_BANK) |
2701			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2703			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2704		/* 64 bpp PRT */
2705		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709			   NUM_BANKS(ADDR_SURF_16_BANK) |
2710			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2713		/* 128 bpp PRT */
2714		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2716			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2717			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2718			   NUM_BANKS(ADDR_SURF_8_BANK) |
2719			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2722
2723		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2724			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2725		break;
2726
2727	case CHIP_VERDE:
2728	case CHIP_OLAND:
2729	case CHIP_HAINAN:
2730		/* non-AA compressed depth or any compressed stencil */
2731		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2733			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2735			   NUM_BANKS(ADDR_SURF_16_BANK) |
2736			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2739		/* 2xAA/4xAA compressed depth only */
2740		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2742			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2744			   NUM_BANKS(ADDR_SURF_16_BANK) |
2745			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2748		/* 8xAA compressed depth only */
2749		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2751			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2753			   NUM_BANKS(ADDR_SURF_16_BANK) |
2754			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2757		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2758		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2760			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2762			   NUM_BANKS(ADDR_SURF_16_BANK) |
2763			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2765			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2766		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2767		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2769			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771			   NUM_BANKS(ADDR_SURF_16_BANK) |
2772			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2775		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2776		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2778			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779			   TILE_SPLIT(split_equal_to_row_size) |
2780			   NUM_BANKS(ADDR_SURF_16_BANK) |
2781			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2783			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2784		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2785		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2787			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788			   TILE_SPLIT(split_equal_to_row_size) |
2789			   NUM_BANKS(ADDR_SURF_16_BANK) |
2790			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2794		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2796			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797			   TILE_SPLIT(split_equal_to_row_size) |
2798			   NUM_BANKS(ADDR_SURF_16_BANK) |
2799			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2801			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2802		/* 1D and 1D Array Surfaces */
2803		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2804			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2807			   NUM_BANKS(ADDR_SURF_16_BANK) |
2808			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2811		/* Displayable maps. */
2812		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2814			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2816			   NUM_BANKS(ADDR_SURF_16_BANK) |
2817			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2820		/* Display 8bpp. */
2821		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2824			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2825			   NUM_BANKS(ADDR_SURF_16_BANK) |
2826			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2829		/* Display 16bpp. */
2830		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2833			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2834			   NUM_BANKS(ADDR_SURF_16_BANK) |
2835			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2837			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2838		/* Display 32bpp. */
2839		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2841			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2842			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2843			   NUM_BANKS(ADDR_SURF_16_BANK) |
2844			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2847		/* Thin. */
2848		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2850			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2852			   NUM_BANKS(ADDR_SURF_16_BANK) |
2853			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2856		/* Thin 8 bpp. */
2857		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2859			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2861			   NUM_BANKS(ADDR_SURF_16_BANK) |
2862			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2865		/* Thin 16 bpp. */
2866		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2868			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2869			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2870			   NUM_BANKS(ADDR_SURF_16_BANK) |
2871			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2874		/* Thin 32 bpp. */
2875		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2878			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2879			   NUM_BANKS(ADDR_SURF_16_BANK) |
2880			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883		/* Thin 64 bpp. */
2884		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2886			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2887			   TILE_SPLIT(split_equal_to_row_size) |
2888			   NUM_BANKS(ADDR_SURF_16_BANK) |
2889			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2892		/* 8 bpp PRT. */
2893		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2895			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2896			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897			   NUM_BANKS(ADDR_SURF_16_BANK) |
2898			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2899			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2900			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2901		/* 16 bpp PRT */
2902		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2904			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2905			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2906			   NUM_BANKS(ADDR_SURF_16_BANK) |
2907			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2908			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2910		/* 32 bpp PRT */
2911		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2913			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2914			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2915			   NUM_BANKS(ADDR_SURF_16_BANK) |
2916			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2918			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2919		/* 64 bpp PRT */
2920		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2922			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2923			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2924			   NUM_BANKS(ADDR_SURF_16_BANK) |
2925			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2927			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2928		/* 128 bpp PRT */
2929		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2931			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2932			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2933			   NUM_BANKS(ADDR_SURF_8_BANK) |
2934			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2936			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2937
2938		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2939			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2940		break;
2941
2942	default:
2943		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2944	}
2945}
2946
2947static void si_select_se_sh(struct radeon_device *rdev,
2948			    u32 se_num, u32 sh_num)
2949{
2950	u32 data = INSTANCE_BROADCAST_WRITES;
2951
2952	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2953		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2954	else if (se_num == 0xffffffff)
2955		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2956	else if (sh_num == 0xffffffff)
2957		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2958	else
2959		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2960	WREG32(GRBM_GFX_INDEX, data);
2961}
2962
2963static u32 si_create_bitmask(u32 bit_width)
2964{
2965	u32 i, mask = 0;
2966
2967	for (i = 0; i < bit_width; i++) {
2968		mask <<= 1;
2969		mask |= 1;
2970	}
2971	return mask;
2972}
2973
2974static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2975{
2976	u32 data, mask;
2977
2978	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2979	if (data & 1)
2980		data &= INACTIVE_CUS_MASK;
2981	else
2982		data = 0;
2983	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2984
2985	data >>= INACTIVE_CUS_SHIFT;
2986
2987	mask = si_create_bitmask(cu_per_sh);
2988
2989	return ~data & mask;
2990}
2991
2992static void si_setup_spi(struct radeon_device *rdev,
2993			 u32 se_num, u32 sh_per_se,
2994			 u32 cu_per_sh)
2995{
2996	int i, j, k;
2997	u32 data, mask, active_cu;
2998
2999	for (i = 0; i < se_num; i++) {
3000		for (j = 0; j < sh_per_se; j++) {
3001			si_select_se_sh(rdev, i, j);
3002			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3003			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3004
3005			mask = 1;
3006			for (k = 0; k < 16; k++) {
3007				mask <<= k;
3008				if (active_cu & mask) {
3009					data &= ~mask;
3010					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3011					break;
3012				}
3013			}
3014		}
3015	}
3016	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3017}
3018
3019static u32 si_get_rb_disabled(struct radeon_device *rdev,
3020			      u32 max_rb_num_per_se,
3021			      u32 sh_per_se)
3022{
3023	u32 data, mask;
3024
3025	data = RREG32(CC_RB_BACKEND_DISABLE);
3026	if (data & 1)
3027		data &= BACKEND_DISABLE_MASK;
3028	else
3029		data = 0;
3030	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3031
3032	data >>= BACKEND_DISABLE_SHIFT;
3033
3034	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3035
3036	return data & mask;
3037}
3038
3039static void si_setup_rb(struct radeon_device *rdev,
3040			u32 se_num, u32 sh_per_se,
3041			u32 max_rb_num_per_se)
3042{
3043	int i, j;
3044	u32 data, mask;
3045	u32 disabled_rbs = 0;
3046	u32 enabled_rbs = 0;
3047
3048	for (i = 0; i < se_num; i++) {
3049		for (j = 0; j < sh_per_se; j++) {
3050			si_select_se_sh(rdev, i, j);
3051			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3052			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3053		}
3054	}
3055	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3056
3057	mask = 1;
3058	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3059		if (!(disabled_rbs & mask))
3060			enabled_rbs |= mask;
3061		mask <<= 1;
3062	}
3063
3064	rdev->config.si.backend_enable_mask = enabled_rbs;
3065
3066	for (i = 0; i < se_num; i++) {
3067		si_select_se_sh(rdev, i, 0xffffffff);
3068		data = 0;
3069		for (j = 0; j < sh_per_se; j++) {
3070			switch (enabled_rbs & 3) {
3071			case 1:
3072				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3073				break;
3074			case 2:
3075				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3076				break;
3077			case 3:
3078			default:
3079				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3080				break;
3081			}
3082			enabled_rbs >>= 2;
3083		}
3084		WREG32(PA_SC_RASTER_CONFIG, data);
3085	}
3086	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3087}
3088
3089static void si_gpu_init(struct radeon_device *rdev)
3090{
3091	u32 gb_addr_config = 0;
3092	u32 mc_shared_chmap, mc_arb_ramcfg;
3093	u32 sx_debug_1;
3094	u32 hdp_host_path_cntl;
3095	u32 tmp;
3096	int i, j;
3097
3098	switch (rdev->family) {
3099	case CHIP_TAHITI:
3100		rdev->config.si.max_shader_engines = 2;
3101		rdev->config.si.max_tile_pipes = 12;
3102		rdev->config.si.max_cu_per_sh = 8;
3103		rdev->config.si.max_sh_per_se = 2;
3104		rdev->config.si.max_backends_per_se = 4;
3105		rdev->config.si.max_texture_channel_caches = 12;
3106		rdev->config.si.max_gprs = 256;
3107		rdev->config.si.max_gs_threads = 32;
3108		rdev->config.si.max_hw_contexts = 8;
3109
3110		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3111		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3112		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3113		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3114		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3115		break;
3116	case CHIP_PITCAIRN:
3117		rdev->config.si.max_shader_engines = 2;
3118		rdev->config.si.max_tile_pipes = 8;
3119		rdev->config.si.max_cu_per_sh = 5;
3120		rdev->config.si.max_sh_per_se = 2;
3121		rdev->config.si.max_backends_per_se = 4;
3122		rdev->config.si.max_texture_channel_caches = 8;
3123		rdev->config.si.max_gprs = 256;
3124		rdev->config.si.max_gs_threads = 32;
3125		rdev->config.si.max_hw_contexts = 8;
3126
3127		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3128		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3129		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3130		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3131		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3132		break;
3133	case CHIP_VERDE:
3134	default:
3135		rdev->config.si.max_shader_engines = 1;
3136		rdev->config.si.max_tile_pipes = 4;
3137		rdev->config.si.max_cu_per_sh = 5;
3138		rdev->config.si.max_sh_per_se = 2;
3139		rdev->config.si.max_backends_per_se = 4;
3140		rdev->config.si.max_texture_channel_caches = 4;
3141		rdev->config.si.max_gprs = 256;
3142		rdev->config.si.max_gs_threads = 32;
3143		rdev->config.si.max_hw_contexts = 8;
3144
3145		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3146		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3147		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3148		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3149		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3150		break;
3151	case CHIP_OLAND:
3152		rdev->config.si.max_shader_engines = 1;
3153		rdev->config.si.max_tile_pipes = 4;
3154		rdev->config.si.max_cu_per_sh = 6;
3155		rdev->config.si.max_sh_per_se = 1;
3156		rdev->config.si.max_backends_per_se = 2;
3157		rdev->config.si.max_texture_channel_caches = 4;
3158		rdev->config.si.max_gprs = 256;
3159		rdev->config.si.max_gs_threads = 16;
3160		rdev->config.si.max_hw_contexts = 8;
3161
3162		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3163		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3164		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3165		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3166		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3167		break;
3168	case CHIP_HAINAN:
3169		rdev->config.si.max_shader_engines = 1;
3170		rdev->config.si.max_tile_pipes = 4;
3171		rdev->config.si.max_cu_per_sh = 5;
3172		rdev->config.si.max_sh_per_se = 1;
3173		rdev->config.si.max_backends_per_se = 1;
3174		rdev->config.si.max_texture_channel_caches = 2;
3175		rdev->config.si.max_gprs = 256;
3176		rdev->config.si.max_gs_threads = 16;
3177		rdev->config.si.max_hw_contexts = 8;
3178
3179		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3180		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3181		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3182		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3183		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3184		break;
3185	}
3186
3187	/* Initialize HDP */
3188	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3189		WREG32((0x2c14 + j), 0x00000000);
3190		WREG32((0x2c18 + j), 0x00000000);
3191		WREG32((0x2c1c + j), 0x00000000);
3192		WREG32((0x2c20 + j), 0x00000000);
3193		WREG32((0x2c24 + j), 0x00000000);
3194	}
3195
3196	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3197	WREG32(SRBM_INT_CNTL, 1);
3198	WREG32(SRBM_INT_ACK, 1);
3199
3200	evergreen_fix_pci_max_read_req_size(rdev);
3201
3202	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3203
3204	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3205	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3206
3207	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3208	rdev->config.si.mem_max_burst_length_bytes = 256;
3209	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3210	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3211	if (rdev->config.si.mem_row_size_in_kb > 4)
3212		rdev->config.si.mem_row_size_in_kb = 4;
3213	/* XXX use MC settings? */
3214	rdev->config.si.shader_engine_tile_size = 32;
3215	rdev->config.si.num_gpus = 1;
3216	rdev->config.si.multi_gpu_tile_size = 64;
3217
3218	/* fix up row size */
3219	gb_addr_config &= ~ROW_SIZE_MASK;
3220	switch (rdev->config.si.mem_row_size_in_kb) {
3221	case 1:
3222	default:
3223		gb_addr_config |= ROW_SIZE(0);
3224		break;
3225	case 2:
3226		gb_addr_config |= ROW_SIZE(1);
3227		break;
3228	case 4:
3229		gb_addr_config |= ROW_SIZE(2);
3230		break;
3231	}
3232
3233	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3234	 * not have bank info, so create a custom tiling dword.
3235	 * bits 3:0   num_pipes
3236	 * bits 7:4   num_banks
3237	 * bits 11:8  group_size
3238	 * bits 15:12 row_size
3239	 */
3240	rdev->config.si.tile_config = 0;
3241	switch (rdev->config.si.num_tile_pipes) {
3242	case 1:
3243		rdev->config.si.tile_config |= (0 << 0);
3244		break;
3245	case 2:
3246		rdev->config.si.tile_config |= (1 << 0);
3247		break;
3248	case 4:
3249		rdev->config.si.tile_config |= (2 << 0);
3250		break;
3251	case 8:
3252	default:
3253		/* XXX what about 12? */
3254		rdev->config.si.tile_config |= (3 << 0);
3255		break;
3256	}	
3257	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3258	case 0: /* four banks */
3259		rdev->config.si.tile_config |= 0 << 4;
3260		break;
3261	case 1: /* eight banks */
3262		rdev->config.si.tile_config |= 1 << 4;
3263		break;
3264	case 2: /* sixteen banks */
3265	default:
3266		rdev->config.si.tile_config |= 2 << 4;
3267		break;
3268	}
3269	rdev->config.si.tile_config |=
3270		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3271	rdev->config.si.tile_config |=
3272		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3273
3274	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3275	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3276	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3277	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3278	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3279	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3280	if (rdev->has_uvd) {
3281		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3282		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3283		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3284	}
3285
3286	si_tiling_mode_table_init(rdev);
3287
3288	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3289		    rdev->config.si.max_sh_per_se,
3290		    rdev->config.si.max_backends_per_se);
3291
3292	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3293		     rdev->config.si.max_sh_per_se,
3294		     rdev->config.si.max_cu_per_sh);
3295
3296	rdev->config.si.active_cus = 0;
3297	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3298		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3299			rdev->config.si.active_cus +=
3300				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3301		}
3302	}
3303
3304	/* set HW defaults for 3D engine */
3305	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3306				     ROQ_IB2_START(0x2b)));
3307	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3308
3309	sx_debug_1 = RREG32(SX_DEBUG_1);
3310	WREG32(SX_DEBUG_1, sx_debug_1);
3311
3312	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3313
3314	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3315				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3316				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3317				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3318
3319	WREG32(VGT_NUM_INSTANCES, 1);
3320
3321	WREG32(CP_PERFMON_CNTL, 0);
3322
3323	WREG32(SQ_CONFIG, 0);
3324
3325	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3326					  FORCE_EOV_MAX_REZ_CNT(255)));
3327
3328	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3329	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3330
3331	WREG32(VGT_GS_VERTEX_REUSE, 16);
3332	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3333
3334	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3335	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3336	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3337	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3338	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3339	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3340	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3341	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3342
3343	tmp = RREG32(HDP_MISC_CNTL);
3344	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3345	WREG32(HDP_MISC_CNTL, tmp);
3346
3347	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3348	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3349
3350	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3351
3352	udelay(50);
3353}
3354
3355/*
3356 * GPU scratch registers helpers function.
3357 */
3358static void si_scratch_init(struct radeon_device *rdev)
3359{
3360	int i;
3361
3362	rdev->scratch.num_reg = 7;
3363	rdev->scratch.reg_base = SCRATCH_REG0;
3364	for (i = 0; i < rdev->scratch.num_reg; i++) {
3365		rdev->scratch.free[i] = true;
3366		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3367	}
3368}
3369
3370void si_fence_ring_emit(struct radeon_device *rdev,
3371			struct radeon_fence *fence)
3372{
3373	struct radeon_ring *ring = &rdev->ring[fence->ring];
3374	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3375
3376	/* flush read cache over gart */
3377	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3378	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3379	radeon_ring_write(ring, 0);
3380	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3381	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3382			  PACKET3_TC_ACTION_ENA |
3383			  PACKET3_SH_KCACHE_ACTION_ENA |
3384			  PACKET3_SH_ICACHE_ACTION_ENA);
3385	radeon_ring_write(ring, 0xFFFFFFFF);
3386	radeon_ring_write(ring, 0);
3387	radeon_ring_write(ring, 10); /* poll interval */
3388	/* EVENT_WRITE_EOP - flush caches, send int */
3389	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3390	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3391	radeon_ring_write(ring, lower_32_bits(addr));
3392	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3393	radeon_ring_write(ring, fence->seq);
3394	radeon_ring_write(ring, 0);
3395}
3396
3397/*
3398 * IB stuff
3399 */
3400void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3401{
3402	struct radeon_ring *ring = &rdev->ring[ib->ring];
3403	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3404	u32 header;
3405
3406	if (ib->is_const_ib) {
3407		/* set switch buffer packet before const IB */
3408		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3409		radeon_ring_write(ring, 0);
3410
3411		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3412	} else {
3413		u32 next_rptr;
3414		if (ring->rptr_save_reg) {
3415			next_rptr = ring->wptr + 3 + 4 + 8;
3416			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3417			radeon_ring_write(ring, ((ring->rptr_save_reg -
3418						  PACKET3_SET_CONFIG_REG_START) >> 2));
3419			radeon_ring_write(ring, next_rptr);
3420		} else if (rdev->wb.enabled) {
3421			next_rptr = ring->wptr + 5 + 4 + 8;
3422			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3423			radeon_ring_write(ring, (1 << 8));
3424			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3425			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3426			radeon_ring_write(ring, next_rptr);
3427		}
3428
3429		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3430	}
3431
3432	radeon_ring_write(ring, header);
3433	radeon_ring_write(ring,
3434#ifdef __BIG_ENDIAN
3435			  (2 << 0) |
3436#endif
3437			  (ib->gpu_addr & 0xFFFFFFFC));
3438	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3439	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3440
3441	if (!ib->is_const_ib) {
3442		/* flush read cache over gart for this vmid */
3443		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3444		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3445		radeon_ring_write(ring, vm_id);
3446		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3447		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3448				  PACKET3_TC_ACTION_ENA |
3449				  PACKET3_SH_KCACHE_ACTION_ENA |
3450				  PACKET3_SH_ICACHE_ACTION_ENA);
3451		radeon_ring_write(ring, 0xFFFFFFFF);
3452		radeon_ring_write(ring, 0);
3453		radeon_ring_write(ring, 10); /* poll interval */
3454	}
3455}
3456
3457/*
3458 * CP.
3459 */
3460static void si_cp_enable(struct radeon_device *rdev, bool enable)
3461{
3462	if (enable)
3463		WREG32(CP_ME_CNTL, 0);
3464	else {
3465		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3466			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3467		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3468		WREG32(SCRATCH_UMSK, 0);
3469		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3470		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3471		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3472	}
3473	udelay(50);
3474}
3475
3476static int si_cp_load_microcode(struct radeon_device *rdev)
3477{
3478	int i;
3479
3480	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3481		return -EINVAL;
3482
3483	si_cp_enable(rdev, false);
3484
3485	if (rdev->new_fw) {
3486		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3487			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3488		const struct gfx_firmware_header_v1_0 *ce_hdr =
3489			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3490		const struct gfx_firmware_header_v1_0 *me_hdr =
3491			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3492		const __le32 *fw_data;
3493		u32 fw_size;
3494
3495		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3496		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3497		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3498
3499		/* PFP */
3500		fw_data = (const __le32 *)
3501			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3502		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3503		WREG32(CP_PFP_UCODE_ADDR, 0);
3504		for (i = 0; i < fw_size; i++)
3505			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3506		WREG32(CP_PFP_UCODE_ADDR, 0);
3507
3508		/* CE */
3509		fw_data = (const __le32 *)
3510			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3511		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3512		WREG32(CP_CE_UCODE_ADDR, 0);
3513		for (i = 0; i < fw_size; i++)
3514			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3515		WREG32(CP_CE_UCODE_ADDR, 0);
3516
3517		/* ME */
3518		fw_data = (const __be32 *)
3519			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3520		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3521		WREG32(CP_ME_RAM_WADDR, 0);
3522		for (i = 0; i < fw_size; i++)
3523			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3524		WREG32(CP_ME_RAM_WADDR, 0);
3525	} else {
3526		const __be32 *fw_data;
3527
3528		/* PFP */
3529		fw_data = (const __be32 *)rdev->pfp_fw->data;
3530		WREG32(CP_PFP_UCODE_ADDR, 0);
3531		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3532			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3533		WREG32(CP_PFP_UCODE_ADDR, 0);
3534
3535		/* CE */
3536		fw_data = (const __be32 *)rdev->ce_fw->data;
3537		WREG32(CP_CE_UCODE_ADDR, 0);
3538		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3539			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3540		WREG32(CP_CE_UCODE_ADDR, 0);
3541
3542		/* ME */
3543		fw_data = (const __be32 *)rdev->me_fw->data;
3544		WREG32(CP_ME_RAM_WADDR, 0);
3545		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3546			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3547		WREG32(CP_ME_RAM_WADDR, 0);
3548	}
3549
3550	WREG32(CP_PFP_UCODE_ADDR, 0);
3551	WREG32(CP_CE_UCODE_ADDR, 0);
3552	WREG32(CP_ME_RAM_WADDR, 0);
3553	WREG32(CP_ME_RAM_RADDR, 0);
3554	return 0;
3555}
3556
3557static int si_cp_start(struct radeon_device *rdev)
3558{
3559	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3560	int r, i;
3561
3562	r = radeon_ring_lock(rdev, ring, 7 + 4);
3563	if (r) {
3564		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3565		return r;
3566	}
3567	/* init the CP */
3568	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3569	radeon_ring_write(ring, 0x1);
3570	radeon_ring_write(ring, 0x0);
3571	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3572	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3573	radeon_ring_write(ring, 0);
3574	radeon_ring_write(ring, 0);
3575
3576	/* init the CE partitions */
3577	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3578	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3579	radeon_ring_write(ring, 0xc000);
3580	radeon_ring_write(ring, 0xe000);
3581	radeon_ring_unlock_commit(rdev, ring, false);
3582
3583	si_cp_enable(rdev, true);
3584
3585	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3586	if (r) {
3587		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3588		return r;
3589	}
3590
3591	/* setup clear context state */
3592	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3593	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3594
3595	for (i = 0; i < si_default_size; i++)
3596		radeon_ring_write(ring, si_default_state[i]);
3597
3598	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3599	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3600
3601	/* set clear context state */
3602	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3603	radeon_ring_write(ring, 0);
3604
3605	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3606	radeon_ring_write(ring, 0x00000316);
3607	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3608	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3609
3610	radeon_ring_unlock_commit(rdev, ring, false);
3611
3612	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3613		ring = &rdev->ring[i];
3614		r = radeon_ring_lock(rdev, ring, 2);
3615
3616		/* clear the compute context state */
3617		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3618		radeon_ring_write(ring, 0);
3619
3620		radeon_ring_unlock_commit(rdev, ring, false);
3621	}
3622
3623	return 0;
3624}
3625
3626static void si_cp_fini(struct radeon_device *rdev)
3627{
3628	struct radeon_ring *ring;
3629	si_cp_enable(rdev, false);
3630
3631	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3632	radeon_ring_fini(rdev, ring);
3633	radeon_scratch_free(rdev, ring->rptr_save_reg);
3634
3635	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3636	radeon_ring_fini(rdev, ring);
3637	radeon_scratch_free(rdev, ring->rptr_save_reg);
3638
3639	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3640	radeon_ring_fini(rdev, ring);
3641	radeon_scratch_free(rdev, ring->rptr_save_reg);
3642}
3643
3644static int si_cp_resume(struct radeon_device *rdev)
3645{
3646	struct radeon_ring *ring;
3647	u32 tmp;
3648	u32 rb_bufsz;
3649	int r;
3650
3651	si_enable_gui_idle_interrupt(rdev, false);
3652
3653	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3654	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3655
3656	/* Set the write pointer delay */
3657	WREG32(CP_RB_WPTR_DELAY, 0);
3658
3659	WREG32(CP_DEBUG, 0);
3660	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3661
3662	/* ring 0 - compute and gfx */
3663	/* Set ring buffer size */
3664	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3665	rb_bufsz = order_base_2(ring->ring_size / 8);
3666	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3667#ifdef __BIG_ENDIAN
3668	tmp |= BUF_SWAP_32BIT;
3669#endif
3670	WREG32(CP_RB0_CNTL, tmp);
3671
3672	/* Initialize the ring buffer's read and write pointers */
3673	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3674	ring->wptr = 0;
3675	WREG32(CP_RB0_WPTR, ring->wptr);
3676
3677	/* set the wb address whether it's enabled or not */
3678	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3679	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3680
3681	if (rdev->wb.enabled)
3682		WREG32(SCRATCH_UMSK, 0xff);
3683	else {
3684		tmp |= RB_NO_UPDATE;
3685		WREG32(SCRATCH_UMSK, 0);
3686	}
3687
3688	mdelay(1);
3689	WREG32(CP_RB0_CNTL, tmp);
3690
3691	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3692
3693	/* ring1  - compute only */
3694	/* Set ring buffer size */
3695	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3696	rb_bufsz = order_base_2(ring->ring_size / 8);
3697	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3698#ifdef __BIG_ENDIAN
3699	tmp |= BUF_SWAP_32BIT;
3700#endif
3701	WREG32(CP_RB1_CNTL, tmp);
3702
3703	/* Initialize the ring buffer's read and write pointers */
3704	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3705	ring->wptr = 0;
3706	WREG32(CP_RB1_WPTR, ring->wptr);
3707
3708	/* set the wb address whether it's enabled or not */
3709	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3710	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3711
3712	mdelay(1);
3713	WREG32(CP_RB1_CNTL, tmp);
3714
3715	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3716
3717	/* ring2 - compute only */
3718	/* Set ring buffer size */
3719	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3720	rb_bufsz = order_base_2(ring->ring_size / 8);
3721	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3722#ifdef __BIG_ENDIAN
3723	tmp |= BUF_SWAP_32BIT;
3724#endif
3725	WREG32(CP_RB2_CNTL, tmp);
3726
3727	/* Initialize the ring buffer's read and write pointers */
3728	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3729	ring->wptr = 0;
3730	WREG32(CP_RB2_WPTR, ring->wptr);
3731
3732	/* set the wb address whether it's enabled or not */
3733	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3734	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3735
3736	mdelay(1);
3737	WREG32(CP_RB2_CNTL, tmp);
3738
3739	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3740
3741	/* start the rings */
3742	si_cp_start(rdev);
3743	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3744	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3745	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3746	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3747	if (r) {
3748		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3749		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3750		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3751		return r;
3752	}
3753	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3754	if (r) {
3755		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3756	}
3757	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3758	if (r) {
3759		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3760	}
3761
3762	si_enable_gui_idle_interrupt(rdev, true);
3763
3764	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3765		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3766
3767	return 0;
3768}
3769
3770u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3771{
3772	u32 reset_mask = 0;
3773	u32 tmp;
3774
3775	/* GRBM_STATUS */
3776	tmp = RREG32(GRBM_STATUS);
3777	if (tmp & (PA_BUSY | SC_BUSY |
3778		   BCI_BUSY | SX_BUSY |
3779		   TA_BUSY | VGT_BUSY |
3780		   DB_BUSY | CB_BUSY |
3781		   GDS_BUSY | SPI_BUSY |
3782		   IA_BUSY | IA_BUSY_NO_DMA))
3783		reset_mask |= RADEON_RESET_GFX;
3784
3785	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3786		   CP_BUSY | CP_COHERENCY_BUSY))
3787		reset_mask |= RADEON_RESET_CP;
3788
3789	if (tmp & GRBM_EE_BUSY)
3790		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3791
3792	/* GRBM_STATUS2 */
3793	tmp = RREG32(GRBM_STATUS2);
3794	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3795		reset_mask |= RADEON_RESET_RLC;
3796
3797	/* DMA_STATUS_REG 0 */
3798	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3799	if (!(tmp & DMA_IDLE))
3800		reset_mask |= RADEON_RESET_DMA;
3801
3802	/* DMA_STATUS_REG 1 */
3803	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3804	if (!(tmp & DMA_IDLE))
3805		reset_mask |= RADEON_RESET_DMA1;
3806
3807	/* SRBM_STATUS2 */
3808	tmp = RREG32(SRBM_STATUS2);
3809	if (tmp & DMA_BUSY)
3810		reset_mask |= RADEON_RESET_DMA;
3811
3812	if (tmp & DMA1_BUSY)
3813		reset_mask |= RADEON_RESET_DMA1;
3814
3815	/* SRBM_STATUS */
3816	tmp = RREG32(SRBM_STATUS);
3817
3818	if (tmp & IH_BUSY)
3819		reset_mask |= RADEON_RESET_IH;
3820
3821	if (tmp & SEM_BUSY)
3822		reset_mask |= RADEON_RESET_SEM;
3823
3824	if (tmp & GRBM_RQ_PENDING)
3825		reset_mask |= RADEON_RESET_GRBM;
3826
3827	if (tmp & VMC_BUSY)
3828		reset_mask |= RADEON_RESET_VMC;
3829
3830	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3831		   MCC_BUSY | MCD_BUSY))
3832		reset_mask |= RADEON_RESET_MC;
3833
3834	if (evergreen_is_display_hung(rdev))
3835		reset_mask |= RADEON_RESET_DISPLAY;
3836
3837	/* VM_L2_STATUS */
3838	tmp = RREG32(VM_L2_STATUS);
3839	if (tmp & L2_BUSY)
3840		reset_mask |= RADEON_RESET_VMC;
3841
3842	/* Skip MC reset as it's mostly likely not hung, just busy */
3843	if (reset_mask & RADEON_RESET_MC) {
3844		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3845		reset_mask &= ~RADEON_RESET_MC;
3846	}
3847
3848	return reset_mask;
3849}
3850
3851static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3852{
3853	struct evergreen_mc_save save;
3854	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3855	u32 tmp;
3856
3857	if (reset_mask == 0)
3858		return;
3859
3860	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3861
3862	evergreen_print_gpu_status_regs(rdev);
3863	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3864		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3865	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3866		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3867
3868	/* disable PG/CG */
3869	si_fini_pg(rdev);
3870	si_fini_cg(rdev);
3871
3872	/* stop the rlc */
3873	si_rlc_stop(rdev);
3874
3875	/* Disable CP parsing/prefetching */
3876	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3877
3878	if (reset_mask & RADEON_RESET_DMA) {
3879		/* dma0 */
3880		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3881		tmp &= ~DMA_RB_ENABLE;
3882		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3883	}
3884	if (reset_mask & RADEON_RESET_DMA1) {
3885		/* dma1 */
3886		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3887		tmp &= ~DMA_RB_ENABLE;
3888		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3889	}
3890
3891	udelay(50);
3892
3893	evergreen_mc_stop(rdev, &save);
3894	if (evergreen_mc_wait_for_idle(rdev)) {
3895		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3896	}
3897
3898	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3899		grbm_soft_reset = SOFT_RESET_CB |
3900			SOFT_RESET_DB |
3901			SOFT_RESET_GDS |
3902			SOFT_RESET_PA |
3903			SOFT_RESET_SC |
3904			SOFT_RESET_BCI |
3905			SOFT_RESET_SPI |
3906			SOFT_RESET_SX |
3907			SOFT_RESET_TC |
3908			SOFT_RESET_TA |
3909			SOFT_RESET_VGT |
3910			SOFT_RESET_IA;
3911	}
3912
3913	if (reset_mask & RADEON_RESET_CP) {
3914		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3915
3916		srbm_soft_reset |= SOFT_RESET_GRBM;
3917	}
3918
3919	if (reset_mask & RADEON_RESET_DMA)
3920		srbm_soft_reset |= SOFT_RESET_DMA;
3921
3922	if (reset_mask & RADEON_RESET_DMA1)
3923		srbm_soft_reset |= SOFT_RESET_DMA1;
3924
3925	if (reset_mask & RADEON_RESET_DISPLAY)
3926		srbm_soft_reset |= SOFT_RESET_DC;
3927
3928	if (reset_mask & RADEON_RESET_RLC)
3929		grbm_soft_reset |= SOFT_RESET_RLC;
3930
3931	if (reset_mask & RADEON_RESET_SEM)
3932		srbm_soft_reset |= SOFT_RESET_SEM;
3933
3934	if (reset_mask & RADEON_RESET_IH)
3935		srbm_soft_reset |= SOFT_RESET_IH;
3936
3937	if (reset_mask & RADEON_RESET_GRBM)
3938		srbm_soft_reset |= SOFT_RESET_GRBM;
3939
3940	if (reset_mask & RADEON_RESET_VMC)
3941		srbm_soft_reset |= SOFT_RESET_VMC;
3942
3943	if (reset_mask & RADEON_RESET_MC)
3944		srbm_soft_reset |= SOFT_RESET_MC;
3945
3946	if (grbm_soft_reset) {
3947		tmp = RREG32(GRBM_SOFT_RESET);
3948		tmp |= grbm_soft_reset;
3949		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3950		WREG32(GRBM_SOFT_RESET, tmp);
3951		tmp = RREG32(GRBM_SOFT_RESET);
3952
3953		udelay(50);
3954
3955		tmp &= ~grbm_soft_reset;
3956		WREG32(GRBM_SOFT_RESET, tmp);
3957		tmp = RREG32(GRBM_SOFT_RESET);
3958	}
3959
3960	if (srbm_soft_reset) {
3961		tmp = RREG32(SRBM_SOFT_RESET);
3962		tmp |= srbm_soft_reset;
3963		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3964		WREG32(SRBM_SOFT_RESET, tmp);
3965		tmp = RREG32(SRBM_SOFT_RESET);
3966
3967		udelay(50);
3968
3969		tmp &= ~srbm_soft_reset;
3970		WREG32(SRBM_SOFT_RESET, tmp);
3971		tmp = RREG32(SRBM_SOFT_RESET);
3972	}
3973
3974	/* Wait a little for things to settle down */
3975	udelay(50);
3976
3977	evergreen_mc_resume(rdev, &save);
3978	udelay(50);
3979
3980	evergreen_print_gpu_status_regs(rdev);
3981}
3982
3983static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3984{
3985	u32 tmp, i;
3986
3987	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3988	tmp |= SPLL_BYPASS_EN;
3989	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3990
3991	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3992	tmp |= SPLL_CTLREQ_CHG;
3993	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3994
3995	for (i = 0; i < rdev->usec_timeout; i++) {
3996		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3997			break;
3998		udelay(1);
3999	}
4000
4001	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4002	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4003	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4004
4005	tmp = RREG32(MPLL_CNTL_MODE);
4006	tmp &= ~MPLL_MCLK_SEL;
4007	WREG32(MPLL_CNTL_MODE, tmp);
4008}
4009
4010static void si_spll_powerdown(struct radeon_device *rdev)
4011{
4012	u32 tmp;
4013
4014	tmp = RREG32(SPLL_CNTL_MODE);
4015	tmp |= SPLL_SW_DIR_CONTROL;
4016	WREG32(SPLL_CNTL_MODE, tmp);
4017
4018	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4019	tmp |= SPLL_RESET;
4020	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4021
4022	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4023	tmp |= SPLL_SLEEP;
4024	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4025
4026	tmp = RREG32(SPLL_CNTL_MODE);
4027	tmp &= ~SPLL_SW_DIR_CONTROL;
4028	WREG32(SPLL_CNTL_MODE, tmp);
4029}
4030
4031static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4032{
4033	struct evergreen_mc_save save;
4034	u32 tmp, i;
4035
4036	dev_info(rdev->dev, "GPU pci config reset\n");
4037
4038	/* disable dpm? */
4039
4040	/* disable cg/pg */
4041	si_fini_pg(rdev);
4042	si_fini_cg(rdev);
4043
4044	/* Disable CP parsing/prefetching */
4045	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4046	/* dma0 */
4047	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4048	tmp &= ~DMA_RB_ENABLE;
4049	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4050	/* dma1 */
4051	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4052	tmp &= ~DMA_RB_ENABLE;
4053	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4054	/* XXX other engines? */
4055
4056	/* halt the rlc, disable cp internal ints */
4057	si_rlc_stop(rdev);
4058
4059	udelay(50);
4060
4061	/* disable mem access */
4062	evergreen_mc_stop(rdev, &save);
4063	if (evergreen_mc_wait_for_idle(rdev)) {
4064		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4065	}
4066
4067	/* set mclk/sclk to bypass */
4068	si_set_clk_bypass_mode(rdev);
4069	/* powerdown spll */
4070	si_spll_powerdown(rdev);
4071	/* disable BM */
4072	pci_clear_master(rdev->pdev);
4073	/* reset */
4074	radeon_pci_config_reset(rdev);
4075	/* wait for asic to come out of reset */
4076	for (i = 0; i < rdev->usec_timeout; i++) {
4077		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4078			break;
4079		udelay(1);
4080	}
4081}
4082
4083int si_asic_reset(struct radeon_device *rdev, bool hard)
4084{
4085	u32 reset_mask;
4086
4087	if (hard) {
4088		si_gpu_pci_config_reset(rdev);
4089		return 0;
4090	}
4091
4092	reset_mask = si_gpu_check_soft_reset(rdev);
4093
4094	if (reset_mask)
4095		r600_set_bios_scratch_engine_hung(rdev, true);
4096
4097	/* try soft reset */
4098	si_gpu_soft_reset(rdev, reset_mask);
4099
4100	reset_mask = si_gpu_check_soft_reset(rdev);
4101
4102	/* try pci config reset */
4103	if (reset_mask && radeon_hard_reset)
4104		si_gpu_pci_config_reset(rdev);
4105
4106	reset_mask = si_gpu_check_soft_reset(rdev);
4107
4108	if (!reset_mask)
4109		r600_set_bios_scratch_engine_hung(rdev, false);
4110
4111	return 0;
4112}
4113
4114/**
4115 * si_gfx_is_lockup - Check if the GFX engine is locked up
4116 *
4117 * @rdev: radeon_device pointer
4118 * @ring: radeon_ring structure holding ring information
4119 *
4120 * Check if the GFX engine is locked up.
4121 * Returns true if the engine appears to be locked up, false if not.
4122 */
4123bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4124{
4125	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4126
4127	if (!(reset_mask & (RADEON_RESET_GFX |
4128			    RADEON_RESET_COMPUTE |
4129			    RADEON_RESET_CP))) {
4130		radeon_ring_lockup_update(rdev, ring);
4131		return false;
4132	}
4133	return radeon_ring_test_lockup(rdev, ring);
4134}
4135
4136/* MC */
4137static void si_mc_program(struct radeon_device *rdev)
4138{
4139	struct evergreen_mc_save save;
4140	u32 tmp;
4141	int i, j;
4142
4143	/* Initialize HDP */
4144	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4145		WREG32((0x2c14 + j), 0x00000000);
4146		WREG32((0x2c18 + j), 0x00000000);
4147		WREG32((0x2c1c + j), 0x00000000);
4148		WREG32((0x2c20 + j), 0x00000000);
4149		WREG32((0x2c24 + j), 0x00000000);
4150	}
4151	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4152
4153	evergreen_mc_stop(rdev, &save);
4154	if (radeon_mc_wait_for_idle(rdev)) {
4155		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4156	}
4157	if (!ASIC_IS_NODCE(rdev))
4158		/* Lockout access through VGA aperture*/
4159		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4160	/* Update configuration */
4161	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4162	       rdev->mc.vram_start >> 12);
4163	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4164	       rdev->mc.vram_end >> 12);
4165	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4166	       rdev->vram_scratch.gpu_addr >> 12);
4167	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4168	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4169	WREG32(MC_VM_FB_LOCATION, tmp);
4170	/* XXX double check these! */
4171	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4172	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4173	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4174	WREG32(MC_VM_AGP_BASE, 0);
4175	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4176	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4177	if (radeon_mc_wait_for_idle(rdev)) {
4178		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4179	}
4180	evergreen_mc_resume(rdev, &save);
4181	if (!ASIC_IS_NODCE(rdev)) {
4182		/* we need to own VRAM, so turn off the VGA renderer here
4183		 * to stop it overwriting our objects */
4184		rv515_vga_render_disable(rdev);
4185	}
4186}
4187
4188void si_vram_gtt_location(struct radeon_device *rdev,
4189			  struct radeon_mc *mc)
4190{
4191	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4192		/* leave room for at least 1024M GTT */
4193		dev_warn(rdev->dev, "limiting VRAM\n");
4194		mc->real_vram_size = 0xFFC0000000ULL;
4195		mc->mc_vram_size = 0xFFC0000000ULL;
4196	}
4197	radeon_vram_location(rdev, &rdev->mc, 0);
4198	rdev->mc.gtt_base_align = 0;
4199	radeon_gtt_location(rdev, mc);
4200}
4201
4202static int si_mc_init(struct radeon_device *rdev)
4203{
4204	u32 tmp;
4205	int chansize, numchan;
4206
4207	/* Get VRAM informations */
4208	rdev->mc.vram_is_ddr = true;
4209	tmp = RREG32(MC_ARB_RAMCFG);
4210	if (tmp & CHANSIZE_OVERRIDE) {
4211		chansize = 16;
4212	} else if (tmp & CHANSIZE_MASK) {
4213		chansize = 64;
4214	} else {
4215		chansize = 32;
4216	}
4217	tmp = RREG32(MC_SHARED_CHMAP);
4218	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4219	case 0:
4220	default:
4221		numchan = 1;
4222		break;
4223	case 1:
4224		numchan = 2;
4225		break;
4226	case 2:
4227		numchan = 4;
4228		break;
4229	case 3:
4230		numchan = 8;
4231		break;
4232	case 4:
4233		numchan = 3;
4234		break;
4235	case 5:
4236		numchan = 6;
4237		break;
4238	case 6:
4239		numchan = 10;
4240		break;
4241	case 7:
4242		numchan = 12;
4243		break;
4244	case 8:
4245		numchan = 16;
4246		break;
4247	}
4248	rdev->mc.vram_width = numchan * chansize;
4249	/* Could aper size report 0 ? */
4250	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4251	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4252	/* size in MB on si */
4253	tmp = RREG32(CONFIG_MEMSIZE);
4254	/* some boards may have garbage in the upper 16 bits */
4255	if (tmp & 0xffff0000) {
4256		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4257		if (tmp & 0xffff)
4258			tmp &= 0xffff;
4259	}
4260	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4261	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4262	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4263	si_vram_gtt_location(rdev, &rdev->mc);
4264	radeon_update_bandwidth_info(rdev);
4265
4266	return 0;
4267}
4268
4269/*
4270 * GART
4271 */
4272void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4273{
4274	/* flush hdp cache */
4275	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4276
4277	/* bits 0-15 are the VM contexts0-15 */
4278	WREG32(VM_INVALIDATE_REQUEST, 1);
4279}
4280
4281static int si_pcie_gart_enable(struct radeon_device *rdev)
4282{
4283	int r, i;
4284
4285	if (rdev->gart.robj == NULL) {
4286		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4287		return -EINVAL;
4288	}
4289	r = radeon_gart_table_vram_pin(rdev);
4290	if (r)
4291		return r;
4292	/* Setup TLB control */
4293	WREG32(MC_VM_MX_L1_TLB_CNTL,
4294	       (0xA << 7) |
4295	       ENABLE_L1_TLB |
4296	       ENABLE_L1_FRAGMENT_PROCESSING |
4297	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4298	       ENABLE_ADVANCED_DRIVER_MODEL |
4299	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4300	/* Setup L2 cache */
4301	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4302	       ENABLE_L2_FRAGMENT_PROCESSING |
4303	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4304	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4305	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4306	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4307	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4308	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4309	       BANK_SELECT(4) |
4310	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4311	/* setup context0 */
4312	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4313	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4314	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4315	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4316			(u32)(rdev->dummy_page.addr >> 12));
4317	WREG32(VM_CONTEXT0_CNTL2, 0);
4318	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4319				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4320
4321	WREG32(0x15D4, 0);
4322	WREG32(0x15D8, 0);
4323	WREG32(0x15DC, 0);
4324
4325	/* empty context1-15 */
4326	/* set vm size, must be a multiple of 4 */
4327	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4328	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4329	/* Assign the pt base to something valid for now; the pts used for
4330	 * the VMs are determined by the application and setup and assigned
4331	 * on the fly in the vm part of radeon_gart.c
4332	 */
4333	for (i = 1; i < 16; i++) {
4334		if (i < 8)
4335			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4336			       rdev->vm_manager.saved_table_addr[i]);
4337		else
4338			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4339			       rdev->vm_manager.saved_table_addr[i]);
4340	}
4341
4342	/* enable context1-15 */
4343	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4344	       (u32)(rdev->dummy_page.addr >> 12));
4345	WREG32(VM_CONTEXT1_CNTL2, 4);
4346	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4347				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4348				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4349				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4350				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4351				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4352				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4354				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4356				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4358				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4360
4361	si_pcie_gart_tlb_flush(rdev);
4362	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4363		 (unsigned)(rdev->mc.gtt_size >> 20),
4364		 (unsigned long long)rdev->gart.table_addr);
4365	rdev->gart.ready = true;
4366	return 0;
4367}
4368
4369static void si_pcie_gart_disable(struct radeon_device *rdev)
4370{
4371	unsigned i;
4372
4373	for (i = 1; i < 16; ++i) {
4374		uint32_t reg;
4375		if (i < 8)
4376			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4377		else
4378			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4379		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4380	}
4381
4382	/* Disable all tables */
4383	WREG32(VM_CONTEXT0_CNTL, 0);
4384	WREG32(VM_CONTEXT1_CNTL, 0);
4385	/* Setup TLB control */
4386	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4387	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4388	/* Setup L2 cache */
4389	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4390	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4391	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4392	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4393	WREG32(VM_L2_CNTL2, 0);
4394	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4395	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4396	radeon_gart_table_vram_unpin(rdev);
4397}
4398
4399static void si_pcie_gart_fini(struct radeon_device *rdev)
4400{
4401	si_pcie_gart_disable(rdev);
4402	radeon_gart_table_vram_free(rdev);
4403	radeon_gart_fini(rdev);
4404}
4405
4406/* vm parser */
4407static bool si_vm_reg_valid(u32 reg)
4408{
4409	/* context regs are fine */
4410	if (reg >= 0x28000)
4411		return true;
4412
4413	/* shader regs are also fine */
4414	if (reg >= 0xB000 && reg < 0xC000)
4415		return true;
4416
4417	/* check config regs */
4418	switch (reg) {
4419	case GRBM_GFX_INDEX:
4420	case CP_STRMOUT_CNTL:
4421	case VGT_VTX_VECT_EJECT_REG:
4422	case VGT_CACHE_INVALIDATION:
4423	case VGT_ESGS_RING_SIZE:
4424	case VGT_GSVS_RING_SIZE:
4425	case VGT_GS_VERTEX_REUSE:
4426	case VGT_PRIMITIVE_TYPE:
4427	case VGT_INDEX_TYPE:
4428	case VGT_NUM_INDICES:
4429	case VGT_NUM_INSTANCES:
4430	case VGT_TF_RING_SIZE:
4431	case VGT_HS_OFFCHIP_PARAM:
4432	case VGT_TF_MEMORY_BASE:
4433	case PA_CL_ENHANCE:
4434	case PA_SU_LINE_STIPPLE_VALUE:
4435	case PA_SC_LINE_STIPPLE_STATE:
4436	case PA_SC_ENHANCE:
4437	case SQC_CACHES:
4438	case SPI_STATIC_THREAD_MGMT_1:
4439	case SPI_STATIC_THREAD_MGMT_2:
4440	case SPI_STATIC_THREAD_MGMT_3:
4441	case SPI_PS_MAX_WAVE_ID:
4442	case SPI_CONFIG_CNTL:
4443	case SPI_CONFIG_CNTL_1:
4444	case TA_CNTL_AUX:
4445	case TA_CS_BC_BASE_ADDR:
4446		return true;
4447	default:
4448		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4449		return false;
4450	}
4451}
4452
4453static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4454				  u32 *ib, struct radeon_cs_packet *pkt)
4455{
4456	switch (pkt->opcode) {
4457	case PACKET3_NOP:
4458	case PACKET3_SET_BASE:
4459	case PACKET3_SET_CE_DE_COUNTERS:
4460	case PACKET3_LOAD_CONST_RAM:
4461	case PACKET3_WRITE_CONST_RAM:
4462	case PACKET3_WRITE_CONST_RAM_OFFSET:
4463	case PACKET3_DUMP_CONST_RAM:
4464	case PACKET3_INCREMENT_CE_COUNTER:
4465	case PACKET3_WAIT_ON_DE_COUNTER:
4466	case PACKET3_CE_WRITE:
4467		break;
4468	default:
4469		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4470		return -EINVAL;
4471	}
4472	return 0;
4473}
4474
4475static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4476{
4477	u32 start_reg, reg, i;
4478	u32 command = ib[idx + 4];
4479	u32 info = ib[idx + 1];
4480	u32 idx_value = ib[idx];
4481	if (command & PACKET3_CP_DMA_CMD_SAS) {
4482		/* src address space is register */
4483		if (((info & 0x60000000) >> 29) == 0) {
4484			start_reg = idx_value << 2;
4485			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4486				reg = start_reg;
4487				if (!si_vm_reg_valid(reg)) {
4488					DRM_ERROR("CP DMA Bad SRC register\n");
4489					return -EINVAL;
4490				}
4491			} else {
4492				for (i = 0; i < (command & 0x1fffff); i++) {
4493					reg = start_reg + (4 * i);
4494					if (!si_vm_reg_valid(reg)) {
4495						DRM_ERROR("CP DMA Bad SRC register\n");
4496						return -EINVAL;
4497					}
4498				}
4499			}
4500		}
4501	}
4502	if (command & PACKET3_CP_DMA_CMD_DAS) {
4503		/* dst address space is register */
4504		if (((info & 0x00300000) >> 20) == 0) {
4505			start_reg = ib[idx + 2];
4506			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4507				reg = start_reg;
4508				if (!si_vm_reg_valid(reg)) {
4509					DRM_ERROR("CP DMA Bad DST register\n");
4510					return -EINVAL;
4511				}
4512			} else {
4513				for (i = 0; i < (command & 0x1fffff); i++) {
4514					reg = start_reg + (4 * i);
4515				if (!si_vm_reg_valid(reg)) {
4516						DRM_ERROR("CP DMA Bad DST register\n");
4517						return -EINVAL;
4518					}
4519				}
4520			}
4521		}
4522	}
4523	return 0;
4524}
4525
4526static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4527				   u32 *ib, struct radeon_cs_packet *pkt)
4528{
4529	int r;
4530	u32 idx = pkt->idx + 1;
4531	u32 idx_value = ib[idx];
4532	u32 start_reg, end_reg, reg, i;
4533
4534	switch (pkt->opcode) {
4535	case PACKET3_NOP:
4536	case PACKET3_SET_BASE:
4537	case PACKET3_CLEAR_STATE:
4538	case PACKET3_INDEX_BUFFER_SIZE:
4539	case PACKET3_DISPATCH_DIRECT:
4540	case PACKET3_DISPATCH_INDIRECT:
4541	case PACKET3_ALLOC_GDS:
4542	case PACKET3_WRITE_GDS_RAM:
4543	case PACKET3_ATOMIC_GDS:
4544	case PACKET3_ATOMIC:
4545	case PACKET3_OCCLUSION_QUERY:
4546	case PACKET3_SET_PREDICATION:
4547	case PACKET3_COND_EXEC:
4548	case PACKET3_PRED_EXEC:
4549	case PACKET3_DRAW_INDIRECT:
4550	case PACKET3_DRAW_INDEX_INDIRECT:
4551	case PACKET3_INDEX_BASE:
4552	case PACKET3_DRAW_INDEX_2:
4553	case PACKET3_CONTEXT_CONTROL:
4554	case PACKET3_INDEX_TYPE:
4555	case PACKET3_DRAW_INDIRECT_MULTI:
4556	case PACKET3_DRAW_INDEX_AUTO:
4557	case PACKET3_DRAW_INDEX_IMMD:
4558	case PACKET3_NUM_INSTANCES:
4559	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4560	case PACKET3_STRMOUT_BUFFER_UPDATE:
4561	case PACKET3_DRAW_INDEX_OFFSET_2:
4562	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4563	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4564	case PACKET3_MPEG_INDEX:
4565	case PACKET3_WAIT_REG_MEM:
4566	case PACKET3_MEM_WRITE:
4567	case PACKET3_PFP_SYNC_ME:
4568	case PACKET3_SURFACE_SYNC:
4569	case PACKET3_EVENT_WRITE:
4570	case PACKET3_EVENT_WRITE_EOP:
4571	case PACKET3_EVENT_WRITE_EOS:
4572	case PACKET3_SET_CONTEXT_REG:
4573	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4574	case PACKET3_SET_SH_REG:
4575	case PACKET3_SET_SH_REG_OFFSET:
4576	case PACKET3_INCREMENT_DE_COUNTER:
4577	case PACKET3_WAIT_ON_CE_COUNTER:
4578	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4579	case PACKET3_ME_WRITE:
4580		break;
4581	case PACKET3_COPY_DATA:
4582		if ((idx_value & 0xf00) == 0) {
4583			reg = ib[idx + 3] * 4;
4584			if (!si_vm_reg_valid(reg))
4585				return -EINVAL;
4586		}
4587		break;
4588	case PACKET3_WRITE_DATA:
4589		if ((idx_value & 0xf00) == 0) {
4590			start_reg = ib[idx + 1] * 4;
4591			if (idx_value & 0x10000) {
4592				if (!si_vm_reg_valid(start_reg))
4593					return -EINVAL;
4594			} else {
4595				for (i = 0; i < (pkt->count - 2); i++) {
4596					reg = start_reg + (4 * i);
4597					if (!si_vm_reg_valid(reg))
4598						return -EINVAL;
4599				}
4600			}
4601		}
4602		break;
4603	case PACKET3_COND_WRITE:
4604		if (idx_value & 0x100) {
4605			reg = ib[idx + 5] * 4;
4606			if (!si_vm_reg_valid(reg))
4607				return -EINVAL;
4608		}
4609		break;
4610	case PACKET3_COPY_DW:
4611		if (idx_value & 0x2) {
4612			reg = ib[idx + 3] * 4;
4613			if (!si_vm_reg_valid(reg))
4614				return -EINVAL;
4615		}
4616		break;
4617	case PACKET3_SET_CONFIG_REG:
4618		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4619		end_reg = 4 * pkt->count + start_reg - 4;
4620		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4621		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4622		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4623			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4624			return -EINVAL;
4625		}
4626		for (i = 0; i < pkt->count; i++) {
4627			reg = start_reg + (4 * i);
4628			if (!si_vm_reg_valid(reg))
4629				return -EINVAL;
4630		}
4631		break;
4632	case PACKET3_CP_DMA:
4633		r = si_vm_packet3_cp_dma_check(ib, idx);
4634		if (r)
4635			return r;
4636		break;
4637	default:
4638		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4639		return -EINVAL;
4640	}
4641	return 0;
4642}
4643
4644static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4645				       u32 *ib, struct radeon_cs_packet *pkt)
4646{
4647	int r;
4648	u32 idx = pkt->idx + 1;
4649	u32 idx_value = ib[idx];
4650	u32 start_reg, reg, i;
4651
4652	switch (pkt->opcode) {
4653	case PACKET3_NOP:
4654	case PACKET3_SET_BASE:
4655	case PACKET3_CLEAR_STATE:
4656	case PACKET3_DISPATCH_DIRECT:
4657	case PACKET3_DISPATCH_INDIRECT:
4658	case PACKET3_ALLOC_GDS:
4659	case PACKET3_WRITE_GDS_RAM:
4660	case PACKET3_ATOMIC_GDS:
4661	case PACKET3_ATOMIC:
4662	case PACKET3_OCCLUSION_QUERY:
4663	case PACKET3_SET_PREDICATION:
4664	case PACKET3_COND_EXEC:
4665	case PACKET3_PRED_EXEC:
4666	case PACKET3_CONTEXT_CONTROL:
4667	case PACKET3_STRMOUT_BUFFER_UPDATE:
4668	case PACKET3_WAIT_REG_MEM:
4669	case PACKET3_MEM_WRITE:
4670	case PACKET3_PFP_SYNC_ME:
4671	case PACKET3_SURFACE_SYNC:
4672	case PACKET3_EVENT_WRITE:
4673	case PACKET3_EVENT_WRITE_EOP:
4674	case PACKET3_EVENT_WRITE_EOS:
4675	case PACKET3_SET_CONTEXT_REG:
4676	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4677	case PACKET3_SET_SH_REG:
4678	case PACKET3_SET_SH_REG_OFFSET:
4679	case PACKET3_INCREMENT_DE_COUNTER:
4680	case PACKET3_WAIT_ON_CE_COUNTER:
4681	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4682	case PACKET3_ME_WRITE:
4683		break;
4684	case PACKET3_COPY_DATA:
4685		if ((idx_value & 0xf00) == 0) {
4686			reg = ib[idx + 3] * 4;
4687			if (!si_vm_reg_valid(reg))
4688				return -EINVAL;
4689		}
4690		break;
4691	case PACKET3_WRITE_DATA:
4692		if ((idx_value & 0xf00) == 0) {
4693			start_reg = ib[idx + 1] * 4;
4694			if (idx_value & 0x10000) {
4695				if (!si_vm_reg_valid(start_reg))
4696					return -EINVAL;
4697			} else {
4698				for (i = 0; i < (pkt->count - 2); i++) {
4699					reg = start_reg + (4 * i);
4700					if (!si_vm_reg_valid(reg))
4701						return -EINVAL;
4702				}
4703			}
4704		}
4705		break;
4706	case PACKET3_COND_WRITE:
4707		if (idx_value & 0x100) {
4708			reg = ib[idx + 5] * 4;
4709			if (!si_vm_reg_valid(reg))
4710				return -EINVAL;
4711		}
4712		break;
4713	case PACKET3_COPY_DW:
4714		if (idx_value & 0x2) {
4715			reg = ib[idx + 3] * 4;
4716			if (!si_vm_reg_valid(reg))
4717				return -EINVAL;
4718		}
4719		break;
4720	case PACKET3_CP_DMA:
4721		r = si_vm_packet3_cp_dma_check(ib, idx);
4722		if (r)
4723			return r;
4724		break;
4725	default:
4726		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4727		return -EINVAL;
4728	}
4729	return 0;
4730}
4731
4732int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4733{
4734	int ret = 0;
4735	u32 idx = 0, i;
4736	struct radeon_cs_packet pkt;
4737
4738	do {
4739		pkt.idx = idx;
4740		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4741		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4742		pkt.one_reg_wr = 0;
4743		switch (pkt.type) {
4744		case RADEON_PACKET_TYPE0:
4745			dev_err(rdev->dev, "Packet0 not allowed!\n");
4746			ret = -EINVAL;
4747			break;
4748		case RADEON_PACKET_TYPE2:
4749			idx += 1;
4750			break;
4751		case RADEON_PACKET_TYPE3:
4752			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4753			if (ib->is_const_ib)
4754				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4755			else {
4756				switch (ib->ring) {
4757				case RADEON_RING_TYPE_GFX_INDEX:
4758					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4759					break;
4760				case CAYMAN_RING_TYPE_CP1_INDEX:
4761				case CAYMAN_RING_TYPE_CP2_INDEX:
4762					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4763					break;
4764				default:
4765					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4766					ret = -EINVAL;
4767					break;
4768				}
4769			}
4770			idx += pkt.count + 2;
4771			break;
4772		default:
4773			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4774			ret = -EINVAL;
4775			break;
4776		}
4777		if (ret) {
4778			for (i = 0; i < ib->length_dw; i++) {
4779				if (i == idx)
4780					printk("\t0x%08x <---\n", ib->ptr[i]);
4781				else
4782					printk("\t0x%08x\n", ib->ptr[i]);
4783			}
4784			break;
4785		}
4786	} while (idx < ib->length_dw);
4787
4788	return ret;
4789}
4790
4791/*
4792 * vm
4793 */
4794int si_vm_init(struct radeon_device *rdev)
4795{
4796	/* number of VMs */
4797	rdev->vm_manager.nvm = 16;
4798	/* base offset of vram pages */
4799	rdev->vm_manager.vram_base_offset = 0;
4800
4801	return 0;
4802}
4803
4804void si_vm_fini(struct radeon_device *rdev)
4805{
4806}
4807
4808/**
4809 * si_vm_decode_fault - print human readable fault info
4810 *
4811 * @rdev: radeon_device pointer
4812 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4813 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4814 *
4815 * Print human readable fault information (SI).
4816 */
4817static void si_vm_decode_fault(struct radeon_device *rdev,
4818			       u32 status, u32 addr)
4819{
4820	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4821	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4822	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4823	char *block;
4824
4825	if (rdev->family == CHIP_TAHITI) {
4826		switch (mc_id) {
4827		case 160:
4828		case 144:
4829		case 96:
4830		case 80:
4831		case 224:
4832		case 208:
4833		case 32:
4834		case 16:
4835			block = "CB";
4836			break;
4837		case 161:
4838		case 145:
4839		case 97:
4840		case 81:
4841		case 225:
4842		case 209:
4843		case 33:
4844		case 17:
4845			block = "CB_FMASK";
4846			break;
4847		case 162:
4848		case 146:
4849		case 98:
4850		case 82:
4851		case 226:
4852		case 210:
4853		case 34:
4854		case 18:
4855			block = "CB_CMASK";
4856			break;
4857		case 163:
4858		case 147:
4859		case 99:
4860		case 83:
4861		case 227:
4862		case 211:
4863		case 35:
4864		case 19:
4865			block = "CB_IMMED";
4866			break;
4867		case 164:
4868		case 148:
4869		case 100:
4870		case 84:
4871		case 228:
4872		case 212:
4873		case 36:
4874		case 20:
4875			block = "DB";
4876			break;
4877		case 165:
4878		case 149:
4879		case 101:
4880		case 85:
4881		case 229:
4882		case 213:
4883		case 37:
4884		case 21:
4885			block = "DB_HTILE";
4886			break;
4887		case 167:
4888		case 151:
4889		case 103:
4890		case 87:
4891		case 231:
4892		case 215:
4893		case 39:
4894		case 23:
4895			block = "DB_STEN";
4896			break;
4897		case 72:
4898		case 68:
4899		case 64:
4900		case 8:
4901		case 4:
4902		case 0:
4903		case 136:
4904		case 132:
4905		case 128:
4906		case 200:
4907		case 196:
4908		case 192:
4909			block = "TC";
4910			break;
4911		case 112:
4912		case 48:
4913			block = "CP";
4914			break;
4915		case 49:
4916		case 177:
4917		case 50:
4918		case 178:
4919			block = "SH";
4920			break;
4921		case 53:
4922		case 190:
4923			block = "VGT";
4924			break;
4925		case 117:
4926			block = "IH";
4927			break;
4928		case 51:
4929		case 115:
4930			block = "RLC";
4931			break;
4932		case 119:
4933		case 183:
4934			block = "DMA0";
4935			break;
4936		case 61:
4937			block = "DMA1";
4938			break;
4939		case 248:
4940		case 120:
4941			block = "HDP";
4942			break;
4943		default:
4944			block = "unknown";
4945			break;
4946		}
4947	} else {
4948		switch (mc_id) {
4949		case 32:
4950		case 16:
4951		case 96:
4952		case 80:
4953		case 160:
4954		case 144:
4955		case 224:
4956		case 208:
4957			block = "CB";
4958			break;
4959		case 33:
4960		case 17:
4961		case 97:
4962		case 81:
4963		case 161:
4964		case 145:
4965		case 225:
4966		case 209:
4967			block = "CB_FMASK";
4968			break;
4969		case 34:
4970		case 18:
4971		case 98:
4972		case 82:
4973		case 162:
4974		case 146:
4975		case 226:
4976		case 210:
4977			block = "CB_CMASK";
4978			break;
4979		case 35:
4980		case 19:
4981		case 99:
4982		case 83:
4983		case 163:
4984		case 147:
4985		case 227:
4986		case 211:
4987			block = "CB_IMMED";
4988			break;
4989		case 36:
4990		case 20:
4991		case 100:
4992		case 84:
4993		case 164:
4994		case 148:
4995		case 228:
4996		case 212:
4997			block = "DB";
4998			break;
4999		case 37:
5000		case 21:
5001		case 101:
5002		case 85:
5003		case 165:
5004		case 149:
5005		case 229:
5006		case 213:
5007			block = "DB_HTILE";
5008			break;
5009		case 39:
5010		case 23:
5011		case 103:
5012		case 87:
5013		case 167:
5014		case 151:
5015		case 231:
5016		case 215:
5017			block = "DB_STEN";
5018			break;
5019		case 72:
5020		case 68:
5021		case 8:
5022		case 4:
5023		case 136:
5024		case 132:
5025		case 200:
5026		case 196:
5027			block = "TC";
5028			break;
5029		case 112:
5030		case 48:
5031			block = "CP";
5032			break;
5033		case 49:
5034		case 177:
5035		case 50:
5036		case 178:
5037			block = "SH";
5038			break;
5039		case 53:
5040			block = "VGT";
5041			break;
5042		case 117:
5043			block = "IH";
5044			break;
5045		case 51:
5046		case 115:
5047			block = "RLC";
5048			break;
5049		case 119:
5050		case 183:
5051			block = "DMA0";
5052			break;
5053		case 61:
5054			block = "DMA1";
5055			break;
5056		case 248:
5057		case 120:
5058			block = "HDP";
5059			break;
5060		default:
5061			block = "unknown";
5062			break;
5063		}
5064	}
5065
5066	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5067	       protections, vmid, addr,
5068	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5069	       block, mc_id);
5070}
5071
5072void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5073		 unsigned vm_id, uint64_t pd_addr)
5074{
5075	/* write new base address */
5076	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5077	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5078				 WRITE_DATA_DST_SEL(0)));
5079
5080	if (vm_id < 8) {
5081		radeon_ring_write(ring,
5082				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5083	} else {
5084		radeon_ring_write(ring,
5085				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5086	}
5087	radeon_ring_write(ring, 0);
5088	radeon_ring_write(ring, pd_addr >> 12);
5089
5090	/* flush hdp cache */
5091	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5092	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5093				 WRITE_DATA_DST_SEL(0)));
5094	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5095	radeon_ring_write(ring, 0);
5096	radeon_ring_write(ring, 0x1);
5097
5098	/* bits 0-15 are the VM contexts0-15 */
5099	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5101				 WRITE_DATA_DST_SEL(0)));
5102	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5103	radeon_ring_write(ring, 0);
5104	radeon_ring_write(ring, 1 << vm_id);
5105
5106	/* wait for the invalidate to complete */
5107	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5108	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5109				 WAIT_REG_MEM_ENGINE(0))); /* me */
5110	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5111	radeon_ring_write(ring, 0);
5112	radeon_ring_write(ring, 0); /* ref */
5113	radeon_ring_write(ring, 0); /* mask */
5114	radeon_ring_write(ring, 0x20); /* poll interval */
5115
5116	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5117	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5118	radeon_ring_write(ring, 0x0);
5119}
5120
5121/*
5122 *  Power and clock gating
5123 */
5124static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5125{
5126	int i;
5127
5128	for (i = 0; i < rdev->usec_timeout; i++) {
5129		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5130			break;
5131		udelay(1);
5132	}
5133
5134	for (i = 0; i < rdev->usec_timeout; i++) {
5135		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5136			break;
5137		udelay(1);
5138	}
5139}
5140
5141static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5142					 bool enable)
5143{
5144	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5145	u32 mask;
5146	int i;
5147
5148	if (enable)
5149		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5150	else
5151		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5152	WREG32(CP_INT_CNTL_RING0, tmp);
5153
5154	if (!enable) {
5155		/* read a gfx register */
5156		tmp = RREG32(DB_DEPTH_INFO);
5157
5158		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5159		for (i = 0; i < rdev->usec_timeout; i++) {
5160			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5161				break;
5162			udelay(1);
5163		}
5164	}
5165}
5166
5167static void si_set_uvd_dcm(struct radeon_device *rdev,
5168			   bool sw_mode)
5169{
5170	u32 tmp, tmp2;
5171
5172	tmp = RREG32(UVD_CGC_CTRL);
5173	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5174	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5175
5176	if (sw_mode) {
5177		tmp &= ~0x7ffff800;
5178		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5179	} else {
5180		tmp |= 0x7ffff800;
5181		tmp2 = 0;
5182	}
5183
5184	WREG32(UVD_CGC_CTRL, tmp);
5185	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5186}
5187
5188void si_init_uvd_internal_cg(struct radeon_device *rdev)
5189{
5190	bool hw_mode = true;
5191
5192	if (hw_mode) {
5193		si_set_uvd_dcm(rdev, false);
5194	} else {
5195		u32 tmp = RREG32(UVD_CGC_CTRL);
5196		tmp &= ~DCM;
5197		WREG32(UVD_CGC_CTRL, tmp);
5198	}
5199}
5200
5201static u32 si_halt_rlc(struct radeon_device *rdev)
5202{
5203	u32 data, orig;
5204
5205	orig = data = RREG32(RLC_CNTL);
5206
5207	if (data & RLC_ENABLE) {
5208		data &= ~RLC_ENABLE;
5209		WREG32(RLC_CNTL, data);
5210
5211		si_wait_for_rlc_serdes(rdev);
5212	}
5213
5214	return orig;
5215}
5216
5217static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5218{
5219	u32 tmp;
5220
5221	tmp = RREG32(RLC_CNTL);
5222	if (tmp != rlc)
5223		WREG32(RLC_CNTL, rlc);
5224}
5225
5226static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5227{
5228	u32 data, orig;
5229
5230	orig = data = RREG32(DMA_PG);
5231	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5232		data |= PG_CNTL_ENABLE;
5233	else
5234		data &= ~PG_CNTL_ENABLE;
5235	if (orig != data)
5236		WREG32(DMA_PG, data);
5237}
5238
5239static void si_init_dma_pg(struct radeon_device *rdev)
5240{
5241	u32 tmp;
5242
5243	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5244	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5245
5246	for (tmp = 0; tmp < 5; tmp++)
5247		WREG32(DMA_PGFSM_WRITE, 0);
5248}
5249
5250static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5251			       bool enable)
5252{
5253	u32 tmp;
5254
5255	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5256		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5257		WREG32(RLC_TTOP_D, tmp);
5258
5259		tmp = RREG32(RLC_PG_CNTL);
5260		tmp |= GFX_PG_ENABLE;
5261		WREG32(RLC_PG_CNTL, tmp);
5262
5263		tmp = RREG32(RLC_AUTO_PG_CTRL);
5264		tmp |= AUTO_PG_EN;
5265		WREG32(RLC_AUTO_PG_CTRL, tmp);
5266	} else {
5267		tmp = RREG32(RLC_AUTO_PG_CTRL);
5268		tmp &= ~AUTO_PG_EN;
5269		WREG32(RLC_AUTO_PG_CTRL, tmp);
5270
5271		tmp = RREG32(DB_RENDER_CONTROL);
5272	}
5273}
5274
5275static void si_init_gfx_cgpg(struct radeon_device *rdev)
5276{
5277	u32 tmp;
5278
5279	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5280
5281	tmp = RREG32(RLC_PG_CNTL);
5282	tmp |= GFX_PG_SRC;
5283	WREG32(RLC_PG_CNTL, tmp);
5284
5285	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5286
5287	tmp = RREG32(RLC_AUTO_PG_CTRL);
5288
5289	tmp &= ~GRBM_REG_SGIT_MASK;
5290	tmp |= GRBM_REG_SGIT(0x700);
5291	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5292	WREG32(RLC_AUTO_PG_CTRL, tmp);
5293}
5294
5295static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5296{
5297	u32 mask = 0, tmp, tmp1;
5298	int i;
5299
5300	si_select_se_sh(rdev, se, sh);
5301	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5302	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5303	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5304
5305	tmp &= 0xffff0000;
5306
5307	tmp |= tmp1;
5308	tmp >>= 16;
5309
5310	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5311		mask <<= 1;
5312		mask |= 1;
5313	}
5314
5315	return (~tmp) & mask;
5316}
5317
5318static void si_init_ao_cu_mask(struct radeon_device *rdev)
5319{
5320	u32 i, j, k, active_cu_number = 0;
5321	u32 mask, counter, cu_bitmap;
5322	u32 tmp = 0;
5323
5324	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5325		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5326			mask = 1;
5327			cu_bitmap = 0;
5328			counter  = 0;
5329			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5330				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5331					if (counter < 2)
5332						cu_bitmap |= mask;
5333					counter++;
5334				}
5335				mask <<= 1;
5336			}
5337
5338			active_cu_number += counter;
5339			tmp |= (cu_bitmap << (i * 16 + j * 8));
5340		}
5341	}
5342
5343	WREG32(RLC_PG_AO_CU_MASK, tmp);
5344
5345	tmp = RREG32(RLC_MAX_PG_CU);
5346	tmp &= ~MAX_PU_CU_MASK;
5347	tmp |= MAX_PU_CU(active_cu_number);
5348	WREG32(RLC_MAX_PG_CU, tmp);
5349}
5350
5351static void si_enable_cgcg(struct radeon_device *rdev,
5352			   bool enable)
5353{
5354	u32 data, orig, tmp;
5355
5356	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5357
5358	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5359		si_enable_gui_idle_interrupt(rdev, true);
5360
5361		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5362
5363		tmp = si_halt_rlc(rdev);
5364
5365		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5366		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5367		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5368
5369		si_wait_for_rlc_serdes(rdev);
5370
5371		si_update_rlc(rdev, tmp);
5372
5373		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5374
5375		data |= CGCG_EN | CGLS_EN;
5376	} else {
5377		si_enable_gui_idle_interrupt(rdev, false);
5378
5379		RREG32(CB_CGTT_SCLK_CTRL);
5380		RREG32(CB_CGTT_SCLK_CTRL);
5381		RREG32(CB_CGTT_SCLK_CTRL);
5382		RREG32(CB_CGTT_SCLK_CTRL);
5383
5384		data &= ~(CGCG_EN | CGLS_EN);
5385	}
5386
5387	if (orig != data)
5388		WREG32(RLC_CGCG_CGLS_CTRL, data);
5389}
5390
5391static void si_enable_mgcg(struct radeon_device *rdev,
5392			   bool enable)
5393{
5394	u32 data, orig, tmp = 0;
5395
5396	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5397		orig = data = RREG32(CGTS_SM_CTRL_REG);
5398		data = 0x96940200;
5399		if (orig != data)
5400			WREG32(CGTS_SM_CTRL_REG, data);
5401
5402		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5403			orig = data = RREG32(CP_MEM_SLP_CNTL);
5404			data |= CP_MEM_LS_EN;
5405			if (orig != data)
5406				WREG32(CP_MEM_SLP_CNTL, data);
5407		}
5408
5409		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5410		data &= 0xffffffc0;
5411		if (orig != data)
5412			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5413
5414		tmp = si_halt_rlc(rdev);
5415
5416		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5417		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5418		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5419
5420		si_update_rlc(rdev, tmp);
5421	} else {
5422		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5423		data |= 0x00000003;
5424		if (orig != data)
5425			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5426
5427		data = RREG32(CP_MEM_SLP_CNTL);
5428		if (data & CP_MEM_LS_EN) {
5429			data &= ~CP_MEM_LS_EN;
5430			WREG32(CP_MEM_SLP_CNTL, data);
5431		}
5432		orig = data = RREG32(CGTS_SM_CTRL_REG);
5433		data |= LS_OVERRIDE | OVERRIDE;
5434		if (orig != data)
5435			WREG32(CGTS_SM_CTRL_REG, data);
5436
5437		tmp = si_halt_rlc(rdev);
5438
5439		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5440		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5441		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5442
5443		si_update_rlc(rdev, tmp);
5444	}
5445}
5446
5447static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5448			       bool enable)
5449{
5450	u32 orig, data, tmp;
5451
5452	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5453		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5454		tmp |= 0x3fff;
5455		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5456
5457		orig = data = RREG32(UVD_CGC_CTRL);
5458		data |= DCM;
5459		if (orig != data)
5460			WREG32(UVD_CGC_CTRL, data);
5461
5462		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5463		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5464	} else {
5465		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5466		tmp &= ~0x3fff;
5467		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5468
5469		orig = data = RREG32(UVD_CGC_CTRL);
5470		data &= ~DCM;
5471		if (orig != data)
5472			WREG32(UVD_CGC_CTRL, data);
5473
5474		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5475		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5476	}
5477}
5478
5479static const u32 mc_cg_registers[] =
5480{
5481	MC_HUB_MISC_HUB_CG,
5482	MC_HUB_MISC_SIP_CG,
5483	MC_HUB_MISC_VM_CG,
5484	MC_XPB_CLK_GAT,
5485	ATC_MISC_CG,
5486	MC_CITF_MISC_WR_CG,
5487	MC_CITF_MISC_RD_CG,
5488	MC_CITF_MISC_VM_CG,
5489	VM_L2_CG,
5490};
5491
5492static void si_enable_mc_ls(struct radeon_device *rdev,
5493			    bool enable)
5494{
5495	int i;
5496	u32 orig, data;
5497
5498	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5499		orig = data = RREG32(mc_cg_registers[i]);
5500		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5501			data |= MC_LS_ENABLE;
5502		else
5503			data &= ~MC_LS_ENABLE;
5504		if (data != orig)
5505			WREG32(mc_cg_registers[i], data);
5506	}
5507}
5508
5509static void si_enable_mc_mgcg(struct radeon_device *rdev,
5510			       bool enable)
5511{
5512	int i;
5513	u32 orig, data;
5514
5515	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5516		orig = data = RREG32(mc_cg_registers[i]);
5517		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5518			data |= MC_CG_ENABLE;
5519		else
5520			data &= ~MC_CG_ENABLE;
5521		if (data != orig)
5522			WREG32(mc_cg_registers[i], data);
5523	}
5524}
5525
5526static void si_enable_dma_mgcg(struct radeon_device *rdev,
5527			       bool enable)
5528{
5529	u32 orig, data, offset;
5530	int i;
5531
5532	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5533		for (i = 0; i < 2; i++) {
5534			if (i == 0)
5535				offset = DMA0_REGISTER_OFFSET;
5536			else
5537				offset = DMA1_REGISTER_OFFSET;
5538			orig = data = RREG32(DMA_POWER_CNTL + offset);
5539			data &= ~MEM_POWER_OVERRIDE;
5540			if (data != orig)
5541				WREG32(DMA_POWER_CNTL + offset, data);
5542			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5543		}
5544	} else {
5545		for (i = 0; i < 2; i++) {
5546			if (i == 0)
5547				offset = DMA0_REGISTER_OFFSET;
5548			else
5549				offset = DMA1_REGISTER_OFFSET;
5550			orig = data = RREG32(DMA_POWER_CNTL + offset);
5551			data |= MEM_POWER_OVERRIDE;
5552			if (data != orig)
5553				WREG32(DMA_POWER_CNTL + offset, data);
5554
5555			orig = data = RREG32(DMA_CLK_CTRL + offset);
5556			data = 0xff000000;
5557			if (data != orig)
5558				WREG32(DMA_CLK_CTRL + offset, data);
5559		}
5560	}
5561}
5562
5563static void si_enable_bif_mgls(struct radeon_device *rdev,
5564			       bool enable)
5565{
5566	u32 orig, data;
5567
5568	orig = data = RREG32_PCIE(PCIE_CNTL2);
5569
5570	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5571		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5572			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5573	else
5574		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5575			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5576
5577	if (orig != data)
5578		WREG32_PCIE(PCIE_CNTL2, data);
5579}
5580
5581static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5582			       bool enable)
5583{
5584	u32 orig, data;
5585
5586	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5587
5588	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5589		data &= ~CLOCK_GATING_DIS;
5590	else
5591		data |= CLOCK_GATING_DIS;
5592
5593	if (orig != data)
5594		WREG32(HDP_HOST_PATH_CNTL, data);
5595}
5596
5597static void si_enable_hdp_ls(struct radeon_device *rdev,
5598			     bool enable)
5599{
5600	u32 orig, data;
5601
5602	orig = data = RREG32(HDP_MEM_POWER_LS);
5603
5604	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5605		data |= HDP_LS_ENABLE;
5606	else
5607		data &= ~HDP_LS_ENABLE;
5608
5609	if (orig != data)
5610		WREG32(HDP_MEM_POWER_LS, data);
5611}
5612
5613static void si_update_cg(struct radeon_device *rdev,
5614			 u32 block, bool enable)
5615{
5616	if (block & RADEON_CG_BLOCK_GFX) {
5617		si_enable_gui_idle_interrupt(rdev, false);
5618		/* order matters! */
5619		if (enable) {
5620			si_enable_mgcg(rdev, true);
5621			si_enable_cgcg(rdev, true);
5622		} else {
5623			si_enable_cgcg(rdev, false);
5624			si_enable_mgcg(rdev, false);
5625		}
5626		si_enable_gui_idle_interrupt(rdev, true);
5627	}
5628
5629	if (block & RADEON_CG_BLOCK_MC) {
5630		si_enable_mc_mgcg(rdev, enable);
5631		si_enable_mc_ls(rdev, enable);
5632	}
5633
5634	if (block & RADEON_CG_BLOCK_SDMA) {
5635		si_enable_dma_mgcg(rdev, enable);
5636	}
5637
5638	if (block & RADEON_CG_BLOCK_BIF) {
5639		si_enable_bif_mgls(rdev, enable);
5640	}
5641
5642	if (block & RADEON_CG_BLOCK_UVD) {
5643		if (rdev->has_uvd) {
5644			si_enable_uvd_mgcg(rdev, enable);
5645		}
5646	}
5647
5648	if (block & RADEON_CG_BLOCK_HDP) {
5649		si_enable_hdp_mgcg(rdev, enable);
5650		si_enable_hdp_ls(rdev, enable);
5651	}
5652}
5653
5654static void si_init_cg(struct radeon_device *rdev)
5655{
5656	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5657			    RADEON_CG_BLOCK_MC |
5658			    RADEON_CG_BLOCK_SDMA |
5659			    RADEON_CG_BLOCK_BIF |
5660			    RADEON_CG_BLOCK_HDP), true);
5661	if (rdev->has_uvd) {
5662		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5663		si_init_uvd_internal_cg(rdev);
5664	}
5665}
5666
5667static void si_fini_cg(struct radeon_device *rdev)
5668{
5669	if (rdev->has_uvd) {
5670		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5671	}
5672	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5673			    RADEON_CG_BLOCK_MC |
5674			    RADEON_CG_BLOCK_SDMA |
5675			    RADEON_CG_BLOCK_BIF |
5676			    RADEON_CG_BLOCK_HDP), false);
5677}
5678
5679u32 si_get_csb_size(struct radeon_device *rdev)
5680{
5681	u32 count = 0;
5682	const struct cs_section_def *sect = NULL;
5683	const struct cs_extent_def *ext = NULL;
5684
5685	if (rdev->rlc.cs_data == NULL)
5686		return 0;
5687
5688	/* begin clear state */
5689	count += 2;
5690	/* context control state */
5691	count += 3;
5692
5693	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5694		for (ext = sect->section; ext->extent != NULL; ++ext) {
5695			if (sect->id == SECT_CONTEXT)
5696				count += 2 + ext->reg_count;
5697			else
5698				return 0;
5699		}
5700	}
5701	/* pa_sc_raster_config */
5702	count += 3;
5703	/* end clear state */
5704	count += 2;
5705	/* clear state */
5706	count += 2;
5707
5708	return count;
5709}
5710
5711void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5712{
5713	u32 count = 0, i;
5714	const struct cs_section_def *sect = NULL;
5715	const struct cs_extent_def *ext = NULL;
5716
5717	if (rdev->rlc.cs_data == NULL)
5718		return;
5719	if (buffer == NULL)
5720		return;
5721
5722	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5723	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5724
5725	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5726	buffer[count++] = cpu_to_le32(0x80000000);
5727	buffer[count++] = cpu_to_le32(0x80000000);
5728
5729	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5730		for (ext = sect->section; ext->extent != NULL; ++ext) {
5731			if (sect->id == SECT_CONTEXT) {
5732				buffer[count++] =
5733					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5734				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5735				for (i = 0; i < ext->reg_count; i++)
5736					buffer[count++] = cpu_to_le32(ext->extent[i]);
5737			} else {
5738				return;
5739			}
5740		}
5741	}
5742
5743	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5744	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5745	switch (rdev->family) {
5746	case CHIP_TAHITI:
5747	case CHIP_PITCAIRN:
5748		buffer[count++] = cpu_to_le32(0x2a00126a);
5749		break;
5750	case CHIP_VERDE:
5751		buffer[count++] = cpu_to_le32(0x0000124a);
5752		break;
5753	case CHIP_OLAND:
5754		buffer[count++] = cpu_to_le32(0x00000082);
5755		break;
5756	case CHIP_HAINAN:
5757		buffer[count++] = cpu_to_le32(0x00000000);
5758		break;
5759	default:
5760		buffer[count++] = cpu_to_le32(0x00000000);
5761		break;
5762	}
5763
5764	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5765	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5766
5767	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5768	buffer[count++] = cpu_to_le32(0);
5769}
5770
5771static void si_init_pg(struct radeon_device *rdev)
5772{
5773	if (rdev->pg_flags) {
5774		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5775			si_init_dma_pg(rdev);
5776		}
5777		si_init_ao_cu_mask(rdev);
5778		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5779			si_init_gfx_cgpg(rdev);
5780		} else {
5781			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5782			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5783		}
5784		si_enable_dma_pg(rdev, true);
5785		si_enable_gfx_cgpg(rdev, true);
5786	} else {
5787		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5788		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5789	}
5790}
5791
5792static void si_fini_pg(struct radeon_device *rdev)
5793{
5794	if (rdev->pg_flags) {
5795		si_enable_dma_pg(rdev, false);
5796		si_enable_gfx_cgpg(rdev, false);
5797	}
5798}
5799
5800/*
5801 * RLC
5802 */
5803void si_rlc_reset(struct radeon_device *rdev)
5804{
5805	u32 tmp = RREG32(GRBM_SOFT_RESET);
5806
5807	tmp |= SOFT_RESET_RLC;
5808	WREG32(GRBM_SOFT_RESET, tmp);
5809	udelay(50);
5810	tmp &= ~SOFT_RESET_RLC;
5811	WREG32(GRBM_SOFT_RESET, tmp);
5812	udelay(50);
5813}
5814
5815static void si_rlc_stop(struct radeon_device *rdev)
5816{
5817	WREG32(RLC_CNTL, 0);
5818
5819	si_enable_gui_idle_interrupt(rdev, false);
5820
5821	si_wait_for_rlc_serdes(rdev);
5822}
5823
5824static void si_rlc_start(struct radeon_device *rdev)
5825{
5826	WREG32(RLC_CNTL, RLC_ENABLE);
5827
5828	si_enable_gui_idle_interrupt(rdev, true);
5829
5830	udelay(50);
5831}
5832
5833static bool si_lbpw_supported(struct radeon_device *rdev)
5834{
5835	u32 tmp;
5836
5837	/* Enable LBPW only for DDR3 */
5838	tmp = RREG32(MC_SEQ_MISC0);
5839	if ((tmp & 0xF0000000) == 0xB0000000)
5840		return true;
5841	return false;
5842}
5843
5844static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5845{
5846	u32 tmp;
5847
5848	tmp = RREG32(RLC_LB_CNTL);
5849	if (enable)
5850		tmp |= LOAD_BALANCE_ENABLE;
5851	else
5852		tmp &= ~LOAD_BALANCE_ENABLE;
5853	WREG32(RLC_LB_CNTL, tmp);
5854
5855	if (!enable) {
5856		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5857		WREG32(SPI_LB_CU_MASK, 0x00ff);
5858	}
5859}
5860
5861static int si_rlc_resume(struct radeon_device *rdev)
5862{
5863	u32 i;
5864
5865	if (!rdev->rlc_fw)
5866		return -EINVAL;
5867
5868	si_rlc_stop(rdev);
5869
5870	si_rlc_reset(rdev);
5871
5872	si_init_pg(rdev);
5873
5874	si_init_cg(rdev);
5875
5876	WREG32(RLC_RL_BASE, 0);
5877	WREG32(RLC_RL_SIZE, 0);
5878	WREG32(RLC_LB_CNTL, 0);
5879	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5880	WREG32(RLC_LB_CNTR_INIT, 0);
5881	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5882
5883	WREG32(RLC_MC_CNTL, 0);
5884	WREG32(RLC_UCODE_CNTL, 0);
5885
5886	if (rdev->new_fw) {
5887		const struct rlc_firmware_header_v1_0 *hdr =
5888			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5889		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5890		const __le32 *fw_data = (const __le32 *)
5891			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5892
5893		radeon_ucode_print_rlc_hdr(&hdr->header);
5894
5895		for (i = 0; i < fw_size; i++) {
5896			WREG32(RLC_UCODE_ADDR, i);
5897			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5898		}
5899	} else {
5900		const __be32 *fw_data =
5901			(const __be32 *)rdev->rlc_fw->data;
5902		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5903			WREG32(RLC_UCODE_ADDR, i);
5904			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5905		}
5906	}
5907	WREG32(RLC_UCODE_ADDR, 0);
5908
5909	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5910
5911	si_rlc_start(rdev);
5912
5913	return 0;
5914}
5915
5916static void si_enable_interrupts(struct radeon_device *rdev)
5917{
5918	u32 ih_cntl = RREG32(IH_CNTL);
5919	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5920
5921	ih_cntl |= ENABLE_INTR;
5922	ih_rb_cntl |= IH_RB_ENABLE;
5923	WREG32(IH_CNTL, ih_cntl);
5924	WREG32(IH_RB_CNTL, ih_rb_cntl);
5925	rdev->ih.enabled = true;
5926}
5927
5928static void si_disable_interrupts(struct radeon_device *rdev)
5929{
5930	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5931	u32 ih_cntl = RREG32(IH_CNTL);
5932
5933	ih_rb_cntl &= ~IH_RB_ENABLE;
5934	ih_cntl &= ~ENABLE_INTR;
5935	WREG32(IH_RB_CNTL, ih_rb_cntl);
5936	WREG32(IH_CNTL, ih_cntl);
5937	/* set rptr, wptr to 0 */
5938	WREG32(IH_RB_RPTR, 0);
5939	WREG32(IH_RB_WPTR, 0);
5940	rdev->ih.enabled = false;
5941	rdev->ih.rptr = 0;
5942}
5943
5944static void si_disable_interrupt_state(struct radeon_device *rdev)
5945{
5946	int i;
5947	u32 tmp;
5948
5949	tmp = RREG32(CP_INT_CNTL_RING0) &
5950		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5951	WREG32(CP_INT_CNTL_RING0, tmp);
5952	WREG32(CP_INT_CNTL_RING1, 0);
5953	WREG32(CP_INT_CNTL_RING2, 0);
5954	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5955	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5956	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5957	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5958	WREG32(GRBM_INT_CNTL, 0);
5959	WREG32(SRBM_INT_CNTL, 0);
5960	for (i = 0; i < rdev->num_crtc; i++)
5961		WREG32(INT_MASK + crtc_offsets[i], 0);
5962	for (i = 0; i < rdev->num_crtc; i++)
5963		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5964
5965	if (!ASIC_IS_NODCE(rdev)) {
5966		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5967
5968		for (i = 0; i < 6; i++)
5969			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5970				   DC_HPDx_INT_POLARITY);
5971	}
5972}
5973
5974static int si_irq_init(struct radeon_device *rdev)
5975{
5976	int ret = 0;
5977	int rb_bufsz;
5978	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5979
5980	/* allocate ring */
5981	ret = r600_ih_ring_alloc(rdev);
5982	if (ret)
5983		return ret;
5984
5985	/* disable irqs */
5986	si_disable_interrupts(rdev);
5987
5988	/* init rlc */
5989	ret = si_rlc_resume(rdev);
5990	if (ret) {
5991		r600_ih_ring_fini(rdev);
5992		return ret;
5993	}
5994
5995	/* setup interrupt control */
5996	/* set dummy read address to ring address */
5997	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5998	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5999	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6000	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6001	 */
6002	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6003	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6004	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6005	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6006
6007	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6008	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6009
6010	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6011		      IH_WPTR_OVERFLOW_CLEAR |
6012		      (rb_bufsz << 1));
6013
6014	if (rdev->wb.enabled)
6015		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6016
6017	/* set the writeback address whether it's enabled or not */
6018	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6019	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6020
6021	WREG32(IH_RB_CNTL, ih_rb_cntl);
6022
6023	/* set rptr, wptr to 0 */
6024	WREG32(IH_RB_RPTR, 0);
6025	WREG32(IH_RB_WPTR, 0);
6026
6027	/* Default settings for IH_CNTL (disabled at first) */
6028	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6029	/* RPTR_REARM only works if msi's are enabled */
6030	if (rdev->msi_enabled)
6031		ih_cntl |= RPTR_REARM;
6032	WREG32(IH_CNTL, ih_cntl);
6033
6034	/* force the active interrupt state to all disabled */
6035	si_disable_interrupt_state(rdev);
6036
6037	pci_set_master(rdev->pdev);
6038
6039	/* enable irqs */
6040	si_enable_interrupts(rdev);
6041
6042	return ret;
6043}
6044
6045/* The order we write back each register here is important */
6046int si_irq_set(struct radeon_device *rdev)
6047{
6048	int i;
6049	u32 cp_int_cntl;
6050	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6051	u32 grbm_int_cntl = 0;
6052	u32 dma_cntl, dma_cntl1;
6053	u32 thermal_int = 0;
6054
6055	if (!rdev->irq.installed) {
6056		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6057		return -EINVAL;
6058	}
6059	/* don't enable anything if the ih is disabled */
6060	if (!rdev->ih.enabled) {
6061		si_disable_interrupts(rdev);
6062		/* force the active interrupt state to all disabled */
6063		si_disable_interrupt_state(rdev);
6064		return 0;
6065	}
6066
6067	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6068		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6069
6070	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6072
6073	thermal_int = RREG32(CG_THERMAL_INT) &
6074		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6075
6076	/* enable CP interrupts on all rings */
6077	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6078		DRM_DEBUG("si_irq_set: sw int gfx\n");
6079		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6080	}
6081	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6082		DRM_DEBUG("si_irq_set: sw int cp1\n");
6083		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6084	}
6085	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6086		DRM_DEBUG("si_irq_set: sw int cp2\n");
6087		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6088	}
6089	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6090		DRM_DEBUG("si_irq_set: sw int dma\n");
6091		dma_cntl |= TRAP_ENABLE;
6092	}
6093
6094	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6095		DRM_DEBUG("si_irq_set: sw int dma1\n");
6096		dma_cntl1 |= TRAP_ENABLE;
6097	}
6098
6099	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6100	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6101	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6102
6103	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6104	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6105
6106	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6107
6108	if (rdev->irq.dpm_thermal) {
6109		DRM_DEBUG("dpm thermal\n");
6110		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6111	}
6112
6113	for (i = 0; i < rdev->num_crtc; i++) {
6114		radeon_irq_kms_set_irq_n_enabled(
6115		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6116		    rdev->irq.crtc_vblank_int[i] ||
6117		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6118	}
6119
6120	for (i = 0; i < rdev->num_crtc; i++)
6121		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6122
6123	if (!ASIC_IS_NODCE(rdev)) {
6124		for (i = 0; i < 6; i++) {
6125			radeon_irq_kms_set_irq_n_enabled(
6126			    rdev, DC_HPDx_INT_CONTROL(i),
6127			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6128			    rdev->irq.hpd[i], "HPD", i);
6129		}
6130	}
6131
6132	WREG32(CG_THERMAL_INT, thermal_int);
6133
6134	/* posting read */
6135	RREG32(SRBM_STATUS);
6136
6137	return 0;
6138}
6139
6140/* The order we write back each register here is important */
6141static inline void si_irq_ack(struct radeon_device *rdev)
6142{
6143	int i, j;
6144	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6145	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6146
6147	if (ASIC_IS_NODCE(rdev))
6148		return;
6149
6150	for (i = 0; i < 6; i++) {
6151		disp_int[i] = RREG32(si_disp_int_status[i]);
6152		if (i < rdev->num_crtc)
6153			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6154	}
6155
6156	/* We write back each interrupt register in pairs of two */
6157	for (i = 0; i < rdev->num_crtc; i += 2) {
6158		for (j = i; j < (i + 2); j++) {
6159			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6160				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6161				       GRPH_PFLIP_INT_CLEAR);
6162		}
6163
6164		for (j = i; j < (i + 2); j++) {
6165			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6166				WREG32(VBLANK_STATUS + crtc_offsets[j],
6167				       VBLANK_ACK);
6168			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6169				WREG32(VLINE_STATUS + crtc_offsets[j],
6170				       VLINE_ACK);
6171		}
6172	}
6173
6174	for (i = 0; i < 6; i++) {
6175		if (disp_int[i] & DC_HPD1_INTERRUPT)
6176			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6177	}
6178
6179	for (i = 0; i < 6; i++) {
6180		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6181			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6182	}
6183}
6184
6185static void si_irq_disable(struct radeon_device *rdev)
6186{
6187	si_disable_interrupts(rdev);
6188	/* Wait and acknowledge irq */
6189	mdelay(1);
6190	si_irq_ack(rdev);
6191	si_disable_interrupt_state(rdev);
6192}
6193
6194static void si_irq_suspend(struct radeon_device *rdev)
6195{
6196	si_irq_disable(rdev);
6197	si_rlc_stop(rdev);
6198}
6199
6200static void si_irq_fini(struct radeon_device *rdev)
6201{
6202	si_irq_suspend(rdev);
6203	r600_ih_ring_fini(rdev);
6204}
6205
6206static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6207{
6208	u32 wptr, tmp;
6209
6210	if (rdev->wb.enabled)
6211		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6212	else
6213		wptr = RREG32(IH_RB_WPTR);
6214
6215	if (wptr & RB_OVERFLOW) {
6216		wptr &= ~RB_OVERFLOW;
6217		/* When a ring buffer overflow happen start parsing interrupt
6218		 * from the last not overwritten vector (wptr + 16). Hopefully
6219		 * this should allow us to catchup.
6220		 */
6221		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6222			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6223		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6224		tmp = RREG32(IH_RB_CNTL);
6225		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6226		WREG32(IH_RB_CNTL, tmp);
6227	}
6228	return (wptr & rdev->ih.ptr_mask);
6229}
6230
6231/*        SI IV Ring
6232 * Each IV ring entry is 128 bits:
6233 * [7:0]    - interrupt source id
6234 * [31:8]   - reserved
6235 * [59:32]  - interrupt source data
6236 * [63:60]  - reserved
6237 * [71:64]  - RINGID
6238 * [79:72]  - VMID
6239 * [127:80] - reserved
6240 */
6241int si_irq_process(struct radeon_device *rdev)
6242{
6243	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6244	u32 crtc_idx, hpd_idx;
6245	u32 mask;
6246	u32 wptr;
6247	u32 rptr;
6248	u32 src_id, src_data, ring_id;
6249	u32 ring_index;
6250	bool queue_hotplug = false;
6251	bool queue_dp = false;
6252	bool queue_thermal = false;
6253	u32 status, addr;
6254	const char *event_name;
6255
6256	if (!rdev->ih.enabled || rdev->shutdown)
6257		return IRQ_NONE;
6258
6259	wptr = si_get_ih_wptr(rdev);
6260
6261restart_ih:
6262	/* is somebody else already processing irqs? */
6263	if (atomic_xchg(&rdev->ih.lock, 1))
6264		return IRQ_NONE;
6265
6266	rptr = rdev->ih.rptr;
6267	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6268
6269	/* Order reading of wptr vs. reading of IH ring data */
6270	rmb();
6271
6272	/* display interrupts */
6273	si_irq_ack(rdev);
6274
6275	while (rptr != wptr) {
6276		/* wptr/rptr are in bytes! */
6277		ring_index = rptr / 4;
6278		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6279		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6280		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6281
6282		switch (src_id) {
6283		case 1: /* D1 vblank/vline */
6284		case 2: /* D2 vblank/vline */
6285		case 3: /* D3 vblank/vline */
6286		case 4: /* D4 vblank/vline */
6287		case 5: /* D5 vblank/vline */
6288		case 6: /* D6 vblank/vline */
6289			crtc_idx = src_id - 1;
6290
6291			if (src_data == 0) { /* vblank */
6292				mask = LB_D1_VBLANK_INTERRUPT;
6293				event_name = "vblank";
6294
6295				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6296					drm_handle_vblank(rdev->ddev, crtc_idx);
6297					rdev->pm.vblank_sync = true;
6298					wake_up(&rdev->irq.vblank_queue);
6299				}
6300				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6301					radeon_crtc_handle_vblank(rdev,
6302								  crtc_idx);
6303				}
6304
6305			} else if (src_data == 1) { /* vline */
6306				mask = LB_D1_VLINE_INTERRUPT;
6307				event_name = "vline";
6308			} else {
6309				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6310					  src_id, src_data);
6311				break;
6312			}
6313
6314			if (!(disp_int[crtc_idx] & mask)) {
6315				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6316					  crtc_idx + 1, event_name);
6317			}
6318
6319			disp_int[crtc_idx] &= ~mask;
6320			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6321
6322			break;
6323		case 8: /* D1 page flip */
6324		case 10: /* D2 page flip */
6325		case 12: /* D3 page flip */
6326		case 14: /* D4 page flip */
6327		case 16: /* D5 page flip */
6328		case 18: /* D6 page flip */
6329			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6330			if (radeon_use_pflipirq > 0)
6331				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6332			break;
6333		case 42: /* HPD hotplug */
6334			if (src_data <= 5) {
6335				hpd_idx = src_data;
6336				mask = DC_HPD1_INTERRUPT;
6337				queue_hotplug = true;
6338				event_name = "HPD";
6339
6340			} else if (src_data <= 11) {
6341				hpd_idx = src_data - 6;
6342				mask = DC_HPD1_RX_INTERRUPT;
6343				queue_dp = true;
6344				event_name = "HPD_RX";
6345
6346			} else {
6347				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6348					  src_id, src_data);
6349				break;
6350			}
6351
6352			if (!(disp_int[hpd_idx] & mask))
6353				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6354
6355			disp_int[hpd_idx] &= ~mask;
6356			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6357			break;
6358		case 96:
6359			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6360			WREG32(SRBM_INT_ACK, 0x1);
6361			break;
6362		case 124: /* UVD */
6363			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6364			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6365			break;
6366		case 146:
6367		case 147:
6368			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6369			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6370			/* reset addr and status */
6371			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6372			if (addr == 0x0 && status == 0x0)
6373				break;
6374			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6375			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6376				addr);
6377			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6378				status);
6379			si_vm_decode_fault(rdev, status, addr);
6380			break;
6381		case 176: /* RINGID0 CP_INT */
6382			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6383			break;
6384		case 177: /* RINGID1 CP_INT */
6385			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6386			break;
6387		case 178: /* RINGID2 CP_INT */
6388			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6389			break;
6390		case 181: /* CP EOP event */
6391			DRM_DEBUG("IH: CP EOP\n");
6392			switch (ring_id) {
6393			case 0:
6394				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6395				break;
6396			case 1:
6397				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6398				break;
6399			case 2:
6400				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6401				break;
6402			}
6403			break;
6404		case 224: /* DMA trap event */
6405			DRM_DEBUG("IH: DMA trap\n");
6406			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6407			break;
6408		case 230: /* thermal low to high */
6409			DRM_DEBUG("IH: thermal low to high\n");
6410			rdev->pm.dpm.thermal.high_to_low = false;
6411			queue_thermal = true;
6412			break;
6413		case 231: /* thermal high to low */
6414			DRM_DEBUG("IH: thermal high to low\n");
6415			rdev->pm.dpm.thermal.high_to_low = true;
6416			queue_thermal = true;
6417			break;
6418		case 233: /* GUI IDLE */
6419			DRM_DEBUG("IH: GUI idle\n");
6420			break;
6421		case 244: /* DMA trap event */
6422			DRM_DEBUG("IH: DMA1 trap\n");
6423			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6424			break;
6425		default:
6426			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6427			break;
6428		}
6429
6430		/* wptr/rptr are in bytes! */
6431		rptr += 16;
6432		rptr &= rdev->ih.ptr_mask;
6433		WREG32(IH_RB_RPTR, rptr);
6434	}
6435	if (queue_dp)
6436		schedule_work(&rdev->dp_work);
6437	if (queue_hotplug)
6438		schedule_delayed_work(&rdev->hotplug_work, 0);
6439	if (queue_thermal && rdev->pm.dpm_enabled)
6440		schedule_work(&rdev->pm.dpm.thermal.work);
6441	rdev->ih.rptr = rptr;
6442	atomic_set(&rdev->ih.lock, 0);
6443
6444	/* make sure wptr hasn't changed while processing */
6445	wptr = si_get_ih_wptr(rdev);
6446	if (wptr != rptr)
6447		goto restart_ih;
6448
6449	return IRQ_HANDLED;
6450}
6451
6452/*
6453 * startup/shutdown callbacks
6454 */
6455static void si_uvd_init(struct radeon_device *rdev)
6456{
6457	int r;
6458
6459	if (!rdev->has_uvd)
6460		return;
6461
6462	r = radeon_uvd_init(rdev);
6463	if (r) {
6464		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6465		/*
6466		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6467		 * to early fails uvd_v2_2_resume() and thus nothing happens
6468		 * there. So it is pointless to try to go through that code
6469		 * hence why we disable uvd here.
6470		 */
6471		rdev->has_uvd = 0;
6472		return;
6473	}
6474	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6475	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6476}
6477
6478static void si_uvd_start(struct radeon_device *rdev)
6479{
6480	int r;
6481
6482	if (!rdev->has_uvd)
6483		return;
6484
6485	r = uvd_v2_2_resume(rdev);
6486	if (r) {
6487		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6488		goto error;
6489	}
6490	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6491	if (r) {
6492		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6493		goto error;
6494	}
6495	return;
6496
6497error:
6498	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6499}
6500
6501static void si_uvd_resume(struct radeon_device *rdev)
6502{
6503	struct radeon_ring *ring;
6504	int r;
6505
6506	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6507		return;
6508
6509	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6510	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6511	if (r) {
6512		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6513		return;
6514	}
6515	r = uvd_v1_0_init(rdev);
6516	if (r) {
6517		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6518		return;
6519	}
6520}
6521
6522static void si_vce_init(struct radeon_device *rdev)
6523{
6524	int r;
6525
6526	if (!rdev->has_vce)
6527		return;
6528
6529	r = radeon_vce_init(rdev);
6530	if (r) {
6531		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6532		/*
6533		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6534		 * to early fails si_vce_start() and thus nothing happens
6535		 * there. So it is pointless to try to go through that code
6536		 * hence why we disable vce here.
6537		 */
6538		rdev->has_vce = 0;
6539		return;
6540	}
6541	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6542	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6543	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6544	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6545}
6546
6547static void si_vce_start(struct radeon_device *rdev)
6548{
6549	int r;
6550
6551	if (!rdev->has_vce)
6552		return;
6553
6554	r = radeon_vce_resume(rdev);
6555	if (r) {
6556		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6557		goto error;
6558	}
6559	r = vce_v1_0_resume(rdev);
6560	if (r) {
6561		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6562		goto error;
6563	}
6564	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6565	if (r) {
6566		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6567		goto error;
6568	}
6569	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6570	if (r) {
6571		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6572		goto error;
6573	}
6574	return;
6575
6576error:
6577	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6578	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6579}
6580
6581static void si_vce_resume(struct radeon_device *rdev)
6582{
6583	struct radeon_ring *ring;
6584	int r;
6585
6586	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6587		return;
6588
6589	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6590	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6591	if (r) {
6592		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6593		return;
6594	}
6595	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6596	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6597	if (r) {
6598		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6599		return;
6600	}
6601	r = vce_v1_0_init(rdev);
6602	if (r) {
6603		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6604		return;
6605	}
6606}
6607
6608static int si_startup(struct radeon_device *rdev)
6609{
6610	struct radeon_ring *ring;
6611	int r;
6612
6613	/* enable pcie gen2/3 link */
6614	si_pcie_gen3_enable(rdev);
6615	/* enable aspm */
6616	si_program_aspm(rdev);
6617
6618	/* scratch needs to be initialized before MC */
6619	r = r600_vram_scratch_init(rdev);
6620	if (r)
6621		return r;
6622
6623	si_mc_program(rdev);
6624
6625	if (!rdev->pm.dpm_enabled) {
6626		r = si_mc_load_microcode(rdev);
6627		if (r) {
6628			DRM_ERROR("Failed to load MC firmware!\n");
6629			return r;
6630		}
6631	}
6632
6633	r = si_pcie_gart_enable(rdev);
6634	if (r)
6635		return r;
6636	si_gpu_init(rdev);
6637
6638	/* allocate rlc buffers */
6639	if (rdev->family == CHIP_VERDE) {
6640		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6641		rdev->rlc.reg_list_size =
6642			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6643	}
6644	rdev->rlc.cs_data = si_cs_data;
6645	r = sumo_rlc_init(rdev);
6646	if (r) {
6647		DRM_ERROR("Failed to init rlc BOs!\n");
6648		return r;
6649	}
6650
6651	/* allocate wb buffer */
6652	r = radeon_wb_init(rdev);
6653	if (r)
6654		return r;
6655
6656	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6657	if (r) {
6658		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6659		return r;
6660	}
6661
6662	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6663	if (r) {
6664		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6665		return r;
6666	}
6667
6668	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6669	if (r) {
6670		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6671		return r;
6672	}
6673
6674	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6675	if (r) {
6676		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6677		return r;
6678	}
6679
6680	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6681	if (r) {
6682		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6683		return r;
6684	}
6685
6686	si_uvd_start(rdev);
6687	si_vce_start(rdev);
6688
6689	/* Enable IRQ */
6690	if (!rdev->irq.installed) {
6691		r = radeon_irq_kms_init(rdev);
6692		if (r)
6693			return r;
6694	}
6695
6696	r = si_irq_init(rdev);
6697	if (r) {
6698		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6699		radeon_irq_kms_fini(rdev);
6700		return r;
6701	}
6702	si_irq_set(rdev);
6703
6704	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6705	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6706			     RADEON_CP_PACKET2);
6707	if (r)
6708		return r;
6709
6710	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6711	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6712			     RADEON_CP_PACKET2);
6713	if (r)
6714		return r;
6715
6716	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6717	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6718			     RADEON_CP_PACKET2);
6719	if (r)
6720		return r;
6721
6722	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6723	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6724			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6725	if (r)
6726		return r;
6727
6728	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6729	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6730			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6731	if (r)
6732		return r;
6733
6734	r = si_cp_load_microcode(rdev);
6735	if (r)
6736		return r;
6737	r = si_cp_resume(rdev);
6738	if (r)
6739		return r;
6740
6741	r = cayman_dma_resume(rdev);
6742	if (r)
6743		return r;
6744
6745	si_uvd_resume(rdev);
6746	si_vce_resume(rdev);
6747
6748	r = radeon_ib_pool_init(rdev);
6749	if (r) {
6750		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6751		return r;
6752	}
6753
6754	r = radeon_vm_manager_init(rdev);
6755	if (r) {
6756		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6757		return r;
6758	}
6759
6760	r = radeon_audio_init(rdev);
6761	if (r)
6762		return r;
6763
6764	return 0;
6765}
6766
6767int si_resume(struct radeon_device *rdev)
6768{
6769	int r;
6770
6771	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6772	 * posting will perform necessary task to bring back GPU into good
6773	 * shape.
6774	 */
6775	/* post card */
6776	atom_asic_init(rdev->mode_info.atom_context);
6777
6778	/* init golden registers */
6779	si_init_golden_registers(rdev);
6780
6781	if (rdev->pm.pm_method == PM_METHOD_DPM)
6782		radeon_pm_resume(rdev);
6783
6784	rdev->accel_working = true;
6785	r = si_startup(rdev);
6786	if (r) {
6787		DRM_ERROR("si startup failed on resume\n");
6788		rdev->accel_working = false;
6789		return r;
6790	}
6791
6792	return r;
6793
6794}
6795
6796int si_suspend(struct radeon_device *rdev)
6797{
6798	radeon_pm_suspend(rdev);
6799	radeon_audio_fini(rdev);
6800	radeon_vm_manager_fini(rdev);
6801	si_cp_enable(rdev, false);
6802	cayman_dma_stop(rdev);
6803	if (rdev->has_uvd) {
6804		uvd_v1_0_fini(rdev);
6805		radeon_uvd_suspend(rdev);
6806	}
6807	if (rdev->has_vce)
6808		radeon_vce_suspend(rdev);
6809	si_fini_pg(rdev);
6810	si_fini_cg(rdev);
6811	si_irq_suspend(rdev);
6812	radeon_wb_disable(rdev);
6813	si_pcie_gart_disable(rdev);
6814	return 0;
6815}
6816
6817/* Plan is to move initialization in that function and use
6818 * helper function so that radeon_device_init pretty much
6819 * do nothing more than calling asic specific function. This
6820 * should also allow to remove a bunch of callback function
6821 * like vram_info.
6822 */
6823int si_init(struct radeon_device *rdev)
6824{
6825	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6826	int r;
6827
6828	/* Read BIOS */
6829	if (!radeon_get_bios(rdev)) {
6830		if (ASIC_IS_AVIVO(rdev))
6831			return -EINVAL;
6832	}
6833	/* Must be an ATOMBIOS */
6834	if (!rdev->is_atom_bios) {
6835		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6836		return -EINVAL;
6837	}
6838	r = radeon_atombios_init(rdev);
6839	if (r)
6840		return r;
6841
6842	/* Post card if necessary */
6843	if (!radeon_card_posted(rdev)) {
6844		if (!rdev->bios) {
6845			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6846			return -EINVAL;
6847		}
6848		DRM_INFO("GPU not posted. posting now...\n");
6849		atom_asic_init(rdev->mode_info.atom_context);
6850	}
6851	/* init golden registers */
6852	si_init_golden_registers(rdev);
6853	/* Initialize scratch registers */
6854	si_scratch_init(rdev);
6855	/* Initialize surface registers */
6856	radeon_surface_init(rdev);
6857	/* Initialize clocks */
6858	radeon_get_clock_info(rdev->ddev);
6859
6860	/* Fence driver */
6861	r = radeon_fence_driver_init(rdev);
6862	if (r)
6863		return r;
6864
6865	/* initialize memory controller */
6866	r = si_mc_init(rdev);
6867	if (r)
6868		return r;
6869	/* Memory manager */
6870	r = radeon_bo_init(rdev);
6871	if (r)
6872		return r;
6873
6874	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6875	    !rdev->rlc_fw || !rdev->mc_fw) {
6876		r = si_init_microcode(rdev);
6877		if (r) {
6878			DRM_ERROR("Failed to load firmware!\n");
6879			return r;
6880		}
6881	}
6882
6883	/* Initialize power management */
6884	radeon_pm_init(rdev);
6885
6886	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6887	ring->ring_obj = NULL;
6888	r600_ring_init(rdev, ring, 1024 * 1024);
6889
6890	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6891	ring->ring_obj = NULL;
6892	r600_ring_init(rdev, ring, 1024 * 1024);
6893
6894	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6895	ring->ring_obj = NULL;
6896	r600_ring_init(rdev, ring, 1024 * 1024);
6897
6898	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6899	ring->ring_obj = NULL;
6900	r600_ring_init(rdev, ring, 64 * 1024);
6901
6902	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6903	ring->ring_obj = NULL;
6904	r600_ring_init(rdev, ring, 64 * 1024);
6905
6906	si_uvd_init(rdev);
6907	si_vce_init(rdev);
6908
6909	rdev->ih.ring_obj = NULL;
6910	r600_ih_ring_init(rdev, 64 * 1024);
6911
6912	r = r600_pcie_gart_init(rdev);
6913	if (r)
6914		return r;
6915
6916	rdev->accel_working = true;
6917	r = si_startup(rdev);
6918	if (r) {
6919		dev_err(rdev->dev, "disabling GPU acceleration\n");
6920		si_cp_fini(rdev);
6921		cayman_dma_fini(rdev);
6922		si_irq_fini(rdev);
6923		sumo_rlc_fini(rdev);
6924		radeon_wb_fini(rdev);
6925		radeon_ib_pool_fini(rdev);
6926		radeon_vm_manager_fini(rdev);
6927		radeon_irq_kms_fini(rdev);
6928		si_pcie_gart_fini(rdev);
6929		rdev->accel_working = false;
6930	}
6931
6932	/* Don't start up if the MC ucode is missing.
6933	 * The default clocks and voltages before the MC ucode
6934	 * is loaded are not suffient for advanced operations.
6935	 */
6936	if (!rdev->mc_fw) {
6937		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6938		return -EINVAL;
6939	}
6940
6941	return 0;
6942}
6943
6944void si_fini(struct radeon_device *rdev)
6945{
6946	radeon_pm_fini(rdev);
6947	si_cp_fini(rdev);
6948	cayman_dma_fini(rdev);
6949	si_fini_pg(rdev);
6950	si_fini_cg(rdev);
6951	si_irq_fini(rdev);
6952	sumo_rlc_fini(rdev);
6953	radeon_wb_fini(rdev);
6954	radeon_vm_manager_fini(rdev);
6955	radeon_ib_pool_fini(rdev);
6956	radeon_irq_kms_fini(rdev);
6957	if (rdev->has_uvd) {
6958		uvd_v1_0_fini(rdev);
6959		radeon_uvd_fini(rdev);
6960	}
6961	if (rdev->has_vce)
6962		radeon_vce_fini(rdev);
6963	si_pcie_gart_fini(rdev);
6964	r600_vram_scratch_fini(rdev);
6965	radeon_gem_fini(rdev);
6966	radeon_fence_driver_fini(rdev);
6967	radeon_bo_fini(rdev);
6968	radeon_atombios_fini(rdev);
6969	kfree(rdev->bios);
6970	rdev->bios = NULL;
6971}
6972
6973/**
6974 * si_get_gpu_clock_counter - return GPU clock counter snapshot
6975 *
6976 * @rdev: radeon_device pointer
6977 *
6978 * Fetches a GPU clock counter snapshot (SI).
6979 * Returns the 64 bit clock counter snapshot.
6980 */
6981uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6982{
6983	uint64_t clock;
6984
6985	mutex_lock(&rdev->gpu_clock_mutex);
6986	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6987	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6988		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6989	mutex_unlock(&rdev->gpu_clock_mutex);
6990	return clock;
6991}
6992
6993int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6994{
6995	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6996	int r;
6997
6998	/* bypass vclk and dclk with bclk */
6999	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7000		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7001		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7002
7003	/* put PLL in bypass mode */
7004	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7005
7006	if (!vclk || !dclk) {
7007		/* keep the Bypass mode */
7008		return 0;
7009	}
7010
7011	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7012					  16384, 0x03FFFFFF, 0, 128, 5,
7013					  &fb_div, &vclk_div, &dclk_div);
7014	if (r)
7015		return r;
7016
7017	/* set RESET_ANTI_MUX to 0 */
7018	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7019
7020	/* set VCO_MODE to 1 */
7021	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7022
7023	/* disable sleep mode */
7024	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7025
7026	/* deassert UPLL_RESET */
7027	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7028
7029	mdelay(1);
7030
7031	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7032	if (r)
7033		return r;
7034
7035	/* assert UPLL_RESET again */
7036	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7037
7038	/* disable spread spectrum. */
7039	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7040
7041	/* set feedback divider */
7042	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7043
7044	/* set ref divider to 0 */
7045	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7046
7047	if (fb_div < 307200)
7048		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7049	else
7050		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7051
7052	/* set PDIV_A and PDIV_B */
7053	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7054		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7055		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7056
7057	/* give the PLL some time to settle */
7058	mdelay(15);
7059
7060	/* deassert PLL_RESET */
7061	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7062
7063	mdelay(15);
7064
7065	/* switch from bypass mode to normal mode */
7066	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7067
7068	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7069	if (r)
7070		return r;
7071
7072	/* switch VCLK and DCLK selection */
7073	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7074		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7075		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7076
7077	mdelay(100);
7078
7079	return 0;
7080}
7081
7082static void si_pcie_gen3_enable(struct radeon_device *rdev)
7083{
7084	struct pci_dev *root = rdev->pdev->bus->self;
7085	int bridge_pos, gpu_pos;
7086	u32 speed_cntl, mask, current_data_rate;
7087	int ret, i;
7088	u16 tmp16;
7089
7090	if (pci_is_root_bus(rdev->pdev->bus))
7091		return;
7092
7093	if (radeon_pcie_gen2 == 0)
7094		return;
7095
7096	if (rdev->flags & RADEON_IS_IGP)
7097		return;
7098
7099	if (!(rdev->flags & RADEON_IS_PCIE))
7100		return;
7101
7102	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7103	if (ret != 0)
7104		return;
7105
7106	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7107		return;
7108
7109	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7110	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7111		LC_CURRENT_DATA_RATE_SHIFT;
7112	if (mask & DRM_PCIE_SPEED_80) {
7113		if (current_data_rate == 2) {
7114			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7115			return;
7116		}
7117		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7118	} else if (mask & DRM_PCIE_SPEED_50) {
7119		if (current_data_rate == 1) {
7120			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7121			return;
7122		}
7123		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7124	}
7125
7126	bridge_pos = pci_pcie_cap(root);
7127	if (!bridge_pos)
7128		return;
7129
7130	gpu_pos = pci_pcie_cap(rdev->pdev);
7131	if (!gpu_pos)
7132		return;
7133
7134	if (mask & DRM_PCIE_SPEED_80) {
7135		/* re-try equalization if gen3 is not already enabled */
7136		if (current_data_rate != 2) {
7137			u16 bridge_cfg, gpu_cfg;
7138			u16 bridge_cfg2, gpu_cfg2;
7139			u32 max_lw, current_lw, tmp;
7140
7141			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7142			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7143
7144			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7145			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7146
7147			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7148			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7149
7150			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7151			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7152			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7153
7154			if (current_lw < max_lw) {
7155				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7156				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7157					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7158					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7159					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7160					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7161				}
7162			}
7163
7164			for (i = 0; i < 10; i++) {
7165				/* check status */
7166				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7167				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7168					break;
7169
7170				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7171				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7172
7173				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7174				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7175
7176				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7177				tmp |= LC_SET_QUIESCE;
7178				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7179
7180				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7181				tmp |= LC_REDO_EQ;
7182				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7183
7184				mdelay(100);
7185
7186				/* linkctl */
7187				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7188				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7189				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7190				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7191
7192				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7193				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7194				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7195				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7196
7197				/* linkctl2 */
7198				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7199				tmp16 &= ~((1 << 4) | (7 << 9));
7200				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7201				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7202
7203				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7204				tmp16 &= ~((1 << 4) | (7 << 9));
7205				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7206				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7207
7208				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7209				tmp &= ~LC_SET_QUIESCE;
7210				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7211			}
7212		}
7213	}
7214
7215	/* set the link speed */
7216	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7217	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7218	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7219
7220	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7221	tmp16 &= ~0xf;
7222	if (mask & DRM_PCIE_SPEED_80)
7223		tmp16 |= 3; /* gen3 */
7224	else if (mask & DRM_PCIE_SPEED_50)
7225		tmp16 |= 2; /* gen2 */
7226	else
7227		tmp16 |= 1; /* gen1 */
7228	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7229
7230	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7231	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7232	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7233
7234	for (i = 0; i < rdev->usec_timeout; i++) {
7235		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7236		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7237			break;
7238		udelay(1);
7239	}
7240}
7241
7242static void si_program_aspm(struct radeon_device *rdev)
7243{
7244	u32 data, orig;
7245	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7246	bool disable_clkreq = false;
7247
7248	if (radeon_aspm == 0)
7249		return;
7250
7251	if (!(rdev->flags & RADEON_IS_PCIE))
7252		return;
7253
7254	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7255	data &= ~LC_XMIT_N_FTS_MASK;
7256	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7257	if (orig != data)
7258		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7259
7260	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7261	data |= LC_GO_TO_RECOVERY;
7262	if (orig != data)
7263		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7264
7265	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7266	data |= P_IGNORE_EDB_ERR;
7267	if (orig != data)
7268		WREG32_PCIE(PCIE_P_CNTL, data);
7269
7270	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7271	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7272	data |= LC_PMI_TO_L1_DIS;
7273	if (!disable_l0s)
7274		data |= LC_L0S_INACTIVITY(7);
7275
7276	if (!disable_l1) {
7277		data |= LC_L1_INACTIVITY(7);
7278		data &= ~LC_PMI_TO_L1_DIS;
7279		if (orig != data)
7280			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7281
7282		if (!disable_plloff_in_l1) {
7283			bool clk_req_support;
7284
7285			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7286			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7287			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7288			if (orig != data)
7289				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7290
7291			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7292			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7293			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7294			if (orig != data)
7295				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7296
7297			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7298			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7299			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7300			if (orig != data)
7301				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7302
7303			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7304			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7305			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7306			if (orig != data)
7307				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7308
7309			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7310				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7311				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7312				if (orig != data)
7313					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7314
7315				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7316				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7317				if (orig != data)
7318					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7319
7320				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7321				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7322				if (orig != data)
7323					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7324
7325				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7326				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7327				if (orig != data)
7328					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7329
7330				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7331				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7332				if (orig != data)
7333					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7334
7335				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7336				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7337				if (orig != data)
7338					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7339
7340				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7341				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7342				if (orig != data)
7343					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7344
7345				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7346				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7347				if (orig != data)
7348					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7349			}
7350			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7351			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7352			data |= LC_DYN_LANES_PWR_STATE(3);
7353			if (orig != data)
7354				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7355
7356			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7357			data &= ~LS2_EXIT_TIME_MASK;
7358			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7359				data |= LS2_EXIT_TIME(5);
7360			if (orig != data)
7361				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7362
7363			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7364			data &= ~LS2_EXIT_TIME_MASK;
7365			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7366				data |= LS2_EXIT_TIME(5);
7367			if (orig != data)
7368				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7369
7370			if (!disable_clkreq &&
7371			    !pci_is_root_bus(rdev->pdev->bus)) {
7372				struct pci_dev *root = rdev->pdev->bus->self;
7373				u32 lnkcap;
7374
7375				clk_req_support = false;
7376				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7377				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7378					clk_req_support = true;
7379			} else {
7380				clk_req_support = false;
7381			}
7382
7383			if (clk_req_support) {
7384				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7385				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7386				if (orig != data)
7387					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7388
7389				orig = data = RREG32(THM_CLK_CNTL);
7390				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7391				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7392				if (orig != data)
7393					WREG32(THM_CLK_CNTL, data);
7394
7395				orig = data = RREG32(MISC_CLK_CNTL);
7396				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7397				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7398				if (orig != data)
7399					WREG32(MISC_CLK_CNTL, data);
7400
7401				orig = data = RREG32(CG_CLKPIN_CNTL);
7402				data &= ~BCLK_AS_XCLK;
7403				if (orig != data)
7404					WREG32(CG_CLKPIN_CNTL, data);
7405
7406				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7407				data &= ~FORCE_BIF_REFCLK_EN;
7408				if (orig != data)
7409					WREG32(CG_CLKPIN_CNTL_2, data);
7410
7411				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7412				data &= ~MPLL_CLKOUT_SEL_MASK;
7413				data |= MPLL_CLKOUT_SEL(4);
7414				if (orig != data)
7415					WREG32(MPLL_BYPASSCLK_SEL, data);
7416
7417				orig = data = RREG32(SPLL_CNTL_MODE);
7418				data &= ~SPLL_REFCLK_SEL_MASK;
7419				if (orig != data)
7420					WREG32(SPLL_CNTL_MODE, data);
7421			}
7422		}
7423	} else {
7424		if (orig != data)
7425			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7426	}
7427
7428	orig = data = RREG32_PCIE(PCIE_CNTL2);
7429	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7430	if (orig != data)
7431		WREG32_PCIE(PCIE_CNTL2, data);
7432
7433	if (!disable_l0s) {
7434		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7435		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7436			data = RREG32_PCIE(PCIE_LC_STATUS1);
7437			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7438				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7439				data &= ~LC_L0S_INACTIVITY_MASK;
7440				if (orig != data)
7441					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7442			}
7443		}
7444	}
7445}
7446
7447static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7448{
7449	unsigned i;
7450
7451	/* make sure VCEPLL_CTLREQ is deasserted */
7452	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7453
7454	mdelay(10);
7455
7456	/* assert UPLL_CTLREQ */
7457	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7458
7459	/* wait for CTLACK and CTLACK2 to get asserted */
7460	for (i = 0; i < 100; ++i) {
7461		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7462		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7463			break;
7464		mdelay(10);
7465	}
7466
7467	/* deassert UPLL_CTLREQ */
7468	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7469
7470	if (i == 100) {
7471		DRM_ERROR("Timeout setting UVD clocks!\n");
7472		return -ETIMEDOUT;
7473	}
7474
7475	return 0;
7476}
7477
7478int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7479{
7480	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7481	int r;
7482
7483	/* bypass evclk and ecclk with bclk */
7484	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7485		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7486		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7487
7488	/* put PLL in bypass mode */
7489	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7490		     ~VCEPLL_BYPASS_EN_MASK);
7491
7492	if (!evclk || !ecclk) {
7493		/* keep the Bypass mode, put PLL to sleep */
7494		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7495			     ~VCEPLL_SLEEP_MASK);
7496		return 0;
7497	}
7498
7499	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7500					  16384, 0x03FFFFFF, 0, 128, 5,
7501					  &fb_div, &evclk_div, &ecclk_div);
7502	if (r)
7503		return r;
7504
7505	/* set RESET_ANTI_MUX to 0 */
7506	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7507
7508	/* set VCO_MODE to 1 */
7509	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7510		     ~VCEPLL_VCO_MODE_MASK);
7511
7512	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7513	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7514		     ~VCEPLL_SLEEP_MASK);
7515	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7516
7517	/* deassert VCEPLL_RESET */
7518	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7519
7520	mdelay(1);
7521
7522	r = si_vce_send_vcepll_ctlreq(rdev);
7523	if (r)
7524		return r;
7525
7526	/* assert VCEPLL_RESET again */
7527	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7528
7529	/* disable spread spectrum. */
7530	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7531
7532	/* set feedback divider */
7533	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7534
7535	/* set ref divider to 0 */
7536	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7537
7538	/* set PDIV_A and PDIV_B */
7539	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7540		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7541		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7542
7543	/* give the PLL some time to settle */
7544	mdelay(15);
7545
7546	/* deassert PLL_RESET */
7547	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7548
7549	mdelay(15);
7550
7551	/* switch from bypass mode to normal mode */
7552	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7553
7554	r = si_vce_send_vcepll_ctlreq(rdev);
7555	if (r)
7556		return r;
7557
7558	/* switch VCLK and DCLK selection */
7559	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7560		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7561		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7562
7563	mdelay(100);
7564
7565	return 0;
7566}