Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
Note: File does not exist in v3.1.
   1/*
   2 * Copyright 2011 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 * Authors: Alex Deucher
  23 */
  24#include <linux/firmware.h>
  25#include <linux/platform_device.h>
  26#include <linux/slab.h>
  27#include <linux/module.h>
  28#include "drmP.h"
  29#include "radeon.h"
  30#include "radeon_asic.h"
  31#include "radeon_drm.h"
  32#include "sid.h"
  33#include "atom.h"
  34#include "si_blit_shaders.h"
  35
  36#define SI_PFP_UCODE_SIZE 2144
  37#define SI_PM4_UCODE_SIZE 2144
  38#define SI_CE_UCODE_SIZE 2144
  39#define SI_RLC_UCODE_SIZE 2048
  40#define SI_MC_UCODE_SIZE 7769
  41
  42MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
  43MODULE_FIRMWARE("radeon/TAHITI_me.bin");
  44MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
  45MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
  46MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
  47MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
  48MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
  49MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
  50MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
  51MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
  52MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
  53MODULE_FIRMWARE("radeon/VERDE_me.bin");
  54MODULE_FIRMWARE("radeon/VERDE_ce.bin");
  55MODULE_FIRMWARE("radeon/VERDE_mc.bin");
  56MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
  57
  58extern int r600_ih_ring_alloc(struct radeon_device *rdev);
  59extern void r600_ih_ring_fini(struct radeon_device *rdev);
  60extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
  61extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
  62extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
  63extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
  64
  65/* get temperature in millidegrees */
  66int si_get_temp(struct radeon_device *rdev)
  67{
  68	u32 temp;
  69	int actual_temp = 0;
  70
  71	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
  72		CTF_TEMP_SHIFT;
  73
  74	if (temp & 0x200)
  75		actual_temp = 255;
  76	else
  77		actual_temp = temp & 0x1ff;
  78
  79	actual_temp = (actual_temp * 1000);
  80
  81	return actual_temp;
  82}
  83
  84#define TAHITI_IO_MC_REGS_SIZE 36
  85
  86static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
  87	{0x0000006f, 0x03044000},
  88	{0x00000070, 0x0480c018},
  89	{0x00000071, 0x00000040},
  90	{0x00000072, 0x01000000},
  91	{0x00000074, 0x000000ff},
  92	{0x00000075, 0x00143400},
  93	{0x00000076, 0x08ec0800},
  94	{0x00000077, 0x040000cc},
  95	{0x00000079, 0x00000000},
  96	{0x0000007a, 0x21000409},
  97	{0x0000007c, 0x00000000},
  98	{0x0000007d, 0xe8000000},
  99	{0x0000007e, 0x044408a8},
 100	{0x0000007f, 0x00000003},
 101	{0x00000080, 0x00000000},
 102	{0x00000081, 0x01000000},
 103	{0x00000082, 0x02000000},
 104	{0x00000083, 0x00000000},
 105	{0x00000084, 0xe3f3e4f4},
 106	{0x00000085, 0x00052024},
 107	{0x00000087, 0x00000000},
 108	{0x00000088, 0x66036603},
 109	{0x00000089, 0x01000000},
 110	{0x0000008b, 0x1c0a0000},
 111	{0x0000008c, 0xff010000},
 112	{0x0000008e, 0xffffefff},
 113	{0x0000008f, 0xfff3efff},
 114	{0x00000090, 0xfff3efbf},
 115	{0x00000094, 0x00101101},
 116	{0x00000095, 0x00000fff},
 117	{0x00000096, 0x00116fff},
 118	{0x00000097, 0x60010000},
 119	{0x00000098, 0x10010000},
 120	{0x00000099, 0x00006000},
 121	{0x0000009a, 0x00001000},
 122	{0x0000009f, 0x00a77400}
 123};
 124
 125static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
 126	{0x0000006f, 0x03044000},
 127	{0x00000070, 0x0480c018},
 128	{0x00000071, 0x00000040},
 129	{0x00000072, 0x01000000},
 130	{0x00000074, 0x000000ff},
 131	{0x00000075, 0x00143400},
 132	{0x00000076, 0x08ec0800},
 133	{0x00000077, 0x040000cc},
 134	{0x00000079, 0x00000000},
 135	{0x0000007a, 0x21000409},
 136	{0x0000007c, 0x00000000},
 137	{0x0000007d, 0xe8000000},
 138	{0x0000007e, 0x044408a8},
 139	{0x0000007f, 0x00000003},
 140	{0x00000080, 0x00000000},
 141	{0x00000081, 0x01000000},
 142	{0x00000082, 0x02000000},
 143	{0x00000083, 0x00000000},
 144	{0x00000084, 0xe3f3e4f4},
 145	{0x00000085, 0x00052024},
 146	{0x00000087, 0x00000000},
 147	{0x00000088, 0x66036603},
 148	{0x00000089, 0x01000000},
 149	{0x0000008b, 0x1c0a0000},
 150	{0x0000008c, 0xff010000},
 151	{0x0000008e, 0xffffefff},
 152	{0x0000008f, 0xfff3efff},
 153	{0x00000090, 0xfff3efbf},
 154	{0x00000094, 0x00101101},
 155	{0x00000095, 0x00000fff},
 156	{0x00000096, 0x00116fff},
 157	{0x00000097, 0x60010000},
 158	{0x00000098, 0x10010000},
 159	{0x00000099, 0x00006000},
 160	{0x0000009a, 0x00001000},
 161	{0x0000009f, 0x00a47400}
 162};
 163
 164static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
 165	{0x0000006f, 0x03044000},
 166	{0x00000070, 0x0480c018},
 167	{0x00000071, 0x00000040},
 168	{0x00000072, 0x01000000},
 169	{0x00000074, 0x000000ff},
 170	{0x00000075, 0x00143400},
 171	{0x00000076, 0x08ec0800},
 172	{0x00000077, 0x040000cc},
 173	{0x00000079, 0x00000000},
 174	{0x0000007a, 0x21000409},
 175	{0x0000007c, 0x00000000},
 176	{0x0000007d, 0xe8000000},
 177	{0x0000007e, 0x044408a8},
 178	{0x0000007f, 0x00000003},
 179	{0x00000080, 0x00000000},
 180	{0x00000081, 0x01000000},
 181	{0x00000082, 0x02000000},
 182	{0x00000083, 0x00000000},
 183	{0x00000084, 0xe3f3e4f4},
 184	{0x00000085, 0x00052024},
 185	{0x00000087, 0x00000000},
 186	{0x00000088, 0x66036603},
 187	{0x00000089, 0x01000000},
 188	{0x0000008b, 0x1c0a0000},
 189	{0x0000008c, 0xff010000},
 190	{0x0000008e, 0xffffefff},
 191	{0x0000008f, 0xfff3efff},
 192	{0x00000090, 0xfff3efbf},
 193	{0x00000094, 0x00101101},
 194	{0x00000095, 0x00000fff},
 195	{0x00000096, 0x00116fff},
 196	{0x00000097, 0x60010000},
 197	{0x00000098, 0x10010000},
 198	{0x00000099, 0x00006000},
 199	{0x0000009a, 0x00001000},
 200	{0x0000009f, 0x00a37400}
 201};
 202
 203/* ucode loading */
 204static int si_mc_load_microcode(struct radeon_device *rdev)
 205{
 206	const __be32 *fw_data;
 207	u32 running, blackout = 0;
 208	u32 *io_mc_regs;
 209	int i, ucode_size, regs_size;
 210
 211	if (!rdev->mc_fw)
 212		return -EINVAL;
 213
 214	switch (rdev->family) {
 215	case CHIP_TAHITI:
 216		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
 217		ucode_size = SI_MC_UCODE_SIZE;
 218		regs_size = TAHITI_IO_MC_REGS_SIZE;
 219		break;
 220	case CHIP_PITCAIRN:
 221		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
 222		ucode_size = SI_MC_UCODE_SIZE;
 223		regs_size = TAHITI_IO_MC_REGS_SIZE;
 224		break;
 225	case CHIP_VERDE:
 226	default:
 227		io_mc_regs = (u32 *)&verde_io_mc_regs;
 228		ucode_size = SI_MC_UCODE_SIZE;
 229		regs_size = TAHITI_IO_MC_REGS_SIZE;
 230		break;
 231	}
 232
 233	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
 234
 235	if (running == 0) {
 236		if (running) {
 237			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
 238			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
 239		}
 240
 241		/* reset the engine and set to writable */
 242		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
 243		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
 244
 245		/* load mc io regs */
 246		for (i = 0; i < regs_size; i++) {
 247			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
 248			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
 249		}
 250		/* load the MC ucode */
 251		fw_data = (const __be32 *)rdev->mc_fw->data;
 252		for (i = 0; i < ucode_size; i++)
 253			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
 254
 255		/* put the engine back into the active state */
 256		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
 257		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
 258		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
 259
 260		/* wait for training to complete */
 261		for (i = 0; i < rdev->usec_timeout; i++) {
 262			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
 263				break;
 264			udelay(1);
 265		}
 266		for (i = 0; i < rdev->usec_timeout; i++) {
 267			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
 268				break;
 269			udelay(1);
 270		}
 271
 272		if (running)
 273			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
 274	}
 275
 276	return 0;
 277}
 278
 279static int si_init_microcode(struct radeon_device *rdev)
 280{
 281	struct platform_device *pdev;
 282	const char *chip_name;
 283	const char *rlc_chip_name;
 284	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
 285	char fw_name[30];
 286	int err;
 287
 288	DRM_DEBUG("\n");
 289
 290	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
 291	err = IS_ERR(pdev);
 292	if (err) {
 293		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
 294		return -EINVAL;
 295	}
 296
 297	switch (rdev->family) {
 298	case CHIP_TAHITI:
 299		chip_name = "TAHITI";
 300		rlc_chip_name = "TAHITI";
 301		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 302		me_req_size = SI_PM4_UCODE_SIZE * 4;
 303		ce_req_size = SI_CE_UCODE_SIZE * 4;
 304		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
 305		mc_req_size = SI_MC_UCODE_SIZE * 4;
 306		break;
 307	case CHIP_PITCAIRN:
 308		chip_name = "PITCAIRN";
 309		rlc_chip_name = "PITCAIRN";
 310		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 311		me_req_size = SI_PM4_UCODE_SIZE * 4;
 312		ce_req_size = SI_CE_UCODE_SIZE * 4;
 313		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
 314		mc_req_size = SI_MC_UCODE_SIZE * 4;
 315		break;
 316	case CHIP_VERDE:
 317		chip_name = "VERDE";
 318		rlc_chip_name = "VERDE";
 319		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 320		me_req_size = SI_PM4_UCODE_SIZE * 4;
 321		ce_req_size = SI_CE_UCODE_SIZE * 4;
 322		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
 323		mc_req_size = SI_MC_UCODE_SIZE * 4;
 324		break;
 325	default: BUG();
 326	}
 327
 328	DRM_INFO("Loading %s Microcode\n", chip_name);
 329
 330	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
 331	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
 332	if (err)
 333		goto out;
 334	if (rdev->pfp_fw->size != pfp_req_size) {
 335		printk(KERN_ERR
 336		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
 337		       rdev->pfp_fw->size, fw_name);
 338		err = -EINVAL;
 339		goto out;
 340	}
 341
 342	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
 343	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
 344	if (err)
 345		goto out;
 346	if (rdev->me_fw->size != me_req_size) {
 347		printk(KERN_ERR
 348		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
 349		       rdev->me_fw->size, fw_name);
 350		err = -EINVAL;
 351	}
 352
 353	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
 354	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
 355	if (err)
 356		goto out;
 357	if (rdev->ce_fw->size != ce_req_size) {
 358		printk(KERN_ERR
 359		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
 360		       rdev->ce_fw->size, fw_name);
 361		err = -EINVAL;
 362	}
 363
 364	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
 365	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
 366	if (err)
 367		goto out;
 368	if (rdev->rlc_fw->size != rlc_req_size) {
 369		printk(KERN_ERR
 370		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
 371		       rdev->rlc_fw->size, fw_name);
 372		err = -EINVAL;
 373	}
 374
 375	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
 376	err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
 377	if (err)
 378		goto out;
 379	if (rdev->mc_fw->size != mc_req_size) {
 380		printk(KERN_ERR
 381		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
 382		       rdev->mc_fw->size, fw_name);
 383		err = -EINVAL;
 384	}
 385
 386out:
 387	platform_device_unregister(pdev);
 388
 389	if (err) {
 390		if (err != -EINVAL)
 391			printk(KERN_ERR
 392			       "si_cp: Failed to load firmware \"%s\"\n",
 393			       fw_name);
 394		release_firmware(rdev->pfp_fw);
 395		rdev->pfp_fw = NULL;
 396		release_firmware(rdev->me_fw);
 397		rdev->me_fw = NULL;
 398		release_firmware(rdev->ce_fw);
 399		rdev->ce_fw = NULL;
 400		release_firmware(rdev->rlc_fw);
 401		rdev->rlc_fw = NULL;
 402		release_firmware(rdev->mc_fw);
 403		rdev->mc_fw = NULL;
 404	}
 405	return err;
 406}
 407
 408/* watermark setup */
 409static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
 410				   struct radeon_crtc *radeon_crtc,
 411				   struct drm_display_mode *mode,
 412				   struct drm_display_mode *other_mode)
 413{
 414	u32 tmp;
 415	/*
 416	 * Line Buffer Setup
 417	 * There are 3 line buffers, each one shared by 2 display controllers.
 418	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
 419	 * the display controllers.  The paritioning is done via one of four
 420	 * preset allocations specified in bits 21:20:
 421	 *  0 - half lb
 422	 *  2 - whole lb, other crtc must be disabled
 423	 */
 424	/* this can get tricky if we have two large displays on a paired group
 425	 * of crtcs.  Ideally for multiple large displays we'd assign them to
 426	 * non-linked crtcs for maximum line buffer allocation.
 427	 */
 428	if (radeon_crtc->base.enabled && mode) {
 429		if (other_mode)
 430			tmp = 0; /* 1/2 */
 431		else
 432			tmp = 2; /* whole */
 433	} else
 434		tmp = 0;
 435
 436	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
 437	       DC_LB_MEMORY_CONFIG(tmp));
 438
 439	if (radeon_crtc->base.enabled && mode) {
 440		switch (tmp) {
 441		case 0:
 442		default:
 443			return 4096 * 2;
 444		case 2:
 445			return 8192 * 2;
 446		}
 447	}
 448
 449	/* controller not enabled, so no lb used */
 450	return 0;
 451}
 452
 453static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
 454{
 455	u32 tmp = RREG32(MC_SHARED_CHMAP);
 456
 457	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
 458	case 0:
 459	default:
 460		return 1;
 461	case 1:
 462		return 2;
 463	case 2:
 464		return 4;
 465	case 3:
 466		return 8;
 467	case 4:
 468		return 3;
 469	case 5:
 470		return 6;
 471	case 6:
 472		return 10;
 473	case 7:
 474		return 12;
 475	case 8:
 476		return 16;
 477	}
 478}
 479
 480struct dce6_wm_params {
 481	u32 dram_channels; /* number of dram channels */
 482	u32 yclk;          /* bandwidth per dram data pin in kHz */
 483	u32 sclk;          /* engine clock in kHz */
 484	u32 disp_clk;      /* display clock in kHz */
 485	u32 src_width;     /* viewport width */
 486	u32 active_time;   /* active display time in ns */
 487	u32 blank_time;    /* blank time in ns */
 488	bool interlaced;    /* mode is interlaced */
 489	fixed20_12 vsc;    /* vertical scale ratio */
 490	u32 num_heads;     /* number of active crtcs */
 491	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
 492	u32 lb_size;       /* line buffer allocated to pipe */
 493	u32 vtaps;         /* vertical scaler taps */
 494};
 495
 496static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
 497{
 498	/* Calculate raw DRAM Bandwidth */
 499	fixed20_12 dram_efficiency; /* 0.7 */
 500	fixed20_12 yclk, dram_channels, bandwidth;
 501	fixed20_12 a;
 502
 503	a.full = dfixed_const(1000);
 504	yclk.full = dfixed_const(wm->yclk);
 505	yclk.full = dfixed_div(yclk, a);
 506	dram_channels.full = dfixed_const(wm->dram_channels * 4);
 507	a.full = dfixed_const(10);
 508	dram_efficiency.full = dfixed_const(7);
 509	dram_efficiency.full = dfixed_div(dram_efficiency, a);
 510	bandwidth.full = dfixed_mul(dram_channels, yclk);
 511	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
 512
 513	return dfixed_trunc(bandwidth);
 514}
 515
 516static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
 517{
 518	/* Calculate DRAM Bandwidth and the part allocated to display. */
 519	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
 520	fixed20_12 yclk, dram_channels, bandwidth;
 521	fixed20_12 a;
 522
 523	a.full = dfixed_const(1000);
 524	yclk.full = dfixed_const(wm->yclk);
 525	yclk.full = dfixed_div(yclk, a);
 526	dram_channels.full = dfixed_const(wm->dram_channels * 4);
 527	a.full = dfixed_const(10);
 528	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
 529	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
 530	bandwidth.full = dfixed_mul(dram_channels, yclk);
 531	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
 532
 533	return dfixed_trunc(bandwidth);
 534}
 535
 536static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
 537{
 538	/* Calculate the display Data return Bandwidth */
 539	fixed20_12 return_efficiency; /* 0.8 */
 540	fixed20_12 sclk, bandwidth;
 541	fixed20_12 a;
 542
 543	a.full = dfixed_const(1000);
 544	sclk.full = dfixed_const(wm->sclk);
 545	sclk.full = dfixed_div(sclk, a);
 546	a.full = dfixed_const(10);
 547	return_efficiency.full = dfixed_const(8);
 548	return_efficiency.full = dfixed_div(return_efficiency, a);
 549	a.full = dfixed_const(32);
 550	bandwidth.full = dfixed_mul(a, sclk);
 551	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
 552
 553	return dfixed_trunc(bandwidth);
 554}
 555
 556static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
 557{
 558	return 32;
 559}
 560
 561static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
 562{
 563	/* Calculate the DMIF Request Bandwidth */
 564	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
 565	fixed20_12 disp_clk, sclk, bandwidth;
 566	fixed20_12 a, b1, b2;
 567	u32 min_bandwidth;
 568
 569	a.full = dfixed_const(1000);
 570	disp_clk.full = dfixed_const(wm->disp_clk);
 571	disp_clk.full = dfixed_div(disp_clk, a);
 572	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
 573	b1.full = dfixed_mul(a, disp_clk);
 574
 575	a.full = dfixed_const(1000);
 576	sclk.full = dfixed_const(wm->sclk);
 577	sclk.full = dfixed_div(sclk, a);
 578	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
 579	b2.full = dfixed_mul(a, sclk);
 580
 581	a.full = dfixed_const(10);
 582	disp_clk_request_efficiency.full = dfixed_const(8);
 583	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
 584
 585	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
 586
 587	a.full = dfixed_const(min_bandwidth);
 588	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
 589
 590	return dfixed_trunc(bandwidth);
 591}
 592
 593static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
 594{
 595	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
 596	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
 597	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
 598	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
 599
 600	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
 601}
 602
 603static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
 604{
 605	/* Calculate the display mode Average Bandwidth
 606	 * DisplayMode should contain the source and destination dimensions,
 607	 * timing, etc.
 608	 */
 609	fixed20_12 bpp;
 610	fixed20_12 line_time;
 611	fixed20_12 src_width;
 612	fixed20_12 bandwidth;
 613	fixed20_12 a;
 614
 615	a.full = dfixed_const(1000);
 616	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
 617	line_time.full = dfixed_div(line_time, a);
 618	bpp.full = dfixed_const(wm->bytes_per_pixel);
 619	src_width.full = dfixed_const(wm->src_width);
 620	bandwidth.full = dfixed_mul(src_width, bpp);
 621	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
 622	bandwidth.full = dfixed_div(bandwidth, line_time);
 623
 624	return dfixed_trunc(bandwidth);
 625}
 626
 627static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
 628{
 629	/* First calcualte the latency in ns */
 630	u32 mc_latency = 2000; /* 2000 ns. */
 631	u32 available_bandwidth = dce6_available_bandwidth(wm);
 632	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
 633	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
 634	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
 635	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
 636		(wm->num_heads * cursor_line_pair_return_time);
 637	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
 638	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
 639	u32 tmp, dmif_size = 12288;
 640	fixed20_12 a, b, c;
 641
 642	if (wm->num_heads == 0)
 643		return 0;
 644
 645	a.full = dfixed_const(2);
 646	b.full = dfixed_const(1);
 647	if ((wm->vsc.full > a.full) ||
 648	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
 649	    (wm->vtaps >= 5) ||
 650	    ((wm->vsc.full >= a.full) && wm->interlaced))
 651		max_src_lines_per_dst_line = 4;
 652	else
 653		max_src_lines_per_dst_line = 2;
 654
 655	a.full = dfixed_const(available_bandwidth);
 656	b.full = dfixed_const(wm->num_heads);
 657	a.full = dfixed_div(a, b);
 658
 659	b.full = dfixed_const(mc_latency + 512);
 660	c.full = dfixed_const(wm->disp_clk);
 661	b.full = dfixed_div(b, c);
 662
 663	c.full = dfixed_const(dmif_size);
 664	b.full = dfixed_div(c, b);
 665
 666	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
 667
 668	b.full = dfixed_const(1000);
 669	c.full = dfixed_const(wm->disp_clk);
 670	b.full = dfixed_div(c, b);
 671	c.full = dfixed_const(wm->bytes_per_pixel);
 672	b.full = dfixed_mul(b, c);
 673
 674	lb_fill_bw = min(tmp, dfixed_trunc(b));
 675
 676	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 677	b.full = dfixed_const(1000);
 678	c.full = dfixed_const(lb_fill_bw);
 679	b.full = dfixed_div(c, b);
 680	a.full = dfixed_div(a, b);
 681	line_fill_time = dfixed_trunc(a);
 682
 683	if (line_fill_time < wm->active_time)
 684		return latency;
 685	else
 686		return latency + (line_fill_time - wm->active_time);
 687
 688}
 689
 690static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
 691{
 692	if (dce6_average_bandwidth(wm) <=
 693	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
 694		return true;
 695	else
 696		return false;
 697};
 698
 699static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
 700{
 701	if (dce6_average_bandwidth(wm) <=
 702	    (dce6_available_bandwidth(wm) / wm->num_heads))
 703		return true;
 704	else
 705		return false;
 706};
 707
 708static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
 709{
 710	u32 lb_partitions = wm->lb_size / wm->src_width;
 711	u32 line_time = wm->active_time + wm->blank_time;
 712	u32 latency_tolerant_lines;
 713	u32 latency_hiding;
 714	fixed20_12 a;
 715
 716	a.full = dfixed_const(1);
 717	if (wm->vsc.full > a.full)
 718		latency_tolerant_lines = 1;
 719	else {
 720		if (lb_partitions <= (wm->vtaps + 1))
 721			latency_tolerant_lines = 1;
 722		else
 723			latency_tolerant_lines = 2;
 724	}
 725
 726	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
 727
 728	if (dce6_latency_watermark(wm) <= latency_hiding)
 729		return true;
 730	else
 731		return false;
 732}
 733
 734static void dce6_program_watermarks(struct radeon_device *rdev,
 735					 struct radeon_crtc *radeon_crtc,
 736					 u32 lb_size, u32 num_heads)
 737{
 738	struct drm_display_mode *mode = &radeon_crtc->base.mode;
 739	struct dce6_wm_params wm;
 740	u32 pixel_period;
 741	u32 line_time = 0;
 742	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 743	u32 priority_a_mark = 0, priority_b_mark = 0;
 744	u32 priority_a_cnt = PRIORITY_OFF;
 745	u32 priority_b_cnt = PRIORITY_OFF;
 746	u32 tmp, arb_control3;
 747	fixed20_12 a, b, c;
 748
 749	if (radeon_crtc->base.enabled && num_heads && mode) {
 750		pixel_period = 1000000 / (u32)mode->clock;
 751		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
 752		priority_a_cnt = 0;
 753		priority_b_cnt = 0;
 754
 755		wm.yclk = rdev->pm.current_mclk * 10;
 756		wm.sclk = rdev->pm.current_sclk * 10;
 757		wm.disp_clk = mode->clock;
 758		wm.src_width = mode->crtc_hdisplay;
 759		wm.active_time = mode->crtc_hdisplay * pixel_period;
 760		wm.blank_time = line_time - wm.active_time;
 761		wm.interlaced = false;
 762		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
 763			wm.interlaced = true;
 764		wm.vsc = radeon_crtc->vsc;
 765		wm.vtaps = 1;
 766		if (radeon_crtc->rmx_type != RMX_OFF)
 767			wm.vtaps = 2;
 768		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
 769		wm.lb_size = lb_size;
 770		if (rdev->family == CHIP_ARUBA)
 771			wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
 772		else
 773			wm.dram_channels = si_get_number_of_dram_channels(rdev);
 774		wm.num_heads = num_heads;
 775
 776		/* set for high clocks */
 777		latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
 778		/* set for low clocks */
 779		/* wm.yclk = low clk; wm.sclk = low clk */
 780		latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
 781
 782		/* possibly force display priority to high */
 783		/* should really do this at mode validation time... */
 784		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
 785		    !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
 786		    !dce6_check_latency_hiding(&wm) ||
 787		    (rdev->disp_priority == 2)) {
 788			DRM_DEBUG_KMS("force priority to high\n");
 789			priority_a_cnt |= PRIORITY_ALWAYS_ON;
 790			priority_b_cnt |= PRIORITY_ALWAYS_ON;
 791		}
 792
 793		a.full = dfixed_const(1000);
 794		b.full = dfixed_const(mode->clock);
 795		b.full = dfixed_div(b, a);
 796		c.full = dfixed_const(latency_watermark_a);
 797		c.full = dfixed_mul(c, b);
 798		c.full = dfixed_mul(c, radeon_crtc->hsc);
 799		c.full = dfixed_div(c, a);
 800		a.full = dfixed_const(16);
 801		c.full = dfixed_div(c, a);
 802		priority_a_mark = dfixed_trunc(c);
 803		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
 804
 805		a.full = dfixed_const(1000);
 806		b.full = dfixed_const(mode->clock);
 807		b.full = dfixed_div(b, a);
 808		c.full = dfixed_const(latency_watermark_b);
 809		c.full = dfixed_mul(c, b);
 810		c.full = dfixed_mul(c, radeon_crtc->hsc);
 811		c.full = dfixed_div(c, a);
 812		a.full = dfixed_const(16);
 813		c.full = dfixed_div(c, a);
 814		priority_b_mark = dfixed_trunc(c);
 815		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
 816	}
 817
 818	/* select wm A */
 819	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
 820	tmp = arb_control3;
 821	tmp &= ~LATENCY_WATERMARK_MASK(3);
 822	tmp |= LATENCY_WATERMARK_MASK(1);
 823	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
 824	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
 825	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
 826		LATENCY_HIGH_WATERMARK(line_time)));
 827	/* select wm B */
 828	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
 829	tmp &= ~LATENCY_WATERMARK_MASK(3);
 830	tmp |= LATENCY_WATERMARK_MASK(2);
 831	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
 832	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
 833	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
 834		LATENCY_HIGH_WATERMARK(line_time)));
 835	/* restore original selection */
 836	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
 837
 838	/* write the priority marks */
 839	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
 840	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
 841
 842}
 843
 844void dce6_bandwidth_update(struct radeon_device *rdev)
 845{
 846	struct drm_display_mode *mode0 = NULL;
 847	struct drm_display_mode *mode1 = NULL;
 848	u32 num_heads = 0, lb_size;
 849	int i;
 850
 851	radeon_update_display_priority(rdev);
 852
 853	for (i = 0; i < rdev->num_crtc; i++) {
 854		if (rdev->mode_info.crtcs[i]->base.enabled)
 855			num_heads++;
 856	}
 857	for (i = 0; i < rdev->num_crtc; i += 2) {
 858		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
 859		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
 860		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
 861		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
 862		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
 863		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
 864	}
 865}
 866
 867/*
 868 * Core functions
 869 */
 870static void si_tiling_mode_table_init(struct radeon_device *rdev)
 871{
 872	const u32 num_tile_mode_states = 32;
 873	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
 874
 875	switch (rdev->config.si.mem_row_size_in_kb) {
 876	case 1:
 877		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
 878		break;
 879	case 2:
 880	default:
 881		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
 882		break;
 883	case 4:
 884		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
 885		break;
 886	}
 887
 888	if ((rdev->family == CHIP_TAHITI) ||
 889	    (rdev->family == CHIP_PITCAIRN)) {
 890		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
 891			switch (reg_offset) {
 892			case 0:  /* non-AA compressed depth or any compressed stencil */
 893				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 894						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 895						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 896						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
 897						 NUM_BANKS(ADDR_SURF_16_BANK) |
 898						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 899						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
 900						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 901				break;
 902			case 1:  /* 2xAA/4xAA compressed depth only */
 903				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 904						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 905						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 906						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
 907						 NUM_BANKS(ADDR_SURF_16_BANK) |
 908						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 909						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
 910						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 911				break;
 912			case 2:  /* 8xAA compressed depth only */
 913				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 914						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 915						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 916						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
 917						 NUM_BANKS(ADDR_SURF_16_BANK) |
 918						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 919						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
 920						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 921				break;
 922			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
 923				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 924						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 925						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 926						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
 927						 NUM_BANKS(ADDR_SURF_16_BANK) |
 928						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 929						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
 930						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 931				break;
 932			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
 933				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
 934						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 935						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 936						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
 937						 NUM_BANKS(ADDR_SURF_16_BANK) |
 938						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 939						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
 940						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 941				break;
 942			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
 943				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 944						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 945						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 946						 TILE_SPLIT(split_equal_to_row_size) |
 947						 NUM_BANKS(ADDR_SURF_16_BANK) |
 948						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 949						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
 950						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 951				break;
 952			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
 953				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 954						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 955						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 956						 TILE_SPLIT(split_equal_to_row_size) |
 957						 NUM_BANKS(ADDR_SURF_16_BANK) |
 958						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 959						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
 960						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
 961				break;
 962			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
 963				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 964						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
 965						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 966						 TILE_SPLIT(split_equal_to_row_size) |
 967						 NUM_BANKS(ADDR_SURF_16_BANK) |
 968						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 969						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
 970						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 971				break;
 972			case 8:  /* 1D and 1D Array Surfaces */
 973				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
 974						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
 975						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 976						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
 977						 NUM_BANKS(ADDR_SURF_16_BANK) |
 978						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 979						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
 980						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 981				break;
 982			case 9:  /* Displayable maps. */
 983				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
 984						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
 985						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 986						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
 987						 NUM_BANKS(ADDR_SURF_16_BANK) |
 988						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 989						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
 990						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
 991				break;
 992			case 10:  /* Display 8bpp. */
 993				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 994						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
 995						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
 996						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
 997						 NUM_BANKS(ADDR_SURF_16_BANK) |
 998						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
 999						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1000						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1001				break;
1002			case 11:  /* Display 16bpp. */
1003				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1007						 NUM_BANKS(ADDR_SURF_16_BANK) |
1008						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1009						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1010						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1011				break;
1012			case 12:  /* Display 32bpp. */
1013				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1014						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1015						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1016						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1017						 NUM_BANKS(ADDR_SURF_16_BANK) |
1018						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1019						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1020						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1021				break;
1022			case 13:  /* Thin. */
1023				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1024						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1025						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1026						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1027						 NUM_BANKS(ADDR_SURF_16_BANK) |
1028						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1029						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1030						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1031				break;
1032			case 14:  /* Thin 8 bpp. */
1033				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1034						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1035						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1036						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1037						 NUM_BANKS(ADDR_SURF_16_BANK) |
1038						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1039						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1040						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1041				break;
1042			case 15:  /* Thin 16 bpp. */
1043				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1044						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1045						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1046						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1047						 NUM_BANKS(ADDR_SURF_16_BANK) |
1048						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1049						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1050						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1051				break;
1052			case 16:  /* Thin 32 bpp. */
1053				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1054						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1055						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1056						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1057						 NUM_BANKS(ADDR_SURF_16_BANK) |
1058						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1060						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1061				break;
1062			case 17:  /* Thin 64 bpp. */
1063				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1064						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1065						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1066						 TILE_SPLIT(split_equal_to_row_size) |
1067						 NUM_BANKS(ADDR_SURF_16_BANK) |
1068						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1070						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1071				break;
1072			case 21:  /* 8 bpp PRT. */
1073				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1074						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1075						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1076						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1077						 NUM_BANKS(ADDR_SURF_16_BANK) |
1078						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1079						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1080						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1081				break;
1082			case 22:  /* 16 bpp PRT */
1083				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1084						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1085						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1086						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1087						 NUM_BANKS(ADDR_SURF_16_BANK) |
1088						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1090						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1091				break;
1092			case 23:  /* 32 bpp PRT */
1093				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1094						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1095						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1096						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1097						 NUM_BANKS(ADDR_SURF_16_BANK) |
1098						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1100						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1101				break;
1102			case 24:  /* 64 bpp PRT */
1103				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1104						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1105						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1106						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1107						 NUM_BANKS(ADDR_SURF_16_BANK) |
1108						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1109						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1110						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1111				break;
1112			case 25:  /* 128 bpp PRT */
1113				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1114						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1115						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1116						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1117						 NUM_BANKS(ADDR_SURF_8_BANK) |
1118						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1121				break;
1122			default:
1123				gb_tile_moden = 0;
1124				break;
1125			}
1126			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1127		}
1128	} else if (rdev->family == CHIP_VERDE) {
1129		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1130			switch (reg_offset) {
1131			case 0:  /* non-AA compressed depth or any compressed stencil */
1132				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1133						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1134						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1135						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1136						 NUM_BANKS(ADDR_SURF_16_BANK) |
1137						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1139						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1140				break;
1141			case 1:  /* 2xAA/4xAA compressed depth only */
1142				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1143						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1144						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1145						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1146						 NUM_BANKS(ADDR_SURF_16_BANK) |
1147						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1148						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1149						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1150				break;
1151			case 2:  /* 8xAA compressed depth only */
1152				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1154						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1155						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1156						 NUM_BANKS(ADDR_SURF_16_BANK) |
1157						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1158						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1159						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1160				break;
1161			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1162				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1164						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1165						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1166						 NUM_BANKS(ADDR_SURF_16_BANK) |
1167						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1168						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1170				break;
1171			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1172				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1173						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1174						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1175						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1176						 NUM_BANKS(ADDR_SURF_16_BANK) |
1177						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1178						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1179						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1180				break;
1181			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1182				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1183						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1184						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1185						 TILE_SPLIT(split_equal_to_row_size) |
1186						 NUM_BANKS(ADDR_SURF_16_BANK) |
1187						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1188						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1189						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1190				break;
1191			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1192				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1193						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1194						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1195						 TILE_SPLIT(split_equal_to_row_size) |
1196						 NUM_BANKS(ADDR_SURF_16_BANK) |
1197						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1198						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1199						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1200				break;
1201			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1202				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1203						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1204						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1205						 TILE_SPLIT(split_equal_to_row_size) |
1206						 NUM_BANKS(ADDR_SURF_16_BANK) |
1207						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1208						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1209						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1210				break;
1211			case 8:  /* 1D and 1D Array Surfaces */
1212				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1213						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1215						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1216						 NUM_BANKS(ADDR_SURF_16_BANK) |
1217						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1218						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1219						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1220				break;
1221			case 9:  /* Displayable maps. */
1222				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1223						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1226						 NUM_BANKS(ADDR_SURF_16_BANK) |
1227						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1228						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1229						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1230				break;
1231			case 10:  /* Display 8bpp. */
1232				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1233						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1234						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1235						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1236						 NUM_BANKS(ADDR_SURF_16_BANK) |
1237						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1238						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1239						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1240				break;
1241			case 11:  /* Display 16bpp. */
1242				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1243						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1244						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1245						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1246						 NUM_BANKS(ADDR_SURF_16_BANK) |
1247						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1248						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1249						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1250				break;
1251			case 12:  /* Display 32bpp. */
1252				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1253						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1254						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1255						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1256						 NUM_BANKS(ADDR_SURF_16_BANK) |
1257						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1258						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1259						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1260				break;
1261			case 13:  /* Thin. */
1262				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1263						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1264						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1265						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1266						 NUM_BANKS(ADDR_SURF_16_BANK) |
1267						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1268						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1269						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1270				break;
1271			case 14:  /* Thin 8 bpp. */
1272				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1274						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1276						 NUM_BANKS(ADDR_SURF_16_BANK) |
1277						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1278						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1279						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1280				break;
1281			case 15:  /* Thin 16 bpp. */
1282				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1283						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1284						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1285						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1286						 NUM_BANKS(ADDR_SURF_16_BANK) |
1287						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1288						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1289						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1290				break;
1291			case 16:  /* Thin 32 bpp. */
1292				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1293						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1294						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1295						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1296						 NUM_BANKS(ADDR_SURF_16_BANK) |
1297						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1298						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1299						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1300				break;
1301			case 17:  /* Thin 64 bpp. */
1302				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1303						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1304						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1305						 TILE_SPLIT(split_equal_to_row_size) |
1306						 NUM_BANKS(ADDR_SURF_16_BANK) |
1307						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1308						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1309						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1310				break;
1311			case 21:  /* 8 bpp PRT. */
1312				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1314						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1315						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1316						 NUM_BANKS(ADDR_SURF_16_BANK) |
1317						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1318						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1319						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1320				break;
1321			case 22:  /* 16 bpp PRT */
1322				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1323						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1324						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1325						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1326						 NUM_BANKS(ADDR_SURF_16_BANK) |
1327						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1328						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1329						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1330				break;
1331			case 23:  /* 32 bpp PRT */
1332				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1333						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1334						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1335						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1336						 NUM_BANKS(ADDR_SURF_16_BANK) |
1337						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1338						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1339						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1340				break;
1341			case 24:  /* 64 bpp PRT */
1342				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1344						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1345						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1346						 NUM_BANKS(ADDR_SURF_16_BANK) |
1347						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1348						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1349						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1350				break;
1351			case 25:  /* 128 bpp PRT */
1352				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1353						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1354						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1355						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1356						 NUM_BANKS(ADDR_SURF_8_BANK) |
1357						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1358						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1359						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1360				break;
1361			default:
1362				gb_tile_moden = 0;
1363				break;
1364			}
1365			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1366		}
1367	} else
1368		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
1369}
1370
1371static void si_select_se_sh(struct radeon_device *rdev,
1372			    u32 se_num, u32 sh_num)
1373{
1374	u32 data = INSTANCE_BROADCAST_WRITES;
1375
1376	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1377		data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1378	else if (se_num == 0xffffffff)
1379		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1380	else if (sh_num == 0xffffffff)
1381		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1382	else
1383		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1384	WREG32(GRBM_GFX_INDEX, data);
1385}
1386
1387static u32 si_create_bitmask(u32 bit_width)
1388{
1389	u32 i, mask = 0;
1390
1391	for (i = 0; i < bit_width; i++) {
1392		mask <<= 1;
1393		mask |= 1;
1394	}
1395	return mask;
1396}
1397
1398static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
1399{
1400	u32 data, mask;
1401
1402	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
1403	if (data & 1)
1404		data &= INACTIVE_CUS_MASK;
1405	else
1406		data = 0;
1407	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
1408
1409	data >>= INACTIVE_CUS_SHIFT;
1410
1411	mask = si_create_bitmask(cu_per_sh);
1412
1413	return ~data & mask;
1414}
1415
1416static void si_setup_spi(struct radeon_device *rdev,
1417			 u32 se_num, u32 sh_per_se,
1418			 u32 cu_per_sh)
1419{
1420	int i, j, k;
1421	u32 data, mask, active_cu;
1422
1423	for (i = 0; i < se_num; i++) {
1424		for (j = 0; j < sh_per_se; j++) {
1425			si_select_se_sh(rdev, i, j);
1426			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
1427			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
1428
1429			mask = 1;
1430			for (k = 0; k < 16; k++) {
1431				mask <<= k;
1432				if (active_cu & mask) {
1433					data &= ~mask;
1434					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
1435					break;
1436				}
1437			}
1438		}
1439	}
1440	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1441}
1442
1443static u32 si_get_rb_disabled(struct radeon_device *rdev,
1444			      u32 max_rb_num, u32 se_num,
1445			      u32 sh_per_se)
1446{
1447	u32 data, mask;
1448
1449	data = RREG32(CC_RB_BACKEND_DISABLE);
1450	if (data & 1)
1451		data &= BACKEND_DISABLE_MASK;
1452	else
1453		data = 0;
1454	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1455
1456	data >>= BACKEND_DISABLE_SHIFT;
1457
1458	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
1459
1460	return data & mask;
1461}
1462
1463static void si_setup_rb(struct radeon_device *rdev,
1464			u32 se_num, u32 sh_per_se,
1465			u32 max_rb_num)
1466{
1467	int i, j;
1468	u32 data, mask;
1469	u32 disabled_rbs = 0;
1470	u32 enabled_rbs = 0;
1471
1472	for (i = 0; i < se_num; i++) {
1473		for (j = 0; j < sh_per_se; j++) {
1474			si_select_se_sh(rdev, i, j);
1475			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1476			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
1477		}
1478	}
1479	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1480
1481	mask = 1;
1482	for (i = 0; i < max_rb_num; i++) {
1483		if (!(disabled_rbs & mask))
1484			enabled_rbs |= mask;
1485		mask <<= 1;
1486	}
1487
1488	for (i = 0; i < se_num; i++) {
1489		si_select_se_sh(rdev, i, 0xffffffff);
1490		data = 0;
1491		for (j = 0; j < sh_per_se; j++) {
1492			switch (enabled_rbs & 3) {
1493			case 1:
1494				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1495				break;
1496			case 2:
1497				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1498				break;
1499			case 3:
1500			default:
1501				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1502				break;
1503			}
1504			enabled_rbs >>= 2;
1505		}
1506		WREG32(PA_SC_RASTER_CONFIG, data);
1507	}
1508	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1509}
1510
1511static void si_gpu_init(struct radeon_device *rdev)
1512{
1513	u32 gb_addr_config = 0;
1514	u32 mc_shared_chmap, mc_arb_ramcfg;
1515	u32 sx_debug_1;
1516	u32 hdp_host_path_cntl;
1517	u32 tmp;
1518	int i, j;
1519
1520	switch (rdev->family) {
1521	case CHIP_TAHITI:
1522		rdev->config.si.max_shader_engines = 2;
1523		rdev->config.si.max_tile_pipes = 12;
1524		rdev->config.si.max_cu_per_sh = 8;
1525		rdev->config.si.max_sh_per_se = 2;
1526		rdev->config.si.max_backends_per_se = 4;
1527		rdev->config.si.max_texture_channel_caches = 12;
1528		rdev->config.si.max_gprs = 256;
1529		rdev->config.si.max_gs_threads = 32;
1530		rdev->config.si.max_hw_contexts = 8;
1531
1532		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1533		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1534		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1535		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1536		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1537		break;
1538	case CHIP_PITCAIRN:
1539		rdev->config.si.max_shader_engines = 2;
1540		rdev->config.si.max_tile_pipes = 8;
1541		rdev->config.si.max_cu_per_sh = 5;
1542		rdev->config.si.max_sh_per_se = 2;
1543		rdev->config.si.max_backends_per_se = 4;
1544		rdev->config.si.max_texture_channel_caches = 8;
1545		rdev->config.si.max_gprs = 256;
1546		rdev->config.si.max_gs_threads = 32;
1547		rdev->config.si.max_hw_contexts = 8;
1548
1549		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1550		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
1551		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1552		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1553		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
1554		break;
1555	case CHIP_VERDE:
1556	default:
1557		rdev->config.si.max_shader_engines = 1;
1558		rdev->config.si.max_tile_pipes = 4;
1559		rdev->config.si.max_cu_per_sh = 2;
1560		rdev->config.si.max_sh_per_se = 2;
1561		rdev->config.si.max_backends_per_se = 4;
1562		rdev->config.si.max_texture_channel_caches = 4;
1563		rdev->config.si.max_gprs = 256;
1564		rdev->config.si.max_gs_threads = 32;
1565		rdev->config.si.max_hw_contexts = 8;
1566
1567		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
1568		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
1569		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
1570		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
1571		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
1572		break;
1573	}
1574
1575	/* Initialize HDP */
1576	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1577		WREG32((0x2c14 + j), 0x00000000);
1578		WREG32((0x2c18 + j), 0x00000000);
1579		WREG32((0x2c1c + j), 0x00000000);
1580		WREG32((0x2c20 + j), 0x00000000);
1581		WREG32((0x2c24 + j), 0x00000000);
1582	}
1583
1584	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1585
1586	evergreen_fix_pci_max_read_req_size(rdev);
1587
1588	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1589
1590	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1591	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1592
1593	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
1594	rdev->config.si.mem_max_burst_length_bytes = 256;
1595	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1596	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1597	if (rdev->config.si.mem_row_size_in_kb > 4)
1598		rdev->config.si.mem_row_size_in_kb = 4;
1599	/* XXX use MC settings? */
1600	rdev->config.si.shader_engine_tile_size = 32;
1601	rdev->config.si.num_gpus = 1;
1602	rdev->config.si.multi_gpu_tile_size = 64;
1603
1604	/* fix up row size */
1605	gb_addr_config &= ~ROW_SIZE_MASK;
1606	switch (rdev->config.si.mem_row_size_in_kb) {
1607	case 1:
1608	default:
1609		gb_addr_config |= ROW_SIZE(0);
1610		break;
1611	case 2:
1612		gb_addr_config |= ROW_SIZE(1);
1613		break;
1614	case 4:
1615		gb_addr_config |= ROW_SIZE(2);
1616		break;
1617	}
1618
1619	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1620	 * not have bank info, so create a custom tiling dword.
1621	 * bits 3:0   num_pipes
1622	 * bits 7:4   num_banks
1623	 * bits 11:8  group_size
1624	 * bits 15:12 row_size
1625	 */
1626	rdev->config.si.tile_config = 0;
1627	switch (rdev->config.si.num_tile_pipes) {
1628	case 1:
1629		rdev->config.si.tile_config |= (0 << 0);
1630		break;
1631	case 2:
1632		rdev->config.si.tile_config |= (1 << 0);
1633		break;
1634	case 4:
1635		rdev->config.si.tile_config |= (2 << 0);
1636		break;
1637	case 8:
1638	default:
1639		/* XXX what about 12? */
1640		rdev->config.si.tile_config |= (3 << 0);
1641		break;
1642	}
1643	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1644	case 0: /* four banks */
1645		rdev->config.si.tile_config |= 0 << 4;
1646		break;
1647	case 1: /* eight banks */
1648		rdev->config.si.tile_config |= 1 << 4;
1649		break;
1650	case 2: /* sixteen banks */
1651	default:
1652		rdev->config.si.tile_config |= 2 << 4;
1653		break;
1654	}
1655	rdev->config.si.tile_config |=
1656		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1657	rdev->config.si.tile_config |=
1658		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1659
1660	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1663
1664	si_tiling_mode_table_init(rdev);
1665
1666	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
1667		    rdev->config.si.max_sh_per_se,
1668		    rdev->config.si.max_backends_per_se);
1669
1670	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
1671		     rdev->config.si.max_sh_per_se,
1672		     rdev->config.si.max_cu_per_sh);
1673
1674
1675	/* set HW defaults for 3D engine */
1676	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
1677				     ROQ_IB2_START(0x2b)));
1678	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1679
1680	sx_debug_1 = RREG32(SX_DEBUG_1);
1681	WREG32(SX_DEBUG_1, sx_debug_1);
1682
1683	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
1684
1685	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
1686				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
1687				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
1688				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
1689
1690	WREG32(VGT_NUM_INSTANCES, 1);
1691
1692	WREG32(CP_PERFMON_CNTL, 0);
1693
1694	WREG32(SQ_CONFIG, 0);
1695
1696	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1697					  FORCE_EOV_MAX_REZ_CNT(255)));
1698
1699	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1700	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1701
1702	WREG32(VGT_GS_VERTEX_REUSE, 16);
1703	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1704
1705	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
1706	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
1707	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
1708	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
1709	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
1710	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
1711	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
1712	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
1713
1714	tmp = RREG32(HDP_MISC_CNTL);
1715	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1716	WREG32(HDP_MISC_CNTL, tmp);
1717
1718	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1719	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1720
1721	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1722
1723	udelay(50);
1724}
1725
1726/*
1727 * GPU scratch registers helpers function.
1728 */
1729static void si_scratch_init(struct radeon_device *rdev)
1730{
1731	int i;
1732
1733	rdev->scratch.num_reg = 7;
1734	rdev->scratch.reg_base = SCRATCH_REG0;
1735	for (i = 0; i < rdev->scratch.num_reg; i++) {
1736		rdev->scratch.free[i] = true;
1737		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
1738	}
1739}
1740
1741void si_fence_ring_emit(struct radeon_device *rdev,
1742			struct radeon_fence *fence)
1743{
1744	struct radeon_ring *ring = &rdev->ring[fence->ring];
1745	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1746
1747	/* flush read cache over gart */
1748	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1749	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1750	radeon_ring_write(ring, 0);
1751	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1752	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1753			  PACKET3_TC_ACTION_ENA |
1754			  PACKET3_SH_KCACHE_ACTION_ENA |
1755			  PACKET3_SH_ICACHE_ACTION_ENA);
1756	radeon_ring_write(ring, 0xFFFFFFFF);
1757	radeon_ring_write(ring, 0);
1758	radeon_ring_write(ring, 10); /* poll interval */
1759	/* EVENT_WRITE_EOP - flush caches, send int */
1760	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1761	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
1762	radeon_ring_write(ring, addr & 0xffffffff);
1763	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1764	radeon_ring_write(ring, fence->seq);
1765	radeon_ring_write(ring, 0);
1766}
1767
1768/*
1769 * IB stuff
1770 */
1771void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1772{
1773	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
1774	u32 header;
1775
1776	if (ib->is_const_ib)
1777		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
1778	else
1779		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
1780
1781	radeon_ring_write(ring, header);
1782	radeon_ring_write(ring,
1783#ifdef __BIG_ENDIAN
1784			  (2 << 0) |
1785#endif
1786			  (ib->gpu_addr & 0xFFFFFFFC));
1787	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
1788	radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));
1789
1790	/* flush read cache over gart for this vmid */
1791	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1792	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1793	radeon_ring_write(ring, ib->vm_id);
1794	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1795	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
1796			  PACKET3_TC_ACTION_ENA |
1797			  PACKET3_SH_KCACHE_ACTION_ENA |
1798			  PACKET3_SH_ICACHE_ACTION_ENA);
1799	radeon_ring_write(ring, 0xFFFFFFFF);
1800	radeon_ring_write(ring, 0);
1801	radeon_ring_write(ring, 10); /* poll interval */
1802}
1803
1804/*
1805 * CP.
1806 */
1807static void si_cp_enable(struct radeon_device *rdev, bool enable)
1808{
1809	if (enable)
1810		WREG32(CP_ME_CNTL, 0);
1811	else {
1812		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1813		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1814		WREG32(SCRATCH_UMSK, 0);
1815	}
1816	udelay(50);
1817}
1818
1819static int si_cp_load_microcode(struct radeon_device *rdev)
1820{
1821	const __be32 *fw_data;
1822	int i;
1823
1824	if (!rdev->me_fw || !rdev->pfp_fw)
1825		return -EINVAL;
1826
1827	si_cp_enable(rdev, false);
1828
1829	/* PFP */
1830	fw_data = (const __be32 *)rdev->pfp_fw->data;
1831	WREG32(CP_PFP_UCODE_ADDR, 0);
1832	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
1833		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1834	WREG32(CP_PFP_UCODE_ADDR, 0);
1835
1836	/* CE */
1837	fw_data = (const __be32 *)rdev->ce_fw->data;
1838	WREG32(CP_CE_UCODE_ADDR, 0);
1839	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
1840		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
1841	WREG32(CP_CE_UCODE_ADDR, 0);
1842
1843	/* ME */
1844	fw_data = (const __be32 *)rdev->me_fw->data;
1845	WREG32(CP_ME_RAM_WADDR, 0);
1846	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
1847		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1848	WREG32(CP_ME_RAM_WADDR, 0);
1849
1850	WREG32(CP_PFP_UCODE_ADDR, 0);
1851	WREG32(CP_CE_UCODE_ADDR, 0);
1852	WREG32(CP_ME_RAM_WADDR, 0);
1853	WREG32(CP_ME_RAM_RADDR, 0);
1854	return 0;
1855}
1856
1857static int si_cp_start(struct radeon_device *rdev)
1858{
1859	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1860	int r, i;
1861
1862	r = radeon_ring_lock(rdev, ring, 7 + 4);
1863	if (r) {
1864		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1865		return r;
1866	}
1867	/* init the CP */
1868	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1869	radeon_ring_write(ring, 0x1);
1870	radeon_ring_write(ring, 0x0);
1871	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
1872	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1873	radeon_ring_write(ring, 0);
1874	radeon_ring_write(ring, 0);
1875
1876	/* init the CE partitions */
1877	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1878	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1879	radeon_ring_write(ring, 0xc000);
1880	radeon_ring_write(ring, 0xe000);
1881	radeon_ring_unlock_commit(rdev, ring);
1882
1883	si_cp_enable(rdev, true);
1884
1885	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
1886	if (r) {
1887		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1888		return r;
1889	}
1890
1891	/* setup clear context state */
1892	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1893	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1894
1895	for (i = 0; i < si_default_size; i++)
1896		radeon_ring_write(ring, si_default_state[i]);
1897
1898	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1899	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1900
1901	/* set clear context state */
1902	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1903	radeon_ring_write(ring, 0);
1904
1905	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1906	radeon_ring_write(ring, 0x00000316);
1907	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1908	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
1909
1910	radeon_ring_unlock_commit(rdev, ring);
1911
1912	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
1913		ring = &rdev->ring[i];
1914		r = radeon_ring_lock(rdev, ring, 2);
1915
1916		/* clear the compute context state */
1917		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
1918		radeon_ring_write(ring, 0);
1919
1920		radeon_ring_unlock_commit(rdev, ring);
1921	}
1922
1923	return 0;
1924}
1925
1926static void si_cp_fini(struct radeon_device *rdev)
1927{
1928	si_cp_enable(rdev, false);
1929	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1930	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
1931	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
1932}
1933
1934static int si_cp_resume(struct radeon_device *rdev)
1935{
1936	struct radeon_ring *ring;
1937	u32 tmp;
1938	u32 rb_bufsz;
1939	int r;
1940
1941	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1942	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1943				 SOFT_RESET_PA |
1944				 SOFT_RESET_VGT |
1945				 SOFT_RESET_SPI |
1946				 SOFT_RESET_SX));
1947	RREG32(GRBM_SOFT_RESET);
1948	mdelay(15);
1949	WREG32(GRBM_SOFT_RESET, 0);
1950	RREG32(GRBM_SOFT_RESET);
1951
1952	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1953	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1954
1955	/* Set the write pointer delay */
1956	WREG32(CP_RB_WPTR_DELAY, 0);
1957
1958	WREG32(CP_DEBUG, 0);
1959	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1960
1961	/* ring 0 - compute and gfx */
1962	/* Set ring buffer size */
1963	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1964	rb_bufsz = drm_order(ring->ring_size / 8);
1965	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1966#ifdef __BIG_ENDIAN
1967	tmp |= BUF_SWAP_32BIT;
1968#endif
1969	WREG32(CP_RB0_CNTL, tmp);
1970
1971	/* Initialize the ring buffer's read and write pointers */
1972	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
1973	ring->wptr = 0;
1974	WREG32(CP_RB0_WPTR, ring->wptr);
1975
1976	/* set the wb address wether it's enabled or not */
1977	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
1978	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
1979
1980	if (rdev->wb.enabled)
1981		WREG32(SCRATCH_UMSK, 0xff);
1982	else {
1983		tmp |= RB_NO_UPDATE;
1984		WREG32(SCRATCH_UMSK, 0);
1985	}
1986
1987	mdelay(1);
1988	WREG32(CP_RB0_CNTL, tmp);
1989
1990	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
1991
1992	ring->rptr = RREG32(CP_RB0_RPTR);
1993
1994	/* ring1  - compute only */
1995	/* Set ring buffer size */
1996	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
1997	rb_bufsz = drm_order(ring->ring_size / 8);
1998	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
1999#ifdef __BIG_ENDIAN
2000	tmp |= BUF_SWAP_32BIT;
2001#endif
2002	WREG32(CP_RB1_CNTL, tmp);
2003
2004	/* Initialize the ring buffer's read and write pointers */
2005	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2006	ring->wptr = 0;
2007	WREG32(CP_RB1_WPTR, ring->wptr);
2008
2009	/* set the wb address wether it's enabled or not */
2010	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2011	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2012
2013	mdelay(1);
2014	WREG32(CP_RB1_CNTL, tmp);
2015
2016	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2017
2018	ring->rptr = RREG32(CP_RB1_RPTR);
2019
2020	/* ring2 - compute only */
2021	/* Set ring buffer size */
2022	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2023	rb_bufsz = drm_order(ring->ring_size / 8);
2024	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2025#ifdef __BIG_ENDIAN
2026	tmp |= BUF_SWAP_32BIT;
2027#endif
2028	WREG32(CP_RB2_CNTL, tmp);
2029
2030	/* Initialize the ring buffer's read and write pointers */
2031	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2032	ring->wptr = 0;
2033	WREG32(CP_RB2_WPTR, ring->wptr);
2034
2035	/* set the wb address wether it's enabled or not */
2036	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2037	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2038
2039	mdelay(1);
2040	WREG32(CP_RB2_CNTL, tmp);
2041
2042	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2043
2044	ring->rptr = RREG32(CP_RB2_RPTR);
2045
2046	/* start the rings */
2047	si_cp_start(rdev);
2048	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2049	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2050	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2051	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2052	if (r) {
2053		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2054		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2055		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2056		return r;
2057	}
2058	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2059	if (r) {
2060		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2061	}
2062	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
2063	if (r) {
2064		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2065	}
2066
2067	return 0;
2068}
2069
2070bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2071{
2072	u32 srbm_status;
2073	u32 grbm_status, grbm_status2;
2074	u32 grbm_status_se0, grbm_status_se1;
2075
2076	srbm_status = RREG32(SRBM_STATUS);
2077	grbm_status = RREG32(GRBM_STATUS);
2078	grbm_status2 = RREG32(GRBM_STATUS2);
2079	grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
2080	grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
2081	if (!(grbm_status & GUI_ACTIVE)) {
2082		radeon_ring_lockup_update(ring);
2083		return false;
2084	}
2085	/* force CP activities */
2086	radeon_ring_force_activity(rdev, ring);
2087	return radeon_ring_test_lockup(rdev, ring);
2088}
2089
2090static int si_gpu_soft_reset(struct radeon_device *rdev)
2091{
2092	struct evergreen_mc_save save;
2093	u32 grbm_reset = 0;
2094
2095	if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
2096		return 0;
2097
2098	dev_info(rdev->dev, "GPU softreset \n");
2099	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2100		RREG32(GRBM_STATUS));
2101	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2102		RREG32(GRBM_STATUS2));
2103	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2104		RREG32(GRBM_STATUS_SE0));
2105	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2106		RREG32(GRBM_STATUS_SE1));
2107	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2108		RREG32(SRBM_STATUS));
2109	evergreen_mc_stop(rdev, &save);
2110	if (radeon_mc_wait_for_idle(rdev)) {
2111		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2112	}
2113	/* Disable CP parsing/prefetching */
2114	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
2115
2116	/* reset all the gfx blocks */
2117	grbm_reset = (SOFT_RESET_CP |
2118		      SOFT_RESET_CB |
2119		      SOFT_RESET_DB |
2120		      SOFT_RESET_GDS |
2121		      SOFT_RESET_PA |
2122		      SOFT_RESET_SC |
2123		      SOFT_RESET_BCI |
2124		      SOFT_RESET_SPI |
2125		      SOFT_RESET_SX |
2126		      SOFT_RESET_TC |
2127		      SOFT_RESET_TA |
2128		      SOFT_RESET_VGT |
2129		      SOFT_RESET_IA);
2130
2131	dev_info(rdev->dev, "  GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
2132	WREG32(GRBM_SOFT_RESET, grbm_reset);
2133	(void)RREG32(GRBM_SOFT_RESET);
2134	udelay(50);
2135	WREG32(GRBM_SOFT_RESET, 0);
2136	(void)RREG32(GRBM_SOFT_RESET);
2137	/* Wait a little for things to settle down */
2138	udelay(50);
2139	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
2140		RREG32(GRBM_STATUS));
2141	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
2142		RREG32(GRBM_STATUS2));
2143	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2144		RREG32(GRBM_STATUS_SE0));
2145	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2146		RREG32(GRBM_STATUS_SE1));
2147	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
2148		RREG32(SRBM_STATUS));
2149	evergreen_mc_resume(rdev, &save);
2150	return 0;
2151}
2152
2153int si_asic_reset(struct radeon_device *rdev)
2154{
2155	return si_gpu_soft_reset(rdev);
2156}
2157
2158/* MC */
2159static void si_mc_program(struct radeon_device *rdev)
2160{
2161	struct evergreen_mc_save save;
2162	u32 tmp;
2163	int i, j;
2164
2165	/* Initialize HDP */
2166	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2167		WREG32((0x2c14 + j), 0x00000000);
2168		WREG32((0x2c18 + j), 0x00000000);
2169		WREG32((0x2c1c + j), 0x00000000);
2170		WREG32((0x2c20 + j), 0x00000000);
2171		WREG32((0x2c24 + j), 0x00000000);
2172	}
2173	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
2174
2175	evergreen_mc_stop(rdev, &save);
2176	if (radeon_mc_wait_for_idle(rdev)) {
2177		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2178	}
2179	/* Lockout access through VGA aperture*/
2180	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
2181	/* Update configuration */
2182	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
2183	       rdev->mc.vram_start >> 12);
2184	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
2185	       rdev->mc.vram_end >> 12);
2186	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
2187	       rdev->vram_scratch.gpu_addr >> 12);
2188	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
2189	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
2190	WREG32(MC_VM_FB_LOCATION, tmp);
2191	/* XXX double check these! */
2192	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
2193	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
2194	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
2195	WREG32(MC_VM_AGP_BASE, 0);
2196	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
2197	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
2198	if (radeon_mc_wait_for_idle(rdev)) {
2199		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
2200	}
2201	evergreen_mc_resume(rdev, &save);
2202	/* we need to own VRAM, so turn off the VGA renderer here
2203	 * to stop it overwriting our objects */
2204	rv515_vga_render_disable(rdev);
2205}
2206
2207/* SI MC address space is 40 bits */
2208static void si_vram_location(struct radeon_device *rdev,
2209			     struct radeon_mc *mc, u64 base)
2210{
2211	mc->vram_start = base;
2212	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {
2213		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
2214		mc->real_vram_size = mc->aper_size;
2215		mc->mc_vram_size = mc->aper_size;
2216	}
2217	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2218	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2219			mc->mc_vram_size >> 20, mc->vram_start,
2220			mc->vram_end, mc->real_vram_size >> 20);
2221}
2222
2223static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
2224{
2225	u64 size_af, size_bf;
2226
2227	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
2228	size_bf = mc->vram_start & ~mc->gtt_base_align;
2229	if (size_bf > size_af) {
2230		if (mc->gtt_size > size_bf) {
2231			dev_warn(rdev->dev, "limiting GTT\n");
2232			mc->gtt_size = size_bf;
2233		}
2234		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
2235	} else {
2236		if (mc->gtt_size > size_af) {
2237			dev_warn(rdev->dev, "limiting GTT\n");
2238			mc->gtt_size = size_af;
2239		}
2240		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
2241	}
2242	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
2243	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2244			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
2245}
2246
2247static void si_vram_gtt_location(struct radeon_device *rdev,
2248				 struct radeon_mc *mc)
2249{
2250	if (mc->mc_vram_size > 0xFFC0000000ULL) {
2251		/* leave room for at least 1024M GTT */
2252		dev_warn(rdev->dev, "limiting VRAM\n");
2253		mc->real_vram_size = 0xFFC0000000ULL;
2254		mc->mc_vram_size = 0xFFC0000000ULL;
2255	}
2256	si_vram_location(rdev, &rdev->mc, 0);
2257	rdev->mc.gtt_base_align = 0;
2258	si_gtt_location(rdev, mc);
2259}
2260
2261static int si_mc_init(struct radeon_device *rdev)
2262{
2263	u32 tmp;
2264	int chansize, numchan;
2265
2266	/* Get VRAM informations */
2267	rdev->mc.vram_is_ddr = true;
2268	tmp = RREG32(MC_ARB_RAMCFG);
2269	if (tmp & CHANSIZE_OVERRIDE) {
2270		chansize = 16;
2271	} else if (tmp & CHANSIZE_MASK) {
2272		chansize = 64;
2273	} else {
2274		chansize = 32;
2275	}
2276	tmp = RREG32(MC_SHARED_CHMAP);
2277	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2278	case 0:
2279	default:
2280		numchan = 1;
2281		break;
2282	case 1:
2283		numchan = 2;
2284		break;
2285	case 2:
2286		numchan = 4;
2287		break;
2288	case 3:
2289		numchan = 8;
2290		break;
2291	case 4:
2292		numchan = 3;
2293		break;
2294	case 5:
2295		numchan = 6;
2296		break;
2297	case 6:
2298		numchan = 10;
2299		break;
2300	case 7:
2301		numchan = 12;
2302		break;
2303	case 8:
2304		numchan = 16;
2305		break;
2306	}
2307	rdev->mc.vram_width = numchan * chansize;
2308	/* Could aper size report 0 ? */
2309	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
2310	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
2311	/* size in MB on si */
2312	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2313	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
2314	rdev->mc.visible_vram_size = rdev->mc.aper_size;
2315	si_vram_gtt_location(rdev, &rdev->mc);
2316	radeon_update_bandwidth_info(rdev);
2317
2318	return 0;
2319}
2320
2321/*
2322 * GART
2323 */
2324void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
2325{
2326	/* flush hdp cache */
2327	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2328
2329	/* bits 0-15 are the VM contexts0-15 */
2330	WREG32(VM_INVALIDATE_REQUEST, 1);
2331}
2332
2333int si_pcie_gart_enable(struct radeon_device *rdev)
2334{
2335	int r, i;
2336
2337	if (rdev->gart.robj == NULL) {
2338		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
2339		return -EINVAL;
2340	}
2341	r = radeon_gart_table_vram_pin(rdev);
2342	if (r)
2343		return r;
2344	radeon_gart_restore(rdev);
2345	/* Setup TLB control */
2346	WREG32(MC_VM_MX_L1_TLB_CNTL,
2347	       (0xA << 7) |
2348	       ENABLE_L1_TLB |
2349	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2350	       ENABLE_ADVANCED_DRIVER_MODEL |
2351	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2352	/* Setup L2 cache */
2353	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
2354	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2355	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2356	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2357	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2358	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
2359	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2360	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2361	/* setup context0 */
2362	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
2363	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
2364	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
2365	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
2366			(u32)(rdev->dummy_page.addr >> 12));
2367	WREG32(VM_CONTEXT0_CNTL2, 0);
2368	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2369				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
2370
2371	WREG32(0x15D4, 0);
2372	WREG32(0x15D8, 0);
2373	WREG32(0x15DC, 0);
2374
2375	/* empty context1-15 */
2376	/* FIXME start with 4G, once using 2 level pt switch to full
2377	 * vm size space
2378	 */
2379	/* set vm size, must be a multiple of 4 */
2380	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
2381	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
2382	for (i = 1; i < 16; i++) {
2383		if (i < 8)
2384			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
2385			       rdev->gart.table_addr >> 12);
2386		else
2387			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
2388			       rdev->gart.table_addr >> 12);
2389	}
2390
2391	/* enable context1-15 */
2392	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2393	       (u32)(rdev->dummy_page.addr >> 12));
2394	WREG32(VM_CONTEXT1_CNTL2, 0);
2395	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
2396				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
2397
2398	si_pcie_gart_tlb_flush(rdev);
2399	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2400		 (unsigned)(rdev->mc.gtt_size >> 20),
2401		 (unsigned long long)rdev->gart.table_addr);
2402	rdev->gart.ready = true;
2403	return 0;
2404}
2405
2406void si_pcie_gart_disable(struct radeon_device *rdev)
2407{
2408	/* Disable all tables */
2409	WREG32(VM_CONTEXT0_CNTL, 0);
2410	WREG32(VM_CONTEXT1_CNTL, 0);
2411	/* Setup TLB control */
2412	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
2413	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
2414	/* Setup L2 cache */
2415	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
2416	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
2417	       EFFECTIVE_L2_QUEUE_SIZE(7) |
2418	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
2419	WREG32(VM_L2_CNTL2, 0);
2420	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
2421	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2422	radeon_gart_table_vram_unpin(rdev);
2423}
2424
2425void si_pcie_gart_fini(struct radeon_device *rdev)
2426{
2427	si_pcie_gart_disable(rdev);
2428	radeon_gart_table_vram_free(rdev);
2429	radeon_gart_fini(rdev);
2430}
2431
2432/* vm parser */
2433static bool si_vm_reg_valid(u32 reg)
2434{
2435	/* context regs are fine */
2436	if (reg >= 0x28000)
2437		return true;
2438
2439	/* check config regs */
2440	switch (reg) {
2441	case GRBM_GFX_INDEX:
2442	case VGT_VTX_VECT_EJECT_REG:
2443	case VGT_CACHE_INVALIDATION:
2444	case VGT_ESGS_RING_SIZE:
2445	case VGT_GSVS_RING_SIZE:
2446	case VGT_GS_VERTEX_REUSE:
2447	case VGT_PRIMITIVE_TYPE:
2448	case VGT_INDEX_TYPE:
2449	case VGT_NUM_INDICES:
2450	case VGT_NUM_INSTANCES:
2451	case VGT_TF_RING_SIZE:
2452	case VGT_HS_OFFCHIP_PARAM:
2453	case VGT_TF_MEMORY_BASE:
2454	case PA_CL_ENHANCE:
2455	case PA_SU_LINE_STIPPLE_VALUE:
2456	case PA_SC_LINE_STIPPLE_STATE:
2457	case PA_SC_ENHANCE:
2458	case SQC_CACHES:
2459	case SPI_STATIC_THREAD_MGMT_1:
2460	case SPI_STATIC_THREAD_MGMT_2:
2461	case SPI_STATIC_THREAD_MGMT_3:
2462	case SPI_PS_MAX_WAVE_ID:
2463	case SPI_CONFIG_CNTL:
2464	case SPI_CONFIG_CNTL_1:
2465	case TA_CNTL_AUX:
2466		return true;
2467	default:
2468		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
2469		return false;
2470	}
2471}
2472
2473static int si_vm_packet3_ce_check(struct radeon_device *rdev,
2474				  u32 *ib, struct radeon_cs_packet *pkt)
2475{
2476	switch (pkt->opcode) {
2477	case PACKET3_NOP:
2478	case PACKET3_SET_BASE:
2479	case PACKET3_SET_CE_DE_COUNTERS:
2480	case PACKET3_LOAD_CONST_RAM:
2481	case PACKET3_WRITE_CONST_RAM:
2482	case PACKET3_WRITE_CONST_RAM_OFFSET:
2483	case PACKET3_DUMP_CONST_RAM:
2484	case PACKET3_INCREMENT_CE_COUNTER:
2485	case PACKET3_WAIT_ON_DE_COUNTER:
2486	case PACKET3_CE_WRITE:
2487		break;
2488	default:
2489		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
2490		return -EINVAL;
2491	}
2492	return 0;
2493}
2494
2495static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2496				   u32 *ib, struct radeon_cs_packet *pkt)
2497{
2498	u32 idx = pkt->idx + 1;
2499	u32 idx_value = ib[idx];
2500	u32 start_reg, end_reg, reg, i;
2501
2502	switch (pkt->opcode) {
2503	case PACKET3_NOP:
2504	case PACKET3_SET_BASE:
2505	case PACKET3_CLEAR_STATE:
2506	case PACKET3_INDEX_BUFFER_SIZE:
2507	case PACKET3_DISPATCH_DIRECT:
2508	case PACKET3_DISPATCH_INDIRECT:
2509	case PACKET3_ALLOC_GDS:
2510	case PACKET3_WRITE_GDS_RAM:
2511	case PACKET3_ATOMIC_GDS:
2512	case PACKET3_ATOMIC:
2513	case PACKET3_OCCLUSION_QUERY:
2514	case PACKET3_SET_PREDICATION:
2515	case PACKET3_COND_EXEC:
2516	case PACKET3_PRED_EXEC:
2517	case PACKET3_DRAW_INDIRECT:
2518	case PACKET3_DRAW_INDEX_INDIRECT:
2519	case PACKET3_INDEX_BASE:
2520	case PACKET3_DRAW_INDEX_2:
2521	case PACKET3_CONTEXT_CONTROL:
2522	case PACKET3_INDEX_TYPE:
2523	case PACKET3_DRAW_INDIRECT_MULTI:
2524	case PACKET3_DRAW_INDEX_AUTO:
2525	case PACKET3_DRAW_INDEX_IMMD:
2526	case PACKET3_NUM_INSTANCES:
2527	case PACKET3_DRAW_INDEX_MULTI_AUTO:
2528	case PACKET3_STRMOUT_BUFFER_UPDATE:
2529	case PACKET3_DRAW_INDEX_OFFSET_2:
2530	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
2531	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
2532	case PACKET3_MPEG_INDEX:
2533	case PACKET3_WAIT_REG_MEM:
2534	case PACKET3_MEM_WRITE:
2535	case PACKET3_PFP_SYNC_ME:
2536	case PACKET3_SURFACE_SYNC:
2537	case PACKET3_EVENT_WRITE:
2538	case PACKET3_EVENT_WRITE_EOP:
2539	case PACKET3_EVENT_WRITE_EOS:
2540	case PACKET3_SET_CONTEXT_REG:
2541	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2542	case PACKET3_SET_SH_REG:
2543	case PACKET3_SET_SH_REG_OFFSET:
2544	case PACKET3_INCREMENT_DE_COUNTER:
2545	case PACKET3_WAIT_ON_CE_COUNTER:
2546	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2547	case PACKET3_ME_WRITE:
2548		break;
2549	case PACKET3_COPY_DATA:
2550		if ((idx_value & 0xf00) == 0) {
2551			reg = ib[idx + 3] * 4;
2552			if (!si_vm_reg_valid(reg))
2553				return -EINVAL;
2554		}
2555		break;
2556	case PACKET3_WRITE_DATA:
2557		if ((idx_value & 0xf00) == 0) {
2558			start_reg = ib[idx + 1] * 4;
2559			if (idx_value & 0x10000) {
2560				if (!si_vm_reg_valid(start_reg))
2561					return -EINVAL;
2562			} else {
2563				for (i = 0; i < (pkt->count - 2); i++) {
2564					reg = start_reg + (4 * i);
2565					if (!si_vm_reg_valid(reg))
2566						return -EINVAL;
2567				}
2568			}
2569		}
2570		break;
2571	case PACKET3_COND_WRITE:
2572		if (idx_value & 0x100) {
2573			reg = ib[idx + 5] * 4;
2574			if (!si_vm_reg_valid(reg))
2575				return -EINVAL;
2576		}
2577		break;
2578	case PACKET3_COPY_DW:
2579		if (idx_value & 0x2) {
2580			reg = ib[idx + 3] * 4;
2581			if (!si_vm_reg_valid(reg))
2582				return -EINVAL;
2583		}
2584		break;
2585	case PACKET3_SET_CONFIG_REG:
2586		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2587		end_reg = 4 * pkt->count + start_reg - 4;
2588		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2589		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2590		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2591			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2592			return -EINVAL;
2593		}
2594		for (i = 0; i < pkt->count; i++) {
2595			reg = start_reg + (4 * i);
2596			if (!si_vm_reg_valid(reg))
2597				return -EINVAL;
2598		}
2599		break;
2600	default:
2601		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2602		return -EINVAL;
2603	}
2604	return 0;
2605}
2606
2607static int si_vm_packet3_compute_check(struct radeon_device *rdev,
2608				       u32 *ib, struct radeon_cs_packet *pkt)
2609{
2610	u32 idx = pkt->idx + 1;
2611	u32 idx_value = ib[idx];
2612	u32 start_reg, reg, i;
2613
2614	switch (pkt->opcode) {
2615	case PACKET3_NOP:
2616	case PACKET3_SET_BASE:
2617	case PACKET3_CLEAR_STATE:
2618	case PACKET3_DISPATCH_DIRECT:
2619	case PACKET3_DISPATCH_INDIRECT:
2620	case PACKET3_ALLOC_GDS:
2621	case PACKET3_WRITE_GDS_RAM:
2622	case PACKET3_ATOMIC_GDS:
2623	case PACKET3_ATOMIC:
2624	case PACKET3_OCCLUSION_QUERY:
2625	case PACKET3_SET_PREDICATION:
2626	case PACKET3_COND_EXEC:
2627	case PACKET3_PRED_EXEC:
2628	case PACKET3_CONTEXT_CONTROL:
2629	case PACKET3_STRMOUT_BUFFER_UPDATE:
2630	case PACKET3_WAIT_REG_MEM:
2631	case PACKET3_MEM_WRITE:
2632	case PACKET3_PFP_SYNC_ME:
2633	case PACKET3_SURFACE_SYNC:
2634	case PACKET3_EVENT_WRITE:
2635	case PACKET3_EVENT_WRITE_EOP:
2636	case PACKET3_EVENT_WRITE_EOS:
2637	case PACKET3_SET_CONTEXT_REG:
2638	case PACKET3_SET_CONTEXT_REG_INDIRECT:
2639	case PACKET3_SET_SH_REG:
2640	case PACKET3_SET_SH_REG_OFFSET:
2641	case PACKET3_INCREMENT_DE_COUNTER:
2642	case PACKET3_WAIT_ON_CE_COUNTER:
2643	case PACKET3_WAIT_ON_AVAIL_BUFFER:
2644	case PACKET3_ME_WRITE:
2645		break;
2646	case PACKET3_COPY_DATA:
2647		if ((idx_value & 0xf00) == 0) {
2648			reg = ib[idx + 3] * 4;
2649			if (!si_vm_reg_valid(reg))
2650				return -EINVAL;
2651		}
2652		break;
2653	case PACKET3_WRITE_DATA:
2654		if ((idx_value & 0xf00) == 0) {
2655			start_reg = ib[idx + 1] * 4;
2656			if (idx_value & 0x10000) {
2657				if (!si_vm_reg_valid(start_reg))
2658					return -EINVAL;
2659			} else {
2660				for (i = 0; i < (pkt->count - 2); i++) {
2661					reg = start_reg + (4 * i);
2662					if (!si_vm_reg_valid(reg))
2663						return -EINVAL;
2664				}
2665			}
2666		}
2667		break;
2668	case PACKET3_COND_WRITE:
2669		if (idx_value & 0x100) {
2670			reg = ib[idx + 5] * 4;
2671			if (!si_vm_reg_valid(reg))
2672				return -EINVAL;
2673		}
2674		break;
2675	case PACKET3_COPY_DW:
2676		if (idx_value & 0x2) {
2677			reg = ib[idx + 3] * 4;
2678			if (!si_vm_reg_valid(reg))
2679				return -EINVAL;
2680		}
2681		break;
2682	default:
2683		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
2684		return -EINVAL;
2685	}
2686	return 0;
2687}
2688
2689int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2690{
2691	int ret = 0;
2692	u32 idx = 0;
2693	struct radeon_cs_packet pkt;
2694
2695	do {
2696		pkt.idx = idx;
2697		pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
2698		pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
2699		pkt.one_reg_wr = 0;
2700		switch (pkt.type) {
2701		case PACKET_TYPE0:
2702			dev_err(rdev->dev, "Packet0 not allowed!\n");
2703			ret = -EINVAL;
2704			break;
2705		case PACKET_TYPE2:
2706			idx += 1;
2707			break;
2708		case PACKET_TYPE3:
2709			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
2710			if (ib->is_const_ib)
2711				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
2712			else {
2713				switch (ib->fence->ring) {
2714				case RADEON_RING_TYPE_GFX_INDEX:
2715					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
2716					break;
2717				case CAYMAN_RING_TYPE_CP1_INDEX:
2718				case CAYMAN_RING_TYPE_CP2_INDEX:
2719					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
2720					break;
2721				default:
2722					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
2723					ret = -EINVAL;
2724					break;
2725				}
2726			}
2727			idx += pkt.count + 2;
2728			break;
2729		default:
2730			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
2731			ret = -EINVAL;
2732			break;
2733		}
2734		if (ret)
2735			break;
2736	} while (idx < ib->length_dw);
2737
2738	return ret;
2739}
2740
2741/*
2742 * vm
2743 */
2744int si_vm_init(struct radeon_device *rdev)
2745{
2746	/* number of VMs */
2747	rdev->vm_manager.nvm = 16;
2748	/* base offset of vram pages */
2749	rdev->vm_manager.vram_base_offset = 0;
2750
2751	return 0;
2752}
2753
2754void si_vm_fini(struct radeon_device *rdev)
2755{
2756}
2757
2758int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
2759{
2760	if (id < 8)
2761		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
2762	else
2763		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
2764		       vm->pt_gpu_addr >> 12);
2765	/* flush hdp cache */
2766	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2767	/* bits 0-15 are the VM contexts0-15 */
2768	WREG32(VM_INVALIDATE_REQUEST, 1 << id);
2769	return 0;
2770}
2771
2772void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
2773{
2774	if (vm->id < 8)
2775		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
2776	else
2777		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
2778	/* flush hdp cache */
2779	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2780	/* bits 0-15 are the VM contexts0-15 */
2781	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2782}
2783
2784void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
2785{
2786	if (vm->id == -1)
2787		return;
2788
2789	/* flush hdp cache */
2790	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
2791	/* bits 0-15 are the VM contexts0-15 */
2792	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
2793}
2794
2795/*
2796 * RLC
2797 */
2798void si_rlc_fini(struct radeon_device *rdev)
2799{
2800	int r;
2801
2802	/* save restore block */
2803	if (rdev->rlc.save_restore_obj) {
2804		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2805		if (unlikely(r != 0))
2806			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
2807		radeon_bo_unpin(rdev->rlc.save_restore_obj);
2808		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2809
2810		radeon_bo_unref(&rdev->rlc.save_restore_obj);
2811		rdev->rlc.save_restore_obj = NULL;
2812	}
2813
2814	/* clear state block */
2815	if (rdev->rlc.clear_state_obj) {
2816		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2817		if (unlikely(r != 0))
2818			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
2819		radeon_bo_unpin(rdev->rlc.clear_state_obj);
2820		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2821
2822		radeon_bo_unref(&rdev->rlc.clear_state_obj);
2823		rdev->rlc.clear_state_obj = NULL;
2824	}
2825}
2826
2827int si_rlc_init(struct radeon_device *rdev)
2828{
2829	int r;
2830
2831	/* save restore block */
2832	if (rdev->rlc.save_restore_obj == NULL) {
2833		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
2834				     RADEON_GEM_DOMAIN_VRAM, NULL,
2835				     &rdev->rlc.save_restore_obj);
2836		if (r) {
2837			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
2838			return r;
2839		}
2840	}
2841
2842	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
2843	if (unlikely(r != 0)) {
2844		si_rlc_fini(rdev);
2845		return r;
2846	}
2847	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
2848			  &rdev->rlc.save_restore_gpu_addr);
2849	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
2850	if (r) {
2851		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
2852		si_rlc_fini(rdev);
2853		return r;
2854	}
2855
2856	/* clear state block */
2857	if (rdev->rlc.clear_state_obj == NULL) {
2858		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
2859				     RADEON_GEM_DOMAIN_VRAM, NULL,
2860				     &rdev->rlc.clear_state_obj);
2861		if (r) {
2862			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
2863			si_rlc_fini(rdev);
2864			return r;
2865		}
2866	}
2867	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
2868	if (unlikely(r != 0)) {
2869		si_rlc_fini(rdev);
2870		return r;
2871	}
2872	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
2873			  &rdev->rlc.clear_state_gpu_addr);
2874	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
2875	if (r) {
2876		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
2877		si_rlc_fini(rdev);
2878		return r;
2879	}
2880
2881	return 0;
2882}
2883
2884static void si_rlc_stop(struct radeon_device *rdev)
2885{
2886	WREG32(RLC_CNTL, 0);
2887}
2888
2889static void si_rlc_start(struct radeon_device *rdev)
2890{
2891	WREG32(RLC_CNTL, RLC_ENABLE);
2892}
2893
2894static int si_rlc_resume(struct radeon_device *rdev)
2895{
2896	u32 i;
2897	const __be32 *fw_data;
2898
2899	if (!rdev->rlc_fw)
2900		return -EINVAL;
2901
2902	si_rlc_stop(rdev);
2903
2904	WREG32(RLC_RL_BASE, 0);
2905	WREG32(RLC_RL_SIZE, 0);
2906	WREG32(RLC_LB_CNTL, 0);
2907	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
2908	WREG32(RLC_LB_CNTR_INIT, 0);
2909
2910	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
2911	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
2912
2913	WREG32(RLC_MC_CNTL, 0);
2914	WREG32(RLC_UCODE_CNTL, 0);
2915
2916	fw_data = (const __be32 *)rdev->rlc_fw->data;
2917	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
2918		WREG32(RLC_UCODE_ADDR, i);
2919		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
2920	}
2921	WREG32(RLC_UCODE_ADDR, 0);
2922
2923	si_rlc_start(rdev);
2924
2925	return 0;
2926}
2927
2928static void si_enable_interrupts(struct radeon_device *rdev)
2929{
2930	u32 ih_cntl = RREG32(IH_CNTL);
2931	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2932
2933	ih_cntl |= ENABLE_INTR;
2934	ih_rb_cntl |= IH_RB_ENABLE;
2935	WREG32(IH_CNTL, ih_cntl);
2936	WREG32(IH_RB_CNTL, ih_rb_cntl);
2937	rdev->ih.enabled = true;
2938}
2939
2940static void si_disable_interrupts(struct radeon_device *rdev)
2941{
2942	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
2943	u32 ih_cntl = RREG32(IH_CNTL);
2944
2945	ih_rb_cntl &= ~IH_RB_ENABLE;
2946	ih_cntl &= ~ENABLE_INTR;
2947	WREG32(IH_RB_CNTL, ih_rb_cntl);
2948	WREG32(IH_CNTL, ih_cntl);
2949	/* set rptr, wptr to 0 */
2950	WREG32(IH_RB_RPTR, 0);
2951	WREG32(IH_RB_WPTR, 0);
2952	rdev->ih.enabled = false;
2953	rdev->ih.wptr = 0;
2954	rdev->ih.rptr = 0;
2955}
2956
2957static void si_disable_interrupt_state(struct radeon_device *rdev)
2958{
2959	u32 tmp;
2960
2961	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2962	WREG32(CP_INT_CNTL_RING1, 0);
2963	WREG32(CP_INT_CNTL_RING2, 0);
2964	WREG32(GRBM_INT_CNTL, 0);
2965	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
2966	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
2967	if (rdev->num_crtc >= 4) {
2968		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
2969		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
2970	}
2971	if (rdev->num_crtc >= 6) {
2972		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
2973		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
2974	}
2975
2976	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
2977	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
2978	if (rdev->num_crtc >= 4) {
2979		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
2980		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
2981	}
2982	if (rdev->num_crtc >= 6) {
2983		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
2984		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
2985	}
2986
2987	WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
2988
2989	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
2990	WREG32(DC_HPD1_INT_CONTROL, tmp);
2991	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
2992	WREG32(DC_HPD2_INT_CONTROL, tmp);
2993	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
2994	WREG32(DC_HPD3_INT_CONTROL, tmp);
2995	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
2996	WREG32(DC_HPD4_INT_CONTROL, tmp);
2997	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
2998	WREG32(DC_HPD5_INT_CONTROL, tmp);
2999	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
3000	WREG32(DC_HPD6_INT_CONTROL, tmp);
3001
3002}
3003
3004static int si_irq_init(struct radeon_device *rdev)
3005{
3006	int ret = 0;
3007	int rb_bufsz;
3008	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
3009
3010	/* allocate ring */
3011	ret = r600_ih_ring_alloc(rdev);
3012	if (ret)
3013		return ret;
3014
3015	/* disable irqs */
3016	si_disable_interrupts(rdev);
3017
3018	/* init rlc */
3019	ret = si_rlc_resume(rdev);
3020	if (ret) {
3021		r600_ih_ring_fini(rdev);
3022		return ret;
3023	}
3024
3025	/* setup interrupt control */
3026	/* set dummy read address to ring address */
3027	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
3028	interrupt_cntl = RREG32(INTERRUPT_CNTL);
3029	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3030	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3031	 */
3032	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
3033	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3034	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
3035	WREG32(INTERRUPT_CNTL, interrupt_cntl);
3036
3037	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
3038	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
3039
3040	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
3041		      IH_WPTR_OVERFLOW_CLEAR |
3042		      (rb_bufsz << 1));
3043
3044	if (rdev->wb.enabled)
3045		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
3046
3047	/* set the writeback address whether it's enabled or not */
3048	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
3049	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
3050
3051	WREG32(IH_RB_CNTL, ih_rb_cntl);
3052
3053	/* set rptr, wptr to 0 */
3054	WREG32(IH_RB_RPTR, 0);
3055	WREG32(IH_RB_WPTR, 0);
3056
3057	/* Default settings for IH_CNTL (disabled at first) */
3058	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3059	/* RPTR_REARM only works if msi's are enabled */
3060	if (rdev->msi_enabled)
3061		ih_cntl |= RPTR_REARM;
3062	WREG32(IH_CNTL, ih_cntl);
3063
3064	/* force the active interrupt state to all disabled */
3065	si_disable_interrupt_state(rdev);
3066
3067	pci_set_master(rdev->pdev);
3068
3069	/* enable irqs */
3070	si_enable_interrupts(rdev);
3071
3072	return ret;
3073}
3074
3075int si_irq_set(struct radeon_device *rdev)
3076{
3077	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
3078	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
3079	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
3080	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3081	u32 grbm_int_cntl = 0;
3082	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3083
3084	if (!rdev->irq.installed) {
3085		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3086		return -EINVAL;
3087	}
3088	/* don't enable anything if the ih is disabled */
3089	if (!rdev->ih.enabled) {
3090		si_disable_interrupts(rdev);
3091		/* force the active interrupt state to all disabled */
3092		si_disable_interrupt_state(rdev);
3093		return 0;
3094	}
3095
3096	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
3097	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
3098	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
3099	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
3100	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3101	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3102
3103	/* enable CP interrupts on all rings */
3104	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
3105		DRM_DEBUG("si_irq_set: sw int gfx\n");
3106		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3107	}
3108	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
3109		DRM_DEBUG("si_irq_set: sw int cp1\n");
3110		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
3111	}
3112	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
3113		DRM_DEBUG("si_irq_set: sw int cp2\n");
3114		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3115	}
3116	if (rdev->irq.crtc_vblank_int[0] ||
3117	    rdev->irq.pflip[0]) {
3118		DRM_DEBUG("si_irq_set: vblank 0\n");
3119		crtc1 |= VBLANK_INT_MASK;
3120	}
3121	if (rdev->irq.crtc_vblank_int[1] ||
3122	    rdev->irq.pflip[1]) {
3123		DRM_DEBUG("si_irq_set: vblank 1\n");
3124		crtc2 |= VBLANK_INT_MASK;
3125	}
3126	if (rdev->irq.crtc_vblank_int[2] ||
3127	    rdev->irq.pflip[2]) {
3128		DRM_DEBUG("si_irq_set: vblank 2\n");
3129		crtc3 |= VBLANK_INT_MASK;
3130	}
3131	if (rdev->irq.crtc_vblank_int[3] ||
3132	    rdev->irq.pflip[3]) {
3133		DRM_DEBUG("si_irq_set: vblank 3\n");
3134		crtc4 |= VBLANK_INT_MASK;
3135	}
3136	if (rdev->irq.crtc_vblank_int[4] ||
3137	    rdev->irq.pflip[4]) {
3138		DRM_DEBUG("si_irq_set: vblank 4\n");
3139		crtc5 |= VBLANK_INT_MASK;
3140	}
3141	if (rdev->irq.crtc_vblank_int[5] ||
3142	    rdev->irq.pflip[5]) {
3143		DRM_DEBUG("si_irq_set: vblank 5\n");
3144		crtc6 |= VBLANK_INT_MASK;
3145	}
3146	if (rdev->irq.hpd[0]) {
3147		DRM_DEBUG("si_irq_set: hpd 1\n");
3148		hpd1 |= DC_HPDx_INT_EN;
3149	}
3150	if (rdev->irq.hpd[1]) {
3151		DRM_DEBUG("si_irq_set: hpd 2\n");
3152		hpd2 |= DC_HPDx_INT_EN;
3153	}
3154	if (rdev->irq.hpd[2]) {
3155		DRM_DEBUG("si_irq_set: hpd 3\n");
3156		hpd3 |= DC_HPDx_INT_EN;
3157	}
3158	if (rdev->irq.hpd[3]) {
3159		DRM_DEBUG("si_irq_set: hpd 4\n");
3160		hpd4 |= DC_HPDx_INT_EN;
3161	}
3162	if (rdev->irq.hpd[4]) {
3163		DRM_DEBUG("si_irq_set: hpd 5\n");
3164		hpd5 |= DC_HPDx_INT_EN;
3165	}
3166	if (rdev->irq.hpd[5]) {
3167		DRM_DEBUG("si_irq_set: hpd 6\n");
3168		hpd6 |= DC_HPDx_INT_EN;
3169	}
3170	if (rdev->irq.gui_idle) {
3171		DRM_DEBUG("gui idle\n");
3172		grbm_int_cntl |= GUI_IDLE_INT_ENABLE;
3173	}
3174
3175	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
3176	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3177	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3178
3179	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3180
3181	WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
3182	WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
3183	if (rdev->num_crtc >= 4) {
3184		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
3185		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
3186	}
3187	if (rdev->num_crtc >= 6) {
3188		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
3189		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
3190	}
3191
3192	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
3193	WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
3194	if (rdev->num_crtc >= 4) {
3195		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
3196		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
3197	}
3198	if (rdev->num_crtc >= 6) {
3199		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
3200		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
3201	}
3202
3203	WREG32(DC_HPD1_INT_CONTROL, hpd1);
3204	WREG32(DC_HPD2_INT_CONTROL, hpd2);
3205	WREG32(DC_HPD3_INT_CONTROL, hpd3);
3206	WREG32(DC_HPD4_INT_CONTROL, hpd4);
3207	WREG32(DC_HPD5_INT_CONTROL, hpd5);
3208	WREG32(DC_HPD6_INT_CONTROL, hpd6);
3209
3210	return 0;
3211}
3212
3213static inline void si_irq_ack(struct radeon_device *rdev)
3214{
3215	u32 tmp;
3216
3217	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
3218	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
3219	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
3220	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
3221	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
3222	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
3223	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
3224	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
3225	if (rdev->num_crtc >= 4) {
3226		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
3227		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
3228	}
3229	if (rdev->num_crtc >= 6) {
3230		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
3231		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
3232	}
3233
3234	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
3235		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3236	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
3237		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3238	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
3239		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
3240	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
3241		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
3242	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
3243		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
3244	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
3245		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
3246
3247	if (rdev->num_crtc >= 4) {
3248		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
3249			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3250		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
3251			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3252		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
3253			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
3254		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
3255			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
3256		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
3257			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
3258		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
3259			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
3260	}
3261
3262	if (rdev->num_crtc >= 6) {
3263		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
3264			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3265		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
3266			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
3267		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
3268			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
3269		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
3270			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
3271		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
3272			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
3273		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
3274			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
3275	}
3276
3277	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3278		tmp = RREG32(DC_HPD1_INT_CONTROL);
3279		tmp |= DC_HPDx_INT_ACK;
3280		WREG32(DC_HPD1_INT_CONTROL, tmp);
3281	}
3282	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3283		tmp = RREG32(DC_HPD2_INT_CONTROL);
3284		tmp |= DC_HPDx_INT_ACK;
3285		WREG32(DC_HPD2_INT_CONTROL, tmp);
3286	}
3287	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3288		tmp = RREG32(DC_HPD3_INT_CONTROL);
3289		tmp |= DC_HPDx_INT_ACK;
3290		WREG32(DC_HPD3_INT_CONTROL, tmp);
3291	}
3292	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3293		tmp = RREG32(DC_HPD4_INT_CONTROL);
3294		tmp |= DC_HPDx_INT_ACK;
3295		WREG32(DC_HPD4_INT_CONTROL, tmp);
3296	}
3297	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3298		tmp = RREG32(DC_HPD5_INT_CONTROL);
3299		tmp |= DC_HPDx_INT_ACK;
3300		WREG32(DC_HPD5_INT_CONTROL, tmp);
3301	}
3302	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3303		tmp = RREG32(DC_HPD5_INT_CONTROL);
3304		tmp |= DC_HPDx_INT_ACK;
3305		WREG32(DC_HPD6_INT_CONTROL, tmp);
3306	}
3307}
3308
3309static void si_irq_disable(struct radeon_device *rdev)
3310{
3311	si_disable_interrupts(rdev);
3312	/* Wait and acknowledge irq */
3313	mdelay(1);
3314	si_irq_ack(rdev);
3315	si_disable_interrupt_state(rdev);
3316}
3317
3318static void si_irq_suspend(struct radeon_device *rdev)
3319{
3320	si_irq_disable(rdev);
3321	si_rlc_stop(rdev);
3322}
3323
3324static void si_irq_fini(struct radeon_device *rdev)
3325{
3326	si_irq_suspend(rdev);
3327	r600_ih_ring_fini(rdev);
3328}
3329
3330static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
3331{
3332	u32 wptr, tmp;
3333
3334	if (rdev->wb.enabled)
3335		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
3336	else
3337		wptr = RREG32(IH_RB_WPTR);
3338
3339	if (wptr & RB_OVERFLOW) {
3340		/* When a ring buffer overflow happen start parsing interrupt
3341		 * from the last not overwritten vector (wptr + 16). Hopefully
3342		 * this should allow us to catchup.
3343		 */
3344		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3345			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
3346		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
3347		tmp = RREG32(IH_RB_CNTL);
3348		tmp |= IH_WPTR_OVERFLOW_CLEAR;
3349		WREG32(IH_RB_CNTL, tmp);
3350	}
3351	return (wptr & rdev->ih.ptr_mask);
3352}
3353
3354/*        SI IV Ring
3355 * Each IV ring entry is 128 bits:
3356 * [7:0]    - interrupt source id
3357 * [31:8]   - reserved
3358 * [59:32]  - interrupt source data
3359 * [63:60]  - reserved
3360 * [71:64]  - RINGID
3361 * [79:72]  - VMID
3362 * [127:80] - reserved
3363 */
3364int si_irq_process(struct radeon_device *rdev)
3365{
3366	u32 wptr;
3367	u32 rptr;
3368	u32 src_id, src_data, ring_id;
3369	u32 ring_index;
3370	unsigned long flags;
3371	bool queue_hotplug = false;
3372
3373	if (!rdev->ih.enabled || rdev->shutdown)
3374		return IRQ_NONE;
3375
3376	wptr = si_get_ih_wptr(rdev);
3377	rptr = rdev->ih.rptr;
3378	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
3379
3380	spin_lock_irqsave(&rdev->ih.lock, flags);
3381	if (rptr == wptr) {
3382		spin_unlock_irqrestore(&rdev->ih.lock, flags);
3383		return IRQ_NONE;
3384	}
3385restart_ih:
3386	/* Order reading of wptr vs. reading of IH ring data */
3387	rmb();
3388
3389	/* display interrupts */
3390	si_irq_ack(rdev);
3391
3392	rdev->ih.wptr = wptr;
3393	while (rptr != wptr) {
3394		/* wptr/rptr are in bytes! */
3395		ring_index = rptr / 4;
3396		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
3397		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
3398		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
3399
3400		switch (src_id) {
3401		case 1: /* D1 vblank/vline */
3402			switch (src_data) {
3403			case 0: /* D1 vblank */
3404				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
3405					if (rdev->irq.crtc_vblank_int[0]) {
3406						drm_handle_vblank(rdev->ddev, 0);
3407						rdev->pm.vblank_sync = true;
3408						wake_up(&rdev->irq.vblank_queue);
3409					}
3410					if (rdev->irq.pflip[0])
3411						radeon_crtc_handle_flip(rdev, 0);
3412					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
3413					DRM_DEBUG("IH: D1 vblank\n");
3414				}
3415				break;
3416			case 1: /* D1 vline */
3417				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
3418					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
3419					DRM_DEBUG("IH: D1 vline\n");
3420				}
3421				break;
3422			default:
3423				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3424				break;
3425			}
3426			break;
3427		case 2: /* D2 vblank/vline */
3428			switch (src_data) {
3429			case 0: /* D2 vblank */
3430				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
3431					if (rdev->irq.crtc_vblank_int[1]) {
3432						drm_handle_vblank(rdev->ddev, 1);
3433						rdev->pm.vblank_sync = true;
3434						wake_up(&rdev->irq.vblank_queue);
3435					}
3436					if (rdev->irq.pflip[1])
3437						radeon_crtc_handle_flip(rdev, 1);
3438					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
3439					DRM_DEBUG("IH: D2 vblank\n");
3440				}
3441				break;
3442			case 1: /* D2 vline */
3443				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
3444					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
3445					DRM_DEBUG("IH: D2 vline\n");
3446				}
3447				break;
3448			default:
3449				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3450				break;
3451			}
3452			break;
3453		case 3: /* D3 vblank/vline */
3454			switch (src_data) {
3455			case 0: /* D3 vblank */
3456				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
3457					if (rdev->irq.crtc_vblank_int[2]) {
3458						drm_handle_vblank(rdev->ddev, 2);
3459						rdev->pm.vblank_sync = true;
3460						wake_up(&rdev->irq.vblank_queue);
3461					}
3462					if (rdev->irq.pflip[2])
3463						radeon_crtc_handle_flip(rdev, 2);
3464					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
3465					DRM_DEBUG("IH: D3 vblank\n");
3466				}
3467				break;
3468			case 1: /* D3 vline */
3469				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
3470					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
3471					DRM_DEBUG("IH: D3 vline\n");
3472				}
3473				break;
3474			default:
3475				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3476				break;
3477			}
3478			break;
3479		case 4: /* D4 vblank/vline */
3480			switch (src_data) {
3481			case 0: /* D4 vblank */
3482				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
3483					if (rdev->irq.crtc_vblank_int[3]) {
3484						drm_handle_vblank(rdev->ddev, 3);
3485						rdev->pm.vblank_sync = true;
3486						wake_up(&rdev->irq.vblank_queue);
3487					}
3488					if (rdev->irq.pflip[3])
3489						radeon_crtc_handle_flip(rdev, 3);
3490					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
3491					DRM_DEBUG("IH: D4 vblank\n");
3492				}
3493				break;
3494			case 1: /* D4 vline */
3495				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
3496					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
3497					DRM_DEBUG("IH: D4 vline\n");
3498				}
3499				break;
3500			default:
3501				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3502				break;
3503			}
3504			break;
3505		case 5: /* D5 vblank/vline */
3506			switch (src_data) {
3507			case 0: /* D5 vblank */
3508				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
3509					if (rdev->irq.crtc_vblank_int[4]) {
3510						drm_handle_vblank(rdev->ddev, 4);
3511						rdev->pm.vblank_sync = true;
3512						wake_up(&rdev->irq.vblank_queue);
3513					}
3514					if (rdev->irq.pflip[4])
3515						radeon_crtc_handle_flip(rdev, 4);
3516					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
3517					DRM_DEBUG("IH: D5 vblank\n");
3518				}
3519				break;
3520			case 1: /* D5 vline */
3521				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
3522					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
3523					DRM_DEBUG("IH: D5 vline\n");
3524				}
3525				break;
3526			default:
3527				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3528				break;
3529			}
3530			break;
3531		case 6: /* D6 vblank/vline */
3532			switch (src_data) {
3533			case 0: /* D6 vblank */
3534				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
3535					if (rdev->irq.crtc_vblank_int[5]) {
3536						drm_handle_vblank(rdev->ddev, 5);
3537						rdev->pm.vblank_sync = true;
3538						wake_up(&rdev->irq.vblank_queue);
3539					}
3540					if (rdev->irq.pflip[5])
3541						radeon_crtc_handle_flip(rdev, 5);
3542					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
3543					DRM_DEBUG("IH: D6 vblank\n");
3544				}
3545				break;
3546			case 1: /* D6 vline */
3547				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
3548					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
3549					DRM_DEBUG("IH: D6 vline\n");
3550				}
3551				break;
3552			default:
3553				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3554				break;
3555			}
3556			break;
3557		case 42: /* HPD hotplug */
3558			switch (src_data) {
3559			case 0:
3560				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
3561					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
3562					queue_hotplug = true;
3563					DRM_DEBUG("IH: HPD1\n");
3564				}
3565				break;
3566			case 1:
3567				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
3568					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
3569					queue_hotplug = true;
3570					DRM_DEBUG("IH: HPD2\n");
3571				}
3572				break;
3573			case 2:
3574				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
3575					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
3576					queue_hotplug = true;
3577					DRM_DEBUG("IH: HPD3\n");
3578				}
3579				break;
3580			case 3:
3581				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
3582					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
3583					queue_hotplug = true;
3584					DRM_DEBUG("IH: HPD4\n");
3585				}
3586				break;
3587			case 4:
3588				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
3589					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
3590					queue_hotplug = true;
3591					DRM_DEBUG("IH: HPD5\n");
3592				}
3593				break;
3594			case 5:
3595				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
3596					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
3597					queue_hotplug = true;
3598					DRM_DEBUG("IH: HPD6\n");
3599				}
3600				break;
3601			default:
3602				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3603				break;
3604			}
3605			break;
3606		case 176: /* RINGID0 CP_INT */
3607			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3608			break;
3609		case 177: /* RINGID1 CP_INT */
3610			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3611			break;
3612		case 178: /* RINGID2 CP_INT */
3613			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3614			break;
3615		case 181: /* CP EOP event */
3616			DRM_DEBUG("IH: CP EOP\n");
3617			switch (ring_id) {
3618			case 0:
3619				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3620				break;
3621			case 1:
3622				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3623				break;
3624			case 2:
3625				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3626				break;
3627			}
3628			break;
3629		case 233: /* GUI IDLE */
3630			DRM_DEBUG("IH: GUI idle\n");
3631			rdev->pm.gui_idle = true;
3632			wake_up(&rdev->irq.idle_queue);
3633			break;
3634		default:
3635			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3636			break;
3637		}
3638
3639		/* wptr/rptr are in bytes! */
3640		rptr += 16;
3641		rptr &= rdev->ih.ptr_mask;
3642	}
3643	/* make sure wptr hasn't changed while processing */
3644	wptr = si_get_ih_wptr(rdev);
3645	if (wptr != rdev->ih.wptr)
3646		goto restart_ih;
3647	if (queue_hotplug)
3648		schedule_work(&rdev->hotplug_work);
3649	rdev->ih.rptr = rptr;
3650	WREG32(IH_RB_RPTR, rdev->ih.rptr);
3651	spin_unlock_irqrestore(&rdev->ih.lock, flags);
3652	return IRQ_HANDLED;
3653}
3654
3655/*
3656 * startup/shutdown callbacks
3657 */
3658static int si_startup(struct radeon_device *rdev)
3659{
3660	struct radeon_ring *ring;
3661	int r;
3662
3663	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
3664	    !rdev->rlc_fw || !rdev->mc_fw) {
3665		r = si_init_microcode(rdev);
3666		if (r) {
3667			DRM_ERROR("Failed to load firmware!\n");
3668			return r;
3669		}
3670	}
3671
3672	r = si_mc_load_microcode(rdev);
3673	if (r) {
3674		DRM_ERROR("Failed to load MC firmware!\n");
3675		return r;
3676	}
3677
3678	r = r600_vram_scratch_init(rdev);
3679	if (r)
3680		return r;
3681
3682	si_mc_program(rdev);
3683	r = si_pcie_gart_enable(rdev);
3684	if (r)
3685		return r;
3686	si_gpu_init(rdev);
3687
3688#if 0
3689	r = evergreen_blit_init(rdev);
3690	if (r) {
3691		r600_blit_fini(rdev);
3692		rdev->asic->copy = NULL;
3693		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
3694	}
3695#endif
3696	/* allocate rlc buffers */
3697	r = si_rlc_init(rdev);
3698	if (r) {
3699		DRM_ERROR("Failed to init rlc BOs!\n");
3700		return r;
3701	}
3702
3703	/* allocate wb buffer */
3704	r = radeon_wb_init(rdev);
3705	if (r)
3706		return r;
3707
3708	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
3709	if (r) {
3710		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3711		return r;
3712	}
3713
3714	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
3715	if (r) {
3716		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3717		return r;
3718	}
3719
3720	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
3721	if (r) {
3722		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
3723		return r;
3724	}
3725
3726	/* Enable IRQ */
3727	r = si_irq_init(rdev);
3728	if (r) {
3729		DRM_ERROR("radeon: IH init failed (%d).\n", r);
3730		radeon_irq_kms_fini(rdev);
3731		return r;
3732	}
3733	si_irq_set(rdev);
3734
3735	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3736	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
3737			     CP_RB0_RPTR, CP_RB0_WPTR,
3738			     0, 0xfffff, RADEON_CP_PACKET2);
3739	if (r)
3740		return r;
3741
3742	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3743	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
3744			     CP_RB1_RPTR, CP_RB1_WPTR,
3745			     0, 0xfffff, RADEON_CP_PACKET2);
3746	if (r)
3747		return r;
3748
3749	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3750	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
3751			     CP_RB2_RPTR, CP_RB2_WPTR,
3752			     0, 0xfffff, RADEON_CP_PACKET2);
3753	if (r)
3754		return r;
3755
3756	r = si_cp_load_microcode(rdev);
3757	if (r)
3758		return r;
3759	r = si_cp_resume(rdev);
3760	if (r)
3761		return r;
3762
3763	r = radeon_ib_pool_start(rdev);
3764	if (r)
3765		return r;
3766
3767	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3768	if (r) {
3769		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 0\n", r);
3770		rdev->accel_working = false;
3771		return r;
3772	}
3773
3774	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3775	if (r) {
3776		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 1\n", r);
3777		rdev->accel_working = false;
3778		return r;
3779	}
3780
3781	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3782	if (r) {
3783		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 2\n", r);
3784		rdev->accel_working = false;
3785		return r;
3786	}
3787
3788	r = radeon_vm_manager_start(rdev);
3789	if (r)
3790		return r;
3791
3792	return 0;
3793}
3794
3795int si_resume(struct radeon_device *rdev)
3796{
3797	int r;
3798
3799	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
3800	 * posting will perform necessary task to bring back GPU into good
3801	 * shape.
3802	 */
3803	/* post card */
3804	atom_asic_init(rdev->mode_info.atom_context);
3805
3806	rdev->accel_working = true;
3807	r = si_startup(rdev);
3808	if (r) {
3809		DRM_ERROR("si startup failed on resume\n");
3810		rdev->accel_working = false;
3811		return r;
3812	}
3813
3814	return r;
3815
3816}
3817
3818int si_suspend(struct radeon_device *rdev)
3819{
3820	/* FIXME: we should wait for ring to be empty */
3821	radeon_ib_pool_suspend(rdev);
3822	radeon_vm_manager_suspend(rdev);
3823#if 0
3824	r600_blit_suspend(rdev);
3825#endif
3826	si_cp_enable(rdev, false);
3827	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3828	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3829	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3830	si_irq_suspend(rdev);
3831	radeon_wb_disable(rdev);
3832	si_pcie_gart_disable(rdev);
3833	return 0;
3834}
3835
3836/* Plan is to move initialization in that function and use
3837 * helper function so that radeon_device_init pretty much
3838 * do nothing more than calling asic specific function. This
3839 * should also allow to remove a bunch of callback function
3840 * like vram_info.
3841 */
3842int si_init(struct radeon_device *rdev)
3843{
3844	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3845	int r;
3846
3847	/* Read BIOS */
3848	if (!radeon_get_bios(rdev)) {
3849		if (ASIC_IS_AVIVO(rdev))
3850			return -EINVAL;
3851	}
3852	/* Must be an ATOMBIOS */
3853	if (!rdev->is_atom_bios) {
3854		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
3855		return -EINVAL;
3856	}
3857	r = radeon_atombios_init(rdev);
3858	if (r)
3859		return r;
3860
3861	/* Post card if necessary */
3862	if (!radeon_card_posted(rdev)) {
3863		if (!rdev->bios) {
3864			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
3865			return -EINVAL;
3866		}
3867		DRM_INFO("GPU not posted. posting now...\n");
3868		atom_asic_init(rdev->mode_info.atom_context);
3869	}
3870	/* Initialize scratch registers */
3871	si_scratch_init(rdev);
3872	/* Initialize surface registers */
3873	radeon_surface_init(rdev);
3874	/* Initialize clocks */
3875	radeon_get_clock_info(rdev->ddev);
3876
3877	/* Fence driver */
3878	r = radeon_fence_driver_init(rdev);
3879	if (r)
3880		return r;
3881
3882	/* initialize memory controller */
3883	r = si_mc_init(rdev);
3884	if (r)
3885		return r;
3886	/* Memory manager */
3887	r = radeon_bo_init(rdev);
3888	if (r)
3889		return r;
3890
3891	r = radeon_irq_kms_init(rdev);
3892	if (r)
3893		return r;
3894
3895	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3896	ring->ring_obj = NULL;
3897	r600_ring_init(rdev, ring, 1024 * 1024);
3898
3899	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3900	ring->ring_obj = NULL;
3901	r600_ring_init(rdev, ring, 1024 * 1024);
3902
3903	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3904	ring->ring_obj = NULL;
3905	r600_ring_init(rdev, ring, 1024 * 1024);
3906
3907	rdev->ih.ring_obj = NULL;
3908	r600_ih_ring_init(rdev, 64 * 1024);
3909
3910	r = r600_pcie_gart_init(rdev);
3911	if (r)
3912		return r;
3913
3914	r = radeon_ib_pool_init(rdev);
3915	rdev->accel_working = true;
3916	if (r) {
3917		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3918		rdev->accel_working = false;
3919	}
3920	r = radeon_vm_manager_init(rdev);
3921	if (r) {
3922		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
3923	}
3924
3925	r = si_startup(rdev);
3926	if (r) {
3927		dev_err(rdev->dev, "disabling GPU acceleration\n");
3928		si_cp_fini(rdev);
3929		si_irq_fini(rdev);
3930		si_rlc_fini(rdev);
3931		radeon_wb_fini(rdev);
3932		r100_ib_fini(rdev);
3933		radeon_vm_manager_fini(rdev);
3934		radeon_irq_kms_fini(rdev);
3935		si_pcie_gart_fini(rdev);
3936		rdev->accel_working = false;
3937	}
3938
3939	/* Don't start up if the MC ucode is missing.
3940	 * The default clocks and voltages before the MC ucode
3941	 * is loaded are not suffient for advanced operations.
3942	 */
3943	if (!rdev->mc_fw) {
3944		DRM_ERROR("radeon: MC ucode required for NI+.\n");
3945		return -EINVAL;
3946	}
3947
3948	return 0;
3949}
3950
3951void si_fini(struct radeon_device *rdev)
3952{
3953#if 0
3954	r600_blit_fini(rdev);
3955#endif
3956	si_cp_fini(rdev);
3957	si_irq_fini(rdev);
3958	si_rlc_fini(rdev);
3959	radeon_wb_fini(rdev);
3960	radeon_vm_manager_fini(rdev);
3961	r100_ib_fini(rdev);
3962	radeon_irq_kms_fini(rdev);
3963	si_pcie_gart_fini(rdev);
3964	r600_vram_scratch_fini(rdev);
3965	radeon_gem_fini(rdev);
3966	radeon_fence_driver_fini(rdev);
3967	radeon_bo_fini(rdev);
3968	radeon_atombios_fini(rdev);
3969	kfree(rdev->bios);
3970	rdev->bios = NULL;
3971}
3972