Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   2 *
   3 * This program is free software; you can redistribute it and/or modify
   4 * it under the terms of the GNU General Public License version 2 and
   5 * only version 2 as published by the Free Software Foundation.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/cpumask.h>
  16#include <linux/qcom_scm.h>
  17#include <linux/dma-mapping.h>
  18#include <linux/of_address.h>
  19#include <linux/soc/qcom/mdt_loader.h>
  20#include <linux/pm_opp.h>
  21#include <linux/nvmem-consumer.h>
  22#include "msm_gem.h"
  23#include "msm_mmu.h"
  24#include "a5xx_gpu.h"
  25
  26extern bool hang_debug;
  27static void a5xx_dump(struct msm_gpu *gpu);
  28
  29#define GPU_PAS_ID 13
  30
  31static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
  32{
  33	struct device *dev = &gpu->pdev->dev;
  34	const struct firmware *fw;
  35	struct device_node *np;
  36	struct resource r;
  37	phys_addr_t mem_phys;
  38	ssize_t mem_size;
  39	void *mem_region = NULL;
  40	int ret;
  41
  42	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
  43		return -EINVAL;
  44
  45	np = of_get_child_by_name(dev->of_node, "zap-shader");
  46	if (!np)
  47		return -ENODEV;
  48
  49	np = of_parse_phandle(np, "memory-region", 0);
  50	if (!np)
  51		return -EINVAL;
  52
  53	ret = of_address_to_resource(np, 0, &r);
  54	if (ret)
  55		return ret;
  56
  57	mem_phys = r.start;
  58	mem_size = resource_size(&r);
  59
  60	/* Request the MDT file for the firmware */
  61	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
  62	if (IS_ERR(fw)) {
  63		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
  64		return PTR_ERR(fw);
  65	}
  66
  67	/* Figure out how much memory we need */
  68	mem_size = qcom_mdt_get_size(fw);
  69	if (mem_size < 0) {
  70		ret = mem_size;
  71		goto out;
  72	}
  73
  74	/* Allocate memory for the firmware image */
  75	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
  76	if (!mem_region) {
  77		ret = -ENOMEM;
  78		goto out;
  79	}
  80
  81	/*
  82	 * Load the rest of the MDT
  83	 *
  84	 * Note that we could be dealing with two different paths, since
  85	 * with upstream linux-firmware it would be in a qcom/ subdir..
  86	 * adreno_request_fw() handles this, but qcom_mdt_load() does
  87	 * not.  But since we've already gotten thru adreno_request_fw()
  88	 * we know which of the two cases it is:
  89	 */
  90	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
  91		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
  92				mem_region, mem_phys, mem_size, NULL);
  93	} else {
  94		char newname[strlen("qcom/") + strlen(fwname) + 1];
  95
  96		sprintf(newname, "qcom/%s", fwname);
  97
  98		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
  99				mem_region, mem_phys, mem_size, NULL);
 100	}
 101	if (ret)
 102		goto out;
 103
 104	/* Send the image to the secure world */
 105	ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
 106	if (ret)
 107		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
 108
 109out:
 110	if (mem_region)
 111		memunmap(mem_region);
 112
 113	release_firmware(fw);
 114
 115	return ret;
 116}
 117
 118static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 119{
 120	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 121	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 122	uint32_t wptr;
 123	unsigned long flags;
 124
 125	spin_lock_irqsave(&ring->lock, flags);
 126
 127	/* Copy the shadow to the actual register */
 128	ring->cur = ring->next;
 129
 130	/* Make sure to wrap wptr if we need to */
 131	wptr = get_wptr(ring);
 132
 133	spin_unlock_irqrestore(&ring->lock, flags);
 134
 135	/* Make sure everything is posted before making a decision */
 136	mb();
 137
 138	/* Update HW if this is the current ring and we are not in preempt */
 139	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
 140		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 141}
 142
 143static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 144	struct msm_file_private *ctx)
 145{
 146	struct msm_drm_private *priv = gpu->dev->dev_private;
 147	struct msm_ringbuffer *ring = submit->ring;
 148	struct msm_gem_object *obj;
 149	uint32_t *ptr, dwords;
 150	unsigned int i;
 151
 152	for (i = 0; i < submit->nr_cmds; i++) {
 153		switch (submit->cmd[i].type) {
 154		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 155			break;
 156		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 157			if (priv->lastctx == ctx)
 158				break;
 159		case MSM_SUBMIT_CMD_BUF:
 160			/* copy commands into RB: */
 161			obj = submit->bos[submit->cmd[i].idx].obj;
 162			dwords = submit->cmd[i].size;
 163
 164			ptr = msm_gem_get_vaddr(&obj->base);
 165
 166			/* _get_vaddr() shouldn't fail at this point,
 167			 * since we've already mapped it once in
 168			 * submit_reloc()
 169			 */
 170			if (WARN_ON(!ptr))
 171				return;
 172
 173			for (i = 0; i < dwords; i++) {
 174				/* normally the OUT_PKTn() would wait
 175				 * for space for the packet.  But since
 176				 * we just OUT_RING() the whole thing,
 177				 * need to call adreno_wait_ring()
 178				 * ourself:
 179				 */
 180				adreno_wait_ring(ring, 1);
 181				OUT_RING(ring, ptr[i]);
 182			}
 183
 184			msm_gem_put_vaddr(&obj->base);
 185
 186			break;
 187		}
 188	}
 189
 190	a5xx_flush(gpu, ring);
 191	a5xx_preempt_trigger(gpu);
 192
 193	/* we might not necessarily have a cmd from userspace to
 194	 * trigger an event to know that submit has completed, so
 195	 * do this manually:
 196	 */
 197	a5xx_idle(gpu, ring);
 198	ring->memptrs->fence = submit->seqno;
 199	msm_gpu_retire(gpu);
 200}
 201
 202static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 203	struct msm_file_private *ctx)
 204{
 205	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 206	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 207	struct msm_drm_private *priv = gpu->dev->dev_private;
 208	struct msm_ringbuffer *ring = submit->ring;
 209	unsigned int i, ibs = 0;
 210
 211	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 212		priv->lastctx = NULL;
 213		a5xx_submit_in_rb(gpu, submit, ctx);
 214		return;
 215	}
 216
 217	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 218	OUT_RING(ring, 0x02);
 219
 220	/* Turn off protected mode to write to special registers */
 221	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 222	OUT_RING(ring, 0);
 223
 224	/* Set the save preemption record for the ring/command */
 225	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 226	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 227	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 228
 229	/* Turn back on protected mode */
 230	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 231	OUT_RING(ring, 1);
 232
 233	/* Enable local preemption for finegrain preemption */
 234	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 235	OUT_RING(ring, 0x02);
 236
 237	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 238	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 239	OUT_RING(ring, 0x02);
 240
 241	/* Submit the commands */
 242	for (i = 0; i < submit->nr_cmds; i++) {
 243		switch (submit->cmd[i].type) {
 244		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 245			break;
 246		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 247			if (priv->lastctx == ctx)
 248				break;
 249		case MSM_SUBMIT_CMD_BUF:
 250			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 251			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 252			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 253			OUT_RING(ring, submit->cmd[i].size);
 254			ibs++;
 255			break;
 256		}
 257	}
 258
 259	/*
 260	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
 261	 * are done rendering - otherwise a lucky preemption would start
 262	 * replaying from the last checkpoint
 263	 */
 264	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 265	OUT_RING(ring, 0);
 266	OUT_RING(ring, 0);
 267	OUT_RING(ring, 0);
 268	OUT_RING(ring, 0);
 269	OUT_RING(ring, 0);
 270
 271	/* Turn off IB level preemptions */
 272	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 273	OUT_RING(ring, 0x01);
 274
 275	/* Write the fence to the scratch register */
 276	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 277	OUT_RING(ring, submit->seqno);
 278
 279	/*
 280	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
 281	 * timestamp is written to the memory and then triggers the interrupt
 282	 */
 283	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 284	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
 285	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 286	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 287	OUT_RING(ring, submit->seqno);
 288
 289	/* Yield the floor on command completion */
 290	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 291	/*
 292	 * If dword[2:1] are non zero, they specify an address for the CP to
 293	 * write the value of dword[3] to on preemption complete. Write 0 to
 294	 * skip the write
 295	 */
 296	OUT_RING(ring, 0x00);
 297	OUT_RING(ring, 0x00);
 298	/* Data value - not used if the address above is 0 */
 299	OUT_RING(ring, 0x01);
 300	/* Set bit 0 to trigger an interrupt on preempt complete */
 301	OUT_RING(ring, 0x01);
 302
 303	a5xx_flush(gpu, ring);
 304
 305	/* Check to see if we need to start preemption */
 306	a5xx_preempt_trigger(gpu);
 307}
 308
 309static const struct {
 310	u32 offset;
 311	u32 value;
 312} a5xx_hwcg[] = {
 313	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 314	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 315	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 316	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 317	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 318	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 319	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 320	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 321	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 322	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 323	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 324	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 325	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 326	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 327	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 328	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 329	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 330	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 331	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 332	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 333	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 334	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 335	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 336	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 337	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 338	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 339	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 340	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 341	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 342	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 343	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 344	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 345	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 346	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 347	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 348	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 349	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 350	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 351	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 352	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 353	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 354	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 355	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 356	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 357	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 358	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 359	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 360	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 361	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 362	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 363	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 364	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 365	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 366	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 367	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 368	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 369	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 370	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 371	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 372	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 373	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 374	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 375	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 376	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 377	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 378	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 379	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 380	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 381	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 382	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 383	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 384	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 385	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 386	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 387	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 388	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 389	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 390	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 391	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 392	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 393	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 394	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 395	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 396	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 397	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 398	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 399	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 400	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 401	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 402	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 403	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 404	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 405};
 406
 407void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 408{
 409	unsigned int i;
 410
 411	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 412		gpu_write(gpu, a5xx_hwcg[i].offset,
 413			state ? a5xx_hwcg[i].value : 0);
 414
 415	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 416	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 417}
 418
 419static int a5xx_me_init(struct msm_gpu *gpu)
 420{
 421	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 422	struct msm_ringbuffer *ring = gpu->rb[0];
 423
 424	OUT_PKT7(ring, CP_ME_INIT, 8);
 425
 426	OUT_RING(ring, 0x0000002F);
 427
 428	/* Enable multiple hardware contexts */
 429	OUT_RING(ring, 0x00000003);
 430
 431	/* Enable error detection */
 432	OUT_RING(ring, 0x20000000);
 433
 434	/* Don't enable header dump */
 435	OUT_RING(ring, 0x00000000);
 436	OUT_RING(ring, 0x00000000);
 437
 438	/* Specify workarounds for various microcode issues */
 439	if (adreno_is_a530(adreno_gpu)) {
 440		/* Workaround for token end syncs
 441		 * Force a WFI after every direct-render 3D mode draw and every
 442		 * 2D mode 3 draw
 443		 */
 444		OUT_RING(ring, 0x0000000B);
 445	} else {
 446		/* No workarounds enabled */
 447		OUT_RING(ring, 0x00000000);
 448	}
 449
 450	OUT_RING(ring, 0x00000000);
 451	OUT_RING(ring, 0x00000000);
 452
 453	gpu->funcs->flush(gpu, ring);
 454	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 455}
 456
 457static int a5xx_preempt_start(struct msm_gpu *gpu)
 458{
 459	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 460	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 461	struct msm_ringbuffer *ring = gpu->rb[0];
 462
 463	if (gpu->nr_rings == 1)
 464		return 0;
 465
 466	/* Turn off protected mode to write to special registers */
 467	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 468	OUT_RING(ring, 0);
 469
 470	/* Set the save preemption record for the ring/command */
 471	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 472	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 473	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 474
 475	/* Turn back on protected mode */
 476	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 477	OUT_RING(ring, 1);
 478
 479	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 480	OUT_RING(ring, 0x00);
 481
 482	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 483	OUT_RING(ring, 0x01);
 484
 485	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 486	OUT_RING(ring, 0x01);
 487
 488	/* Yield the floor on command completion */
 489	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 490	OUT_RING(ring, 0x00);
 491	OUT_RING(ring, 0x00);
 492	OUT_RING(ring, 0x01);
 493	OUT_RING(ring, 0x01);
 494
 495	gpu->funcs->flush(gpu, ring);
 496
 497	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 498}
 499
 500static int a5xx_ucode_init(struct msm_gpu *gpu)
 501{
 502	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 503	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 504	int ret;
 505
 506	if (!a5xx_gpu->pm4_bo) {
 507		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 508			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 509
 510		if (IS_ERR(a5xx_gpu->pm4_bo)) {
 511			ret = PTR_ERR(a5xx_gpu->pm4_bo);
 512			a5xx_gpu->pm4_bo = NULL;
 513			dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
 514				ret);
 515			return ret;
 516		}
 517	}
 518
 519	if (!a5xx_gpu->pfp_bo) {
 520		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 521			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 522
 523		if (IS_ERR(a5xx_gpu->pfp_bo)) {
 524			ret = PTR_ERR(a5xx_gpu->pfp_bo);
 525			a5xx_gpu->pfp_bo = NULL;
 526			dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
 527				ret);
 528			return ret;
 529		}
 530	}
 531
 532	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 533		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 534
 535	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 536		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 537
 538	return 0;
 539}
 540
 541#define SCM_GPU_ZAP_SHADER_RESUME 0
 542
 543static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 544{
 545	int ret;
 546
 547	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 548	if (ret)
 549		DRM_ERROR("%s: zap-shader resume failed: %d\n",
 550			gpu->name, ret);
 551
 552	return ret;
 553}
 554
 555static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 556{
 557	static bool loaded;
 558	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 559	struct platform_device *pdev = gpu->pdev;
 560	int ret;
 561
 562	/*
 563	 * If the zap shader is already loaded into memory we just need to kick
 564	 * the remote processor to reinitialize it
 565	 */
 566	if (loaded)
 567		return a5xx_zap_shader_resume(gpu);
 568
 569	/* We need SCM to be able to load the firmware */
 570	if (!qcom_scm_is_available()) {
 571		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
 572		return -EPROBE_DEFER;
 573	}
 574
 575	/* Each GPU has a target specific zap shader firmware name to use */
 576	if (!adreno_gpu->info->zapfw) {
 577		DRM_DEV_ERROR(&pdev->dev,
 578			"Zap shader firmware file not specified for this target\n");
 579		return -ENODEV;
 580	}
 581
 582	ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
 583
 584	loaded = !ret;
 585
 586	return ret;
 587}
 588
 589#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 590	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 591	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 592	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 593	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 594	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 595	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 596	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 597	  A5XX_RBBM_INT_0_MASK_CP_SW | \
 598	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 599	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 600	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 601
 602static int a5xx_hw_init(struct msm_gpu *gpu)
 603{
 604	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 605	int ret;
 606
 607	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 608
 609	/* Make all blocks contribute to the GPU BUSY perf counter */
 610	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 611
 612	/* Enable RBBM error reporting bits */
 613	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 614
 615	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 616		/*
 617		 * Mask out the activity signals from RB1-3 to avoid false
 618		 * positives
 619		 */
 620
 621		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 622			0xF0000000);
 623		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 624			0xFFFFFFFF);
 625		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 626			0xFFFFFFFF);
 627		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 628			0xFFFFFFFF);
 629		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 630			0xFFFFFFFF);
 631		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 632			0xFFFFFFFF);
 633		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 634			0xFFFFFFFF);
 635		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 636			0xFFFFFFFF);
 637	}
 638
 639	/* Enable fault detection */
 640	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 641		(1 << 30) | 0xFFFF);
 642
 643	/* Turn on performance counters */
 644	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 645
 646	/* Select CP0 to always count cycles */
 647	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 648
 649	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
 650	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 651
 652	/* Increase VFD cache access so LRZ and other data gets evicted less */
 653	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 654
 655	/* Disable L2 bypass in the UCHE */
 656	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 657	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 658	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 659	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 660
 661	/* Set the GMEM VA range (0 to gpu->gmem) */
 662	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 663	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 664	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 665		0x00100000 + adreno_gpu->gmem - 1);
 666	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 667
 668	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 669	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 670	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 671	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 672
 673	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 674
 675	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 676		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 677
 678	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 679
 680	/* Enable USE_RETENTION_FLOPS */
 681	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 682
 683	/* Enable ME/PFP split notification */
 684	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 685
 686	/* Enable HWCG */
 687	a5xx_set_hwcg(gpu, true);
 688
 689	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 690
 691	/* Set the highest bank bit */
 692	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 693	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 694
 695	/* Protect registers from the CP */
 696	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 697
 698	/* RBBM */
 699	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 700	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 701	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 702	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 703	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 704	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 705
 706	/* Content protect */
 707	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 708		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 709			16));
 710	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 711		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 712
 713	/* CP */
 714	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 715	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 716	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 717	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 718
 719	/* RB */
 720	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 721	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 722
 723	/* VPC */
 724	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 725	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 726
 727	/* UCHE */
 728	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 729
 730	if (adreno_is_a530(adreno_gpu))
 731		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 732			ADRENO_PROTECT_RW(0x10000, 0x8000));
 733
 734	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 735	/*
 736	 * Disable the trusted memory range - we don't actually supported secure
 737	 * memory rendering at this point in time and we don't want to block off
 738	 * part of the virtual memory space.
 739	 */
 740	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 741		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 742	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 743
 744	ret = adreno_hw_init(gpu);
 745	if (ret)
 746		return ret;
 747
 748	a5xx_preempt_hw_init(gpu);
 749
 750	a5xx_gpmu_ucode_init(gpu);
 751
 752	ret = a5xx_ucode_init(gpu);
 753	if (ret)
 754		return ret;
 755
 756	/* Disable the interrupts through the initial bringup stage */
 757	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 758
 759	/* Clear ME_HALT to start the micro engine */
 760	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 761	ret = a5xx_me_init(gpu);
 762	if (ret)
 763		return ret;
 764
 765	ret = a5xx_power_init(gpu);
 766	if (ret)
 767		return ret;
 768
 769	/*
 770	 * Send a pipeline event stat to get misbehaving counters to start
 771	 * ticking correctly
 772	 */
 773	if (adreno_is_a530(adreno_gpu)) {
 774		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 775		OUT_RING(gpu->rb[0], 0x0F);
 776
 777		gpu->funcs->flush(gpu, gpu->rb[0]);
 778		if (!a5xx_idle(gpu, gpu->rb[0]))
 779			return -EINVAL;
 780	}
 781
 782	/*
 783	 * Try to load a zap shader into the secure world. If successful
 784	 * we can use the CP to switch out of secure mode. If not then we
 785	 * have no resource but to try to switch ourselves out manually. If we
 786	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 787	 * be blocked and a permissions violation will soon follow.
 788	 */
 789	ret = a5xx_zap_shader_init(gpu);
 790	if (!ret) {
 791		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 792		OUT_RING(gpu->rb[0], 0x00000000);
 793
 794		gpu->funcs->flush(gpu, gpu->rb[0]);
 795		if (!a5xx_idle(gpu, gpu->rb[0]))
 796			return -EINVAL;
 797	} else {
 798		/* Print a warning so if we die, we know why */
 799		dev_warn_once(gpu->dev->dev,
 800			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 801		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 802	}
 803
 804	/* Last step - yield the ringbuffer */
 805	a5xx_preempt_start(gpu);
 806
 807	return 0;
 808}
 809
 810static void a5xx_recover(struct msm_gpu *gpu)
 811{
 812	int i;
 813
 814	adreno_dump_info(gpu);
 815
 816	for (i = 0; i < 8; i++) {
 817		printk("CP_SCRATCH_REG%d: %u\n", i,
 818			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 819	}
 820
 821	if (hang_debug)
 822		a5xx_dump(gpu);
 823
 824	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 825	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 826	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 827	adreno_recover(gpu);
 828}
 829
 830static void a5xx_destroy(struct msm_gpu *gpu)
 831{
 832	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 833	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 834
 835	DBG("%s", gpu->name);
 836
 837	a5xx_preempt_fini(gpu);
 838
 839	if (a5xx_gpu->pm4_bo) {
 840		if (a5xx_gpu->pm4_iova)
 841			msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 842		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
 843	}
 844
 845	if (a5xx_gpu->pfp_bo) {
 846		if (a5xx_gpu->pfp_iova)
 847			msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 848		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
 849	}
 850
 851	if (a5xx_gpu->gpmu_bo) {
 852		if (a5xx_gpu->gpmu_iova)
 853			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 854		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
 855	}
 856
 857	adreno_gpu_cleanup(adreno_gpu);
 858	kfree(a5xx_gpu);
 859}
 860
 861static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 862{
 863	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 864		return false;
 865
 866	/*
 867	 * Nearly every abnormality ends up pausing the GPU and triggering a
 868	 * fault so we can safely just watch for this one interrupt to fire
 869	 */
 870	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 871		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 872}
 873
 874bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 875{
 876	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 877	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 878
 879	if (ring != a5xx_gpu->cur_ring) {
 880		WARN(1, "Tried to idle a non-current ringbuffer\n");
 881		return false;
 882	}
 883
 884	/* wait for CP to drain ringbuffer: */
 885	if (!adreno_idle(gpu, ring))
 886		return false;
 887
 888	if (spin_until(_a5xx_check_idle(gpu))) {
 889		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 890			gpu->name, __builtin_return_address(0),
 891			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 892			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 893			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 894			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 895		return false;
 896	}
 897
 898	return true;
 899}
 900
 901static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 902{
 903	struct msm_gpu *gpu = arg;
 904	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 905			iova, flags,
 906			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 907			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 908			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 909			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 910
 911	return -EFAULT;
 912}
 913
 914static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 915{
 916	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 917
 918	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 919		u32 val;
 920
 921		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 922
 923		/*
 924		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 925		 * read it twice
 926		 */
 927
 928		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 929		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 930
 931		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 932			val);
 933	}
 934
 935	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 936		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 937			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 938
 939	if (status & A5XX_CP_INT_CP_DMA_ERROR)
 940		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 941
 942	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 943		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 944
 945		dev_err_ratelimited(gpu->dev->dev,
 946			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 947			val & (1 << 24) ? "WRITE" : "READ",
 948			(val & 0xFFFFF) >> 2, val);
 949	}
 950
 951	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 952		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 953		const char *access[16] = { "reserved", "reserved",
 954			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
 955			"", "", "me read", "me write", "", "", "crashdump read",
 956			"crashdump write" };
 957
 958		dev_err_ratelimited(gpu->dev->dev,
 959			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 960			status & 0xFFFFF, access[(status >> 24) & 0xF],
 961			(status & (1 << 31)), status);
 962	}
 963}
 964
 965static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 966{
 967	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 968		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 969
 970		dev_err_ratelimited(gpu->dev->dev,
 971			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 972			val & (1 << 28) ? "WRITE" : "READ",
 973			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 974			(val >> 24) & 0xF);
 975
 976		/* Clear the error */
 977		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 978
 979		/* Clear the interrupt */
 980		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 981			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 982	}
 983
 984	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 985		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 986
 987	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 988		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 989			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 990
 991	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 992		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 993			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 994
 995	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 996		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 997			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 998
 999	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1000		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1001
1002	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1003		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1004}
1005
1006static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1007{
1008	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1009
1010	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1011
1012	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1013		addr);
1014}
1015
1016static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1017{
1018	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1019}
1020
1021static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1022{
1023	struct drm_device *dev = gpu->dev;
1024	struct msm_drm_private *priv = dev->dev_private;
1025	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1026
1027	dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1028		ring ? ring->id : -1, ring ? ring->seqno : 0,
1029		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1030		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1031		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1032		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1033		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1034		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1035		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1036
1037	/* Turn off the hangcheck timer to keep it from bothering us */
1038	del_timer(&gpu->hangcheck_timer);
1039
1040	queue_work(priv->wq, &gpu->recover_work);
1041}
1042
1043#define RBBM_ERROR_MASK \
1044	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1045	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1046	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1047	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1048	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1049	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1050
1051static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1052{
1053	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1054
1055	/*
1056	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1057	 * before the source is cleared the interrupt will storm.
1058	 */
1059	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1060		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1061
1062	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1063	if (status & RBBM_ERROR_MASK)
1064		a5xx_rbbm_err_irq(gpu, status);
1065
1066	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1067		a5xx_cp_err_irq(gpu);
1068
1069	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1070		a5xx_fault_detect_irq(gpu);
1071
1072	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1073		a5xx_uche_err_irq(gpu);
1074
1075	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1076		a5xx_gpmu_err_irq(gpu);
1077
1078	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1079		a5xx_preempt_trigger(gpu);
1080		msm_gpu_retire(gpu);
1081	}
1082
1083	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1084		a5xx_preempt_irq(gpu);
1085
1086	return IRQ_HANDLED;
1087}
1088
1089static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1090	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1091	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1092	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1093	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1094		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1095	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1096	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1097	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1098};
1099
1100static const u32 a5xx_registers[] = {
1101	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1102	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1103	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1104	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1105	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1106	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1107	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1108	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1109	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1110	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1111	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1112	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1113	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1114	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1115	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1116	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1117	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1118	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1119	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1120	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1121	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1122	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1123	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1124	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1125	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1126	0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1127	0xB9A0, 0xB9BF, ~0
1128};
1129
1130static void a5xx_dump(struct msm_gpu *gpu)
1131{
1132	dev_info(gpu->dev->dev, "status:   %08x\n",
1133		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1134	adreno_dump(gpu);
1135}
1136
1137static int a5xx_pm_resume(struct msm_gpu *gpu)
1138{
1139	int ret;
1140
1141	/* Turn on the core power */
1142	ret = msm_gpu_pm_resume(gpu);
1143	if (ret)
1144		return ret;
1145
1146	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1147	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1148
1149	/* Wait 3 usecs before polling */
1150	udelay(3);
1151
1152	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1153		(1 << 20), (1 << 20));
1154	if (ret) {
1155		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1156			gpu->name,
1157			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1158		return ret;
1159	}
1160
1161	/* Turn on the SP domain */
1162	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1163	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1164		(1 << 20), (1 << 20));
1165	if (ret)
1166		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1167			gpu->name);
1168
1169	return ret;
1170}
1171
1172static int a5xx_pm_suspend(struct msm_gpu *gpu)
1173{
1174	/* Clear the VBIF pipe before shutting down */
1175	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1176	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1177
1178	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1179
1180	/*
1181	 * Reset the VBIF before power collapse to avoid issue with FIFO
1182	 * entries
1183	 */
1184	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1185	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1186
1187	return msm_gpu_pm_suspend(gpu);
1188}
1189
1190static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1191{
1192	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1193		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1194
1195	return 0;
1196}
1197
1198#ifdef CONFIG_DEBUG_FS
1199static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1200{
1201	seq_printf(m, "status:   %08x\n",
1202			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1203
1204	/*
1205	 * Temporarily disable hardware clock gating before going into
1206	 * adreno_show to avoid issues while reading the registers
1207	 */
1208	a5xx_set_hwcg(gpu, false);
1209	adreno_show(gpu, m);
1210	a5xx_set_hwcg(gpu, true);
1211}
1212#endif
1213
1214static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1215{
1216	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1217	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1218
1219	return a5xx_gpu->cur_ring;
1220}
1221
1222static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1223{
1224	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1225		REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1226
1227	return 0;
1228}
1229
1230static const struct adreno_gpu_funcs funcs = {
1231	.base = {
1232		.get_param = adreno_get_param,
1233		.hw_init = a5xx_hw_init,
1234		.pm_suspend = a5xx_pm_suspend,
1235		.pm_resume = a5xx_pm_resume,
1236		.recover = a5xx_recover,
1237		.submit = a5xx_submit,
1238		.flush = a5xx_flush,
1239		.active_ring = a5xx_active_ring,
1240		.irq = a5xx_irq,
1241		.destroy = a5xx_destroy,
1242#ifdef CONFIG_DEBUG_FS
1243		.show = a5xx_show,
1244		.debugfs_init = a5xx_debugfs_init,
1245#endif
1246		.gpu_busy = a5xx_gpu_busy,
1247	},
1248	.get_timestamp = a5xx_get_timestamp,
1249};
1250
1251static void check_speed_bin(struct device *dev)
1252{
1253	struct nvmem_cell *cell;
1254	u32 bin, val;
1255
1256	cell = nvmem_cell_get(dev, "speed_bin");
1257
1258	/* If a nvmem cell isn't defined, nothing to do */
1259	if (IS_ERR(cell))
1260		return;
1261
1262	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1263	nvmem_cell_put(cell);
1264
1265	val = (1 << bin);
1266
1267	dev_pm_opp_set_supported_hw(dev, &val, 1);
1268}
1269
1270struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1271{
1272	struct msm_drm_private *priv = dev->dev_private;
1273	struct platform_device *pdev = priv->gpu_pdev;
1274	struct a5xx_gpu *a5xx_gpu = NULL;
1275	struct adreno_gpu *adreno_gpu;
1276	struct msm_gpu *gpu;
1277	int ret;
1278
1279	if (!pdev) {
1280		dev_err(dev->dev, "No A5XX device is defined\n");
1281		return ERR_PTR(-ENXIO);
1282	}
1283
1284	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1285	if (!a5xx_gpu)
1286		return ERR_PTR(-ENOMEM);
1287
1288	adreno_gpu = &a5xx_gpu->base;
1289	gpu = &adreno_gpu->base;
1290
1291	adreno_gpu->registers = a5xx_registers;
1292	adreno_gpu->reg_offsets = a5xx_register_offsets;
1293
1294	a5xx_gpu->lm_leakage = 0x4E001A;
1295
1296	check_speed_bin(&pdev->dev);
1297
1298	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1299	if (ret) {
1300		a5xx_destroy(&(a5xx_gpu->base.base));
1301		return ERR_PTR(ret);
1302	}
1303
1304	if (gpu->aspace)
1305		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1306
1307	/* Set up the preemption specific bits and pieces for each ringbuffer */
1308	a5xx_preempt_init(gpu);
1309
1310	return gpu;
1311}