Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
   3 */
   4
   5#include <linux/kernel.h>
   6#include <linux/types.h>
   7#include <linux/cpumask.h>
   8#include <linux/qcom_scm.h>
   9#include <linux/pm_opp.h>
  10#include <linux/nvmem-consumer.h>
  11#include <linux/slab.h>
  12#include "msm_gem.h"
  13#include "msm_mmu.h"
  14#include "a5xx_gpu.h"
  15
  16extern bool hang_debug;
  17static void a5xx_dump(struct msm_gpu *gpu);
  18
  19#define GPU_PAS_ID 13
  20
  21static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  22{
  23	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  24	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
  25	uint32_t wptr;
  26	unsigned long flags;
  27
  28	spin_lock_irqsave(&ring->lock, flags);
  29
  30	/* Copy the shadow to the actual register */
  31	ring->cur = ring->next;
  32
  33	/* Make sure to wrap wptr if we need to */
  34	wptr = get_wptr(ring);
  35
  36	spin_unlock_irqrestore(&ring->lock, flags);
  37
  38	/* Make sure everything is posted before making a decision */
  39	mb();
  40
  41	/* Update HW if this is the current ring and we are not in preempt */
  42	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
  43		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
  44}
  45
  46static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
  47	struct msm_file_private *ctx)
  48{
  49	struct msm_drm_private *priv = gpu->dev->dev_private;
  50	struct msm_ringbuffer *ring = submit->ring;
  51	struct msm_gem_object *obj;
  52	uint32_t *ptr, dwords;
  53	unsigned int i;
  54
  55	for (i = 0; i < submit->nr_cmds; i++) {
  56		switch (submit->cmd[i].type) {
  57		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
  58			break;
  59		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
  60			if (priv->lastctx == ctx)
  61				break;
  62			fallthrough;
  63		case MSM_SUBMIT_CMD_BUF:
  64			/* copy commands into RB: */
  65			obj = submit->bos[submit->cmd[i].idx].obj;
  66			dwords = submit->cmd[i].size;
  67
  68			ptr = msm_gem_get_vaddr(&obj->base);
  69
  70			/* _get_vaddr() shouldn't fail at this point,
  71			 * since we've already mapped it once in
  72			 * submit_reloc()
  73			 */
  74			if (WARN_ON(!ptr))
  75				return;
  76
  77			for (i = 0; i < dwords; i++) {
  78				/* normally the OUT_PKTn() would wait
  79				 * for space for the packet.  But since
  80				 * we just OUT_RING() the whole thing,
  81				 * need to call adreno_wait_ring()
  82				 * ourself:
  83				 */
  84				adreno_wait_ring(ring, 1);
  85				OUT_RING(ring, ptr[i]);
  86			}
  87
  88			msm_gem_put_vaddr(&obj->base);
  89
  90			break;
  91		}
  92	}
  93
  94	a5xx_flush(gpu, ring);
  95	a5xx_preempt_trigger(gpu);
  96
  97	/* we might not necessarily have a cmd from userspace to
  98	 * trigger an event to know that submit has completed, so
  99	 * do this manually:
 100	 */
 101	a5xx_idle(gpu, ring);
 102	ring->memptrs->fence = submit->seqno;
 103	msm_gpu_retire(gpu);
 104}
 105
 106static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 107	struct msm_file_private *ctx)
 108{
 109	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 110	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 111	struct msm_drm_private *priv = gpu->dev->dev_private;
 112	struct msm_ringbuffer *ring = submit->ring;
 113	unsigned int i, ibs = 0;
 114
 115	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
 116		priv->lastctx = NULL;
 117		a5xx_submit_in_rb(gpu, submit, ctx);
 118		return;
 119	}
 120
 121	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 122	OUT_RING(ring, 0x02);
 123
 124	/* Turn off protected mode to write to special registers */
 125	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 126	OUT_RING(ring, 0);
 127
 128	/* Set the save preemption record for the ring/command */
 129	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 130	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 131	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
 132
 133	/* Turn back on protected mode */
 134	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 135	OUT_RING(ring, 1);
 136
 137	/* Enable local preemption for finegrain preemption */
 138	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 139	OUT_RING(ring, 0x02);
 140
 141	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
 142	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 143	OUT_RING(ring, 0x02);
 144
 145	/* Submit the commands */
 146	for (i = 0; i < submit->nr_cmds; i++) {
 147		switch (submit->cmd[i].type) {
 148		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 149			break;
 150		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 151			if (priv->lastctx == ctx)
 152				break;
 153			fallthrough;
 154		case MSM_SUBMIT_CMD_BUF:
 155			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 156			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 157			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 158			OUT_RING(ring, submit->cmd[i].size);
 159			ibs++;
 160			break;
 161		}
 162	}
 163
 164	/*
 165	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
 166	 * are done rendering - otherwise a lucky preemption would start
 167	 * replaying from the last checkpoint
 168	 */
 169	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
 170	OUT_RING(ring, 0);
 171	OUT_RING(ring, 0);
 172	OUT_RING(ring, 0);
 173	OUT_RING(ring, 0);
 174	OUT_RING(ring, 0);
 175
 176	/* Turn off IB level preemptions */
 177	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 178	OUT_RING(ring, 0x01);
 179
 180	/* Write the fence to the scratch register */
 181	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
 182	OUT_RING(ring, submit->seqno);
 183
 184	/*
 185	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
 186	 * timestamp is written to the memory and then triggers the interrupt
 187	 */
 188	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 189	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 190		CP_EVENT_WRITE_0_IRQ);
 191	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 192	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 193	OUT_RING(ring, submit->seqno);
 194
 195	/* Yield the floor on command completion */
 196	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 197	/*
 198	 * If dword[2:1] are non zero, they specify an address for the CP to
 199	 * write the value of dword[3] to on preemption complete. Write 0 to
 200	 * skip the write
 201	 */
 202	OUT_RING(ring, 0x00);
 203	OUT_RING(ring, 0x00);
 204	/* Data value - not used if the address above is 0 */
 205	OUT_RING(ring, 0x01);
 206	/* Set bit 0 to trigger an interrupt on preempt complete */
 207	OUT_RING(ring, 0x01);
 208
 209	a5xx_flush(gpu, ring);
 210
 211	/* Check to see if we need to start preemption */
 212	a5xx_preempt_trigger(gpu);
 213}
 214
 215static const struct {
 216	u32 offset;
 217	u32 value;
 218} a5xx_hwcg[] = {
 219	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 220	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
 221	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
 222	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
 223	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 224	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
 225	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
 226	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
 227	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 228	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
 229	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
 230	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
 231	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 232	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 233	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 234	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 235	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 236	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
 237	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
 238	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
 239	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 240	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 241	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 242	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 243	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
 244	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
 245	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
 246	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
 247	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 248	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 249	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 250	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 251	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 252	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 253	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 254	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 255	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
 256	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
 257	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
 258	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
 259	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 260	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 261	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 262	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 263	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 264	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 265	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 266	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 267	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
 268	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
 269	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
 270	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
 271	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 272	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 273	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 274	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 275	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
 276	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 277	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 278	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 279	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 280	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 281	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
 282	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
 283	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
 284	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
 285	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
 286	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
 287	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
 288	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
 289	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
 290	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
 291	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
 292	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
 293	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
 294	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
 295	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
 296	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
 297	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
 298	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
 299	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
 300	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
 301	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 302	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
 303	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 304	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 305	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 306	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 307	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 308	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 309	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 310	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
 311};
 312
 313void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 314{
 315	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 316	unsigned int i;
 317
 318	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
 319		gpu_write(gpu, a5xx_hwcg[i].offset,
 320			state ? a5xx_hwcg[i].value : 0);
 321
 322	if (adreno_is_a540(adreno_gpu)) {
 323		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
 324		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
 325	}
 326
 327	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
 328	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
 329}
 330
 331static int a5xx_me_init(struct msm_gpu *gpu)
 332{
 333	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 334	struct msm_ringbuffer *ring = gpu->rb[0];
 335
 336	OUT_PKT7(ring, CP_ME_INIT, 8);
 337
 338	OUT_RING(ring, 0x0000002F);
 339
 340	/* Enable multiple hardware contexts */
 341	OUT_RING(ring, 0x00000003);
 342
 343	/* Enable error detection */
 344	OUT_RING(ring, 0x20000000);
 345
 346	/* Don't enable header dump */
 347	OUT_RING(ring, 0x00000000);
 348	OUT_RING(ring, 0x00000000);
 349
 350	/* Specify workarounds for various microcode issues */
 351	if (adreno_is_a530(adreno_gpu)) {
 352		/* Workaround for token end syncs
 353		 * Force a WFI after every direct-render 3D mode draw and every
 354		 * 2D mode 3 draw
 355		 */
 356		OUT_RING(ring, 0x0000000B);
 357	} else if (adreno_is_a510(adreno_gpu)) {
 358		/* Workaround for token and syncs */
 359		OUT_RING(ring, 0x00000001);
 360	} else {
 361		/* No workarounds enabled */
 362		OUT_RING(ring, 0x00000000);
 363	}
 364
 365	OUT_RING(ring, 0x00000000);
 366	OUT_RING(ring, 0x00000000);
 367
 368	gpu->funcs->flush(gpu, ring);
 369	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 370}
 371
 372static int a5xx_preempt_start(struct msm_gpu *gpu)
 373{
 374	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 375	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 376	struct msm_ringbuffer *ring = gpu->rb[0];
 377
 378	if (gpu->nr_rings == 1)
 379		return 0;
 380
 381	/* Turn off protected mode to write to special registers */
 382	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 383	OUT_RING(ring, 0);
 384
 385	/* Set the save preemption record for the ring/command */
 386	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
 387	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 388	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
 389
 390	/* Turn back on protected mode */
 391	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 392	OUT_RING(ring, 1);
 393
 394	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 395	OUT_RING(ring, 0x00);
 396
 397	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
 398	OUT_RING(ring, 0x01);
 399
 400	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
 401	OUT_RING(ring, 0x01);
 402
 403	/* Yield the floor on command completion */
 404	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 405	OUT_RING(ring, 0x00);
 406	OUT_RING(ring, 0x00);
 407	OUT_RING(ring, 0x01);
 408	OUT_RING(ring, 0x01);
 409
 410	gpu->funcs->flush(gpu, ring);
 411
 412	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
 413}
 414
 415static int a5xx_ucode_init(struct msm_gpu *gpu)
 416{
 417	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 418	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 419	int ret;
 420
 421	if (!a5xx_gpu->pm4_bo) {
 422		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
 423			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
 424
 425
 426		if (IS_ERR(a5xx_gpu->pm4_bo)) {
 427			ret = PTR_ERR(a5xx_gpu->pm4_bo);
 428			a5xx_gpu->pm4_bo = NULL;
 429			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
 430				ret);
 431			return ret;
 432		}
 433
 434		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
 435	}
 436
 437	if (!a5xx_gpu->pfp_bo) {
 438		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
 439			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
 440
 441		if (IS_ERR(a5xx_gpu->pfp_bo)) {
 442			ret = PTR_ERR(a5xx_gpu->pfp_bo);
 443			a5xx_gpu->pfp_bo = NULL;
 444			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
 445				ret);
 446			return ret;
 447		}
 448
 449		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
 450	}
 451
 452	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
 453		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
 454
 455	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
 456		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
 457
 458	return 0;
 459}
 460
 461#define SCM_GPU_ZAP_SHADER_RESUME 0
 462
 463static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
 464{
 465	int ret;
 466
 467	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
 468	if (ret)
 469		DRM_ERROR("%s: zap-shader resume failed: %d\n",
 470			gpu->name, ret);
 471
 472	return ret;
 473}
 474
 475static int a5xx_zap_shader_init(struct msm_gpu *gpu)
 476{
 477	static bool loaded;
 478	int ret;
 479
 480	/*
 481	 * If the zap shader is already loaded into memory we just need to kick
 482	 * the remote processor to reinitialize it
 483	 */
 484	if (loaded)
 485		return a5xx_zap_shader_resume(gpu);
 486
 487	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 488
 489	loaded = !ret;
 490	return ret;
 491}
 492
 493#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
 494	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
 495	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
 496	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
 497	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
 498	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
 499	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 500	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
 501	  A5XX_RBBM_INT_0_MASK_CP_SW | \
 502	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 503	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 504	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
 505
 506static int a5xx_hw_init(struct msm_gpu *gpu)
 507{
 508	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 509	int ret;
 510
 511	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 512
 513	if (adreno_is_a540(adreno_gpu))
 514		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 515
 516	/* Make all blocks contribute to the GPU BUSY perf counter */
 517	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
 518
 519	/* Enable RBBM error reporting bits */
 520	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 521
 522	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 523		/*
 524		 * Mask out the activity signals from RB1-3 to avoid false
 525		 * positives
 526		 */
 527
 528		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
 529			0xF0000000);
 530		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
 531			0xFFFFFFFF);
 532		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
 533			0xFFFFFFFF);
 534		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
 535			0xFFFFFFFF);
 536		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
 537			0xFFFFFFFF);
 538		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
 539			0xFFFFFFFF);
 540		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
 541			0xFFFFFFFF);
 542		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
 543			0xFFFFFFFF);
 544	}
 545
 546	/* Enable fault detection */
 547	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
 548		(1 << 30) | 0xFFFF);
 549
 550	/* Turn on performance counters */
 551	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
 552
 553	/* Select CP0 to always count cycles */
 554	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
 555
 556	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
 557	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
 558
 559	/* Increase VFD cache access so LRZ and other data gets evicted less */
 560	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
 561
 562	/* Disable L2 bypass in the UCHE */
 563	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
 564	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
 565	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
 566	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
 567
 568	/* Set the GMEM VA range (0 to gpu->gmem) */
 569	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 570	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
 571	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
 572		0x00100000 + adreno_gpu->gmem - 1);
 573	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 574
 575	if (adreno_is_a510(adreno_gpu)) {
 576		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
 577		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
 578		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
 579		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
 580		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 581			  (0x200 << 11 | 0x200 << 22));
 582	} else {
 583		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
 584		if (adreno_is_a530(adreno_gpu))
 585			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
 586		if (adreno_is_a540(adreno_gpu))
 587			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
 588		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
 589		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
 590		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
 591			  (0x400 << 11 | 0x300 << 22));
 592	}
 593
 594	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 595		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 596
 597	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
 598
 599	/* Enable USE_RETENTION_FLOPS */
 600	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
 601
 602	/* Enable ME/PFP split notification */
 603	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
 604
 605	/*
 606	 *  In A5x, CCU can send context_done event of a particular context to
 607	 *  UCHE which ultimately reaches CP even when there is valid
 608	 *  transaction of that context inside CCU. This can let CP to program
 609	 *  config registers, which will make the "valid transaction" inside
 610	 *  CCU to be interpreted differently. This can cause gpu fault. This
 611	 *  bug is fixed in latest A510 revision. To enable this bug fix -
 612	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
 613	 *  (disable). For older A510 version this bit is unused.
 614	 */
 615	if (adreno_is_a510(adreno_gpu))
 616		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
 617
 618	/* Enable HWCG */
 619	a5xx_set_hwcg(gpu, true);
 620
 621	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 622
 623	/* Set the highest bank bit */
 624	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
 625	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
 626	if (adreno_is_a540(adreno_gpu))
 627		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
 628
 629	/* Protect registers from the CP */
 630	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
 631
 632	/* RBBM */
 633	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
 634	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
 635	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
 636	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
 637	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
 638	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
 639
 640	/* Content protect */
 641	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
 642		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 643			16));
 644	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
 645		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
 646
 647	/* CP */
 648	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
 649	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
 650	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
 651	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
 652
 653	/* RB */
 654	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
 655	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
 656
 657	/* VPC */
 658	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
 659	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
 660
 661	/* UCHE */
 662	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 663
 664	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
 665		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
 666			ADRENO_PROTECT_RW(0x10000, 0x8000));
 667
 668	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
 669	/*
 670	 * Disable the trusted memory range - we don't actually supported secure
 671	 * memory rendering at this point in time and we don't want to block off
 672	 * part of the virtual memory space.
 673	 */
 674	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
 675		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
 676	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 677
 678	/* Put the GPU into 64 bit by default */
 679	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
 680	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
 681	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
 682	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
 683	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
 684	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
 685	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
 686	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
 687	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
 688	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
 689	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
 690	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 691
 692	/*
 693	 * VPC corner case with local memory load kill leads to corrupt
 694	 * internal state. Normal Disable does not work for all a5x chips.
 695	 * So do the following setting to disable it.
 696	 */
 697	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
 698		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
 699		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
 700	}
 701
 702	ret = adreno_hw_init(gpu);
 703	if (ret)
 704		return ret;
 705
 706	if (!adreno_is_a510(adreno_gpu))
 707		a5xx_gpmu_ucode_init(gpu);
 708
 709	ret = a5xx_ucode_init(gpu);
 710	if (ret)
 711		return ret;
 712
 713	/* Set the ringbuffer address */
 714	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
 715		gpu->rb[0]->iova);
 716
 717	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
 718		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
 719
 720	a5xx_preempt_hw_init(gpu);
 721
 722	/* Disable the interrupts through the initial bringup stage */
 723	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
 724
 725	/* Clear ME_HALT to start the micro engine */
 726	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
 727	ret = a5xx_me_init(gpu);
 728	if (ret)
 729		return ret;
 730
 731	ret = a5xx_power_init(gpu);
 732	if (ret)
 733		return ret;
 734
 735	/*
 736	 * Send a pipeline event stat to get misbehaving counters to start
 737	 * ticking correctly
 738	 */
 739	if (adreno_is_a530(adreno_gpu)) {
 740		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
 741		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
 742
 743		gpu->funcs->flush(gpu, gpu->rb[0]);
 744		if (!a5xx_idle(gpu, gpu->rb[0]))
 745			return -EINVAL;
 746	}
 747
 748	/*
 749	 * If the chip that we are using does support loading one, then
 750	 * try to load a zap shader into the secure world. If successful
 751	 * we can use the CP to switch out of secure mode. If not then we
 752	 * have no resource but to try to switch ourselves out manually. If we
 753	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
 754	 * be blocked and a permissions violation will soon follow.
 755	 */
 756	ret = a5xx_zap_shader_init(gpu);
 757	if (!ret) {
 758		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
 759		OUT_RING(gpu->rb[0], 0x00000000);
 760
 761		gpu->funcs->flush(gpu, gpu->rb[0]);
 762		if (!a5xx_idle(gpu, gpu->rb[0]))
 763			return -EINVAL;
 764	} else if (ret == -ENODEV) {
 765		/*
 766		 * This device does not use zap shader (but print a warning
 767		 * just in case someone got their dt wrong.. hopefully they
 768		 * have a debug UART to realize the error of their ways...
 769		 * if you mess this up you are about to crash horribly)
 770		 */
 771		dev_warn_once(gpu->dev->dev,
 772			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
 773		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
 774	} else {
 775		return ret;
 776	}
 777
 778	/* Last step - yield the ringbuffer */
 779	a5xx_preempt_start(gpu);
 780
 781	return 0;
 782}
 783
 784static void a5xx_recover(struct msm_gpu *gpu)
 785{
 786	int i;
 787
 788	adreno_dump_info(gpu);
 789
 790	for (i = 0; i < 8; i++) {
 791		printk("CP_SCRATCH_REG%d: %u\n", i,
 792			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
 793	}
 794
 795	if (hang_debug)
 796		a5xx_dump(gpu);
 797
 798	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
 799	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
 800	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
 801	adreno_recover(gpu);
 802}
 803
 804static void a5xx_destroy(struct msm_gpu *gpu)
 805{
 806	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 807	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 808
 809	DBG("%s", gpu->name);
 810
 811	a5xx_preempt_fini(gpu);
 812
 813	if (a5xx_gpu->pm4_bo) {
 814		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
 815		drm_gem_object_put(a5xx_gpu->pm4_bo);
 816	}
 817
 818	if (a5xx_gpu->pfp_bo) {
 819		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
 820		drm_gem_object_put(a5xx_gpu->pfp_bo);
 821	}
 822
 823	if (a5xx_gpu->gpmu_bo) {
 824		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
 825		drm_gem_object_put(a5xx_gpu->gpmu_bo);
 826	}
 827
 828	adreno_gpu_cleanup(adreno_gpu);
 829	kfree(a5xx_gpu);
 830}
 831
 832static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
 833{
 834	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
 835		return false;
 836
 837	/*
 838	 * Nearly every abnormality ends up pausing the GPU and triggering a
 839	 * fault so we can safely just watch for this one interrupt to fire
 840	 */
 841	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
 842		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
 843}
 844
 845bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 846{
 847	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 848	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 849
 850	if (ring != a5xx_gpu->cur_ring) {
 851		WARN(1, "Tried to idle a non-current ringbuffer\n");
 852		return false;
 853	}
 854
 855	/* wait for CP to drain ringbuffer: */
 856	if (!adreno_idle(gpu, ring))
 857		return false;
 858
 859	if (spin_until(_a5xx_check_idle(gpu))) {
 860		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
 861			gpu->name, __builtin_return_address(0),
 862			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 863			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
 864			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 865			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
 866		return false;
 867	}
 868
 869	return true;
 870}
 871
 872static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
 873{
 874	struct msm_gpu *gpu = arg;
 875	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
 876			iova, flags,
 877			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
 878			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
 879			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
 880			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
 881
 882	return -EFAULT;
 883}
 884
 885static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 886{
 887	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
 888
 889	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
 890		u32 val;
 891
 892		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
 893
 894		/*
 895		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
 896		 * read it twice
 897		 */
 898
 899		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 900		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
 901
 902		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
 903			val);
 904	}
 905
 906	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
 907		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
 908			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
 909
 910	if (status & A5XX_CP_INT_CP_DMA_ERROR)
 911		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
 912
 913	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
 914		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
 915
 916		dev_err_ratelimited(gpu->dev->dev,
 917			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
 918			val & (1 << 24) ? "WRITE" : "READ",
 919			(val & 0xFFFFF) >> 2, val);
 920	}
 921
 922	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
 923		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
 924		const char *access[16] = { "reserved", "reserved",
 925			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
 926			"", "", "me read", "me write", "", "", "crashdump read",
 927			"crashdump write" };
 928
 929		dev_err_ratelimited(gpu->dev->dev,
 930			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
 931			status & 0xFFFFF, access[(status >> 24) & 0xF],
 932			(status & (1 << 31)), status);
 933	}
 934}
 935
 936static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 937{
 938	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 939		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 940
 941		dev_err_ratelimited(gpu->dev->dev,
 942			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
 943			val & (1 << 28) ? "WRITE" : "READ",
 944			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
 945			(val >> 24) & 0xF);
 946
 947		/* Clear the error */
 948		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
 949
 950		/* Clear the interrupt */
 951		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
 952			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 953	}
 954
 955	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
 956		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
 957
 958	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
 959		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
 960			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
 961
 962	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
 963		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
 964			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
 965
 966	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
 967		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
 968			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
 969
 970	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
 971		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
 972
 973	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
 974		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
 975}
 976
 977static void a5xx_uche_err_irq(struct msm_gpu *gpu)
 978{
 979	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
 980
 981	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
 982
 983	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
 984		addr);
 985}
 986
 987static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
 988{
 989	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
 990}
 991
 992static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 993{
 994	struct drm_device *dev = gpu->dev;
 995	struct msm_drm_private *priv = dev->dev_private;
 996	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
 997
 998	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
 999		ring ? ring->id : -1, ring ? ring->seqno : 0,
1000		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1001		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1002		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1003		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1004		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1005		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1006		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1007
1008	/* Turn off the hangcheck timer to keep it from bothering us */
1009	del_timer(&gpu->hangcheck_timer);
1010
1011	queue_work(priv->wq, &gpu->recover_work);
1012}
1013
1014#define RBBM_ERROR_MASK \
1015	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1016	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1017	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1018	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1019	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1020	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1021
1022static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1023{
1024	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1025
1026	/*
1027	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1028	 * before the source is cleared the interrupt will storm.
1029	 */
1030	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1031		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1032
1033	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1034	if (status & RBBM_ERROR_MASK)
1035		a5xx_rbbm_err_irq(gpu, status);
1036
1037	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1038		a5xx_cp_err_irq(gpu);
1039
1040	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1041		a5xx_fault_detect_irq(gpu);
1042
1043	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1044		a5xx_uche_err_irq(gpu);
1045
1046	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1047		a5xx_gpmu_err_irq(gpu);
1048
1049	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1050		a5xx_preempt_trigger(gpu);
1051		msm_gpu_retire(gpu);
1052	}
1053
1054	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1055		a5xx_preempt_irq(gpu);
1056
1057	return IRQ_HANDLED;
1058}
1059
1060static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1061	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1062	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1063	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1064	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1065		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1066	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1067	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1068	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1069};
1070
1071static const u32 a5xx_registers[] = {
1072	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1073	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1074	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1075	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1076	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1077	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1078	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1079	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1080	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1081	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1082	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1083	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1084	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1085	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1086	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1087	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1088	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1089	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1090	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1091	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1092	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1093	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1094	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1095	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1096	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1097	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1098	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1099	0xAC60, 0xAC60, ~0,
1100};
1101
1102static void a5xx_dump(struct msm_gpu *gpu)
1103{
1104	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1105		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1106	adreno_dump(gpu);
1107}
1108
1109static int a5xx_pm_resume(struct msm_gpu *gpu)
1110{
1111	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1112	int ret;
1113
1114	/* Turn on the core power */
1115	ret = msm_gpu_pm_resume(gpu);
1116	if (ret)
1117		return ret;
1118
1119	if (adreno_is_a510(adreno_gpu)) {
1120		/* Halt the sp_input_clk at HM level */
1121		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1122		a5xx_set_hwcg(gpu, true);
1123		/* Turn on sp_input_clk at HM level */
1124		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1125		return 0;
1126	}
1127
1128	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1129	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1130
1131	/* Wait 3 usecs before polling */
1132	udelay(3);
1133
1134	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1135		(1 << 20), (1 << 20));
1136	if (ret) {
1137		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1138			gpu->name,
1139			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1140		return ret;
1141	}
1142
1143	/* Turn on the SP domain */
1144	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1145	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1146		(1 << 20), (1 << 20));
1147	if (ret)
1148		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1149			gpu->name);
1150
1151	return ret;
1152}
1153
1154static int a5xx_pm_suspend(struct msm_gpu *gpu)
1155{
1156	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1157	u32 mask = 0xf;
1158
1159	/* A510 has 3 XIN ports in VBIF */
1160	if (adreno_is_a510(adreno_gpu))
1161		mask = 0x7;
1162
1163	/* Clear the VBIF pipe before shutting down */
1164	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1165	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1166				mask) == mask);
1167
1168	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1169
1170	/*
1171	 * Reset the VBIF before power collapse to avoid issue with FIFO
1172	 * entries
1173	 */
1174	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1175	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1176
1177	return msm_gpu_pm_suspend(gpu);
1178}
1179
1180static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1181{
1182	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1183		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1184
1185	return 0;
1186}
1187
1188struct a5xx_crashdumper {
1189	void *ptr;
1190	struct drm_gem_object *bo;
1191	u64 iova;
1192};
1193
1194struct a5xx_gpu_state {
1195	struct msm_gpu_state base;
1196	u32 *hlsqregs;
1197};
1198
1199static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1200		struct a5xx_crashdumper *dumper)
1201{
1202	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1203		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1204		&dumper->bo, &dumper->iova);
1205
1206	if (!IS_ERR(dumper->ptr))
1207		msm_gem_object_set_name(dumper->bo, "crashdump");
1208
1209	return PTR_ERR_OR_ZERO(dumper->ptr);
1210}
1211
1212static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1213		struct a5xx_crashdumper *dumper)
1214{
1215	u32 val;
1216
1217	if (IS_ERR_OR_NULL(dumper->ptr))
1218		return -EINVAL;
1219
1220	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1221		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1222
1223	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1224
1225	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1226		val & 0x04, 100, 10000);
1227}
1228
1229/*
1230 * These are a list of the registers that need to be read through the HLSQ
1231 * aperture through the crashdumper.  These are not nominally accessible from
1232 * the CPU on a secure platform.
1233 */
1234static const struct {
1235	u32 type;
1236	u32 regoffset;
1237	u32 count;
1238} a5xx_hlsq_aperture_regs[] = {
1239	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1240	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1241	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1242	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1243	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1244	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1245	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1246	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1247	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1248	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1249	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1250	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1251	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1252	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1253	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1254};
1255
1256static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1257		struct a5xx_gpu_state *a5xx_state)
1258{
1259	struct a5xx_crashdumper dumper = { 0 };
1260	u32 offset, count = 0;
1261	u64 *ptr;
1262	int i;
1263
1264	if (a5xx_crashdumper_init(gpu, &dumper))
1265		return;
1266
1267	/* The script will be written at offset 0 */
1268	ptr = dumper.ptr;
1269
1270	/* Start writing the data at offset 256k */
1271	offset = dumper.iova + (256 * SZ_1K);
1272
1273	/* Count how many additional registers to get from the HLSQ aperture */
1274	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1275		count += a5xx_hlsq_aperture_regs[i].count;
1276
1277	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1278	if (!a5xx_state->hlsqregs)
1279		return;
1280
1281	/* Build the crashdump script */
1282	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1283		u32 type = a5xx_hlsq_aperture_regs[i].type;
1284		u32 c = a5xx_hlsq_aperture_regs[i].count;
1285
1286		/* Write the register to select the desired bank */
1287		*ptr++ = ((u64) type << 8);
1288		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1289			(1 << 21) | 1;
1290
1291		*ptr++ = offset;
1292		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1293			| c;
1294
1295		offset += c * sizeof(u32);
1296	}
1297
1298	/* Write two zeros to close off the script */
1299	*ptr++ = 0;
1300	*ptr++ = 0;
1301
1302	if (a5xx_crashdumper_run(gpu, &dumper)) {
1303		kfree(a5xx_state->hlsqregs);
1304		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305		return;
1306	}
1307
1308	/* Copy the data from the crashdumper to the state */
1309	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1310		count * sizeof(u32));
1311
1312	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1313}
1314
1315static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1316{
1317	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1318			GFP_KERNEL);
1319
1320	if (!a5xx_state)
1321		return ERR_PTR(-ENOMEM);
1322
1323	/* Temporarily disable hardware clock gating before reading the hw */
1324	a5xx_set_hwcg(gpu, false);
1325
1326	/* First get the generic state from the adreno core */
1327	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1328
1329	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1330
1331	/* Get the HLSQ regs with the help of the crashdumper */
1332	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1333
1334	a5xx_set_hwcg(gpu, true);
1335
1336	return &a5xx_state->base;
1337}
1338
1339static void a5xx_gpu_state_destroy(struct kref *kref)
1340{
1341	struct msm_gpu_state *state = container_of(kref,
1342		struct msm_gpu_state, ref);
1343	struct a5xx_gpu_state *a5xx_state = container_of(state,
1344		struct a5xx_gpu_state, base);
1345
1346	kfree(a5xx_state->hlsqregs);
1347
1348	adreno_gpu_state_destroy(state);
1349	kfree(a5xx_state);
1350}
1351
1352static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1353{
1354	if (IS_ERR_OR_NULL(state))
1355		return 1;
1356
1357	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1358}
1359
1360
1361#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1362static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1363		      struct drm_printer *p)
1364{
1365	int i, j;
1366	u32 pos = 0;
1367	struct a5xx_gpu_state *a5xx_state = container_of(state,
1368		struct a5xx_gpu_state, base);
1369
1370	if (IS_ERR_OR_NULL(state))
1371		return;
1372
1373	adreno_show(gpu, state, p);
1374
1375	/* Dump the additional a5xx HLSQ registers */
1376	if (!a5xx_state->hlsqregs)
1377		return;
1378
1379	drm_printf(p, "registers-hlsq:\n");
1380
1381	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1382		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1383		u32 c = a5xx_hlsq_aperture_regs[i].count;
1384
1385		for (j = 0; j < c; j++, pos++, o++) {
1386			/*
1387			 * To keep the crashdump simple we pull the entire range
1388			 * for each register type but not all of the registers
1389			 * in the range are valid. Fortunately invalid registers
1390			 * stick out like a sore thumb with a value of
1391			 * 0xdeadbeef
1392			 */
1393			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1394				continue;
1395
1396			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1397				o << 2, a5xx_state->hlsqregs[pos]);
1398		}
1399	}
1400}
1401#endif
1402
1403static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1404{
1405	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1406	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1407
1408	return a5xx_gpu->cur_ring;
1409}
1410
1411static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1412{
1413	u64 busy_cycles, busy_time;
1414
1415	/* Only read the gpu busy if the hardware is already active */
1416	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1417		return 0;
1418
1419	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1420			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1421
1422	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1423	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1424
1425	gpu->devfreq.busy_cycles = busy_cycles;
1426
1427	pm_runtime_put(&gpu->pdev->dev);
1428
1429	if (WARN_ON(busy_time > ~0LU))
1430		return ~0LU;
1431
1432	return (unsigned long)busy_time;
1433}
1434
1435static const struct adreno_gpu_funcs funcs = {
1436	.base = {
1437		.get_param = adreno_get_param,
1438		.hw_init = a5xx_hw_init,
1439		.pm_suspend = a5xx_pm_suspend,
1440		.pm_resume = a5xx_pm_resume,
1441		.recover = a5xx_recover,
1442		.submit = a5xx_submit,
1443		.flush = a5xx_flush,
1444		.active_ring = a5xx_active_ring,
1445		.irq = a5xx_irq,
1446		.destroy = a5xx_destroy,
1447#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1448		.show = a5xx_show,
1449#endif
1450#if defined(CONFIG_DEBUG_FS)
1451		.debugfs_init = a5xx_debugfs_init,
1452#endif
1453		.gpu_busy = a5xx_gpu_busy,
1454		.gpu_state_get = a5xx_gpu_state_get,
1455		.gpu_state_put = a5xx_gpu_state_put,
1456		.create_address_space = adreno_iommu_create_address_space,
1457	},
1458	.get_timestamp = a5xx_get_timestamp,
1459};
1460
1461static void check_speed_bin(struct device *dev)
1462{
1463	struct nvmem_cell *cell;
1464	u32 val;
1465
1466	/*
1467	 * If the OPP table specifies a opp-supported-hw property then we have
1468	 * to set something with dev_pm_opp_set_supported_hw() or the table
1469	 * doesn't get populated so pick an arbitrary value that should
1470	 * ensure the default frequencies are selected but not conflict with any
1471	 * actual bins
1472	 */
1473	val = 0x80;
1474
1475	cell = nvmem_cell_get(dev, "speed_bin");
1476
1477	if (!IS_ERR(cell)) {
1478		void *buf = nvmem_cell_read(cell, NULL);
1479
1480		if (!IS_ERR(buf)) {
1481			u8 bin = *((u8 *) buf);
1482
1483			val = (1 << bin);
1484			kfree(buf);
1485		}
1486
1487		nvmem_cell_put(cell);
1488	}
1489
1490	dev_pm_opp_set_supported_hw(dev, &val, 1);
1491}
1492
1493struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1494{
1495	struct msm_drm_private *priv = dev->dev_private;
1496	struct platform_device *pdev = priv->gpu_pdev;
1497	struct a5xx_gpu *a5xx_gpu = NULL;
1498	struct adreno_gpu *adreno_gpu;
1499	struct msm_gpu *gpu;
1500	int ret;
1501
1502	if (!pdev) {
1503		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1504		return ERR_PTR(-ENXIO);
1505	}
1506
1507	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1508	if (!a5xx_gpu)
1509		return ERR_PTR(-ENOMEM);
1510
1511	adreno_gpu = &a5xx_gpu->base;
1512	gpu = &adreno_gpu->base;
1513
1514	adreno_gpu->registers = a5xx_registers;
1515	adreno_gpu->reg_offsets = a5xx_register_offsets;
1516
1517	a5xx_gpu->lm_leakage = 0x4E001A;
1518
1519	check_speed_bin(&pdev->dev);
1520
1521	/* Restricting nr_rings to 1 to temporarily disable preemption */
1522	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1523	if (ret) {
1524		a5xx_destroy(&(a5xx_gpu->base.base));
1525		return ERR_PTR(ret);
1526	}
1527
1528	if (gpu->aspace)
1529		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1530
1531	/* Set up the preemption specific bits and pieces for each ringbuffer */
1532	a5xx_preempt_init(gpu);
1533
1534	return gpu;
1535}