Linux Audio

Check our new training course

In-person Linux kernel drivers training

Jun 16-20, 2025
Register
Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
   3
   4
   5#include "msm_gem.h"
   6#include "msm_mmu.h"
   7#include "msm_gpu_trace.h"
   8#include "a6xx_gpu.h"
   9#include "a6xx_gmu.xml.h"
  10
  11#include <linux/bitfield.h>
  12#include <linux/devfreq.h>
  13#include <linux/reset.h>
  14#include <linux/soc/qcom/llcc-qcom.h>
  15
  16#define GPU_PAS_ID 13
  17
  18static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
  19{
  20	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  21	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  22
  23	/* Check that the GMU is idle */
  24	if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
  25		return false;
  26
  27	/* Check tha the CX master is idle */
  28	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
  29			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
  30		return false;
  31
  32	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
  33		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
  34}
  35
  36static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  37{
  38	/* wait for CP to drain ringbuffer: */
  39	if (!adreno_idle(gpu, ring))
  40		return false;
  41
  42	if (spin_until(_a6xx_check_idle(gpu))) {
  43		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  44			gpu->name, __builtin_return_address(0),
  45			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
  46			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
  47			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
  48			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
  49		return false;
  50	}
  51
  52	return true;
  53}
  54
  55static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  56{
  57	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  58	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  59
  60	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
  61	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
  62		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  63		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
  64		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
  65	}
  66}
  67
  68static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  69{
  70	uint32_t wptr;
  71	unsigned long flags;
  72
  73	update_shadow_rptr(gpu, ring);
  74
  75	spin_lock_irqsave(&ring->preempt_lock, flags);
  76
  77	/* Copy the shadow to the actual register */
  78	ring->cur = ring->next;
  79
  80	/* Make sure to wrap wptr if we need to */
  81	wptr = get_wptr(ring);
  82
  83	spin_unlock_irqrestore(&ring->preempt_lock, flags);
  84
  85	/* Make sure everything is posted before making a decision */
  86	mb();
  87
  88	gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
  89}
  90
  91static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
  92		u64 iova)
  93{
  94	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
  95	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
  96		CP_REG_TO_MEM_0_CNT(2) |
  97		CP_REG_TO_MEM_0_64B);
  98	OUT_RING(ring, lower_32_bits(iova));
  99	OUT_RING(ring, upper_32_bits(iova));
 100}
 101
 102static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
 103		struct msm_ringbuffer *ring, struct msm_file_private *ctx)
 104{
 105	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
 106	phys_addr_t ttbr;
 107	u32 asid;
 108	u64 memptr = rbmemptr(ring, ttbr0);
 109
 110	if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
 111		return;
 112
 113	if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
 114		return;
 115
 116	if (!sysprof) {
 117		/* Turn off protected mode to write to special registers */
 118		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 119		OUT_RING(ring, 0);
 120
 121		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
 122		OUT_RING(ring, 1);
 123	}
 124
 125	/* Execute the table update */
 126	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
 127	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
 128
 129	OUT_RING(ring,
 130		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
 131		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
 132	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
 133	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
 134
 135	/*
 136	 * Write the new TTBR0 to the memstore. This is good for debugging.
 137	 */
 138	OUT_PKT7(ring, CP_MEM_WRITE, 4);
 139	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
 140	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
 141	OUT_RING(ring, lower_32_bits(ttbr));
 142	OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
 143
 144	/*
 145	 * And finally, trigger a uche flush to be sure there isn't anything
 146	 * lingering in that part of the GPU
 147	 */
 148
 149	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 150	OUT_RING(ring, CACHE_INVALIDATE);
 151
 152	if (!sysprof) {
 153		/*
 154		 * Wait for SRAM clear after the pgtable update, so the
 155		 * two can happen in parallel:
 156		 */
 157		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
 158		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
 159		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
 160				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
 161		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
 162		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
 163		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
 164		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
 165
 166		/* Re-enable protected mode: */
 167		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 168		OUT_RING(ring, 1);
 169	}
 170}
 171
 172static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 173{
 174	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
 175	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 176	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 177	struct msm_ringbuffer *ring = submit->ring;
 178	unsigned int i, ibs = 0;
 179
 180	a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
 181
 182	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 183		rbmemptr_stats(ring, index, cpcycles_start));
 184
 185	/*
 186	 * For PM4 the GMU register offsets are calculated from the base of the
 187	 * GPU registers so we need to add 0x1a800 to the register value on A630
 188	 * to get the right value from PM4.
 189	 */
 190	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 191		rbmemptr_stats(ring, index, alwayson_start));
 192
 193	/* Invalidate CCU depth and color */
 194	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 195	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
 196
 197	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 198	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
 199
 200	/* Submit the commands */
 201	for (i = 0; i < submit->nr_cmds; i++) {
 202		switch (submit->cmd[i].type) {
 203		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 204			break;
 205		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 206			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
 207				break;
 208			fallthrough;
 209		case MSM_SUBMIT_CMD_BUF:
 210			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 211			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 212			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 213			OUT_RING(ring, submit->cmd[i].size);
 214			ibs++;
 215			break;
 216		}
 217
 218		/*
 219		 * Periodically update shadow-wptr if needed, so that we
 220		 * can see partial progress of submits with large # of
 221		 * cmds.. otherwise we could needlessly stall waiting for
 222		 * ringbuffer state, simply due to looking at a shadow
 223		 * rptr value that has not been updated
 224		 */
 225		if ((ibs % 32) == 0)
 226			update_shadow_rptr(gpu, ring);
 227	}
 228
 229	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 230		rbmemptr_stats(ring, index, cpcycles_end));
 231	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 232		rbmemptr_stats(ring, index, alwayson_end));
 233
 234	/* Write the fence to the scratch register */
 235	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
 236	OUT_RING(ring, submit->seqno);
 237
 238	/*
 239	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
 240	 * timestamp is written to the memory and then triggers the interrupt
 241	 */
 242	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 243	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 244		CP_EVENT_WRITE_0_IRQ);
 245	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 246	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 247	OUT_RING(ring, submit->seqno);
 248
 249	trace_msm_gpu_submit_flush(submit,
 250		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
 251
 252	a6xx_flush(gpu, ring);
 253}
 254
 255/* For a615 family (a615, a616, a618 and a619) */
 256const struct adreno_reglist a615_hwcg[] = {
 257	{REG_A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
 258	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 259	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 260	{REG_A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
 261	{REG_A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
 262	{REG_A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
 263	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 264	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 265	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 266	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
 267	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 268	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
 269	{REG_A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
 270	{REG_A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
 271	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 272	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 273	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 274	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
 275	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 276	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
 277	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 278	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 279	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 280	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 281	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 282	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
 283	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 284	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
 285	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
 286	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 287	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 288	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 289	{REG_A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
 290	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 291	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 292	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
 293	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020},
 294	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
 295	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
 296	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
 297	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 298	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
 299	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
 300	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
 301	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
 302	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 303	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 304	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 305	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 306	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 307	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 308	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 309	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 310	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 311	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 312	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 313	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 314	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 315	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 316	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 317	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 318	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 319	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 320	{},
 321};
 322
 323const struct adreno_reglist a630_hwcg[] = {
 324	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
 325	{REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
 326	{REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
 327	{REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
 328	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
 329	{REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
 330	{REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
 331	{REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
 332	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 333	{REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 334	{REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 335	{REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 336	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
 337	{REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
 338	{REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
 339	{REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
 340	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 341	{REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
 342	{REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
 343	{REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
 344	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 345	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 346	{REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 347	{REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 348	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 349	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
 350	{REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
 351	{REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
 352	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 353	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
 354	{REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
 355	{REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
 356	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 357	{REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 358	{REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 359	{REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 360	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 361	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 362	{REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 363	{REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 364	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 365	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
 366	{REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
 367	{REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
 368	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 369	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
 370	{REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
 371	{REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
 372	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 373	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 374	{REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 375	{REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 376	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 377	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 378	{REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 379	{REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 380	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 381	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
 382	{REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
 383	{REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
 384	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 385	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
 386	{REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
 387	{REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
 388	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 389	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 390	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 391	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 392	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 393	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 394	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 395	{REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 396	{REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 397	{REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 398	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
 399	{REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
 400	{REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
 401	{REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
 402	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 403	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
 404	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
 405	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
 406	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
 407	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
 408	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
 409	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
 410	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
 411	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 412	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 413	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 414	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 415	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 416	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 417	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 418	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 419	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 420	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 421	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 422	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 423	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 424	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 425	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 426	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 427	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 428	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 429	{},
 430};
 431
 432const struct adreno_reglist a640_hwcg[] = {
 433	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 434	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 435	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 436	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 437	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 438	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 439	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 440	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 441	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 442	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 443	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 444	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 445	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 446	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 447	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 448	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 449	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 450	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 451	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 452	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 453	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
 454	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 455	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 456	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 457	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 458	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 459	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 460	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 461	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 462	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 463	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 464	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 465	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 466	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 467	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 468	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 469	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 470	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 471	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 472	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
 473	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 474	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 475	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 476	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 477	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 478	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 479	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 480	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 481	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 482	{},
 483};
 484
 485const struct adreno_reglist a650_hwcg[] = {
 486	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 487	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 488	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 489	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 490	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 491	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 492	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 493	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 494	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 495	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 496	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 497	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 498	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 499	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 500	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 501	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 502	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 503	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 504	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 505	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 506	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
 507	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 508	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 509	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 510	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 511	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 512	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 513	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 514	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 515	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 516	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 517	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 518	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 519	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 520	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 521	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 522	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 523	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 524	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 525	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
 526	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 527	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 528	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 529	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 530	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 531	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 532	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 533	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 534	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 535	{},
 536};
 537
 538const struct adreno_reglist a660_hwcg[] = {
 539	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 540	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 541	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 542	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 543	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 544	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 545	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 546	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 547	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 548	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 549	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 550	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 551	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 552	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 553	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 554	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 555	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 556	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 557	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 558	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 559	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
 560	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 561	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 562	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 563	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 564	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 565	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 566	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 567	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 568	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 569	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 570	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 571	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 572	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 573	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 574	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 575	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 576	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 577	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 578	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
 579	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 580	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 581	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 582	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 583	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 584	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 585	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 586	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 587	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 588	{},
 589};
 590
 591static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
 592{
 593	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 594	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 595	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 596	const struct adreno_reglist *reg;
 597	unsigned int i;
 598	u32 val, clock_cntl_on;
 599
 600	if (!adreno_gpu->info->hwcg)
 601		return;
 602
 603	if (adreno_is_a630(adreno_gpu))
 604		clock_cntl_on = 0x8aa8aa02;
 605	else
 606		clock_cntl_on = 0x8aa8aa82;
 607
 608	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
 609
 610	/* Don't re-program the registers if they are already correct */
 611	if ((!state && !val) || (state && (val == clock_cntl_on)))
 612		return;
 613
 614	/* Disable SP clock before programming HWCG registers */
 615	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
 616
 617	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
 618		gpu_write(gpu, reg->offset, state ? reg->value : 0);
 619
 620	/* Enable SP clock */
 621	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
 622
 623	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
 624}
 625
 626/* For a615, a616, a618, a619, a630, a640 and a680 */
 627static const u32 a6xx_protect[] = {
 628	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 629	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 630	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 631	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 632	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 633	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 634	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 635	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 636	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 637	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 638	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 639	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 640	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 641	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 642	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 643	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 644	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 645	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 646	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 647	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 648	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 649	A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
 650	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 651	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 652	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 653	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
 654	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 655	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 656	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
 657	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 658	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 659	A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
 660};
 661
 662/* These are for a620 and a650 */
 663static const u32 a650_protect[] = {
 664	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 665	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 666	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 667	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 668	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 669	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 670	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 671	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 672	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 673	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 674	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 675	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 676	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 677	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 678	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 679	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 680	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 681	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 682	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 683	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 684	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
 685	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 686	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
 687	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 688	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 689	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 690	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
 691	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 692	A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
 693	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 694	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
 695	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 696	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 697	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
 698	A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
 699	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
 700	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
 701	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
 702	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
 703};
 704
 705/* These are for a635 and a660 */
 706static const u32 a660_protect[] = {
 707	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 708	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 709	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 710	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 711	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 712	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 713	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 714	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 715	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 716	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 717	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 718	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 719	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 720	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 721	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 722	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 723	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 724	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 725	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 726	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 727	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
 728	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 729	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
 730	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 731	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 732	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 733	A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
 734	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 735	A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
 736	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 737	A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
 738	A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
 739	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 740	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 741	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
 742	A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
 743	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
 744	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
 745	A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
 746	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
 747	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
 748};
 749
 750static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 751{
 752	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 753	const u32 *regs = a6xx_protect;
 754	unsigned i, count, count_max;
 755
 756	if (adreno_is_a650(adreno_gpu)) {
 757		regs = a650_protect;
 758		count = ARRAY_SIZE(a650_protect);
 759		count_max = 48;
 760		BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
 761	} else if (adreno_is_a660_family(adreno_gpu)) {
 762		regs = a660_protect;
 763		count = ARRAY_SIZE(a660_protect);
 764		count_max = 48;
 765		BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
 766	} else {
 767		regs = a6xx_protect;
 768		count = ARRAY_SIZE(a6xx_protect);
 769		count_max = 32;
 770		BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
 771	}
 772
 773	/*
 774	 * Enable access protection to privileged registers, fault on an access
 775	 * protect violation and select the last span to protect from the start
 776	 * address all the way to the end of the register address space
 777	 */
 778	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
 779
 780	for (i = 0; i < count - 1; i++)
 781		gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
 782	/* last CP_PROTECT to have "infinite" length on the last entry */
 783	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
 784}
 785
 786static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 787{
 788	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 789	u32 lower_bit = 2;
 790	u32 amsbc = 0;
 791	u32 rgb565_predicator = 0;
 792	u32 uavflagprd_inv = 0;
 793
 794	/* a618 is using the hw default values */
 795	if (adreno_is_a618(adreno_gpu))
 796		return;
 797
 798	if (adreno_is_a640_family(adreno_gpu))
 799		amsbc = 1;
 800
 801	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
 802		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
 803		lower_bit = 3;
 804		amsbc = 1;
 805		rgb565_predicator = 1;
 806		uavflagprd_inv = 2;
 807	}
 808
 809	if (adreno_is_7c3(adreno_gpu)) {
 810		lower_bit = 1;
 811		amsbc = 1;
 812		rgb565_predicator = 1;
 813		uavflagprd_inv = 2;
 814	}
 815
 816	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
 817		rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
 818	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
 819	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
 820		uavflagprd_inv << 4 | lower_bit << 1);
 821	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
 822}
 823
 824static int a6xx_cp_init(struct msm_gpu *gpu)
 825{
 826	struct msm_ringbuffer *ring = gpu->rb[0];
 827
 828	OUT_PKT7(ring, CP_ME_INIT, 8);
 829
 830	OUT_RING(ring, 0x0000002f);
 831
 832	/* Enable multiple hardware contexts */
 833	OUT_RING(ring, 0x00000003);
 834
 835	/* Enable error detection */
 836	OUT_RING(ring, 0x20000000);
 837
 838	/* Don't enable header dump */
 839	OUT_RING(ring, 0x00000000);
 840	OUT_RING(ring, 0x00000000);
 841
 842	/* No workarounds enabled */
 843	OUT_RING(ring, 0x00000000);
 844
 845	/* Pad rest of the cmds with 0's */
 846	OUT_RING(ring, 0x00000000);
 847	OUT_RING(ring, 0x00000000);
 848
 849	a6xx_flush(gpu, ring);
 850	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 851}
 852
 853/*
 854 * Check that the microcode version is new enough to include several key
 855 * security fixes. Return true if the ucode is safe.
 856 */
 857static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
 858		struct drm_gem_object *obj)
 859{
 860	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 861	struct msm_gpu *gpu = &adreno_gpu->base;
 862	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
 863	u32 *buf = msm_gem_get_vaddr(obj);
 864	bool ret = false;
 865
 866	if (IS_ERR(buf))
 867		return false;
 868
 869	/*
 870	 * Targets up to a640 (a618, a630 and a640) need to check for a
 871	 * microcode version that is patched to support the whereami opcode or
 872	 * one that is new enough to include it by default.
 873	 *
 874	 * a650 tier targets don't need whereami but still need to be
 875	 * equal to or newer than 0.95 for other security fixes
 876	 *
 877	 * a660 targets have all the critical security fixes from the start
 878	 */
 879	if (!strcmp(sqe_name, "a630_sqe.fw")) {
 880		/*
 881		 * If the lowest nibble is 0xa that is an indication that this
 882		 * microcode has been patched. The actual version is in dword
 883		 * [3] but we only care about the patchlevel which is the lowest
 884		 * nibble of dword [3]
 885		 *
 886		 * Otherwise check that the firmware is greater than or equal
 887		 * to 1.90 which was the first version that had this fix built
 888		 * in
 889		 */
 890		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
 891			(buf[0] & 0xfff) >= 0x190) {
 892			a6xx_gpu->has_whereami = true;
 893			ret = true;
 894			goto out;
 895		}
 896
 897		DRM_DEV_ERROR(&gpu->pdev->dev,
 898			"a630 SQE ucode is too old. Have version %x need at least %x\n",
 899			buf[0] & 0xfff, 0x190);
 900	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
 901		if ((buf[0] & 0xfff) >= 0x095) {
 902			ret = true;
 903			goto out;
 904		}
 905
 906		DRM_DEV_ERROR(&gpu->pdev->dev,
 907			"a650 SQE ucode is too old. Have version %x need at least %x\n",
 908			buf[0] & 0xfff, 0x095);
 909	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
 910		ret = true;
 911	} else {
 912		DRM_DEV_ERROR(&gpu->pdev->dev,
 913			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
 914	}
 915out:
 916	msm_gem_put_vaddr(obj);
 917	return ret;
 918}
 919
 920static int a6xx_ucode_init(struct msm_gpu *gpu)
 921{
 922	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 923	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 924
 925	if (!a6xx_gpu->sqe_bo) {
 926		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
 927			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
 928
 929		if (IS_ERR(a6xx_gpu->sqe_bo)) {
 930			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
 931
 932			a6xx_gpu->sqe_bo = NULL;
 933			DRM_DEV_ERROR(&gpu->pdev->dev,
 934				"Could not allocate SQE ucode: %d\n", ret);
 935
 936			return ret;
 937		}
 938
 939		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
 940		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
 941			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
 942			drm_gem_object_put(a6xx_gpu->sqe_bo);
 943
 944			a6xx_gpu->sqe_bo = NULL;
 945			return -EPERM;
 946		}
 947	}
 948
 949	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
 950
 951	return 0;
 952}
 953
 954static int a6xx_zap_shader_init(struct msm_gpu *gpu)
 955{
 956	static bool loaded;
 957	int ret;
 958
 959	if (loaded)
 960		return 0;
 961
 962	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 963
 964	loaded = !ret;
 965	return ret;
 966}
 967
 968#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
 969	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
 970	  A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 971	  A6XX_RBBM_INT_0_MASK_CP_IB2 | \
 972	  A6XX_RBBM_INT_0_MASK_CP_IB1 | \
 973	  A6XX_RBBM_INT_0_MASK_CP_RB | \
 974	  A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 975	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
 976	  A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
 977	  A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 978	  A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
 979
 980static int hw_init(struct msm_gpu *gpu)
 981{
 982	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 983	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 984	int ret;
 985
 986	/* Make sure the GMU keeps the GPU on while we set it up */
 987	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
 988
 989	/* Clear GBIF halt in case GX domain was not collapsed */
 990	if (a6xx_has_gbif(adreno_gpu))
 991		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
 992
 993	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
 994
 995	/*
 996	 * Disable the trusted memory range - we don't actually supported secure
 997	 * memory rendering at this point in time and we don't want to block off
 998	 * part of the virtual memory space.
 999	 */
1000	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
1001	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1002
1003	/* Turn on 64 bit addressing for all blocks */
1004	gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1005	gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1006	gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1007	gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1008	gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1009	gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1010	gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1011	gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1012	gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1013	gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1014	gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1015	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1016
1017	/* enable hardware clockgating */
1018	a6xx_set_hwcg(gpu, true);
1019
1020	/* VBIF/GBIF start*/
1021	if (adreno_is_a640_family(adreno_gpu) ||
1022	    adreno_is_a650_family(adreno_gpu)) {
1023		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1024		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1025		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1026		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1027		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1028		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1029	} else {
1030		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1031	}
1032
1033	if (adreno_is_a630(adreno_gpu))
1034		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1035
1036	/* Make all blocks contribute to the GPU BUSY perf counter */
1037	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1038
1039	/* Disable L2 bypass in the UCHE */
1040	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
1041	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
1042	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
1043	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
1044	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
1045	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
1046
1047	if (!adreno_is_a650_family(adreno_gpu)) {
1048		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1049		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
1050
1051		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
1052			0x00100000 + adreno_gpu->gmem - 1);
1053	}
1054
1055	gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1056	gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1057
1058	if (adreno_is_a640_family(adreno_gpu) ||
1059	    adreno_is_a650_family(adreno_gpu))
1060		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1061	else
1062		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1063	gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1064
1065	if (adreno_is_a660_family(adreno_gpu))
1066		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1067
1068	/* Setting the mem pool size */
1069	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1070
1071	/* Setting the primFifo thresholds default values,
1072	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
1073	*/
1074	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1075		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1076	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
1077		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
1078	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1079		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1080	else
1081		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
1082
1083	/* Set the AHB default slave response to "ERROR" */
1084	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1085
1086	/* Turn on performance counters */
1087	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1088
1089	/* Select CP0 to always count cycles */
1090	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1091
1092	a6xx_set_ubwc_config(gpu);
1093
1094	/* Enable fault detection */
1095	gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
1096		(1 << 30) | 0x1fffff);
1097
1098	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
1099
1100	/* Set weights for bicubic filtering */
1101	if (adreno_is_a650_family(adreno_gpu)) {
1102		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1103		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1104			0x3fe05ff4);
1105		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1106			0x3fa0ebee);
1107		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1108			0x3f5193ed);
1109		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1110			0x3f0243f0);
1111	}
1112
1113	/* Protect registers from the CP */
1114	a6xx_set_cp_protect(gpu);
1115
1116	if (adreno_is_a660_family(adreno_gpu)) {
1117		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1118		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1119	}
1120
1121	/* Set dualQ + disable afull for A660 GPU */
1122	if (adreno_is_a660(adreno_gpu))
1123		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1124
1125	/* Enable expanded apriv for targets that support it */
1126	if (gpu->hw_apriv) {
1127		gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1128			(1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
1129	}
1130
1131	/* Enable interrupts */
1132	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
1133
1134	ret = adreno_hw_init(gpu);
1135	if (ret)
1136		goto out;
1137
1138	ret = a6xx_ucode_init(gpu);
1139	if (ret)
1140		goto out;
1141
1142	/* Set the ringbuffer address */
1143	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1144
1145	/* Targets that support extended APRIV can use the RPTR shadow from
1146	 * hardware but all the other ones need to disable the feature. Targets
1147	 * that support the WHERE_AM_I opcode can use that instead
1148	 */
1149	if (adreno_gpu->base.hw_apriv)
1150		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1151	else
1152		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1153			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1154
1155	/*
1156	 * Expanded APRIV and targets that support WHERE_AM_I both need a
1157	 * privileged buffer to store the RPTR shadow
1158	 */
1159
1160	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
1161		if (!a6xx_gpu->shadow_bo) {
1162			a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1163				sizeof(u32) * gpu->nr_rings,
1164				MSM_BO_WC | MSM_BO_MAP_PRIV,
1165				gpu->aspace, &a6xx_gpu->shadow_bo,
1166				&a6xx_gpu->shadow_iova);
1167
1168			if (IS_ERR(a6xx_gpu->shadow))
1169				return PTR_ERR(a6xx_gpu->shadow);
1170
1171			msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1172		}
1173
1174		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
1175			shadowptr(a6xx_gpu, gpu->rb[0]));
1176	}
1177
1178	/* Always come up on rb 0 */
1179	a6xx_gpu->cur_ring = gpu->rb[0];
1180
1181	gpu->cur_ctx_seqno = 0;
1182
1183	/* Enable the SQE_to start the CP engine */
1184	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1185
1186	ret = a6xx_cp_init(gpu);
1187	if (ret)
1188		goto out;
1189
1190	/*
1191	 * Try to load a zap shader into the secure world. If successful
1192	 * we can use the CP to switch out of secure mode. If not then we
1193	 * have no resource but to try to switch ourselves out manually. If we
1194	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1195	 * be blocked and a permissions violation will soon follow.
1196	 */
1197	ret = a6xx_zap_shader_init(gpu);
1198	if (!ret) {
1199		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1200		OUT_RING(gpu->rb[0], 0x00000000);
1201
1202		a6xx_flush(gpu, gpu->rb[0]);
1203		if (!a6xx_idle(gpu, gpu->rb[0]))
1204			return -EINVAL;
1205	} else if (ret == -ENODEV) {
1206		/*
1207		 * This device does not use zap shader (but print a warning
1208		 * just in case someone got their dt wrong.. hopefully they
1209		 * have a debug UART to realize the error of their ways...
1210		 * if you mess this up you are about to crash horribly)
1211		 */
1212		dev_warn_once(gpu->dev->dev,
1213			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1214		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1215		ret = 0;
1216	} else {
1217		return ret;
1218	}
1219
1220out:
1221	/*
1222	 * Tell the GMU that we are done touching the GPU and it can start power
1223	 * management
1224	 */
1225	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1226
1227	if (a6xx_gpu->gmu.legacy) {
1228		/* Take the GMU out of its special boot mode */
1229		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1230	}
1231
1232	return ret;
1233}
1234
1235static int a6xx_hw_init(struct msm_gpu *gpu)
1236{
1237	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1238	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1239	int ret;
1240
1241	mutex_lock(&a6xx_gpu->gmu.lock);
1242	ret = hw_init(gpu);
1243	mutex_unlock(&a6xx_gpu->gmu.lock);
1244
1245	return ret;
1246}
1247
1248static void a6xx_dump(struct msm_gpu *gpu)
1249{
1250	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1251			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1252	adreno_dump(gpu);
1253}
1254
1255#define VBIF_RESET_ACK_TIMEOUT	100
1256#define VBIF_RESET_ACK_MASK	0x00f0
1257
1258static void a6xx_recover(struct msm_gpu *gpu)
1259{
1260	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1261	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1262	int i, active_submits;
1263
1264	adreno_dump_info(gpu);
1265
1266	for (i = 0; i < 8; i++)
1267		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1268			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1269
1270	if (hang_debug)
1271		a6xx_dump(gpu);
1272
1273	/*
1274	 * To handle recovery specific sequences during the rpm suspend we are
1275	 * about to trigger
1276	 */
1277	a6xx_gpu->hung = true;
1278
1279	/* Halt SQE first */
1280	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1281
1282	/*
1283	 * Turn off keep alive that might have been enabled by the hang
1284	 * interrupt
1285	 */
1286	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1287
1288	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1289
1290	/* active_submit won't change until we make a submission */
1291	mutex_lock(&gpu->active_lock);
1292	active_submits = gpu->active_submits;
1293
1294	/*
1295	 * Temporarily clear active_submits count to silence a WARN() in the
1296	 * runtime suspend cb
1297	 */
1298	gpu->active_submits = 0;
1299
1300	/* Drop the rpm refcount from active submits */
1301	if (active_submits)
1302		pm_runtime_put(&gpu->pdev->dev);
1303
1304	/* And the final one from recover worker */
1305	pm_runtime_put_sync(&gpu->pdev->dev);
1306
1307	/* Call into gpucc driver to poll for cx gdsc collapse */
1308	reset_control_reset(gpu->cx_collapse);
1309
1310	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1311
1312	if (active_submits)
1313		pm_runtime_get(&gpu->pdev->dev);
1314
1315	pm_runtime_get_sync(&gpu->pdev->dev);
1316
1317	gpu->active_submits = active_submits;
1318	mutex_unlock(&gpu->active_lock);
1319
1320	msm_gpu_hw_init(gpu);
1321	a6xx_gpu->hung = false;
1322}
1323
1324static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1325{
1326	static const char *uche_clients[7] = {
1327		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1328	};
1329	u32 val;
1330
1331	if (mid < 1 || mid > 3)
1332		return "UNKNOWN";
1333
1334	/*
1335	 * The source of the data depends on the mid ID read from FSYNR1.
1336	 * and the client ID read from the UCHE block
1337	 */
1338	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1339
1340	/* mid = 3 is most precise and refers to only one block per client */
1341	if (mid == 3)
1342		return uche_clients[val & 7];
1343
1344	/* For mid=2 the source is TP or VFD except when the client id is 0 */
1345	if (mid == 2)
1346		return ((val & 7) == 0) ? "TP" : "TP|VFD";
1347
1348	/* For mid=1 just return "UCHE" as a catchall for everything else */
1349	return "UCHE";
1350}
1351
1352static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1353{
1354	if (id == 0)
1355		return "CP";
1356	else if (id == 4)
1357		return "CCU";
1358	else if (id == 6)
1359		return "CDP Prefetch";
1360
1361	return a6xx_uche_fault_block(gpu, id);
1362}
1363
1364#define ARM_SMMU_FSR_TF                 BIT(1)
1365#define ARM_SMMU_FSR_PF			BIT(3)
1366#define ARM_SMMU_FSR_EF			BIT(4)
1367
1368static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1369{
1370	struct msm_gpu *gpu = arg;
1371	struct adreno_smmu_fault_info *info = data;
1372	const char *type = "UNKNOWN";
1373	const char *block;
1374	bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
1375
1376	/*
1377	 * If we aren't going to be resuming later from fault_worker, then do
1378	 * it now.
1379	 */
1380	if (!do_devcoredump) {
1381		gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
1382	}
1383
1384	/*
1385	 * Print a default message if we couldn't get the data from the
1386	 * adreno-smmu-priv
1387	 */
1388	if (!info) {
1389		pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n",
1390			iova, flags,
1391			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1392			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1393			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1394			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
1395
1396		return 0;
1397	}
1398
1399	if (info->fsr & ARM_SMMU_FSR_TF)
1400		type = "TRANSLATION";
1401	else if (info->fsr & ARM_SMMU_FSR_PF)
1402		type = "PERMISSION";
1403	else if (info->fsr & ARM_SMMU_FSR_EF)
1404		type = "EXTERNAL";
1405
1406	block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1407
1408	pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
1409			info->ttbr0, iova,
1410			flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
1411			type, block,
1412			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1413			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1414			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1415			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
1416
1417	if (do_devcoredump) {
1418		/* Turn off the hangcheck timer to keep it from bothering us */
1419		del_timer(&gpu->hangcheck_timer);
1420
1421		gpu->fault_info.ttbr0 = info->ttbr0;
1422		gpu->fault_info.iova  = iova;
1423		gpu->fault_info.flags = flags;
1424		gpu->fault_info.type  = type;
1425		gpu->fault_info.block = block;
1426
1427		kthread_queue_work(gpu->worker, &gpu->fault_work);
1428	}
1429
1430	return 0;
1431}
1432
1433static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1434{
1435	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1436
1437	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1438		u32 val;
1439
1440		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1441		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1442		dev_err_ratelimited(&gpu->pdev->dev,
1443			"CP | opcode error | possible opcode=0x%8.8X\n",
1444			val);
1445	}
1446
1447	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1448		dev_err_ratelimited(&gpu->pdev->dev,
1449			"CP ucode error interrupt\n");
1450
1451	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1452		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1453			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1454
1455	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1456		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1457
1458		dev_err_ratelimited(&gpu->pdev->dev,
1459			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1460			val & (1 << 20) ? "READ" : "WRITE",
1461			(val & 0x3ffff), val);
1462	}
1463
1464	if (status & A6XX_CP_INT_CP_AHB_ERROR)
1465		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1466
1467	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1468		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1469
1470	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1471		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1472
1473}
1474
1475static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1476{
1477	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1478	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1479	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1480
1481	/*
1482	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1483	 * but the fault handler will trigger the devcore dump, and we want
1484	 * to otherwise resume normally rather than killing the submit, so
1485	 * just bail.
1486	 */
1487	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1488		return;
1489
1490	/*
1491	 * Force the GPU to stay on until after we finish
1492	 * collecting information
1493	 */
1494	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1495
1496	DRM_DEV_ERROR(&gpu->pdev->dev,
1497		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1498		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1499		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1500		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1501		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1502		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1503		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1504		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1505		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1506
1507	/* Turn off the hangcheck timer to keep it from bothering us */
1508	del_timer(&gpu->hangcheck_timer);
1509
1510	kthread_queue_work(gpu->worker, &gpu->recover_work);
1511}
1512
1513static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1514{
1515	struct msm_drm_private *priv = gpu->dev->dev_private;
1516	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1517
1518	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1519
1520	if (priv->disable_err_irq)
1521		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1522
1523	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1524		a6xx_fault_detect_irq(gpu);
1525
1526	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1527		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1528
1529	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1530		a6xx_cp_hw_err_irq(gpu);
1531
1532	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1533		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1534
1535	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1536		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1537
1538	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1539		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1540
1541	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
1542		msm_gpu_retire(gpu);
1543
1544	return IRQ_HANDLED;
1545}
1546
1547static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
1548{
1549	return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
1550}
1551
1552static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
1553{
1554	msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
1555}
1556
1557static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1558{
1559	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1560	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1561}
1562
1563static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1564{
1565	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1566	struct msm_gpu *gpu = &adreno_gpu->base;
1567	u32 cntl1_regval = 0;
1568
1569	if (IS_ERR(a6xx_gpu->llc_mmio))
1570		return;
1571
1572	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1573		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1574
1575		gpu_scid &= 0x1f;
1576		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1577			       (gpu_scid << 15) | (gpu_scid << 20);
1578
1579		/* On A660, the SCID programming for UCHE traffic is done in
1580		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1581		 */
1582		if (adreno_is_a660_family(adreno_gpu))
1583			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1584				(1 << 8), (gpu_scid << 10) | (1 << 8));
1585	}
1586
1587	/*
1588	 * For targets with a MMU500, activate the slice but don't program the
1589	 * register.  The XBL will take care of that.
1590	 */
1591	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1592		if (!a6xx_gpu->have_mmu500) {
1593			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1594
1595			gpuhtw_scid &= 0x1f;
1596			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1597		}
1598	}
1599
1600	if (!cntl1_regval)
1601		return;
1602
1603	/*
1604	 * Program the slice IDs for the various GPU blocks and GPU MMU
1605	 * pagetables
1606	 */
1607	if (!a6xx_gpu->have_mmu500) {
1608		a6xx_llc_write(a6xx_gpu,
1609			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1610
1611		/*
1612		 * Program cacheability overrides to not allocate cache
1613		 * lines on a write miss
1614		 */
1615		a6xx_llc_rmw(a6xx_gpu,
1616			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1617		return;
1618	}
1619
1620	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1621}
1622
1623static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1624{
1625	llcc_slice_putd(a6xx_gpu->llc_slice);
1626	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1627}
1628
1629static void a6xx_llc_slices_init(struct platform_device *pdev,
1630		struct a6xx_gpu *a6xx_gpu)
1631{
1632	struct device_node *phandle;
1633
1634	/*
1635	 * There is a different programming path for targets with an mmu500
1636	 * attached, so detect if that is the case
1637	 */
1638	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1639	a6xx_gpu->have_mmu500 = (phandle &&
1640		of_device_is_compatible(phandle, "arm,mmu-500"));
1641	of_node_put(phandle);
1642
1643	if (a6xx_gpu->have_mmu500)
1644		a6xx_gpu->llc_mmio = NULL;
1645	else
1646		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1647
1648	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1649	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1650
1651	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1652		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1653}
1654
1655static int a6xx_pm_resume(struct msm_gpu *gpu)
1656{
1657	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1658	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1659	int ret;
1660
1661	gpu->needs_hw_init = true;
1662
1663	trace_msm_gpu_resume(0);
1664
1665	mutex_lock(&a6xx_gpu->gmu.lock);
1666	ret = a6xx_gmu_resume(a6xx_gpu);
1667	mutex_unlock(&a6xx_gpu->gmu.lock);
1668	if (ret)
1669		return ret;
1670
1671	msm_devfreq_resume(gpu);
1672
1673	a6xx_llc_activate(a6xx_gpu);
1674
1675	return 0;
1676}
1677
1678static int a6xx_pm_suspend(struct msm_gpu *gpu)
1679{
1680	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1681	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1682	int i, ret;
1683
1684	trace_msm_gpu_suspend(0);
1685
1686	a6xx_llc_deactivate(a6xx_gpu);
1687
1688	msm_devfreq_suspend(gpu);
1689
1690	mutex_lock(&a6xx_gpu->gmu.lock);
1691	ret = a6xx_gmu_stop(a6xx_gpu);
1692	mutex_unlock(&a6xx_gpu->gmu.lock);
1693	if (ret)
1694		return ret;
1695
1696	if (a6xx_gpu->shadow_bo)
1697		for (i = 0; i < gpu->nr_rings; i++)
1698			a6xx_gpu->shadow[i] = 0;
1699
1700	gpu->suspend_count++;
1701
1702	return 0;
1703}
1704
1705static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1706{
1707	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1708	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1709
1710	mutex_lock(&a6xx_gpu->gmu.lock);
1711
1712	/* Force the GPU power on so we can read this register */
1713	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1714
1715	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
1716
1717	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1718
1719	mutex_unlock(&a6xx_gpu->gmu.lock);
1720
1721	return 0;
1722}
1723
1724static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1725{
1726	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1727	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1728
1729	return a6xx_gpu->cur_ring;
1730}
1731
1732static void a6xx_destroy(struct msm_gpu *gpu)
1733{
1734	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1735	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1736
1737	if (a6xx_gpu->sqe_bo) {
1738		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1739		drm_gem_object_put(a6xx_gpu->sqe_bo);
1740	}
1741
1742	if (a6xx_gpu->shadow_bo) {
1743		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1744		drm_gem_object_put(a6xx_gpu->shadow_bo);
1745	}
1746
1747	a6xx_llc_slices_destroy(a6xx_gpu);
1748
1749	a6xx_gmu_remove(a6xx_gpu);
1750
1751	adreno_gpu_cleanup(adreno_gpu);
1752
1753	kfree(a6xx_gpu);
1754}
1755
1756static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1757{
1758	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1759	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1760	u64 busy_cycles;
1761
1762	/* 19.2MHz */
1763	*out_sample_rate = 19200000;
1764
1765	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1766			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1767			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1768
1769	return busy_cycles;
1770}
1771
1772static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
1773			      bool suspended)
1774{
1775	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1776	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1777
1778	mutex_lock(&a6xx_gpu->gmu.lock);
1779	a6xx_gmu_set_freq(gpu, opp, suspended);
1780	mutex_unlock(&a6xx_gpu->gmu.lock);
1781}
1782
1783static struct msm_gem_address_space *
1784a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
1785{
1786	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1787	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1788	unsigned long quirks = 0;
1789
1790	/*
1791	 * This allows GPU to set the bus attributes required to use system
1792	 * cache on behalf of the iommu page table walker.
1793	 */
1794	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1795		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
1796
1797	return adreno_iommu_create_address_space(gpu, pdev, quirks);
1798}
1799
1800static struct msm_gem_address_space *
1801a6xx_create_private_address_space(struct msm_gpu *gpu)
1802{
1803	struct msm_mmu *mmu;
1804
1805	mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
1806
1807	if (IS_ERR(mmu))
1808		return ERR_CAST(mmu);
1809
1810	return msm_gem_address_space_create(mmu,
1811		"gpu", 0x100000000ULL,
1812		adreno_private_address_space_size(gpu));
1813}
1814
1815static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1816{
1817	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1818	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1819
1820	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
1821		return a6xx_gpu->shadow[ring->id];
1822
1823	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
1824}
1825
1826static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1827{
1828	struct msm_cp_state cp_state = {
1829		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1830		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1831		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1832		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
1833	};
1834	bool progress;
1835
1836	/*
1837	 * Adjust the remaining data to account for what has already been
1838	 * fetched from memory, but not yet consumed by the SQE.
1839	 *
1840	 * This is not *technically* correct, the amount buffered could
1841	 * exceed the IB size due to hw prefetching ahead, but:
1842	 *
1843	 * (1) We aren't trying to find the exact position, just whether
1844	 *     progress has been made
1845	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
1846	 *     to prevent prefetching into an unrelated submit.  (And
1847	 *     either way, at some point the ROQ will be full.)
1848	 */
1849	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
1850	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
1851
1852	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
1853
1854	ring->last_cp_state = cp_state;
1855
1856	return progress;
1857}
1858
1859static u32 a618_get_speed_bin(u32 fuse)
1860{
1861	if (fuse == 0)
1862		return 0;
1863	else if (fuse == 169)
1864		return 1;
1865	else if (fuse == 174)
1866		return 2;
1867
1868	return UINT_MAX;
1869}
1870
1871static u32 a619_get_speed_bin(u32 fuse)
1872{
1873	if (fuse == 0)
1874		return 0;
1875	else if (fuse == 120)
1876		return 4;
1877	else if (fuse == 138)
1878		return 3;
1879	else if (fuse == 169)
1880		return 2;
1881	else if (fuse == 180)
1882		return 1;
1883
1884	return UINT_MAX;
1885}
1886
1887static u32 adreno_7c3_get_speed_bin(u32 fuse)
1888{
1889	if (fuse == 0)
1890		return 0;
1891	else if (fuse == 117)
1892		return 0;
1893	else if (fuse == 190)
1894		return 1;
1895
1896	return UINT_MAX;
1897}
1898
1899static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
1900{
1901	u32 val = UINT_MAX;
1902
1903	if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
1904		val = a618_get_speed_bin(fuse);
1905
1906	if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev))
1907		val = a619_get_speed_bin(fuse);
1908
1909	if (adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), rev))
1910		val = adreno_7c3_get_speed_bin(fuse);
1911
1912	if (val == UINT_MAX) {
1913		DRM_DEV_ERROR(dev,
1914			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
1915			fuse);
1916		return UINT_MAX;
1917	}
1918
1919	return (1 << val);
1920}
1921
1922static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
1923{
1924	u32 supp_hw;
1925	u32 speedbin;
1926	int ret;
1927
1928	ret = adreno_read_speedbin(dev, &speedbin);
1929	/*
1930	 * -ENOENT means that the platform doesn't support speedbin which is
1931	 * fine
1932	 */
1933	if (ret == -ENOENT) {
1934		return 0;
1935	} else if (ret) {
1936		dev_err_probe(dev, ret,
1937			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
1938		return ret;
1939	}
1940
1941	supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
1942
1943	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
1944	if (ret)
1945		return ret;
1946
1947	return 0;
1948}
1949
1950static const struct adreno_gpu_funcs funcs = {
1951	.base = {
1952		.get_param = adreno_get_param,
1953		.set_param = adreno_set_param,
1954		.hw_init = a6xx_hw_init,
1955		.pm_suspend = a6xx_pm_suspend,
1956		.pm_resume = a6xx_pm_resume,
1957		.recover = a6xx_recover,
1958		.submit = a6xx_submit,
1959		.active_ring = a6xx_active_ring,
1960		.irq = a6xx_irq,
1961		.destroy = a6xx_destroy,
1962#if defined(CONFIG_DRM_MSM_GPU_STATE)
1963		.show = a6xx_show,
1964#endif
1965		.gpu_busy = a6xx_gpu_busy,
1966		.gpu_get_freq = a6xx_gmu_get_freq,
1967		.gpu_set_freq = a6xx_gpu_set_freq,
1968#if defined(CONFIG_DRM_MSM_GPU_STATE)
1969		.gpu_state_get = a6xx_gpu_state_get,
1970		.gpu_state_put = a6xx_gpu_state_put,
1971#endif
1972		.create_address_space = a6xx_create_address_space,
1973		.create_private_address_space = a6xx_create_private_address_space,
1974		.get_rptr = a6xx_get_rptr,
1975		.progress = a6xx_progress,
1976	},
1977	.get_timestamp = a6xx_get_timestamp,
1978};
1979
1980struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
1981{
1982	struct msm_drm_private *priv = dev->dev_private;
1983	struct platform_device *pdev = priv->gpu_pdev;
1984	struct adreno_platform_config *config = pdev->dev.platform_data;
1985	const struct adreno_info *info;
1986	struct device_node *node;
1987	struct a6xx_gpu *a6xx_gpu;
1988	struct adreno_gpu *adreno_gpu;
1989	struct msm_gpu *gpu;
1990	int ret;
1991
1992	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
1993	if (!a6xx_gpu)
1994		return ERR_PTR(-ENOMEM);
1995
1996	adreno_gpu = &a6xx_gpu->base;
1997	gpu = &adreno_gpu->base;
1998
1999	adreno_gpu->registers = NULL;
2000
2001	/*
2002	 * We need to know the platform type before calling into adreno_gpu_init
2003	 * so that the hw_apriv flag can be correctly set. Snoop into the info
2004	 * and grab the revision number
2005	 */
2006	info = adreno_info(config->rev);
2007
2008	if (info && (info->revn == 650 || info->revn == 660 ||
2009			adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
2010		adreno_gpu->base.hw_apriv = true;
2011
2012	a6xx_llc_slices_init(pdev, a6xx_gpu);
2013
2014	ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
2015	if (ret) {
2016		a6xx_destroy(&(a6xx_gpu->base.base));
2017		return ERR_PTR(ret);
2018	}
2019
2020	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2021	if (ret) {
2022		a6xx_destroy(&(a6xx_gpu->base.base));
2023		return ERR_PTR(ret);
2024	}
2025
2026	/*
2027	 * For now only clamp to idle freq for devices where this is known not
2028	 * to cause power supply issues:
2029	 */
2030	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2031		gpu->clamp_to_idle = true;
2032
2033	/* Check if there is a GMU phandle and set it up */
2034	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2035
2036	/* FIXME: How do we gracefully handle this? */
2037	BUG_ON(!node);
2038
2039	ret = a6xx_gmu_init(a6xx_gpu, node);
2040	of_node_put(node);
2041	if (ret) {
2042		a6xx_destroy(&(a6xx_gpu->base.base));
2043		return ERR_PTR(ret);
2044	}
2045
2046	if (gpu->aspace)
2047		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2048				a6xx_fault_handler);
2049
2050	return gpu;
2051}