Linux Audio

Check our new training course

Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
   3
   4
   5#include "msm_gem.h"
   6#include "msm_mmu.h"
   7#include "msm_gpu_trace.h"
   8#include "a6xx_gpu.h"
   9#include "a6xx_gmu.xml.h"
  10
  11#include <linux/bitfield.h>
  12#include <linux/devfreq.h>
  13#include <linux/firmware/qcom/qcom_scm.h>
  14#include <linux/pm_domain.h>
  15#include <linux/soc/qcom/llcc-qcom.h>
  16
  17#define GPU_PAS_ID 13
  18
  19static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
  20{
  21	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  22	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  23
  24	/* Check that the GMU is idle */
  25	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_isidle(&a6xx_gpu->gmu))
  26		return false;
  27
  28	/* Check tha the CX master is idle */
  29	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
  30			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
  31		return false;
  32
  33	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
  34		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
  35}
  36
  37static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  38{
  39	/* wait for CP to drain ringbuffer: */
  40	if (!adreno_idle(gpu, ring))
  41		return false;
  42
  43	if (spin_until(_a6xx_check_idle(gpu))) {
  44		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  45			gpu->name, __builtin_return_address(0),
  46			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
  47			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
  48			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
  49			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
  50		return false;
  51	}
  52
  53	return true;
  54}
  55
  56static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  57{
  58	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  59	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  60
  61	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
  62	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
  63		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  64		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
  65		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
  66	}
  67}
  68
  69static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  70{
  71	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  72	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  73	uint32_t wptr;
  74	unsigned long flags;
  75
  76	update_shadow_rptr(gpu, ring);
  77
  78	spin_lock_irqsave(&ring->preempt_lock, flags);
  79
  80	/* Copy the shadow to the actual register */
  81	ring->cur = ring->next;
  82
  83	/* Make sure to wrap wptr if we need to */
  84	wptr = get_wptr(ring);
  85
  86	/* Update HW if this is the current ring and we are not in preempt*/
  87	if (!a6xx_in_preempt(a6xx_gpu)) {
  88		if (a6xx_gpu->cur_ring == ring)
  89			gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
  90		else
  91			ring->restore_wptr = true;
  92	} else {
  93		ring->restore_wptr = true;
  94	}
  95
  96	spin_unlock_irqrestore(&ring->preempt_lock, flags);
 
 
 
 
 
  97}
  98
  99static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
 100		u64 iova)
 101{
 102	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
 103	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
 104		CP_REG_TO_MEM_0_CNT(2) |
 105		CP_REG_TO_MEM_0_64B);
 106	OUT_RING(ring, lower_32_bits(iova));
 107	OUT_RING(ring, upper_32_bits(iova));
 108}
 109
 110static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
 111		struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
 112{
 113	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
 114	struct msm_file_private *ctx = submit->queue->ctx;
 115	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 116	phys_addr_t ttbr;
 117	u32 asid;
 118	u64 memptr = rbmemptr(ring, ttbr0);
 119
 120	if (ctx->seqno == ring->cur_ctx_seqno)
 121		return;
 122
 123	if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
 124		return;
 125
 126	if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
 127		/* Wait for previous submit to complete before continuing: */
 128		OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
 129		OUT_RING(ring, 0);
 130		OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 131		OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 132		OUT_RING(ring, submit->seqno - 1);
 133	}
 134
 135	if (!sysprof) {
 136		if (!adreno_is_a7xx(adreno_gpu)) {
 137			/* Turn off protected mode to write to special registers */
 138			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 139			OUT_RING(ring, 0);
 140		}
 141
 142		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
 143		OUT_RING(ring, 1);
 144	}
 145
 146	/* Execute the table update */
 147	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
 148	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
 149
 150	OUT_RING(ring,
 151		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
 152		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
 153	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
 154	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
 155
 156	/*
 157	 * Write the new TTBR0 to the memstore. This is good for debugging.
 158	 * Needed for preemption
 159	 */
 160	OUT_PKT7(ring, CP_MEM_WRITE, 5);
 161	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
 162	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
 163	OUT_RING(ring, lower_32_bits(ttbr));
 164	OUT_RING(ring, upper_32_bits(ttbr));
 165	OUT_RING(ring, ctx->seqno);
 166
 167	/*
 168	 * Sync both threads after switching pagetables and enable BR only
 169	 * to make sure BV doesn't race ahead while BR is still switching
 170	 * pagetables.
 171	 */
 172	if (adreno_is_a7xx(&a6xx_gpu->base)) {
 173		OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 174		OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
 175	}
 176
 177	/*
 178	 * And finally, trigger a uche flush to be sure there isn't anything
 179	 * lingering in that part of the GPU
 180	 */
 181
 182	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 183	OUT_RING(ring, CACHE_INVALIDATE);
 184
 185	if (!sysprof) {
 186		/*
 187		 * Wait for SRAM clear after the pgtable update, so the
 188		 * two can happen in parallel:
 189		 */
 190		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
 191		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
 192		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
 193				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
 194		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
 195		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
 196		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
 197		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
 198
 199		if (!adreno_is_a7xx(adreno_gpu)) {
 200			/* Re-enable protected mode: */
 201			OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 202			OUT_RING(ring, 1);
 203		}
 204	}
 205}
 206
 207static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 208{
 209	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
 210	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 211	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 212	struct msm_ringbuffer *ring = submit->ring;
 213	unsigned int i, ibs = 0;
 214
 215	a6xx_set_pagetable(a6xx_gpu, ring, submit);
 216
 217	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 218		rbmemptr_stats(ring, index, cpcycles_start));
 219
 220	/*
 221	 * For PM4 the GMU register offsets are calculated from the base of the
 222	 * GPU registers so we need to add 0x1a800 to the register value on A630
 223	 * to get the right value from PM4.
 224	 */
 225	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
 226		rbmemptr_stats(ring, index, alwayson_start));
 227
 228	/* Invalidate CCU depth and color */
 229	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 230	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
 231
 232	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 233	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
 234
 235	/* Submit the commands */
 236	for (i = 0; i < submit->nr_cmds; i++) {
 237		switch (submit->cmd[i].type) {
 238		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 239			break;
 240		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 241			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
 242				break;
 243			fallthrough;
 244		case MSM_SUBMIT_CMD_BUF:
 245			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 246			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 247			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 248			OUT_RING(ring, submit->cmd[i].size);
 249			ibs++;
 250			break;
 251		}
 252
 253		/*
 254		 * Periodically update shadow-wptr if needed, so that we
 255		 * can see partial progress of submits with large # of
 256		 * cmds.. otherwise we could needlessly stall waiting for
 257		 * ringbuffer state, simply due to looking at a shadow
 258		 * rptr value that has not been updated
 259		 */
 260		if ((ibs % 32) == 0)
 261			update_shadow_rptr(gpu, ring);
 262	}
 263
 264	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 265		rbmemptr_stats(ring, index, cpcycles_end));
 266	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
 267		rbmemptr_stats(ring, index, alwayson_end));
 268
 269	/* Write the fence to the scratch register */
 270	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
 271	OUT_RING(ring, submit->seqno);
 272
 273	/*
 274	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
 275	 * timestamp is written to the memory and then triggers the interrupt
 276	 */
 277	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 278	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 279		CP_EVENT_WRITE_0_IRQ);
 280	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 281	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 282	OUT_RING(ring, submit->seqno);
 283
 284	trace_msm_gpu_submit_flush(submit,
 285		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
 286
 287	a6xx_flush(gpu, ring);
 288}
 289
 290static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring,
 291		struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue)
 292{
 293	u64 preempt_postamble;
 294
 295	OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12);
 296
 297	OUT_RING(ring, SMMU_INFO);
 298	/* don't save SMMU, we write the record from the kernel instead */
 299	OUT_RING(ring, 0);
 300	OUT_RING(ring, 0);
 301
 302	/* privileged and non secure buffer save */
 303	OUT_RING(ring, NON_SECURE_SAVE_ADDR);
 304	OUT_RING(ring, lower_32_bits(
 305		a6xx_gpu->preempt_iova[ring->id]));
 306	OUT_RING(ring, upper_32_bits(
 307		a6xx_gpu->preempt_iova[ring->id]));
 308
 309	/* user context buffer save, seems to be unnused by fw */
 310	OUT_RING(ring, NON_PRIV_SAVE_ADDR);
 311	OUT_RING(ring, 0);
 312	OUT_RING(ring, 0);
 313
 314	OUT_RING(ring, COUNTER);
 315	/* seems OK to set to 0 to disable it */
 316	OUT_RING(ring, 0);
 317	OUT_RING(ring, 0);
 318
 319	/* Emit postamble to clear perfcounters */
 320	preempt_postamble = a6xx_gpu->preempt_postamble_iova;
 321
 322	OUT_PKT7(ring, CP_SET_AMBLE, 3);
 323	OUT_RING(ring, lower_32_bits(preempt_postamble));
 324	OUT_RING(ring, upper_32_bits(preempt_postamble));
 325	OUT_RING(ring, CP_SET_AMBLE_2_DWORDS(
 326				 a6xx_gpu->preempt_postamble_len) |
 327			 CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE));
 328}
 329
 330static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 331{
 332	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
 333	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 334	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 335	struct msm_ringbuffer *ring = submit->ring;
 336	unsigned int i, ibs = 0;
 337
 338	/*
 339	 * Toggle concurrent binning for pagetable switch and set the thread to
 340	 * BR since only it can execute the pagetable switch packets.
 341	 */
 342	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 343	OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
 344
 345	a6xx_set_pagetable(a6xx_gpu, ring, submit);
 346
 347	/*
 348	 * If preemption is enabled, then set the pseudo register for the save
 349	 * sequence
 350	 */
 351	if (gpu->nr_rings > 1)
 352		a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
 353
 354	get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
 355		rbmemptr_stats(ring, index, cpcycles_start));
 356	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
 357		rbmemptr_stats(ring, index, alwayson_start));
 358
 359	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 360	OUT_RING(ring, CP_SET_THREAD_BOTH);
 361
 362	OUT_PKT7(ring, CP_SET_MARKER, 1);
 363	OUT_RING(ring, 0x101); /* IFPC disable */
 364
 365	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
 366		OUT_PKT7(ring, CP_SET_MARKER, 1);
 367		OUT_RING(ring, 0x00d); /* IB1LIST start */
 368	}
 369
 370	/* Submit the commands */
 371	for (i = 0; i < submit->nr_cmds; i++) {
 372		switch (submit->cmd[i].type) {
 373		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 374			break;
 375		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 376			if (ring->cur_ctx_seqno == submit->queue->ctx->seqno)
 377				break;
 378			fallthrough;
 379		case MSM_SUBMIT_CMD_BUF:
 380			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 381			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 382			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 383			OUT_RING(ring, submit->cmd[i].size);
 384			ibs++;
 385			break;
 386		}
 387
 388		/*
 389		 * Periodically update shadow-wptr if needed, so that we
 390		 * can see partial progress of submits with large # of
 391		 * cmds.. otherwise we could needlessly stall waiting for
 392		 * ringbuffer state, simply due to looking at a shadow
 393		 * rptr value that has not been updated
 394		 */
 395		if ((ibs % 32) == 0)
 396			update_shadow_rptr(gpu, ring);
 397	}
 398
 399	if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) {
 400		OUT_PKT7(ring, CP_SET_MARKER, 1);
 401		OUT_RING(ring, 0x00e); /* IB1LIST end */
 402	}
 403
 404	get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
 405		rbmemptr_stats(ring, index, cpcycles_end));
 406	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
 407		rbmemptr_stats(ring, index, alwayson_end));
 408
 409	/* Write the fence to the scratch register */
 410	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
 411	OUT_RING(ring, submit->seqno);
 412
 413	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 414	OUT_RING(ring, CP_SET_THREAD_BR);
 415
 416	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 417	OUT_RING(ring, CCU_INVALIDATE_DEPTH);
 418
 419	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 420	OUT_RING(ring, CCU_INVALIDATE_COLOR);
 421
 422	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 423	OUT_RING(ring, CP_SET_THREAD_BV);
 424
 425	/*
 426	 * Make sure the timestamp is committed once BV pipe is
 427	 * completely done with this submission.
 428	 */
 429	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 430	OUT_RING(ring, CACHE_CLEAN | BIT(27));
 431	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
 432	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
 433	OUT_RING(ring, submit->seqno);
 434
 435	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 436	OUT_RING(ring, CP_SET_THREAD_BR);
 437
 438	/*
 439	 * This makes sure that BR doesn't race ahead and commit
 440	 * timestamp to memstore while BV is still processing
 441	 * this submission.
 442	 */
 443	OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4);
 444	OUT_RING(ring, 0);
 445	OUT_RING(ring, lower_32_bits(rbmemptr(ring, bv_fence)));
 446	OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence)));
 447	OUT_RING(ring, submit->seqno);
 448
 449	a6xx_gpu->last_seqno[ring->id] = submit->seqno;
 450
 451	/* write the ringbuffer timestamp */
 452	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 453	OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27));
 454	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 455	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 456	OUT_RING(ring, submit->seqno);
 457
 458	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 459	OUT_RING(ring, CP_SET_THREAD_BOTH);
 460
 461	OUT_PKT7(ring, CP_SET_MARKER, 1);
 462	OUT_RING(ring, 0x100); /* IFPC enable */
 463
 464	/* If preemption is enabled */
 465	if (gpu->nr_rings > 1) {
 466		/* Yield the floor on command completion */
 467		OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 468
 469		/*
 470		 * If dword[2:1] are non zero, they specify an address for
 471		 * the CP to write the value of dword[3] to on preemption
 472		 * complete. Write 0 to skip the write
 473		 */
 474		OUT_RING(ring, 0x00);
 475		OUT_RING(ring, 0x00);
 476		/* Data value - not used if the address above is 0 */
 477		OUT_RING(ring, 0x01);
 478		/* generate interrupt on preemption completion */
 479		OUT_RING(ring, 0x00);
 480	}
 481
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 482
 483	trace_msm_gpu_submit_flush(submit,
 484		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 485
 486	a6xx_flush(gpu, ring);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 487
 488	/* Check to see if we need to start preemption */
 489	a6xx_preempt_trigger(gpu);
 490}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 491
 492static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
 493{
 494	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 495	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 496	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 497	const struct adreno_reglist *reg;
 498	unsigned int i;
 499	u32 cgc_delay, cgc_hyst;
 500	u32 val, clock_cntl_on;
 501
 502	if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu)))
 503		return;
 504
 505	if (adreno_is_a630(adreno_gpu))
 506		clock_cntl_on = 0x8aa8aa02;
 507	else if (adreno_is_a610(adreno_gpu))
 508		clock_cntl_on = 0xaaa8aa82;
 509	else if (adreno_is_a702(adreno_gpu))
 510		clock_cntl_on = 0xaaaaaa82;
 511	else
 512		clock_cntl_on = 0x8aa8aa82;
 513
 514	cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111;
 515	cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555;
 516
 517	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
 518			state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
 519	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL,
 520			state ? cgc_delay : 0);
 521	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL,
 522			state ? cgc_hyst : 0);
 523
 524	if (!adreno_gpu->info->a6xx->hwcg) {
 525		gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1);
 526		gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0);
 527
 528		if (state) {
 529			gpu_write(gpu, REG_A7XX_RBBM_CGC_P2S_TRIG_CMD, 1);
 530
 531			if (gpu_poll_timeout(gpu, REG_A7XX_RBBM_CGC_P2S_STATUS, val,
 532					     val & A7XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) {
 533				dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n");
 534				return;
 535			}
 536
 537			gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 0);
 538		}
 539
 540		return;
 541	}
 542
 543	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
 544
 545	/* Don't re-program the registers if they are already correct */
 546	if ((!state && !val) || (state && (val == clock_cntl_on)))
 547		return;
 548
 549	/* Disable SP clock before programming HWCG registers */
 550	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
 551		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
 552
 553	for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++)
 554		gpu_write(gpu, reg->offset, state ? reg->value : 0);
 555
 556	/* Enable SP clock */
 557	if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu))
 558		gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
 559
 560	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
 561}
 562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 563static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 564{
 565	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 566	const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect;
 567	unsigned i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 568
 569	/*
 570	 * Enable access protection to privileged registers, fault on an access
 571	 * protect violation and select the last span to protect from the start
 572	 * address all the way to the end of the register address space
 573	 */
 574	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL,
 575		  A6XX_CP_PROTECT_CNTL_ACCESS_PROT_EN |
 576		  A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN |
 577		  A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE);
 578
 579	for (i = 0; i < protect->count - 1; i++) {
 580		/* Intentionally skip writing to some registers */
 581		if (protect->regs[i])
 582			gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]);
 583	}
 584	/* last CP_PROTECT to have "infinite" length on the last entry */
 585	gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
 586}
 587
 588static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
 589{
 590	gpu->ubwc_config.rgb565_predicator = 0;
 591	gpu->ubwc_config.uavflagprd_inv = 0;
 592	gpu->ubwc_config.min_acc_len = 0;
 593	gpu->ubwc_config.ubwc_swizzle = 0x6;
 594	gpu->ubwc_config.macrotile_mode = 0;
 595	gpu->ubwc_config.highest_bank_bit = 15;
 596
 597	if (adreno_is_a610(gpu)) {
 598		gpu->ubwc_config.highest_bank_bit = 13;
 599		gpu->ubwc_config.min_acc_len = 1;
 600		gpu->ubwc_config.ubwc_swizzle = 0x7;
 601	}
 602
 603	if (adreno_is_a618(gpu))
 604		gpu->ubwc_config.highest_bank_bit = 14;
 605
 606	if (adreno_is_a619(gpu))
 607		/* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
 608		gpu->ubwc_config.highest_bank_bit = 13;
 609
 610	if (adreno_is_a619_holi(gpu))
 611		gpu->ubwc_config.highest_bank_bit = 13;
 612
 613	if (adreno_is_a621(gpu)) {
 614		gpu->ubwc_config.highest_bank_bit = 13;
 615		gpu->ubwc_config.amsbc = 1;
 616		gpu->ubwc_config.uavflagprd_inv = 2;
 617	}
 618
 619	if (adreno_is_a640_family(gpu))
 620		gpu->ubwc_config.amsbc = 1;
 
 621
 622	if (adreno_is_a680(gpu))
 623		gpu->ubwc_config.macrotile_mode = 1;
 624
 625	if (adreno_is_a650(gpu) ||
 626	    adreno_is_a660(gpu) ||
 627	    adreno_is_a690(gpu) ||
 628	    adreno_is_a730(gpu) ||
 629	    adreno_is_a740_family(gpu)) {
 630		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
 631		gpu->ubwc_config.highest_bank_bit = 16;
 632		gpu->ubwc_config.amsbc = 1;
 633		gpu->ubwc_config.rgb565_predicator = 1;
 634		gpu->ubwc_config.uavflagprd_inv = 2;
 635		gpu->ubwc_config.macrotile_mode = 1;
 636	}
 637
 638	if (adreno_is_a663(gpu)) {
 639		gpu->ubwc_config.highest_bank_bit = 13;
 640		gpu->ubwc_config.amsbc = 1;
 641		gpu->ubwc_config.rgb565_predicator = 1;
 642		gpu->ubwc_config.uavflagprd_inv = 2;
 643		gpu->ubwc_config.macrotile_mode = 1;
 644		gpu->ubwc_config.ubwc_swizzle = 0x4;
 645	}
 646
 647	if (adreno_is_7c3(gpu)) {
 648		gpu->ubwc_config.highest_bank_bit = 14;
 649		gpu->ubwc_config.amsbc = 1;
 650		gpu->ubwc_config.rgb565_predicator = 1;
 651		gpu->ubwc_config.uavflagprd_inv = 2;
 652		gpu->ubwc_config.macrotile_mode = 1;
 653	}
 654
 655	if (adreno_is_a702(gpu)) {
 656		gpu->ubwc_config.highest_bank_bit = 14;
 657		gpu->ubwc_config.min_acc_len = 1;
 658	}
 659}
 660
 661static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 662{
 663	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 664	/*
 665	 * We subtract 13 from the highest bank bit (13 is the minimum value
 666	 * allowed by hw) and write the lowest two bits of the remaining value
 667	 * as hbb_lo and the one above it as hbb_hi to the hardware.
 668	 */
 669	BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
 670	u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
 671	u32 hbb_hi = hbb >> 2;
 672	u32 hbb_lo = hbb & 3;
 673	u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1;
 674	u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2);
 675
 676	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
 677		  level2_swizzling_dis << 12 |
 678		  adreno_gpu->ubwc_config.rgb565_predicator << 11 |
 679		  hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
 680		  adreno_gpu->ubwc_config.min_acc_len << 3 |
 681		  hbb_lo << 1 | ubwc_mode);
 682
 683	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
 684		  level2_swizzling_dis << 6 | hbb_hi << 4 |
 685		  adreno_gpu->ubwc_config.min_acc_len << 3 |
 686		  hbb_lo << 1 | ubwc_mode);
 687
 688	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
 689		  level2_swizzling_dis << 12 | hbb_hi << 10 |
 690		  adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
 691		  adreno_gpu->ubwc_config.min_acc_len << 3 |
 692		  hbb_lo << 1 | ubwc_mode);
 693
 694	if (adreno_is_a7xx(adreno_gpu))
 695		gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL,
 696			  FIELD_PREP(GENMASK(8, 5), hbb_lo));
 697
 698	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
 699		  adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
 700
 701	gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
 702		  adreno_gpu->ubwc_config.macrotile_mode);
 703}
 704
 705static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
 706{
 707	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 708	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 709	const struct adreno_reglist_list *reglist;
 710	void *ptr = a6xx_gpu->pwrup_reglist_ptr;
 711	struct cpu_gpu_lock *lock = ptr;
 712	u32 *dest = (u32 *)&lock->regs[0];
 713	int i;
 714
 715	reglist = adreno_gpu->info->a6xx->pwrup_reglist;
 716
 717	lock->gpu_req = lock->cpu_req = lock->turn = 0;
 718	lock->ifpc_list_len = 0;
 719	lock->preemption_list_len = reglist->count;
 720
 721	/*
 722	 * For each entry in each of the lists, write the offset and the current
 723	 * register value into the GPU buffer
 724	 */
 725	for (i = 0; i < reglist->count; i++) {
 726		*dest++ = reglist->regs[i];
 727		*dest++ = gpu_read(gpu, reglist->regs[i]);
 728	}
 729
 730	/*
 731	 * The overall register list is composed of
 732	 * 1. Static IFPC-only registers
 733	 * 2. Static IFPC + preemption registers
 734	 * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects)
 735	 *
 736	 * The first two lists are static. Size of these lists are stored as
 737	 * number of pairs in ifpc_list_len and preemption_list_len
 738	 * respectively. With concurrent binning, Some of the perfcounter
 739	 * registers being virtualized, CP needs to know the pipe id to program
 740	 * the aperture inorder to restore the same. Thus, third list is a
 741	 * dynamic list with triplets as
 742	 * (<aperture, shifted 12 bits> <address> <data>), and the length is
 743	 * stored as number for triplets in dynamic_list_len.
 744	 */
 745	lock->dynamic_list_len = 0;
 746}
 747
 748static int a7xx_preempt_start(struct msm_gpu *gpu)
 749{
 750	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 751	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 752	struct msm_ringbuffer *ring = gpu->rb[0];
 753
 754	if (gpu->nr_rings <= 1)
 755		return 0;
 756
 757	/* Turn CP protection off */
 758	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 759	OUT_RING(ring, 0);
 760
 761	a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL);
 762
 763	/* Yield the floor on command completion */
 764	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
 765	OUT_RING(ring, 0x00);
 766	OUT_RING(ring, 0x00);
 767	OUT_RING(ring, 0x00);
 768	/* Generate interrupt on preemption completion */
 769	OUT_RING(ring, 0x00);
 770
 771	a6xx_flush(gpu, ring);
 772
 773	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 774}
 775
 776static int a6xx_cp_init(struct msm_gpu *gpu)
 777{
 778	struct msm_ringbuffer *ring = gpu->rb[0];
 779
 780	OUT_PKT7(ring, CP_ME_INIT, 8);
 781
 782	OUT_RING(ring, 0x0000002f);
 783
 784	/* Enable multiple hardware contexts */
 785	OUT_RING(ring, 0x00000003);
 786
 787	/* Enable error detection */
 788	OUT_RING(ring, 0x20000000);
 789
 790	/* Don't enable header dump */
 791	OUT_RING(ring, 0x00000000);
 792	OUT_RING(ring, 0x00000000);
 793
 794	/* No workarounds enabled */
 795	OUT_RING(ring, 0x00000000);
 796
 797	/* Pad rest of the cmds with 0's */
 798	OUT_RING(ring, 0x00000000);
 799	OUT_RING(ring, 0x00000000);
 800
 801	a6xx_flush(gpu, ring);
 802	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 803}
 804
 805static int a7xx_cp_init(struct msm_gpu *gpu)
 806{
 807	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 808	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 809	struct msm_ringbuffer *ring = gpu->rb[0];
 810	u32 mask;
 811
 812	/* Disable concurrent binning before sending CP init */
 813	OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
 814	OUT_RING(ring, BIT(27));
 815
 816	OUT_PKT7(ring, CP_ME_INIT, 7);
 817
 818	/* Use multiple HW contexts */
 819	mask = BIT(0);
 820
 821	/* Enable error detection */
 822	mask |= BIT(1);
 823
 824	/* Set default reset state */
 825	mask |= BIT(3);
 826
 827	/* Disable save/restore of performance counters across preemption */
 828	mask |= BIT(6);
 829
 830	/* Enable the register init list with the spinlock */
 831	mask |= BIT(8);
 832
 833	OUT_RING(ring, mask);
 834
 835	/* Enable multiple hardware contexts */
 836	OUT_RING(ring, 0x00000003);
 837
 838	/* Enable error detection */
 839	OUT_RING(ring, 0x20000000);
 840
 841	/* Operation mode mask */
 842	OUT_RING(ring, 0x00000002);
 843
 844	/* *Don't* send a power up reg list for concurrent binning (TODO) */
 845	/* Lo address */
 846	OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova));
 847	/* Hi address */
 848	OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova));
 849	/* BIT(31) set => read the regs from the list */
 850	OUT_RING(ring, BIT(31));
 851
 852	a6xx_flush(gpu, ring);
 853	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 854}
 855
 856/*
 857 * Check that the microcode version is new enough to include several key
 858 * security fixes. Return true if the ucode is safe.
 859 */
 860static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
 861		struct drm_gem_object *obj)
 862{
 863	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 864	struct msm_gpu *gpu = &adreno_gpu->base;
 865	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
 866	u32 *buf = msm_gem_get_vaddr(obj);
 867	bool ret = false;
 868
 869	if (IS_ERR(buf))
 870		return false;
 871
 872	/* A7xx is safe! */
 873	if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
 874		return true;
 875
 876	/*
 877	 * Targets up to a640 (a618, a630 and a640) need to check for a
 878	 * microcode version that is patched to support the whereami opcode or
 879	 * one that is new enough to include it by default.
 880	 *
 881	 * a650 tier targets don't need whereami but still need to be
 882	 * equal to or newer than 0.95 for other security fixes
 883	 *
 884	 * a660 targets have all the critical security fixes from the start
 885	 */
 886	if (!strcmp(sqe_name, "a630_sqe.fw")) {
 887		/*
 888		 * If the lowest nibble is 0xa that is an indication that this
 889		 * microcode has been patched. The actual version is in dword
 890		 * [3] but we only care about the patchlevel which is the lowest
 891		 * nibble of dword [3]
 892		 *
 893		 * Otherwise check that the firmware is greater than or equal
 894		 * to 1.90 which was the first version that had this fix built
 895		 * in
 896		 */
 897		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
 898			(buf[0] & 0xfff) >= 0x190) {
 899			a6xx_gpu->has_whereami = true;
 900			ret = true;
 901			goto out;
 902		}
 903
 904		DRM_DEV_ERROR(&gpu->pdev->dev,
 905			"a630 SQE ucode is too old. Have version %x need at least %x\n",
 906			buf[0] & 0xfff, 0x190);
 907	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
 908		if ((buf[0] & 0xfff) >= 0x095) {
 909			ret = true;
 910			goto out;
 911		}
 912
 913		DRM_DEV_ERROR(&gpu->pdev->dev,
 914			"a650 SQE ucode is too old. Have version %x need at least %x\n",
 915			buf[0] & 0xfff, 0x095);
 916	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
 917		ret = true;
 918	} else {
 919		DRM_DEV_ERROR(&gpu->pdev->dev,
 920			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
 921	}
 922out:
 923	msm_gem_put_vaddr(obj);
 924	return ret;
 925}
 926
 927static int a6xx_ucode_load(struct msm_gpu *gpu)
 928{
 929	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 930	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 931
 932	if (!a6xx_gpu->sqe_bo) {
 933		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
 934			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
 935
 936		if (IS_ERR(a6xx_gpu->sqe_bo)) {
 937			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
 938
 939			a6xx_gpu->sqe_bo = NULL;
 940			DRM_DEV_ERROR(&gpu->pdev->dev,
 941				"Could not allocate SQE ucode: %d\n", ret);
 942
 943			return ret;
 944		}
 945
 946		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
 947		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
 948			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
 949			drm_gem_object_put(a6xx_gpu->sqe_bo);
 950
 951			a6xx_gpu->sqe_bo = NULL;
 952			return -EPERM;
 953		}
 954	}
 955
 956	/*
 957	 * Expanded APRIV and targets that support WHERE_AM_I both need a
 958	 * privileged buffer to store the RPTR shadow
 959	 */
 960	if ((adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) &&
 961	    !a6xx_gpu->shadow_bo) {
 962		a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
 963						      sizeof(u32) * gpu->nr_rings,
 964						      MSM_BO_WC | MSM_BO_MAP_PRIV,
 965						      gpu->aspace, &a6xx_gpu->shadow_bo,
 966						      &a6xx_gpu->shadow_iova);
 967
 968		if (IS_ERR(a6xx_gpu->shadow))
 969			return PTR_ERR(a6xx_gpu->shadow);
 970
 971		msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
 972	}
 973
 974	a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
 975							 MSM_BO_WC  | MSM_BO_MAP_PRIV,
 976							 gpu->aspace, &a6xx_gpu->pwrup_reglist_bo,
 977							 &a6xx_gpu->pwrup_reglist_iova);
 978
 979	if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
 980		return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr);
 981
 982	msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist");
 983
 984	return 0;
 985}
 986
 987static int a6xx_zap_shader_init(struct msm_gpu *gpu)
 988{
 989	static bool loaded;
 990	int ret;
 991
 992	if (loaded)
 993		return 0;
 994
 995	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 996
 997	loaded = !ret;
 998	return ret;
 999}
1000
1001#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1002		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1003		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1004		       A6XX_RBBM_INT_0_MASK_CP_IB2 | \
1005		       A6XX_RBBM_INT_0_MASK_CP_IB1 | \
1006		       A6XX_RBBM_INT_0_MASK_CP_RB | \
1007		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1008		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1009		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1010		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1011		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
1012
1013#define A7XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
1014		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
1015		       A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \
1016		       A6XX_RBBM_INT_0_MASK_CP_SW | \
1017		       A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
1018		       A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \
1019		       A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \
1020		       A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
1021		       A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
1022		       A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
1023		       A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
1024		       A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \
1025		       A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \
1026		       A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1027
1028#define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \
1029			 A6XX_CP_APRIV_CNTL_RBFETCH | \
1030			 A6XX_CP_APRIV_CNTL_RBPRIVLEVEL | \
1031			 A6XX_CP_APRIV_CNTL_RBRPWB)
1032
1033#define A7XX_BR_APRIVMASK (A7XX_APRIV_MASK | \
1034			   A6XX_CP_APRIV_CNTL_CDREAD | \
1035			   A6XX_CP_APRIV_CNTL_CDWRITE)
1036
1037static int hw_init(struct msm_gpu *gpu)
1038{
1039	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1040	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1041	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1042	u64 gmem_range_min;
1043	unsigned int i;
1044	int ret;
1045
1046	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1047		/* Make sure the GMU keeps the GPU on while we set it up */
1048		ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1049		if (ret)
1050			return ret;
1051	}
1052
1053	/* Clear GBIF halt in case GX domain was not collapsed */
1054	if (adreno_is_a619_holi(adreno_gpu)) {
1055		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1056		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1057
1058		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0);
1059		gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL);
1060	} else if (a6xx_has_gbif(adreno_gpu)) {
1061		gpu_write(gpu, REG_A6XX_GBIF_HALT, 0);
1062		gpu_read(gpu, REG_A6XX_GBIF_HALT);
1063
1064		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
1065		gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT);
1066	}
1067
1068	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
1069
1070	if (adreno_is_a619_holi(adreno_gpu))
1071		a6xx_sptprac_enable(gmu);
1072
1073	/*
1074	 * Disable the trusted memory range - we don't actually supported secure
1075	 * memory rendering at this point in time and we don't want to block off
1076	 * part of the virtual memory space.
1077	 */
1078	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000);
1079	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1080
1081	if (!adreno_is_a7xx(adreno_gpu)) {
1082		/* Turn on 64 bit addressing for all blocks */
1083		gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1084		gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1085		gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1086		gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1087		gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1088		gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1089		gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1090		gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1091		gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1092		gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1093		gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1094		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
1095	}
1096
1097	/* enable hardware clockgating */
1098	a6xx_set_hwcg(gpu, true);
1099
1100	/* VBIF/GBIF start*/
1101	if (adreno_is_a610_family(adreno_gpu) ||
1102	    adreno_is_a640_family(adreno_gpu) ||
1103	    adreno_is_a650_family(adreno_gpu) ||
1104	    adreno_is_a7xx(adreno_gpu)) {
1105		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1106		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1107		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1108		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1109		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL,
1110			  adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3);
1111	} else {
1112		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1113	}
1114
1115	if (adreno_is_a630(adreno_gpu))
1116		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1117
1118	if (adreno_is_a7xx(adreno_gpu))
1119		gpu_write(gpu, REG_A6XX_UCHE_GBIF_GX_CONFIG, 0x10240e0);
1120
1121	/* Make all blocks contribute to the GPU BUSY perf counter */
1122	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1123
1124	/* Disable L2 bypass in the UCHE */
1125	if (adreno_is_a7xx(adreno_gpu)) {
1126		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
1127		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
1128	} else {
1129		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
1130		gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
1131		gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
1132	}
1133
1134	if (!(adreno_is_a650_family(adreno_gpu) ||
1135	      adreno_is_a702(adreno_gpu) ||
1136	      adreno_is_a730(adreno_gpu))) {
1137		gmem_range_min = adreno_is_a740_family(adreno_gpu) ? SZ_16M : SZ_1M;
1138
 
1139		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1140		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN, gmem_range_min);
1141
1142		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX,
1143			gmem_range_min + adreno_gpu->info->gmem - 1);
1144	}
1145
1146	if (adreno_is_a7xx(adreno_gpu))
1147		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, BIT(23));
1148	else {
1149		gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1150		gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
1151	}
1152
1153	if (adreno_is_a640_family(adreno_gpu) || adreno_is_a650_family(adreno_gpu)) {
 
1154		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1155		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1156	} else if (adreno_is_a610_family(adreno_gpu)) {
1157		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x00800060);
1158		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x40201b16);
1159	} else if (!adreno_is_a7xx(adreno_gpu)) {
1160		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1161		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
1162	}
1163
1164	if (adreno_is_a660_family(adreno_gpu))
1165		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1166
1167	/* Setting the mem pool size */
1168	if (adreno_is_a610(adreno_gpu)) {
1169		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
1170		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
1171	} else if (adreno_is_a702(adreno_gpu)) {
1172		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 64);
1173		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 63);
1174	} else if (!adreno_is_a7xx(adreno_gpu))
1175		gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
1176
1177
1178	/* Set the default primFifo threshold values */
1179	if (adreno_gpu->info->a6xx->prim_fifo_threshold)
1180		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL,
1181			  adreno_gpu->info->a6xx->prim_fifo_threshold);
 
 
 
 
 
 
1182
1183	/* Set the AHB default slave response to "ERROR" */
1184	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1185
1186	/* Turn on performance counters */
1187	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1188
1189	if (adreno_is_a7xx(adreno_gpu)) {
1190		/* Turn on the IFPC counter (countable 4 on XOCLK4) */
1191		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_1,
1192			  FIELD_PREP(GENMASK(7, 0), 0x4));
1193	}
1194
1195	/* Select CP0 to always count cycles */
1196	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1197
1198	a6xx_set_ubwc_config(gpu);
1199
1200	/* Enable fault detection */
1201	if (adreno_is_a730(adreno_gpu) ||
1202	    adreno_is_a740_family(adreno_gpu))
1203		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
1204	else if (adreno_is_a690(adreno_gpu))
1205		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x4fffff);
1206	else if (adreno_is_a619(adreno_gpu))
1207		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3fffff);
1208	else if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
1209		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x3ffff);
1210	else
1211		gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0x1fffff);
1212
1213	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1);
1214
1215	/* Set weights for bicubic filtering */
1216	if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
1217		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1218		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1219			0x3fe05ff4);
1220		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1221			0x3fa0ebee);
1222		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1223			0x3f5193ed);
1224		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1225			0x3f0243f0);
1226	}
1227
1228	/* Set up the CX GMU counter 0 to count busy ticks */
1229	gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
1230
1231	/* Enable the power counter */
1232	gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, BIT(5));
1233	gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
1234
1235	/* Protect registers from the CP */
1236	a6xx_set_cp_protect(gpu);
1237
1238	if (adreno_is_a660_family(adreno_gpu)) {
1239		if (adreno_is_a690(adreno_gpu))
1240			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x00028801);
1241		else
1242			gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
1243		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
1244	} else if (adreno_is_a702(adreno_gpu)) {
1245		/* Something to do with the HLSQ cluster */
1246		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(24));
1247	}
1248
1249	if (adreno_is_a690(adreno_gpu))
1250		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90);
1251	/* Set dualQ + disable afull for A660 GPU */
1252	else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu))
1253		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
1254	else if (adreno_is_a7xx(adreno_gpu))
1255		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG,
1256			  FIELD_PREP(GENMASK(19, 16), 6) |
1257			  FIELD_PREP(GENMASK(15, 12), 6) |
1258			  FIELD_PREP(GENMASK(11, 8), 9) |
1259			  BIT(3) | BIT(2) |
1260			  FIELD_PREP(GENMASK(1, 0), 2));
1261
1262	/* Enable expanded apriv for targets that support it */
1263	if (gpu->hw_apriv) {
1264		if (adreno_is_a7xx(adreno_gpu)) {
1265			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1266				  A7XX_BR_APRIVMASK);
1267			gpu_write(gpu, REG_A7XX_CP_BV_APRIV_CNTL,
1268				  A7XX_APRIV_MASK);
1269			gpu_write(gpu, REG_A7XX_CP_LPAC_APRIV_CNTL,
1270				  A7XX_APRIV_MASK);
1271		} else
1272			gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1273				  BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1));
1274	}
1275
1276	if (adreno_is_a750(adreno_gpu)) {
1277		/* Disable ubwc merged UFC request feature */
1278		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19));
1279
1280		/* Enable TP flaghint and other performance settings */
1281		gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700);
1282	} else if (adreno_is_a7xx(adreno_gpu)) {
1283		/* Disable non-ubwc read reqs from passing write reqs */
1284		gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11));
1285	}
1286
1287	/* Enable interrupts */
1288	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK,
1289		  adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK);
1290
1291	ret = adreno_hw_init(gpu);
1292	if (ret)
1293		goto out;
1294
1295	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
 
 
1296
1297	/* Set the ringbuffer address */
1298	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1299
1300	/* Targets that support extended APRIV can use the RPTR shadow from
1301	 * hardware but all the other ones need to disable the feature. Targets
1302	 * that support the WHERE_AM_I opcode can use that instead
1303	 */
1304	if (adreno_gpu->base.hw_apriv)
1305		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1306	else
1307		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1308			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1309
1310	/* Configure the RPTR shadow if needed: */
1311	if (a6xx_gpu->shadow_bo) {
1312		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR,
1313			shadowptr(a6xx_gpu, gpu->rb[0]));
1314		for (unsigned int i = 0; i < gpu->nr_rings; i++)
1315			a6xx_gpu->shadow[i] = 0;
1316	}
1317
1318	/* ..which means "always" on A7xx, also for BV shadow */
1319	if (adreno_is_a7xx(adreno_gpu)) {
1320		gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR,
1321			    rbmemptr(gpu->rb[0], bv_rptr));
1322	}
 
 
1323
1324	a6xx_preempt_hw_init(gpu);
 
 
 
 
 
 
 
 
1325
1326	/* Always come up on rb 0 */
1327	a6xx_gpu->cur_ring = gpu->rb[0];
1328
1329	for (i = 0; i < gpu->nr_rings; i++)
1330		gpu->rb[i]->cur_ctx_seqno = 0;
1331
1332	/* Enable the SQE_to start the CP engine */
1333	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1334
1335	if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) {
1336		a7xx_patch_pwrup_reglist(gpu);
1337		a6xx_gpu->pwrup_reglist_emitted = true;
1338	}
1339
1340	ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu);
1341	if (ret)
1342		goto out;
1343
1344	/*
1345	 * Try to load a zap shader into the secure world. If successful
1346	 * we can use the CP to switch out of secure mode. If not then we
1347	 * have no resource but to try to switch ourselves out manually. If we
1348	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1349	 * be blocked and a permissions violation will soon follow.
1350	 */
1351	ret = a6xx_zap_shader_init(gpu);
1352	if (!ret) {
1353		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1354		OUT_RING(gpu->rb[0], 0x00000000);
1355
1356		a6xx_flush(gpu, gpu->rb[0]);
1357		if (!a6xx_idle(gpu, gpu->rb[0]))
1358			return -EINVAL;
1359	} else if (ret == -ENODEV) {
1360		/*
1361		 * This device does not use zap shader (but print a warning
1362		 * just in case someone got their dt wrong.. hopefully they
1363		 * have a debug UART to realize the error of their ways...
1364		 * if you mess this up you are about to crash horribly)
1365		 */
1366		dev_warn_once(gpu->dev->dev,
1367			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1368		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1369		ret = 0;
1370	} else {
1371		return ret;
1372	}
1373
1374out:
1375	if (adreno_has_gmu_wrapper(adreno_gpu))
1376		return ret;
1377
1378	/* Last step - yield the ringbuffer */
1379	a7xx_preempt_start(gpu);
1380
1381	/*
1382	 * Tell the GMU that we are done touching the GPU and it can start power
1383	 * management
1384	 */
1385	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1386
1387	if (a6xx_gpu->gmu.legacy) {
1388		/* Take the GMU out of its special boot mode */
1389		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1390	}
1391
1392	return ret;
1393}
1394
1395static int a6xx_hw_init(struct msm_gpu *gpu)
1396{
1397	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1398	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1399	int ret;
1400
1401	mutex_lock(&a6xx_gpu->gmu.lock);
1402	ret = hw_init(gpu);
1403	mutex_unlock(&a6xx_gpu->gmu.lock);
1404
1405	return ret;
1406}
1407
1408static void a6xx_dump(struct msm_gpu *gpu)
1409{
1410	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1411			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1412	adreno_dump(gpu);
1413}
1414
 
 
 
1415static void a6xx_recover(struct msm_gpu *gpu)
1416{
1417	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1418	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1419	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1420	int i, active_submits;
1421
1422	adreno_dump_info(gpu);
1423
1424	for (i = 0; i < 8; i++)
1425		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1426			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1427
1428	if (hang_debug)
1429		a6xx_dump(gpu);
1430
1431	/*
1432	 * To handle recovery specific sequences during the rpm suspend we are
1433	 * about to trigger
1434	 */
1435	a6xx_gpu->hung = true;
1436
1437	/* Halt SQE first */
1438	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1439
 
 
 
 
 
 
1440	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1441
1442	/* active_submit won't change until we make a submission */
1443	mutex_lock(&gpu->active_lock);
1444	active_submits = gpu->active_submits;
1445
1446	/*
1447	 * Temporarily clear active_submits count to silence a WARN() in the
1448	 * runtime suspend cb
1449	 */
1450	gpu->active_submits = 0;
1451
1452	if (adreno_has_gmu_wrapper(adreno_gpu)) {
1453		/* Drain the outstanding traffic on memory buses */
1454		a6xx_bus_clear_pending_transactions(adreno_gpu, true);
1455
1456		/* Reset the GPU to a clean state */
1457		a6xx_gpu_sw_reset(gpu, true);
1458		a6xx_gpu_sw_reset(gpu, false);
1459	}
1460
1461	reinit_completion(&gmu->pd_gate);
1462	dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb);
1463	dev_pm_genpd_synced_poweroff(gmu->cxpd);
1464
1465	/* Drop the rpm refcount from active submits */
1466	if (active_submits)
1467		pm_runtime_put(&gpu->pdev->dev);
1468
1469	/* And the final one from recover worker */
1470	pm_runtime_put_sync(&gpu->pdev->dev);
1471
1472	if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000)))
1473		DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n");
1474
1475	dev_pm_genpd_remove_notifier(gmu->cxpd);
1476
1477	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1478
1479	if (active_submits)
1480		pm_runtime_get(&gpu->pdev->dev);
1481
1482	pm_runtime_get_sync(&gpu->pdev->dev);
1483
1484	gpu->active_submits = active_submits;
1485	mutex_unlock(&gpu->active_lock);
1486
1487	msm_gpu_hw_init(gpu);
1488	a6xx_gpu->hung = false;
1489}
1490
1491static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1492{
1493	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494	static const char *uche_clients[7] = {
1495		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1496	};
1497	u32 val;
1498
1499	if (adreno_is_a7xx(adreno_gpu)) {
1500		if (mid != 1 && mid != 2 && mid != 3 && mid != 8)
1501			return "UNKNOWN";
1502	} else {
1503		if (mid < 1 || mid > 3)
1504			return "UNKNOWN";
1505	}
1506
1507	/*
1508	 * The source of the data depends on the mid ID read from FSYNR1.
1509	 * and the client ID read from the UCHE block
1510	 */
1511	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1512
1513	if (adreno_is_a7xx(adreno_gpu)) {
1514		/* Bit 3 for mid=3 indicates BR or BV */
1515		static const char *uche_clients_a7xx[16] = {
1516			"BR_VFD", "BR_SP", "BR_VSC", "BR_VPC",
1517			"BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP",
1518			"BV_VFD", "BV_SP", "BV_VSC", "BV_VPC",
1519			"BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP",
1520		};
1521
1522		/* LPAC has the same clients as BR and BV, but because it is
1523		 * compute-only some of them do not exist and there are holes
1524		 * in the array.
1525		 */
1526		static const char *uche_clients_lpac_a7xx[8] = {
1527			"-", "LPAC_SP", "-", "-",
1528			"LPAC_HLSQ", "-", "-", "LPAC_TP",
1529		};
1530
1531		val &= GENMASK(6, 0);
1532
1533		/* mid=3 refers to BR or BV */
1534		if (mid == 3) {
1535			if (val < ARRAY_SIZE(uche_clients_a7xx))
1536				return uche_clients_a7xx[val];
1537			else
1538				return "UCHE";
1539		}
1540
1541		/* mid=8 refers to LPAC */
1542		if (mid == 8) {
1543			if (val < ARRAY_SIZE(uche_clients_lpac_a7xx))
1544				return uche_clients_lpac_a7xx[val];
1545			else
1546				return "UCHE_LPAC";
1547		}
1548
1549		/* mid=2 is a catchall for everything else in LPAC */
1550		if (mid == 2)
1551			return "UCHE_LPAC";
1552
1553		/* mid=1 is a catchall for everything else in BR/BV */
1554		return "UCHE";
1555	} else if (adreno_is_a660_family(adreno_gpu)) {
1556		static const char *uche_clients_a660[8] = {
1557			"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ", "TP",
1558		};
1559
1560		static const char *uche_clients_a660_not[8] = {
1561			"not VFD", "not SP", "not VSC", "not VPC",
1562			"not HLSQ", "not PC", "not LRZ", "not TP",
1563		};
1564
1565		val &= GENMASK(6, 0);
1566
1567		if (mid == 3 && val < ARRAY_SIZE(uche_clients_a660))
1568			return uche_clients_a660[val];
1569
1570		if (mid == 1 && val < ARRAY_SIZE(uche_clients_a660_not))
1571			return uche_clients_a660_not[val];
1572
1573		return "UCHE";
1574	} else {
1575		/* mid = 3 is most precise and refers to only one block per client */
1576		if (mid == 3)
1577			return uche_clients[val & 7];
1578
1579		/* For mid=2 the source is TP or VFD except when the client id is 0 */
1580		if (mid == 2)
1581			return ((val & 7) == 0) ? "TP" : "TP|VFD";
1582
1583		/* For mid=1 just return "UCHE" as a catchall for everything else */
1584		return "UCHE";
1585	}
1586}
1587
1588static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1589{
1590	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1591
1592	if (id == 0)
1593		return "CP";
1594	else if (id == 4)
1595		return "CCU";
1596	else if (id == 6)
1597		return "CDP Prefetch";
1598	else if (id == 7)
1599		return "GMU";
1600	else if (id == 5 && adreno_is_a7xx(adreno_gpu))
1601		return "Flag cache";
1602
1603	return a6xx_uche_fault_block(gpu, id);
1604}
1605
 
 
 
 
1606static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1607{
1608	struct msm_gpu *gpu = arg;
1609	struct adreno_smmu_fault_info *info = data;
1610	const char *block = "unknown";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1611
1612	u32 scratch[] = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1613			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1614			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1615			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1616			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
1617	};
1618
1619	if (info)
1620		block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
 
 
 
 
 
 
 
1621
1622	return adreno_fault_handler(gpu, iova, flags, info, block, scratch);
 
 
 
1623}
1624
1625static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1626{
1627	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1628
1629	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1630		u32 val;
1631
1632		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1633		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1634		dev_err_ratelimited(&gpu->pdev->dev,
1635			"CP | opcode error | possible opcode=0x%8.8X\n",
1636			val);
1637	}
1638
1639	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1640		dev_err_ratelimited(&gpu->pdev->dev,
1641			"CP ucode error interrupt\n");
1642
1643	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1644		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1645			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1646
1647	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1648		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1649
1650		dev_err_ratelimited(&gpu->pdev->dev,
1651			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1652			val & (1 << 20) ? "READ" : "WRITE",
1653			(val & 0x3ffff), val);
1654	}
1655
1656	if (status & A6XX_CP_INT_CP_AHB_ERROR && !adreno_is_a7xx(to_adreno_gpu(gpu)))
1657		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1658
1659	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1660		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1661
1662	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1663		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1664
1665}
1666
1667static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1668{
1669	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1670	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1671	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1672
1673	/*
1674	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1675	 * but the fault handler will trigger the devcore dump, and we want
1676	 * to otherwise resume normally rather than killing the submit, so
1677	 * just bail.
1678	 */
1679	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1680		return;
1681
1682	/*
1683	 * Force the GPU to stay on until after we finish
1684	 * collecting information
1685	 */
1686	if (!adreno_has_gmu_wrapper(adreno_gpu))
1687		gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
1688
1689	DRM_DEV_ERROR(&gpu->pdev->dev,
1690		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1691		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1692		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1693		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1694		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1695		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1696		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1697		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1698		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1699
1700	/* Turn off the hangcheck timer to keep it from bothering us */
1701	del_timer(&gpu->hangcheck_timer);
1702
1703	kthread_queue_work(gpu->worker, &gpu->recover_work);
1704}
1705
1706static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
1707{
1708	u32 status;
1709
1710	status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS);
1711	gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0);
1712
1713	dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status);
1714
1715	/*
1716	 * Ignore FASTBLEND violations, because the HW will silently fall back
1717	 * to legacy blending.
1718	 */
1719	if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1720		      A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
1721		del_timer(&gpu->hangcheck_timer);
1722
1723		kthread_queue_work(gpu->worker, &gpu->recover_work);
1724	}
1725}
1726
1727static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1728{
1729	struct msm_drm_private *priv = gpu->dev->dev_private;
1730	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1731
1732	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1733
1734	if (priv->disable_err_irq)
1735		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1736
1737	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1738		a6xx_fault_detect_irq(gpu);
1739
1740	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1741		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1742
1743	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1744		a6xx_cp_hw_err_irq(gpu);
1745
1746	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1747		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1748
1749	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1750		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1751
1752	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1753		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1754
1755	if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION)
1756		a7xx_sw_fuse_violation_irq(gpu);
1757
1758	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1759		msm_gpu_retire(gpu);
1760		a6xx_preempt_trigger(gpu);
1761	}
1762
1763	if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
1764		a6xx_preempt_irq(gpu);
1765
1766	return IRQ_HANDLED;
1767}
1768
 
 
 
 
 
 
 
 
 
 
1769static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1770{
1771	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1772	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1773}
1774
1775static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1776{
1777	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1778	struct msm_gpu *gpu = &adreno_gpu->base;
1779	u32 cntl1_regval = 0;
1780
1781	if (IS_ERR(a6xx_gpu->llc_mmio))
1782		return;
1783
1784	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1785		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1786
1787		gpu_scid &= 0x1f;
1788		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1789			       (gpu_scid << 15) | (gpu_scid << 20);
1790
1791		/* On A660, the SCID programming for UCHE traffic is done in
1792		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1793		 */
1794		if (adreno_is_a660_family(adreno_gpu))
1795			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1796				(1 << 8), (gpu_scid << 10) | (1 << 8));
1797	}
1798
1799	/*
1800	 * For targets with a MMU500, activate the slice but don't program the
1801	 * register.  The XBL will take care of that.
1802	 */
1803	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1804		if (!a6xx_gpu->have_mmu500) {
1805			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1806
1807			gpuhtw_scid &= 0x1f;
1808			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1809		}
1810	}
1811
1812	if (!cntl1_regval)
1813		return;
1814
1815	/*
1816	 * Program the slice IDs for the various GPU blocks and GPU MMU
1817	 * pagetables
1818	 */
1819	if (!a6xx_gpu->have_mmu500) {
1820		a6xx_llc_write(a6xx_gpu,
1821			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1822
1823		/*
1824		 * Program cacheability overrides to not allocate cache
1825		 * lines on a write miss
1826		 */
1827		a6xx_llc_rmw(a6xx_gpu,
1828			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1829		return;
1830	}
1831
1832	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1833}
1834
1835static void a7xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1836{
1837	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1838	struct msm_gpu *gpu = &adreno_gpu->base;
1839
1840	if (IS_ERR(a6xx_gpu->llc_mmio))
1841		return;
1842
1843	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1844		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1845
1846		gpu_scid &= GENMASK(4, 0);
1847
1848		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1,
1849			  FIELD_PREP(GENMASK(29, 25), gpu_scid) |
1850			  FIELD_PREP(GENMASK(24, 20), gpu_scid) |
1851			  FIELD_PREP(GENMASK(19, 15), gpu_scid) |
1852			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1853			  FIELD_PREP(GENMASK(9, 5), gpu_scid) |
1854			  FIELD_PREP(GENMASK(4, 0), gpu_scid));
1855
1856		gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0,
1857			  FIELD_PREP(GENMASK(14, 10), gpu_scid) |
1858			  BIT(8));
1859	}
1860
1861	llcc_slice_activate(a6xx_gpu->htw_llc_slice);
1862}
1863
1864static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1865{
1866	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1867	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1868		return;
1869
1870	llcc_slice_putd(a6xx_gpu->llc_slice);
1871	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1872}
1873
1874static void a6xx_llc_slices_init(struct platform_device *pdev,
1875		struct a6xx_gpu *a6xx_gpu, bool is_a7xx)
1876{
1877	struct device_node *phandle;
1878
1879	/* No LLCC on non-RPMh (and by extension, non-GMU) SoCs */
1880	if (adreno_has_gmu_wrapper(&a6xx_gpu->base))
1881		return;
1882
1883	/*
1884	 * There is a different programming path for A6xx targets with an
1885	 * mmu500 attached, so detect if that is the case
1886	 */
1887	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1888	a6xx_gpu->have_mmu500 = (phandle &&
1889		of_device_is_compatible(phandle, "arm,mmu-500"));
1890	of_node_put(phandle);
1891
1892	if (is_a7xx || !a6xx_gpu->have_mmu500)
1893		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1894	else
1895		a6xx_gpu->llc_mmio = NULL;
 
 
1896
1897	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1898	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1899
1900	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1901		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1902}
1903
1904static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
1905{
1906	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1907	struct msm_gpu *gpu = &adreno_gpu->base;
1908	u32 fuse_val;
1909	int ret;
1910
1911	if (adreno_is_a750(adreno_gpu)) {
1912		/*
1913		 * Assume that if qcom scm isn't available, that whatever
1914		 * replacement allows writing the fuse register ourselves.
1915		 * Users of alternative firmware need to make sure this
1916		 * register is writeable or indicate that it's not somehow.
1917		 * Print a warning because if you mess this up you're about to
1918		 * crash horribly.
1919		 */
1920		if (!qcom_scm_is_available()) {
1921			dev_warn_once(gpu->dev->dev,
1922				"SCM is not available, poking fuse register\n");
1923			a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE,
1924				A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
1925				A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND |
1926				A7XX_CX_MISC_SW_FUSE_VALUE_LPAC);
1927			adreno_gpu->has_ray_tracing = true;
1928			return 0;
1929		}
1930
1931		ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ |
1932					     QCOM_SCM_GPU_TSENSE_EN_REQ);
1933		if (ret)
1934			return ret;
1935
1936		/*
1937		 * On a750 raytracing may be disabled by the firmware, find out
1938		 * whether that's the case. The scm call above sets the fuse
1939		 * register.
1940		 */
1941		fuse_val = a6xx_llc_read(a6xx_gpu,
1942					 REG_A7XX_CX_MISC_SW_FUSE_VALUE);
1943		adreno_gpu->has_ray_tracing =
1944			!!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING);
1945	} else if (adreno_is_a740(adreno_gpu)) {
1946		/* Raytracing is always enabled on a740 */
1947		adreno_gpu->has_ray_tracing = true;
1948	}
1949
1950	return 0;
1951}
1952
1953
1954#define GBIF_CLIENT_HALT_MASK		BIT(0)
1955#define GBIF_ARB_HALT_MASK		BIT(1)
1956#define VBIF_XIN_HALT_CTRL0_MASK	GENMASK(3, 0)
1957#define VBIF_RESET_ACK_MASK		0xF0
1958#define GPR0_GBIF_HALT_REQUEST		0x1E0
1959
1960void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off)
1961{
1962	struct msm_gpu *gpu = &adreno_gpu->base;
1963
1964	if (adreno_is_a619_holi(adreno_gpu)) {
1965		gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, GPR0_GBIF_HALT_REQUEST);
1966		spin_until((gpu_read(gpu, REG_A6XX_RBBM_VBIF_GX_RESET_STATUS) &
1967				(VBIF_RESET_ACK_MASK)) == VBIF_RESET_ACK_MASK);
1968	} else if (!a6xx_has_gbif(adreno_gpu)) {
1969		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, VBIF_XIN_HALT_CTRL0_MASK);
1970		spin_until((gpu_read(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL1) &
1971				(VBIF_XIN_HALT_CTRL0_MASK)) == VBIF_XIN_HALT_CTRL0_MASK);
1972		gpu_write(gpu, REG_A6XX_VBIF_XIN_HALT_CTRL0, 0);
1973
1974		return;
1975	}
1976
1977	if (gx_off) {
1978		/* Halt the gx side of GBIF */
1979		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1);
1980		spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1);
1981	}
1982
1983	/* Halt new client requests on GBIF */
1984	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK);
1985	spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) &
1986			(GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK);
1987
1988	/* Halt all AXI requests on GBIF */
1989	gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK);
1990	spin_until((gpu_read(gpu,  REG_A6XX_GBIF_HALT_ACK) &
1991			(GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK);
1992
1993	/* The GBIF halt needs to be explicitly cleared */
1994	gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0);
1995}
1996
1997void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
1998{
1999	/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
2000	if (adreno_is_a610(to_adreno_gpu(gpu)))
2001		return;
2002
2003	gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
2004	/* Perform a bogus read and add a brief delay to ensure ordering. */
2005	gpu_read(gpu, REG_A6XX_RBBM_SW_RESET_CMD);
2006	udelay(1);
2007
2008	/* The reset line needs to be asserted for at least 100 us */
2009	if (assert)
2010		udelay(100);
2011}
2012
2013static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
2014{
2015	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2016	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2017	int ret;
2018
2019	gpu->needs_hw_init = true;
2020
2021	trace_msm_gpu_resume(0);
2022
2023	mutex_lock(&a6xx_gpu->gmu.lock);
2024	ret = a6xx_gmu_resume(a6xx_gpu);
2025	mutex_unlock(&a6xx_gpu->gmu.lock);
2026	if (ret)
2027		return ret;
2028
2029	msm_devfreq_resume(gpu);
2030
2031	adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
2032
2033	return ret;
2034}
2035
2036static int a6xx_pm_resume(struct msm_gpu *gpu)
2037{
2038	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2039	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2040	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2041	unsigned long freq = gpu->fast_rate;
2042	struct dev_pm_opp *opp;
2043	int ret;
2044
2045	gpu->needs_hw_init = true;
2046
2047	trace_msm_gpu_resume(0);
2048
2049	mutex_lock(&a6xx_gpu->gmu.lock);
2050
2051	opp = dev_pm_opp_find_freq_ceil(&gpu->pdev->dev, &freq);
2052	if (IS_ERR(opp)) {
2053		ret = PTR_ERR(opp);
2054		goto err_set_opp;
2055	}
2056	dev_pm_opp_put(opp);
2057
2058	/* Set the core clock and bus bw, having VDD scaling in mind */
2059	dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
2060
2061	pm_runtime_resume_and_get(gmu->dev);
2062	pm_runtime_resume_and_get(gmu->gxpd);
2063
2064	ret = clk_bulk_prepare_enable(gpu->nr_clocks, gpu->grp_clks);
2065	if (ret)
2066		goto err_bulk_clk;
2067
2068	if (adreno_is_a619_holi(adreno_gpu))
2069		a6xx_sptprac_enable(gmu);
2070
2071	/* If anything goes south, tear the GPU down piece by piece.. */
2072	if (ret) {
2073err_bulk_clk:
2074		pm_runtime_put(gmu->gxpd);
2075		pm_runtime_put(gmu->dev);
2076		dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2077	}
2078err_set_opp:
2079	mutex_unlock(&a6xx_gpu->gmu.lock);
2080
2081	if (!ret)
2082		msm_devfreq_resume(gpu);
2083
2084	return ret;
2085}
2086
2087static int a6xx_gmu_pm_suspend(struct msm_gpu *gpu)
2088{
2089	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2090	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2091	int i, ret;
2092
2093	trace_msm_gpu_suspend(0);
2094
2095	a6xx_llc_deactivate(a6xx_gpu);
2096
2097	msm_devfreq_suspend(gpu);
2098
2099	mutex_lock(&a6xx_gpu->gmu.lock);
2100	ret = a6xx_gmu_stop(a6xx_gpu);
2101	mutex_unlock(&a6xx_gpu->gmu.lock);
2102	if (ret)
2103		return ret;
2104
2105	if (a6xx_gpu->shadow_bo)
2106		for (i = 0; i < gpu->nr_rings; i++)
2107			a6xx_gpu->shadow[i] = 0;
2108
2109	gpu->suspend_count++;
2110
2111	return 0;
2112}
2113
2114static int a6xx_pm_suspend(struct msm_gpu *gpu)
2115{
2116	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2117	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2118	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2119	int i;
2120
2121	trace_msm_gpu_suspend(0);
2122
2123	msm_devfreq_suspend(gpu);
2124
2125	mutex_lock(&a6xx_gpu->gmu.lock);
2126
2127	/* Drain the outstanding traffic on memory buses */
2128	a6xx_bus_clear_pending_transactions(adreno_gpu, true);
2129
2130	if (adreno_is_a619_holi(adreno_gpu))
2131		a6xx_sptprac_disable(gmu);
2132
2133	clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
2134
2135	pm_runtime_put_sync(gmu->gxpd);
2136	dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
2137	pm_runtime_put_sync(gmu->dev);
2138
2139	mutex_unlock(&a6xx_gpu->gmu.lock);
2140
2141	if (a6xx_gpu->shadow_bo)
2142		for (i = 0; i < gpu->nr_rings; i++)
2143			a6xx_gpu->shadow[i] = 0;
2144
2145	gpu->suspend_count++;
2146
2147	return 0;
2148}
2149
2150static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2151{
2152	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2153	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2154
2155	mutex_lock(&a6xx_gpu->gmu.lock);
2156
2157	/* Force the GPU power on so we can read this register */
2158	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2159
2160	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2161
2162	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
2163
2164	mutex_unlock(&a6xx_gpu->gmu.lock);
2165
2166	return 0;
2167}
2168
2169static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
2170{
2171	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
2172	return 0;
2173}
2174
2175static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
2176{
2177	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2178	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2179
2180	return a6xx_gpu->cur_ring;
2181}
2182
2183static void a6xx_destroy(struct msm_gpu *gpu)
2184{
2185	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2186	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2187
2188	if (a6xx_gpu->sqe_bo) {
2189		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
2190		drm_gem_object_put(a6xx_gpu->sqe_bo);
2191	}
2192
2193	if (a6xx_gpu->shadow_bo) {
2194		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
2195		drm_gem_object_put(a6xx_gpu->shadow_bo);
2196	}
2197
2198	a6xx_llc_slices_destroy(a6xx_gpu);
2199
2200	a6xx_gmu_remove(a6xx_gpu);
2201
2202	adreno_gpu_cleanup(adreno_gpu);
2203
2204	kfree(a6xx_gpu);
2205}
2206
2207static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
2208{
2209	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2210	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2211	u64 busy_cycles;
2212
2213	/* 19.2MHz */
2214	*out_sample_rate = 19200000;
2215
2216	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
2217			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
2218			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
2219
2220	return busy_cycles;
2221}
2222
2223static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
2224			      bool suspended)
2225{
2226	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2227	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2228
2229	mutex_lock(&a6xx_gpu->gmu.lock);
2230	a6xx_gmu_set_freq(gpu, opp, suspended);
2231	mutex_unlock(&a6xx_gpu->gmu.lock);
2232}
2233
2234static struct msm_gem_address_space *
2235a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
2236{
2237	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2238	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2239	unsigned long quirks = 0;
2240
2241	/*
2242	 * This allows GPU to set the bus attributes required to use system
2243	 * cache on behalf of the iommu page table walker.
2244	 */
2245	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice) &&
2246	    !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
2247		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
2248
2249	return adreno_iommu_create_address_space(gpu, pdev, quirks);
2250}
2251
2252static struct msm_gem_address_space *
2253a6xx_create_private_address_space(struct msm_gpu *gpu)
2254{
2255	struct msm_mmu *mmu;
2256
2257	mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
2258
2259	if (IS_ERR(mmu))
2260		return ERR_CAST(mmu);
2261
2262	return msm_gem_address_space_create(mmu,
2263		"gpu", 0x100000000ULL,
2264		adreno_private_address_space_size(gpu));
2265}
2266
2267static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2268{
2269	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
2270	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
2271
2272	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
2273		return a6xx_gpu->shadow[ring->id];
2274
2275	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
2276}
2277
2278static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
2279{
2280	struct msm_cp_state cp_state = {
2281		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
2282		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
2283		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
2284		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
2285	};
2286	bool progress;
2287
2288	/*
2289	 * Adjust the remaining data to account for what has already been
2290	 * fetched from memory, but not yet consumed by the SQE.
2291	 *
2292	 * This is not *technically* correct, the amount buffered could
2293	 * exceed the IB size due to hw prefetching ahead, but:
2294	 *
2295	 * (1) We aren't trying to find the exact position, just whether
2296	 *     progress has been made
2297	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
2298	 *     to prevent prefetching into an unrelated submit.  (And
2299	 *     either way, at some point the ROQ will be full.)
2300	 */
2301	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB1) >> 16;
2302	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_ROQ_AVAIL_IB2) >> 16;
2303
2304	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
2305
2306	ring->last_cp_state = cp_state;
2307
2308	return progress;
2309}
2310
2311static u32 fuse_to_supp_hw(const struct adreno_info *info, u32 fuse)
2312{
2313	if (!info->speedbins)
2314		return UINT_MAX;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2315
2316	for (int i = 0; info->speedbins[i].fuse != SHRT_MAX; i++)
2317		if (info->speedbins[i].fuse == fuse)
2318			return BIT(info->speedbins[i].speedbin);
 
 
 
 
 
 
 
 
2319
2320	return UINT_MAX;
2321}
2322
2323static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2324{
2325	u32 supp_hw;
2326	u32 speedbin;
2327	int ret;
2328
2329	ret = adreno_read_speedbin(dev, &speedbin);
2330	/*
2331	 * -ENOENT means that the platform doesn't support speedbin which is
2332	 * fine
2333	 */
2334	if (ret == -ENOENT) {
2335		return 0;
2336	} else if (ret) {
2337		dev_err_probe(dev, ret,
2338			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
2339		return ret;
2340	}
2341
2342	supp_hw = fuse_to_supp_hw(info, speedbin);
2343
2344	if (supp_hw == UINT_MAX) {
2345		DRM_DEV_ERROR(dev,
2346			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
2347			speedbin);
2348		supp_hw = BIT(0); /* Default */
2349	}
2350
2351	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
2352	if (ret)
2353		return ret;
2354
2355	return 0;
2356}
2357
2358static const struct adreno_gpu_funcs funcs = {
2359	.base = {
2360		.get_param = adreno_get_param,
2361		.set_param = adreno_set_param,
2362		.hw_init = a6xx_hw_init,
2363		.ucode_load = a6xx_ucode_load,
2364		.pm_suspend = a6xx_gmu_pm_suspend,
2365		.pm_resume = a6xx_gmu_pm_resume,
2366		.recover = a6xx_recover,
2367		.submit = a6xx_submit,
2368		.active_ring = a6xx_active_ring,
2369		.irq = a6xx_irq,
2370		.destroy = a6xx_destroy,
2371#if defined(CONFIG_DRM_MSM_GPU_STATE)
2372		.show = a6xx_show,
2373#endif
2374		.gpu_busy = a6xx_gpu_busy,
2375		.gpu_get_freq = a6xx_gmu_get_freq,
2376		.gpu_set_freq = a6xx_gpu_set_freq,
2377#if defined(CONFIG_DRM_MSM_GPU_STATE)
2378		.gpu_state_get = a6xx_gpu_state_get,
2379		.gpu_state_put = a6xx_gpu_state_put,
2380#endif
2381		.create_address_space = a6xx_create_address_space,
2382		.create_private_address_space = a6xx_create_private_address_space,
2383		.get_rptr = a6xx_get_rptr,
2384		.progress = a6xx_progress,
2385	},
2386	.get_timestamp = a6xx_gmu_get_timestamp,
2387};
2388
2389static const struct adreno_gpu_funcs funcs_gmuwrapper = {
2390	.base = {
2391		.get_param = adreno_get_param,
2392		.set_param = adreno_set_param,
2393		.hw_init = a6xx_hw_init,
2394		.ucode_load = a6xx_ucode_load,
2395		.pm_suspend = a6xx_pm_suspend,
2396		.pm_resume = a6xx_pm_resume,
2397		.recover = a6xx_recover,
2398		.submit = a6xx_submit,
2399		.active_ring = a6xx_active_ring,
2400		.irq = a6xx_irq,
2401		.destroy = a6xx_destroy,
2402#if defined(CONFIG_DRM_MSM_GPU_STATE)
2403		.show = a6xx_show,
2404#endif
2405		.gpu_busy = a6xx_gpu_busy,
2406#if defined(CONFIG_DRM_MSM_GPU_STATE)
2407		.gpu_state_get = a6xx_gpu_state_get,
2408		.gpu_state_put = a6xx_gpu_state_put,
2409#endif
2410		.create_address_space = a6xx_create_address_space,
2411		.create_private_address_space = a6xx_create_private_address_space,
2412		.get_rptr = a6xx_get_rptr,
2413		.progress = a6xx_progress,
2414	},
2415	.get_timestamp = a6xx_get_timestamp,
2416};
2417
2418static const struct adreno_gpu_funcs funcs_a7xx = {
2419	.base = {
2420		.get_param = adreno_get_param,
2421		.set_param = adreno_set_param,
2422		.hw_init = a6xx_hw_init,
2423		.ucode_load = a6xx_ucode_load,
2424		.pm_suspend = a6xx_gmu_pm_suspend,
2425		.pm_resume = a6xx_gmu_pm_resume,
2426		.recover = a6xx_recover,
2427		.submit = a7xx_submit,
2428		.active_ring = a6xx_active_ring,
2429		.irq = a6xx_irq,
2430		.destroy = a6xx_destroy,
2431#if defined(CONFIG_DRM_MSM_GPU_STATE)
2432		.show = a6xx_show,
2433#endif
2434		.gpu_busy = a6xx_gpu_busy,
2435		.gpu_get_freq = a6xx_gmu_get_freq,
2436		.gpu_set_freq = a6xx_gpu_set_freq,
2437#if defined(CONFIG_DRM_MSM_GPU_STATE)
2438		.gpu_state_get = a6xx_gpu_state_get,
2439		.gpu_state_put = a6xx_gpu_state_put,
2440#endif
2441		.create_address_space = a6xx_create_address_space,
2442		.create_private_address_space = a6xx_create_private_address_space,
2443		.get_rptr = a6xx_get_rptr,
2444		.progress = a6xx_progress,
2445	},
2446	.get_timestamp = a6xx_gmu_get_timestamp,
2447};
2448
2449struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
2450{
2451	struct msm_drm_private *priv = dev->dev_private;
2452	struct platform_device *pdev = priv->gpu_pdev;
2453	struct adreno_platform_config *config = pdev->dev.platform_data;
 
2454	struct device_node *node;
2455	struct a6xx_gpu *a6xx_gpu;
2456	struct adreno_gpu *adreno_gpu;
2457	struct msm_gpu *gpu;
2458	extern int enable_preemption;
2459	bool is_a7xx;
2460	int ret;
2461
2462	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
2463	if (!a6xx_gpu)
2464		return ERR_PTR(-ENOMEM);
2465
2466	adreno_gpu = &a6xx_gpu->base;
2467	gpu = &adreno_gpu->base;
2468
2469	mutex_init(&a6xx_gpu->gmu.lock);
2470
2471	adreno_gpu->registers = NULL;
2472
2473	/* Check if there is a GMU phandle and set it up */
2474	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2475	/* FIXME: How do we gracefully handle this? */
2476	BUG_ON(!node);
2477
2478	adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
2479
2480	adreno_gpu->base.hw_apriv =
2481		!!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
2482
2483	/* gpu->info only gets assigned in adreno_gpu_init() */
2484	is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
2485		  config->info->family == ADRENO_7XX_GEN2 ||
2486		  config->info->family == ADRENO_7XX_GEN3;
2487
2488	a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
2489
2490	ret = a6xx_set_supported_hw(&pdev->dev, config->info);
2491	if (ret) {
2492		a6xx_llc_slices_destroy(a6xx_gpu);
2493		kfree(a6xx_gpu);
2494		return ERR_PTR(ret);
2495	}
2496
2497	if ((enable_preemption == 1) || (enable_preemption == -1 &&
2498	    (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
2499		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4);
2500	else if (is_a7xx)
2501		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
2502	else if (adreno_has_gmu_wrapper(adreno_gpu))
2503		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
2504	else
2505		ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
2506	if (ret) {
2507		a6xx_destroy(&(a6xx_gpu->base.base));
2508		return ERR_PTR(ret);
2509	}
2510
2511	/*
2512	 * For now only clamp to idle freq for devices where this is known not
2513	 * to cause power supply issues:
2514	 */
2515	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2516		priv->gpu_clamp_to_idle = true;
2517
2518	if (adreno_has_gmu_wrapper(adreno_gpu))
2519		ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
2520	else
2521		ret = a6xx_gmu_init(a6xx_gpu, node);
 
 
 
2522	of_node_put(node);
2523	if (ret) {
2524		a6xx_destroy(&(a6xx_gpu->base.base));
2525		return ERR_PTR(ret);
2526	}
2527
2528	if (adreno_is_a7xx(adreno_gpu)) {
2529		ret = a7xx_cx_mem_init(a6xx_gpu);
2530		if (ret) {
2531			a6xx_destroy(&(a6xx_gpu->base.base));
2532			return ERR_PTR(ret);
2533		}
2534	}
2535
2536	if (gpu->aspace)
2537		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2538				a6xx_fault_handler);
2539
2540	a6xx_calc_ubwc_config(adreno_gpu);
2541	/* Set up the preemption specific bits and pieces for each ringbuffer */
2542	a6xx_preempt_init(gpu);
2543
2544	return gpu;
2545}
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
   3
   4
   5#include "msm_gem.h"
   6#include "msm_mmu.h"
   7#include "msm_gpu_trace.h"
   8#include "a6xx_gpu.h"
   9#include "a6xx_gmu.xml.h"
  10
  11#include <linux/bitfield.h>
  12#include <linux/devfreq.h>
  13#include <linux/reset.h>
 
  14#include <linux/soc/qcom/llcc-qcom.h>
  15
  16#define GPU_PAS_ID 13
  17
  18static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
  19{
  20	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  21	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  22
  23	/* Check that the GMU is idle */
  24	if (!a6xx_gmu_isidle(&a6xx_gpu->gmu))
  25		return false;
  26
  27	/* Check tha the CX master is idle */
  28	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS) &
  29			~A6XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER)
  30		return false;
  31
  32	return !(gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS) &
  33		A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT);
  34}
  35
  36static bool a6xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  37{
  38	/* wait for CP to drain ringbuffer: */
  39	if (!adreno_idle(gpu, ring))
  40		return false;
  41
  42	if (spin_until(_a6xx_check_idle(gpu))) {
  43		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
  44			gpu->name, __builtin_return_address(0),
  45			gpu_read(gpu, REG_A6XX_RBBM_STATUS),
  46			gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS),
  47			gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
  48			gpu_read(gpu, REG_A6XX_CP_RB_WPTR));
  49		return false;
  50	}
  51
  52	return true;
  53}
  54
  55static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  56{
  57	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
  58	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
  59
  60	/* Expanded APRIV doesn't need to issue the WHERE_AM_I opcode */
  61	if (a6xx_gpu->has_whereami && !adreno_gpu->base.hw_apriv) {
  62		OUT_PKT7(ring, CP_WHERE_AM_I, 2);
  63		OUT_RING(ring, lower_32_bits(shadowptr(a6xx_gpu, ring)));
  64		OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
  65	}
  66}
  67
  68static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
  69{
 
 
  70	uint32_t wptr;
  71	unsigned long flags;
  72
  73	update_shadow_rptr(gpu, ring);
  74
  75	spin_lock_irqsave(&ring->preempt_lock, flags);
  76
  77	/* Copy the shadow to the actual register */
  78	ring->cur = ring->next;
  79
  80	/* Make sure to wrap wptr if we need to */
  81	wptr = get_wptr(ring);
  82
 
 
 
 
 
 
 
 
 
 
  83	spin_unlock_irqrestore(&ring->preempt_lock, flags);
  84
  85	/* Make sure everything is posted before making a decision */
  86	mb();
  87
  88	gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
  89}
  90
  91static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
  92		u64 iova)
  93{
  94	OUT_PKT7(ring, CP_REG_TO_MEM, 3);
  95	OUT_RING(ring, CP_REG_TO_MEM_0_REG(counter) |
  96		CP_REG_TO_MEM_0_CNT(2) |
  97		CP_REG_TO_MEM_0_64B);
  98	OUT_RING(ring, lower_32_bits(iova));
  99	OUT_RING(ring, upper_32_bits(iova));
 100}
 101
 102static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
 103		struct msm_ringbuffer *ring, struct msm_file_private *ctx)
 104{
 105	bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
 
 
 106	phys_addr_t ttbr;
 107	u32 asid;
 108	u64 memptr = rbmemptr(ring, ttbr0);
 109
 110	if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno)
 111		return;
 112
 113	if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
 114		return;
 115
 
 
 
 
 
 
 
 
 
 116	if (!sysprof) {
 117		/* Turn off protected mode to write to special registers */
 118		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 119		OUT_RING(ring, 0);
 
 
 120
 121		OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
 122		OUT_RING(ring, 1);
 123	}
 124
 125	/* Execute the table update */
 126	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
 127	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_0_TTBR0_LO(lower_32_bits(ttbr)));
 128
 129	OUT_RING(ring,
 130		CP_SMMU_TABLE_UPDATE_1_TTBR0_HI(upper_32_bits(ttbr)) |
 131		CP_SMMU_TABLE_UPDATE_1_ASID(asid));
 132	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_2_CONTEXTIDR(0));
 133	OUT_RING(ring, CP_SMMU_TABLE_UPDATE_3_CONTEXTBANK(0));
 134
 135	/*
 136	 * Write the new TTBR0 to the memstore. This is good for debugging.
 
 137	 */
 138	OUT_PKT7(ring, CP_MEM_WRITE, 4);
 139	OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
 140	OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
 141	OUT_RING(ring, lower_32_bits(ttbr));
 142	OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr));
 
 
 
 
 
 
 
 
 
 
 
 143
 144	/*
 145	 * And finally, trigger a uche flush to be sure there isn't anything
 146	 * lingering in that part of the GPU
 147	 */
 148
 149	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 150	OUT_RING(ring, CACHE_INVALIDATE);
 151
 152	if (!sysprof) {
 153		/*
 154		 * Wait for SRAM clear after the pgtable update, so the
 155		 * two can happen in parallel:
 156		 */
 157		OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
 158		OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
 159		OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
 160				REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
 161		OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
 162		OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
 163		OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
 164		OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
 165
 166		/* Re-enable protected mode: */
 167		OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
 168		OUT_RING(ring, 1);
 
 
 169	}
 170}
 171
 172static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 173{
 174	unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
 175	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 176	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 177	struct msm_ringbuffer *ring = submit->ring;
 178	unsigned int i, ibs = 0;
 179
 180	a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx);
 181
 182	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 183		rbmemptr_stats(ring, index, cpcycles_start));
 184
 185	/*
 186	 * For PM4 the GMU register offsets are calculated from the base of the
 187	 * GPU registers so we need to add 0x1a800 to the register value on A630
 188	 * to get the right value from PM4.
 189	 */
 190	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 191		rbmemptr_stats(ring, index, alwayson_start));
 192
 193	/* Invalidate CCU depth and color */
 194	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 195	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_DEPTH));
 196
 197	OUT_PKT7(ring, CP_EVENT_WRITE, 1);
 198	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(PC_CCU_INVALIDATE_COLOR));
 199
 200	/* Submit the commands */
 201	for (i = 0; i < submit->nr_cmds; i++) {
 202		switch (submit->cmd[i].type) {
 203		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 204			break;
 205		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 206			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
 207				break;
 208			fallthrough;
 209		case MSM_SUBMIT_CMD_BUF:
 210			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
 211			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 212			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
 213			OUT_RING(ring, submit->cmd[i].size);
 214			ibs++;
 215			break;
 216		}
 217
 218		/*
 219		 * Periodically update shadow-wptr if needed, so that we
 220		 * can see partial progress of submits with large # of
 221		 * cmds.. otherwise we could needlessly stall waiting for
 222		 * ringbuffer state, simply due to looking at a shadow
 223		 * rptr value that has not been updated
 224		 */
 225		if ((ibs % 32) == 0)
 226			update_shadow_rptr(gpu, ring);
 227	}
 228
 229	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
 230		rbmemptr_stats(ring, index, cpcycles_end));
 231	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 232		rbmemptr_stats(ring, index, alwayson_end));
 233
 234	/* Write the fence to the scratch register */
 235	OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
 236	OUT_RING(ring, submit->seqno);
 237
 238	/*
 239	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
 240	 * timestamp is written to the memory and then triggers the interrupt
 241	 */
 242	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
 243	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
 244		CP_EVENT_WRITE_0_IRQ);
 245	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
 246	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
 247	OUT_RING(ring, submit->seqno);
 248
 249	trace_msm_gpu_submit_flush(submit,
 250		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
 251
 252	a6xx_flush(gpu, ring);
 253}
 254
 255/* For a615 family (a615, a616, a618 and a619) */
 256const struct adreno_reglist a615_hwcg[] = {
 257	{REG_A6XX_RBBM_CLOCK_CNTL_SP0,  0x02222222},
 258	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 259	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 260	{REG_A6XX_RBBM_CLOCK_HYST_SP0,  0x0000F3CF},
 261	{REG_A6XX_RBBM_CLOCK_CNTL_TP0,  0x02222222},
 262	{REG_A6XX_RBBM_CLOCK_CNTL_TP1,  0x02222222},
 263	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 264	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 265	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 266	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
 267	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 268	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
 269	{REG_A6XX_RBBM_CLOCK_HYST_TP0,  0x77777777},
 270	{REG_A6XX_RBBM_CLOCK_HYST_TP1,  0x77777777},
 271	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 272	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 273	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 274	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
 275	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 276	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
 277	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 278	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 279	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 280	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 281	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 282	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
 283	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 284	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
 285	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE,  0x22222222},
 286	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 287	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 288	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 289	{REG_A6XX_RBBM_CLOCK_HYST_UCHE,  0x00000004},
 290	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 291	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 292	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
 293	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020},
 294	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
 295	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
 296	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
 297	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 298	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00},
 299	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00},
 300	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00},
 301	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
 302	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 303	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 304	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 305	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 306	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 307	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 308	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 309	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 310	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 311	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 312	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 313	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 314	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 315	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 316	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 317	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 318	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 319	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 320	{},
 321};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 322
 323const struct adreno_reglist a630_hwcg[] = {
 324	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222},
 325	{REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222},
 326	{REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222},
 327	{REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222},
 328	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220},
 329	{REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220},
 330	{REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220},
 331	{REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220},
 332	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 333	{REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
 334	{REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
 335	{REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
 336	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf},
 337	{REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf},
 338	{REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf},
 339	{REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf},
 340	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 341	{REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222},
 342	{REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222},
 343	{REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222},
 344	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 345	{REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
 346	{REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
 347	{REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
 348	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 349	{REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222},
 350	{REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222},
 351	{REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222},
 352	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 353	{REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222},
 354	{REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222},
 355	{REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222},
 356	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 357	{REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
 358	{REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
 359	{REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
 360	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 361	{REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
 362	{REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
 363	{REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
 364	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 365	{REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777},
 366	{REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777},
 367	{REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777},
 368	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 369	{REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777},
 370	{REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777},
 371	{REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777},
 372	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 373	{REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
 374	{REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
 375	{REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
 376	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 377	{REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
 378	{REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
 379	{REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
 380	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 381	{REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111},
 382	{REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111},
 383	{REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111},
 384	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 385	{REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111},
 386	{REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111},
 387	{REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111},
 388	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 389	{REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
 390	{REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
 391	{REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
 392	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 393	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 394	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 395	{REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
 396	{REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
 397	{REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
 398	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222},
 399	{REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222},
 400	{REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222},
 401	{REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222},
 402	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 403	{REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220},
 404	{REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220},
 405	{REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220},
 406	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00},
 407	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00},
 408	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00},
 409	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00},
 410	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022},
 411	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 412	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 413	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 414	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 415	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 416	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 417	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 418	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 419	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 420	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 421	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 422	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 423	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 424	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 425	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 426	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 427	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 428	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 429	{},
 430};
 431
 432const struct adreno_reglist a640_hwcg[] = {
 433	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 434	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 435	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 436	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 437	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 438	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 439	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 440	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 441	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 442	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 443	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 444	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 445	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 446	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 447	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 448	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 449	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 450	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 451	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 452	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 453	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022},
 454	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 455	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 456	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 457	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 458	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 459	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 460	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 461	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 462	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 463	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 464	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 465	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 466	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 467	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 468	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 469	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 470	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 471	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 472	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
 473	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 474	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 475	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 476	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 477	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 478	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 479	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 480	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 481	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 482	{},
 483};
 484
 485const struct adreno_reglist a650_hwcg[] = {
 486	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 487	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 488	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 489	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 490	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222},
 491	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 492	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 493	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 494	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 495	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 496	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 497	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 498	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 499	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 500	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 501	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 502	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 503	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 504	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 505	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 506	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
 507	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 508	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 509	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 510	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 511	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 512	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 513	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 514	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 515	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 516	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 517	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 518	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 519	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 520	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 521	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 522	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 523	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 524	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 525	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777},
 526	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 527	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 528	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 529	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 530	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 531	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 532	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 533	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 534	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 535	{},
 536};
 537
 538const struct adreno_reglist a660_hwcg[] = {
 539	{REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
 540	{REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
 541	{REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
 542	{REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
 543	{REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
 544	{REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
 545	{REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222},
 546	{REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222},
 547	{REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
 548	{REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
 549	{REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111},
 550	{REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111},
 551	{REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
 552	{REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
 553	{REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777},
 554	{REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777},
 555	{REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
 556	{REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222},
 557	{REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220},
 558	{REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00},
 559	{REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022},
 560	{REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555},
 561	{REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011},
 562	{REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044},
 563	{REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
 564	{REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
 565	{REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222},
 566	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002},
 567	{REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222},
 568	{REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
 569	{REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 570	{REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
 571	{REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
 572	{REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
 573	{REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
 574	{REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
 575	{REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000},
 576	{REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222},
 577	{REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111},
 578	{REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000},
 579	{REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
 580	{REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004},
 581	{REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
 582	{REG_A6XX_RBBM_ISDB_CNT, 0x00000182},
 583	{REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000},
 584	{REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000},
 585	{REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222},
 586	{REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111},
 587	{REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555},
 588	{},
 589};
 590
 591static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
 592{
 593	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 594	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 595	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 596	const struct adreno_reglist *reg;
 597	unsigned int i;
 
 598	u32 val, clock_cntl_on;
 599
 600	if (!adreno_gpu->info->hwcg)
 601		return;
 602
 603	if (adreno_is_a630(adreno_gpu))
 604		clock_cntl_on = 0x8aa8aa02;
 
 
 
 
 605	else
 606		clock_cntl_on = 0x8aa8aa82;
 607
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 608	val = gpu_read(gpu, REG_A6XX_RBBM_CLOCK_CNTL);
 609
 610	/* Don't re-program the registers if they are already correct */
 611	if ((!state && !val) || (state && (val == clock_cntl_on)))
 612		return;
 613
 614	/* Disable SP clock before programming HWCG registers */
 615	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0);
 
 616
 617	for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++)
 618		gpu_write(gpu, reg->offset, state ? reg->value : 0);
 619
 620	/* Enable SP clock */
 621	gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 0, 1);
 
 622
 623	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
 624}
 625
 626/* For a615, a616, a618, a619, a630, a640 and a680 */
 627static const u32 a6xx_protect[] = {
 628	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 629	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 630	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 631	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 632	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 633	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 634	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 635	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 636	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 637	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 638	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 639	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 640	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 641	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 642	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 643	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 644	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 645	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 646	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 647	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 648	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 649	A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
 650	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 651	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 652	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 653	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
 654	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 655	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 656	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
 657	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 658	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 659	A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
 660};
 661
 662/* These are for a620 and a650 */
 663static const u32 a650_protect[] = {
 664	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 665	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 666	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 667	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 668	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 669	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 670	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 671	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 672	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 673	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 674	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 675	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 676	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 677	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 678	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 679	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 680	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 681	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 682	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 683	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 684	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
 685	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 686	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
 687	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 688	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 689	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 690	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
 691	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 692	A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
 693	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 694	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
 695	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 696	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 697	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
 698	A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
 699	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
 700	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
 701	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
 702	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
 703};
 704
 705/* These are for a635 and a660 */
 706static const u32 a660_protect[] = {
 707	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
 708	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
 709	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
 710	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
 711	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
 712	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
 713	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
 714	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
 715	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
 716	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
 717	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
 718	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
 719	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
 720	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
 721	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
 722	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
 723	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
 724	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
 725	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
 726	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
 727	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
 728	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
 729	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
 730	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
 731	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
 732	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
 733	A6XX_PROTECT_NORDWR(0x0ae50, 0x012f),
 734	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
 735	A6XX_PROTECT_NORDWR(0x0b608, 0x0006),
 736	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
 737	A6XX_PROTECT_NORDWR(0x0be20, 0x015f),
 738	A6XX_PROTECT_NORDWR(0x0d000, 0x05ff),
 739	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
 740	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
 741	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
 742	A6XX_PROTECT_NORDWR(0x1a400, 0x1fff),
 743	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
 744	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
 745	A6XX_PROTECT_NORDWR(0x1f860, 0x0000),
 746	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
 747	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
 748};
 749
 750static void a6xx_set_cp_protect(struct msm_gpu *gpu)
 751{
 752	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 753	const u32 *regs = a6xx_protect;
 754	unsigned i, count, count_max;
 755
 756	if (adreno_is_a650(adreno_gpu)) {
 757		regs = a650_protect;
 758		count = ARRAY_SIZE(a650_protect);
 759		count_max = 48;
 760		BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
 761	} else if (adreno_is_a660_family(adreno_gpu)) {
 762		regs = a660_protect;
 763		count = ARRAY_SIZE(a660_protect);
 764		count_max = 48;
 765		BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48);
 766	} else {
 767		regs = a6xx_protect;
 768		count = ARRAY_SIZE(a6xx_protect);
 769		count_max = 32;
 770		BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
 771	}
 772
 773	/*
 774	 * Enable access protection to privileged registers, fault on an access
 775	 * protect violation and select the last span to protect from the start
 776	 * address all the way to the end of the register address space
 777	 */
 778	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
 779
 780	for (i = 0; i < count - 1; i++)
 781		gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
 
 
 
 
 
 
 782	/* last CP_PROTECT to have "infinite" length on the last entry */
 783	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
 784}
 785
 786static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 787{
 788	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 789	u32 lower_bit = 2;
 790	u32 amsbc = 0;
 791	u32 rgb565_predicator = 0;
 792	u32 uavflagprd_inv = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 793
 794	/* a618 is using the hw default values */
 795	if (adreno_is_a618(adreno_gpu))
 796		return;
 797
 798	if (adreno_is_a640_family(adreno_gpu))
 799		amsbc = 1;
 800
 801	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) {
 
 
 
 
 802		/* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
 803		lower_bit = 3;
 804		amsbc = 1;
 805		rgb565_predicator = 1;
 806		uavflagprd_inv = 2;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 807	}
 
 808
 809	if (adreno_is_7c3(adreno_gpu)) {
 810		lower_bit = 1;
 811		amsbc = 1;
 812		rgb565_predicator = 1;
 813		uavflagprd_inv = 2;
 814	}
 
 
 
 
 
 
 
 
 815
 816	gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
 817		rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
 818	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
 
 
 
 
 
 
 
 
 
 819	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
 820		uavflagprd_inv << 4 | lower_bit << 1);
 821	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 822}
 823
 824static int a6xx_cp_init(struct msm_gpu *gpu)
 825{
 826	struct msm_ringbuffer *ring = gpu->rb[0];
 827
 828	OUT_PKT7(ring, CP_ME_INIT, 8);
 829
 830	OUT_RING(ring, 0x0000002f);
 831
 832	/* Enable multiple hardware contexts */
 833	OUT_RING(ring, 0x00000003);
 834
 835	/* Enable error detection */
 836	OUT_RING(ring, 0x20000000);
 837
 838	/* Don't enable header dump */
 839	OUT_RING(ring, 0x00000000);
 840	OUT_RING(ring, 0x00000000);
 841
 842	/* No workarounds enabled */
 843	OUT_RING(ring, 0x00000000);
 844
 845	/* Pad rest of the cmds with 0's */
 846	OUT_RING(ring, 0x00000000);
 847	OUT_RING(ring, 0x00000000);
 848
 849	a6xx_flush(gpu, ring);
 850	return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 851}
 852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 853/*
 854 * Check that the microcode version is new enough to include several key
 855 * security fixes. Return true if the ucode is safe.
 856 */
 857static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
 858		struct drm_gem_object *obj)
 859{
 860	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 861	struct msm_gpu *gpu = &adreno_gpu->base;
 862	const char *sqe_name = adreno_gpu->info->fw[ADRENO_FW_SQE];
 863	u32 *buf = msm_gem_get_vaddr(obj);
 864	bool ret = false;
 865
 866	if (IS_ERR(buf))
 867		return false;
 868
 
 
 
 
 869	/*
 870	 * Targets up to a640 (a618, a630 and a640) need to check for a
 871	 * microcode version that is patched to support the whereami opcode or
 872	 * one that is new enough to include it by default.
 873	 *
 874	 * a650 tier targets don't need whereami but still need to be
 875	 * equal to or newer than 0.95 for other security fixes
 876	 *
 877	 * a660 targets have all the critical security fixes from the start
 878	 */
 879	if (!strcmp(sqe_name, "a630_sqe.fw")) {
 880		/*
 881		 * If the lowest nibble is 0xa that is an indication that this
 882		 * microcode has been patched. The actual version is in dword
 883		 * [3] but we only care about the patchlevel which is the lowest
 884		 * nibble of dword [3]
 885		 *
 886		 * Otherwise check that the firmware is greater than or equal
 887		 * to 1.90 which was the first version that had this fix built
 888		 * in
 889		 */
 890		if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
 891			(buf[0] & 0xfff) >= 0x190) {
 892			a6xx_gpu->has_whereami = true;
 893			ret = true;
 894			goto out;
 895		}
 896
 897		DRM_DEV_ERROR(&gpu->pdev->dev,
 898			"a630 SQE ucode is too old. Have version %x need at least %x\n",
 899			buf[0] & 0xfff, 0x190);
 900	} else if (!strcmp(sqe_name, "a650_sqe.fw")) {
 901		if ((buf[0] & 0xfff) >= 0x095) {
 902			ret = true;
 903			goto out;
 904		}
 905
 906		DRM_DEV_ERROR(&gpu->pdev->dev,
 907			"a650 SQE ucode is too old. Have version %x need at least %x\n",
 908			buf[0] & 0xfff, 0x095);
 909	} else if (!strcmp(sqe_name, "a660_sqe.fw")) {
 910		ret = true;
 911	} else {
 912		DRM_DEV_ERROR(&gpu->pdev->dev,
 913			"unknown GPU, add it to a6xx_ucode_check_version()!!\n");
 914	}
 915out:
 916	msm_gem_put_vaddr(obj);
 917	return ret;
 918}
 919
 920static int a6xx_ucode_init(struct msm_gpu *gpu)
 921{
 922	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 923	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 924
 925	if (!a6xx_gpu->sqe_bo) {
 926		a6xx_gpu->sqe_bo = adreno_fw_create_bo(gpu,
 927			adreno_gpu->fw[ADRENO_FW_SQE], &a6xx_gpu->sqe_iova);
 928
 929		if (IS_ERR(a6xx_gpu->sqe_bo)) {
 930			int ret = PTR_ERR(a6xx_gpu->sqe_bo);
 931
 932			a6xx_gpu->sqe_bo = NULL;
 933			DRM_DEV_ERROR(&gpu->pdev->dev,
 934				"Could not allocate SQE ucode: %d\n", ret);
 935
 936			return ret;
 937		}
 938
 939		msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
 940		if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
 941			msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
 942			drm_gem_object_put(a6xx_gpu->sqe_bo);
 943
 944			a6xx_gpu->sqe_bo = NULL;
 945			return -EPERM;
 946		}
 947	}
 948
 949	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 950
 951	return 0;
 952}
 953
 954static int a6xx_zap_shader_init(struct msm_gpu *gpu)
 955{
 956	static bool loaded;
 957	int ret;
 958
 959	if (loaded)
 960		return 0;
 961
 962	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
 963
 964	loaded = !ret;
 965	return ret;
 966}
 967
 968#define A6XX_INT_MASK (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \
 969	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \
 970	  A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
 971	  A6XX_RBBM_INT_0_MASK_CP_IB2 | \
 972	  A6XX_RBBM_INT_0_MASK_CP_IB1 | \
 973	  A6XX_RBBM_INT_0_MASK_CP_RB | \
 974	  A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
 975	  A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \
 976	  A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \
 977	  A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
 978	  A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 979
 980static int hw_init(struct msm_gpu *gpu)
 981{
 982	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 983	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 
 
 
 984	int ret;
 985
 986	/* Make sure the GMU keeps the GPU on while we set it up */
 987	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
 
 
 
 
 988
 989	/* Clear GBIF halt in case GX domain was not collapsed */
 990	if (a6xx_has_gbif(adreno_gpu))
 
 
 
 
 
 
 
 
 
 991		gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0);
 
 
 992
 993	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0);
 994
 
 
 
 995	/*
 996	 * Disable the trusted memory range - we don't actually supported secure
 997	 * memory rendering at this point in time and we don't want to block off
 998	 * part of the virtual memory space.
 999	 */
1000	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
1001	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
1002
1003	/* Turn on 64 bit addressing for all blocks */
1004	gpu_write(gpu, REG_A6XX_CP_ADDR_MODE_CNTL, 0x1);
1005	gpu_write(gpu, REG_A6XX_VSC_ADDR_MODE_CNTL, 0x1);
1006	gpu_write(gpu, REG_A6XX_GRAS_ADDR_MODE_CNTL, 0x1);
1007	gpu_write(gpu, REG_A6XX_RB_ADDR_MODE_CNTL, 0x1);
1008	gpu_write(gpu, REG_A6XX_PC_ADDR_MODE_CNTL, 0x1);
1009	gpu_write(gpu, REG_A6XX_HLSQ_ADDR_MODE_CNTL, 0x1);
1010	gpu_write(gpu, REG_A6XX_VFD_ADDR_MODE_CNTL, 0x1);
1011	gpu_write(gpu, REG_A6XX_VPC_ADDR_MODE_CNTL, 0x1);
1012	gpu_write(gpu, REG_A6XX_UCHE_ADDR_MODE_CNTL, 0x1);
1013	gpu_write(gpu, REG_A6XX_SP_ADDR_MODE_CNTL, 0x1);
1014	gpu_write(gpu, REG_A6XX_TPL1_ADDR_MODE_CNTL, 0x1);
1015	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
 
 
1016
1017	/* enable hardware clockgating */
1018	a6xx_set_hwcg(gpu, true);
1019
1020	/* VBIF/GBIF start*/
1021	if (adreno_is_a640_family(adreno_gpu) ||
1022	    adreno_is_a650_family(adreno_gpu)) {
 
 
1023		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
1024		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
1025		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
1026		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1027		gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
1028		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
1029	} else {
1030		gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
1031	}
1032
1033	if (adreno_is_a630(adreno_gpu))
1034		gpu_write(gpu, REG_A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
1035
 
 
 
1036	/* Make all blocks contribute to the GPU BUSY perf counter */
1037	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff);
1038
1039	/* Disable L2 bypass in the UCHE */
1040	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_LO, 0xffffffc0);
1041	gpu_write(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX_HI, 0x0001ffff);
1042	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_LO, 0xfffff000);
1043	gpu_write(gpu, REG_A6XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
1044	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_LO, 0xfffff000);
1045	gpu_write(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
 
 
 
 
 
 
 
1046
1047	if (!adreno_is_a650_family(adreno_gpu)) {
1048		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
1049		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
1050
1051		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
1052			0x00100000 + adreno_gpu->gmem - 1);
1053	}
1054
1055	gpu_write(gpu, REG_A6XX_UCHE_FILTER_CNTL, 0x804);
1056	gpu_write(gpu, REG_A6XX_UCHE_CACHE_WAYS, 0x4);
 
 
 
 
1057
1058	if (adreno_is_a640_family(adreno_gpu) ||
1059	    adreno_is_a650_family(adreno_gpu))
1060		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
1061	else
 
 
 
 
1062		gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2, 0x010000c0);
1063	gpu_write(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362c);
 
1064
1065	if (adreno_is_a660_family(adreno_gpu))
1066		gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
1067
1068	/* Setting the mem pool size */
1069	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128);
 
 
 
 
 
 
 
1070
1071	/* Setting the primFifo thresholds default values,
1072	 * and vccCacheSkipDis=1 bit (0x200) for A640 and newer
1073	*/
1074	if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1075		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1076	else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu))
1077		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200);
1078	else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu))
1079		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200);
1080	else
1081		gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000);
1082
1083	/* Set the AHB default slave response to "ERROR" */
1084	gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1);
1085
1086	/* Turn on performance counters */
1087	gpu_write(gpu, REG_A6XX_RBBM_PERFCTR_CNTL, 0x1);
1088
 
 
 
 
 
 
1089	/* Select CP0 to always count cycles */
1090	gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL(0), PERF_CP_ALWAYS_COUNT);
1091
1092	a6xx_set_ubwc_config(gpu);
1093
1094	/* Enable fault detection */
1095	gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL,
1096		(1 << 30) | 0x1fffff);
 
 
 
 
 
 
 
 
 
1097
1098	gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, 1);
1099
1100	/* Set weights for bicubic filtering */
1101	if (adreno_is_a650_family(adreno_gpu)) {
1102		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
1103		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
1104			0x3fe05ff4);
1105		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
1106			0x3fa0ebee);
1107		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
1108			0x3f5193ed);
1109		gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
1110			0x3f0243f0);
1111	}
1112
 
 
 
 
 
 
 
1113	/* Protect registers from the CP */
1114	a6xx_set_cp_protect(gpu);
1115
1116	if (adreno_is_a660_family(adreno_gpu)) {
1117		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, 0x1);
 
 
 
1118		gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x0);
 
 
 
1119	}
1120
 
 
1121	/* Set dualQ + disable afull for A660 GPU */
1122	if (adreno_is_a660(adreno_gpu))
1123		gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906);
 
 
 
 
 
 
 
1124
1125	/* Enable expanded apriv for targets that support it */
1126	if (gpu->hw_apriv) {
1127		gpu_write(gpu, REG_A6XX_CP_APRIV_CNTL,
1128			(1 << 6) | (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1129	}
1130
1131	/* Enable interrupts */
1132	gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, A6XX_INT_MASK);
 
1133
1134	ret = adreno_hw_init(gpu);
1135	if (ret)
1136		goto out;
1137
1138	ret = a6xx_ucode_init(gpu);
1139	if (ret)
1140		goto out;
1141
1142	/* Set the ringbuffer address */
1143	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
1144
1145	/* Targets that support extended APRIV can use the RPTR shadow from
1146	 * hardware but all the other ones need to disable the feature. Targets
1147	 * that support the WHERE_AM_I opcode can use that instead
1148	 */
1149	if (adreno_gpu->base.hw_apriv)
1150		gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT);
1151	else
1152		gpu_write(gpu, REG_A6XX_CP_RB_CNTL,
1153			MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
1154
1155	/*
1156	 * Expanded APRIV and targets that support WHERE_AM_I both need a
1157	 * privileged buffer to store the RPTR shadow
1158	 */
 
 
 
1159
1160	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) {
1161		if (!a6xx_gpu->shadow_bo) {
1162			a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
1163				sizeof(u32) * gpu->nr_rings,
1164				MSM_BO_WC | MSM_BO_MAP_PRIV,
1165				gpu->aspace, &a6xx_gpu->shadow_bo,
1166				&a6xx_gpu->shadow_iova);
1167
1168			if (IS_ERR(a6xx_gpu->shadow))
1169				return PTR_ERR(a6xx_gpu->shadow);
1170
1171			msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow");
1172		}
1173
1174		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
1175			shadowptr(a6xx_gpu, gpu->rb[0]));
1176	}
1177
1178	/* Always come up on rb 0 */
1179	a6xx_gpu->cur_ring = gpu->rb[0];
1180
1181	gpu->cur_ctx_seqno = 0;
 
1182
1183	/* Enable the SQE_to start the CP engine */
1184	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1);
1185
1186	ret = a6xx_cp_init(gpu);
 
 
 
 
 
1187	if (ret)
1188		goto out;
1189
1190	/*
1191	 * Try to load a zap shader into the secure world. If successful
1192	 * we can use the CP to switch out of secure mode. If not then we
1193	 * have no resource but to try to switch ourselves out manually. If we
1194	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
1195	 * be blocked and a permissions violation will soon follow.
1196	 */
1197	ret = a6xx_zap_shader_init(gpu);
1198	if (!ret) {
1199		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
1200		OUT_RING(gpu->rb[0], 0x00000000);
1201
1202		a6xx_flush(gpu, gpu->rb[0]);
1203		if (!a6xx_idle(gpu, gpu->rb[0]))
1204			return -EINVAL;
1205	} else if (ret == -ENODEV) {
1206		/*
1207		 * This device does not use zap shader (but print a warning
1208		 * just in case someone got their dt wrong.. hopefully they
1209		 * have a debug UART to realize the error of their ways...
1210		 * if you mess this up you are about to crash horribly)
1211		 */
1212		dev_warn_once(gpu->dev->dev,
1213			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
1214		gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0);
1215		ret = 0;
1216	} else {
1217		return ret;
1218	}
1219
1220out:
 
 
 
 
 
 
1221	/*
1222	 * Tell the GMU that we are done touching the GPU and it can start power
1223	 * management
1224	 */
1225	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1226
1227	if (a6xx_gpu->gmu.legacy) {
1228		/* Take the GMU out of its special boot mode */
1229		a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_BOOT_SLUMBER);
1230	}
1231
1232	return ret;
1233}
1234
1235static int a6xx_hw_init(struct msm_gpu *gpu)
1236{
1237	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1238	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1239	int ret;
1240
1241	mutex_lock(&a6xx_gpu->gmu.lock);
1242	ret = hw_init(gpu);
1243	mutex_unlock(&a6xx_gpu->gmu.lock);
1244
1245	return ret;
1246}
1247
1248static void a6xx_dump(struct msm_gpu *gpu)
1249{
1250	DRM_DEV_INFO(&gpu->pdev->dev, "status:   %08x\n",
1251			gpu_read(gpu, REG_A6XX_RBBM_STATUS));
1252	adreno_dump(gpu);
1253}
1254
1255#define VBIF_RESET_ACK_TIMEOUT	100
1256#define VBIF_RESET_ACK_MASK	0x00f0
1257
1258static void a6xx_recover(struct msm_gpu *gpu)
1259{
1260	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1261	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 
1262	int i, active_submits;
1263
1264	adreno_dump_info(gpu);
1265
1266	for (i = 0; i < 8; i++)
1267		DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
1268			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
1269
1270	if (hang_debug)
1271		a6xx_dump(gpu);
1272
1273	/*
1274	 * To handle recovery specific sequences during the rpm suspend we are
1275	 * about to trigger
1276	 */
1277	a6xx_gpu->hung = true;
1278
1279	/* Halt SQE first */
1280	gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
1281
1282	/*
1283	 * Turn off keep alive that might have been enabled by the hang
1284	 * interrupt
1285	 */
1286	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1287
1288	pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
1289
1290	/* active_submit won't change until we make a submission */
1291	mutex_lock(&gpu->active_lock);
1292	active_submits = gpu->active_submits;
1293
1294	/*
1295	 * Temporarily clear active_submits count to silence a WARN() in the
1296	 * runtime suspend cb
1297	 */
1298	gpu->active_submits = 0;
1299
 
 
 
 
 
 
 
 
 
 
 
 
 
1300	/* Drop the rpm refcount from active submits */
1301	if (active_submits)
1302		pm_runtime_put(&gpu->pdev->dev);
1303
1304	/* And the final one from recover worker */
1305	pm_runtime_put_sync(&gpu->pdev->dev);
1306
1307	/* Call into gpucc driver to poll for cx gdsc collapse */
1308	reset_control_reset(gpu->cx_collapse);
 
 
1309
1310	pm_runtime_use_autosuspend(&gpu->pdev->dev);
1311
1312	if (active_submits)
1313		pm_runtime_get(&gpu->pdev->dev);
1314
1315	pm_runtime_get_sync(&gpu->pdev->dev);
1316
1317	gpu->active_submits = active_submits;
1318	mutex_unlock(&gpu->active_lock);
1319
1320	msm_gpu_hw_init(gpu);
1321	a6xx_gpu->hung = false;
1322}
1323
1324static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid)
1325{
 
1326	static const char *uche_clients[7] = {
1327		"VFD", "SP", "VSC", "VPC", "HLSQ", "PC", "LRZ",
1328	};
1329	u32 val;
1330
1331	if (mid < 1 || mid > 3)
1332		return "UNKNOWN";
 
 
 
 
 
1333
1334	/*
1335	 * The source of the data depends on the mid ID read from FSYNR1.
1336	 * and the client ID read from the UCHE block
1337	 */
1338	val = gpu_read(gpu, REG_A6XX_UCHE_CLIENT_PF);
1339
1340	/* mid = 3 is most precise and refers to only one block per client */
1341	if (mid == 3)
1342		return uche_clients[val & 7];
1343
1344	/* For mid=2 the source is TP or VFD except when the client id is 0 */
1345	if (mid == 2)
1346		return ((val & 7) == 0) ? "TP" : "TP|VFD";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1347
1348	/* For mid=1 just return "UCHE" as a catchall for everything else */
1349	return "UCHE";
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1350}
1351
1352static const char *a6xx_fault_block(struct msm_gpu *gpu, u32 id)
1353{
 
 
1354	if (id == 0)
1355		return "CP";
1356	else if (id == 4)
1357		return "CCU";
1358	else if (id == 6)
1359		return "CDP Prefetch";
 
 
 
 
1360
1361	return a6xx_uche_fault_block(gpu, id);
1362}
1363
1364#define ARM_SMMU_FSR_TF                 BIT(1)
1365#define ARM_SMMU_FSR_PF			BIT(3)
1366#define ARM_SMMU_FSR_EF			BIT(4)
1367
1368static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *data)
1369{
1370	struct msm_gpu *gpu = arg;
1371	struct adreno_smmu_fault_info *info = data;
1372	const char *type = "UNKNOWN";
1373	const char *block;
1374	bool do_devcoredump = info && !READ_ONCE(gpu->crashstate);
1375
1376	/*
1377	 * If we aren't going to be resuming later from fault_worker, then do
1378	 * it now.
1379	 */
1380	if (!do_devcoredump) {
1381		gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu);
1382	}
1383
1384	/*
1385	 * Print a default message if we couldn't get the data from the
1386	 * adreno-smmu-priv
1387	 */
1388	if (!info) {
1389		pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n",
1390			iova, flags,
1391			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1392			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1393			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1394			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
1395
1396		return 0;
1397	}
1398
1399	if (info->fsr & ARM_SMMU_FSR_TF)
1400		type = "TRANSLATION";
1401	else if (info->fsr & ARM_SMMU_FSR_PF)
1402		type = "PERMISSION";
1403	else if (info->fsr & ARM_SMMU_FSR_EF)
1404		type = "EXTERNAL";
1405
1406	block = a6xx_fault_block(gpu, info->fsynr1 & 0xff);
1407
1408	pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n",
1409			info->ttbr0, iova,
1410			flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ",
1411			type, block,
1412			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
1413			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
1414			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
1415			gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)));
 
1416
1417	if (do_devcoredump) {
1418		/* Turn off the hangcheck timer to keep it from bothering us */
1419		del_timer(&gpu->hangcheck_timer);
1420
1421		gpu->fault_info.ttbr0 = info->ttbr0;
1422		gpu->fault_info.iova  = iova;
1423		gpu->fault_info.flags = flags;
1424		gpu->fault_info.type  = type;
1425		gpu->fault_info.block = block;
1426
1427		kthread_queue_work(gpu->worker, &gpu->fault_work);
1428	}
1429
1430	return 0;
1431}
1432
1433static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
1434{
1435	u32 status = gpu_read(gpu, REG_A6XX_CP_INTERRUPT_STATUS);
1436
1437	if (status & A6XX_CP_INT_CP_OPCODE_ERROR) {
1438		u32 val;
1439
1440		gpu_write(gpu, REG_A6XX_CP_SQE_STAT_ADDR, 1);
1441		val = gpu_read(gpu, REG_A6XX_CP_SQE_STAT_DATA);
1442		dev_err_ratelimited(&gpu->pdev->dev,
1443			"CP | opcode error | possible opcode=0x%8.8X\n",
1444			val);
1445	}
1446
1447	if (status & A6XX_CP_INT_CP_UCODE_ERROR)
1448		dev_err_ratelimited(&gpu->pdev->dev,
1449			"CP ucode error interrupt\n");
1450
1451	if (status & A6XX_CP_INT_CP_HW_FAULT_ERROR)
1452		dev_err_ratelimited(&gpu->pdev->dev, "CP | HW fault | status=0x%8.8X\n",
1453			gpu_read(gpu, REG_A6XX_CP_HW_FAULT));
1454
1455	if (status & A6XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
1456		u32 val = gpu_read(gpu, REG_A6XX_CP_PROTECT_STATUS);
1457
1458		dev_err_ratelimited(&gpu->pdev->dev,
1459			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
1460			val & (1 << 20) ? "READ" : "WRITE",
1461			(val & 0x3ffff), val);
1462	}
1463
1464	if (status & A6XX_CP_INT_CP_AHB_ERROR)
1465		dev_err_ratelimited(&gpu->pdev->dev, "CP AHB error interrupt\n");
1466
1467	if (status & A6XX_CP_INT_CP_VSD_PARITY_ERROR)
1468		dev_err_ratelimited(&gpu->pdev->dev, "CP VSD decoder parity error\n");
1469
1470	if (status & A6XX_CP_INT_CP_ILLEGAL_INSTR_ERROR)
1471		dev_err_ratelimited(&gpu->pdev->dev, "CP illegal instruction error\n");
1472
1473}
1474
1475static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
1476{
1477	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1478	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1479	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1480
1481	/*
1482	 * If stalled on SMMU fault, we could trip the GPU's hang detection,
1483	 * but the fault handler will trigger the devcore dump, and we want
1484	 * to otherwise resume normally rather than killing the submit, so
1485	 * just bail.
1486	 */
1487	if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
1488		return;
1489
1490	/*
1491	 * Force the GPU to stay on until after we finish
1492	 * collecting information
1493	 */
1494	gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
 
1495
1496	DRM_DEV_ERROR(&gpu->pdev->dev,
1497		"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1498		ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
1499		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
1500		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
1501		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
1502		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1503		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1504		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1505		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
1506
1507	/* Turn off the hangcheck timer to keep it from bothering us */
1508	del_timer(&gpu->hangcheck_timer);
1509
1510	kthread_queue_work(gpu->worker, &gpu->recover_work);
1511}
1512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1513static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
1514{
1515	struct msm_drm_private *priv = gpu->dev->dev_private;
1516	u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
1517
1518	gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
1519
1520	if (priv->disable_err_irq)
1521		status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS;
1522
1523	if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT)
1524		a6xx_fault_detect_irq(gpu);
1525
1526	if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR)
1527		dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error\n");
1528
1529	if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1530		a6xx_cp_hw_err_irq(gpu);
1531
1532	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW)
1533		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n");
1534
1535	if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1536		dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n");
1537
1538	if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1539		dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n");
1540
1541	if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
 
 
 
1542		msm_gpu_retire(gpu);
 
 
 
 
 
1543
1544	return IRQ_HANDLED;
1545}
1546
1547static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
1548{
1549	return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
1550}
1551
1552static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
1553{
1554	msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
1555}
1556
1557static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
1558{
1559	llcc_slice_deactivate(a6xx_gpu->llc_slice);
1560	llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
1561}
1562
1563static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
1564{
1565	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1566	struct msm_gpu *gpu = &adreno_gpu->base;
1567	u32 cntl1_regval = 0;
1568
1569	if (IS_ERR(a6xx_gpu->llc_mmio))
1570		return;
1571
1572	if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
1573		u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
1574
1575		gpu_scid &= 0x1f;
1576		cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
1577			       (gpu_scid << 15) | (gpu_scid << 20);
1578
1579		/* On A660, the SCID programming for UCHE traffic is done in
1580		 * A6XX_GBIF_SCACHE_CNTL0[14:10]
1581		 */
1582		if (adreno_is_a660_family(adreno_gpu))
1583			gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
1584				(1 << 8), (gpu_scid << 10) | (1 << 8));
1585	}
1586
1587	/*
1588	 * For targets with a MMU500, activate the slice but don't program the
1589	 * register.  The XBL will take care of that.
1590	 */
1591	if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
1592		if (!a6xx_gpu->have_mmu500) {
1593			u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);
1594
1595			gpuhtw_scid &= 0x1f;
1596			cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
1597		}
1598	}
1599
1600	if (!cntl1_regval)
1601		return;
1602
1603	/*
1604	 * Program the slice IDs for the various GPU blocks and GPU MMU
1605	 * pagetables
1606	 */
1607	if (!a6xx_gpu->have_mmu500) {
1608		a6xx_llc_write(a6xx_gpu,
1609			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);
1610
1611		/*
1612		 * Program cacheability overrides to not allocate cache
1613		 * lines on a write miss
1614		 */
1615		a6xx_llc_rmw(a6xx_gpu,
1616			REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
1617		return;
1618	}
1619
1620	gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
1621}
1622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1623static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
1624{
 
 
 
 
1625	llcc_slice_putd(a6xx_gpu->llc_slice);
1626	llcc_slice_putd(a6xx_gpu->htw_llc_slice);
1627}
1628
1629static void a6xx_llc_slices_init(struct platform_device *pdev,
1630		struct a6xx_gpu *a6xx_gpu)
1631{
1632	struct device_node *phandle;
1633
 
 
 
 
1634	/*
1635	 * There is a different programming path for targets with an mmu500
1636	 * attached, so detect if that is the case
1637	 */
1638	phandle = of_parse_phandle(pdev->dev.of_node, "iommus", 0);
1639	a6xx_gpu->have_mmu500 = (phandle &&
1640		of_device_is_compatible(phandle, "arm,mmu-500"));
1641	of_node_put(phandle);
1642
1643	if (a6xx_gpu->have_mmu500)
 
 
1644		a6xx_gpu->llc_mmio = NULL;
1645	else
1646		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem");
1647
1648	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
1649	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
1650
1651	if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
1652		a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
1653}
1654
1655static int a6xx_pm_resume(struct msm_gpu *gpu)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1656{
1657	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1658	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1659	int ret;
1660
1661	gpu->needs_hw_init = true;
1662
1663	trace_msm_gpu_resume(0);
1664
1665	mutex_lock(&a6xx_gpu->gmu.lock);
1666	ret = a6xx_gmu_resume(a6xx_gpu);
1667	mutex_unlock(&a6xx_gpu->gmu.lock);
1668	if (ret)
1669		return ret;
1670
1671	msm_devfreq_resume(gpu);
1672
1673	a6xx_llc_activate(a6xx_gpu);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1674
1675	return 0;
1676}
1677
1678static int a6xx_pm_suspend(struct msm_gpu *gpu)
1679{
1680	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1681	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1682	int i, ret;
1683
1684	trace_msm_gpu_suspend(0);
1685
1686	a6xx_llc_deactivate(a6xx_gpu);
1687
1688	msm_devfreq_suspend(gpu);
1689
1690	mutex_lock(&a6xx_gpu->gmu.lock);
1691	ret = a6xx_gmu_stop(a6xx_gpu);
1692	mutex_unlock(&a6xx_gpu->gmu.lock);
1693	if (ret)
1694		return ret;
1695
1696	if (a6xx_gpu->shadow_bo)
1697		for (i = 0; i < gpu->nr_rings; i++)
1698			a6xx_gpu->shadow[i] = 0;
1699
1700	gpu->suspend_count++;
1701
1702	return 0;
1703}
1704
1705static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1706{
1707	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1708	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1709
1710	mutex_lock(&a6xx_gpu->gmu.lock);
1711
1712	/* Force the GPU power on so we can read this register */
1713	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1714
1715	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
1716
1717	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
1718
1719	mutex_unlock(&a6xx_gpu->gmu.lock);
1720
1721	return 0;
1722}
1723
 
 
 
 
 
 
1724static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
1725{
1726	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1727	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1728
1729	return a6xx_gpu->cur_ring;
1730}
1731
1732static void a6xx_destroy(struct msm_gpu *gpu)
1733{
1734	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1735	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1736
1737	if (a6xx_gpu->sqe_bo) {
1738		msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
1739		drm_gem_object_put(a6xx_gpu->sqe_bo);
1740	}
1741
1742	if (a6xx_gpu->shadow_bo) {
1743		msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
1744		drm_gem_object_put(a6xx_gpu->shadow_bo);
1745	}
1746
1747	a6xx_llc_slices_destroy(a6xx_gpu);
1748
1749	a6xx_gmu_remove(a6xx_gpu);
1750
1751	adreno_gpu_cleanup(adreno_gpu);
1752
1753	kfree(a6xx_gpu);
1754}
1755
1756static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
1757{
1758	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1759	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1760	u64 busy_cycles;
1761
1762	/* 19.2MHz */
1763	*out_sample_rate = 19200000;
1764
1765	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
1766			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
1767			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
1768
1769	return busy_cycles;
1770}
1771
1772static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
1773			      bool suspended)
1774{
1775	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1776	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1777
1778	mutex_lock(&a6xx_gpu->gmu.lock);
1779	a6xx_gmu_set_freq(gpu, opp, suspended);
1780	mutex_unlock(&a6xx_gpu->gmu.lock);
1781}
1782
1783static struct msm_gem_address_space *
1784a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
1785{
1786	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1787	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1788	unsigned long quirks = 0;
1789
1790	/*
1791	 * This allows GPU to set the bus attributes required to use system
1792	 * cache on behalf of the iommu page table walker.
1793	 */
1794	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
 
1795		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
1796
1797	return adreno_iommu_create_address_space(gpu, pdev, quirks);
1798}
1799
1800static struct msm_gem_address_space *
1801a6xx_create_private_address_space(struct msm_gpu *gpu)
1802{
1803	struct msm_mmu *mmu;
1804
1805	mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
1806
1807	if (IS_ERR(mmu))
1808		return ERR_CAST(mmu);
1809
1810	return msm_gem_address_space_create(mmu,
1811		"gpu", 0x100000000ULL,
1812		adreno_private_address_space_size(gpu));
1813}
1814
1815static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1816{
1817	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1818	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1819
1820	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
1821		return a6xx_gpu->shadow[ring->id];
1822
1823	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
1824}
1825
1826static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
1827{
1828	struct msm_cp_state cp_state = {
1829		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
1830		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
1831		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
1832		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
1833	};
1834	bool progress;
1835
1836	/*
1837	 * Adjust the remaining data to account for what has already been
1838	 * fetched from memory, but not yet consumed by the SQE.
1839	 *
1840	 * This is not *technically* correct, the amount buffered could
1841	 * exceed the IB size due to hw prefetching ahead, but:
1842	 *
1843	 * (1) We aren't trying to find the exact position, just whether
1844	 *     progress has been made
1845	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
1846	 *     to prevent prefetching into an unrelated submit.  (And
1847	 *     either way, at some point the ROQ will be full.)
1848	 */
1849	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
1850	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
1851
1852	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
1853
1854	ring->last_cp_state = cp_state;
1855
1856	return progress;
1857}
1858
1859static u32 a618_get_speed_bin(u32 fuse)
1860{
1861	if (fuse == 0)
1862		return 0;
1863	else if (fuse == 169)
1864		return 1;
1865	else if (fuse == 174)
1866		return 2;
1867
1868	return UINT_MAX;
1869}
1870
1871static u32 a619_get_speed_bin(u32 fuse)
1872{
1873	if (fuse == 0)
1874		return 0;
1875	else if (fuse == 120)
1876		return 4;
1877	else if (fuse == 138)
1878		return 3;
1879	else if (fuse == 169)
1880		return 2;
1881	else if (fuse == 180)
1882		return 1;
1883
1884	return UINT_MAX;
1885}
1886
1887static u32 adreno_7c3_get_speed_bin(u32 fuse)
1888{
1889	if (fuse == 0)
1890		return 0;
1891	else if (fuse == 117)
1892		return 0;
1893	else if (fuse == 190)
1894		return 1;
1895
1896	return UINT_MAX;
1897}
1898
1899static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
1900{
1901	u32 val = UINT_MAX;
1902
1903	if (adreno_cmp_rev(ADRENO_REV(6, 1, 8, ANY_ID), rev))
1904		val = a618_get_speed_bin(fuse);
1905
1906	if (adreno_cmp_rev(ADRENO_REV(6, 1, 9, ANY_ID), rev))
1907		val = a619_get_speed_bin(fuse);
1908
1909	if (adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), rev))
1910		val = adreno_7c3_get_speed_bin(fuse);
1911
1912	if (val == UINT_MAX) {
1913		DRM_DEV_ERROR(dev,
1914			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
1915			fuse);
1916		return UINT_MAX;
1917	}
1918
1919	return (1 << val);
1920}
1921
1922static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
1923{
1924	u32 supp_hw;
1925	u32 speedbin;
1926	int ret;
1927
1928	ret = adreno_read_speedbin(dev, &speedbin);
1929	/*
1930	 * -ENOENT means that the platform doesn't support speedbin which is
1931	 * fine
1932	 */
1933	if (ret == -ENOENT) {
1934		return 0;
1935	} else if (ret) {
1936		dev_err_probe(dev, ret,
1937			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
1938		return ret;
1939	}
1940
1941	supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
 
 
 
 
 
 
 
1942
1943	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
1944	if (ret)
1945		return ret;
1946
1947	return 0;
1948}
1949
1950static const struct adreno_gpu_funcs funcs = {
1951	.base = {
1952		.get_param = adreno_get_param,
1953		.set_param = adreno_set_param,
1954		.hw_init = a6xx_hw_init,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1955		.pm_suspend = a6xx_pm_suspend,
1956		.pm_resume = a6xx_pm_resume,
1957		.recover = a6xx_recover,
1958		.submit = a6xx_submit,
1959		.active_ring = a6xx_active_ring,
1960		.irq = a6xx_irq,
1961		.destroy = a6xx_destroy,
1962#if defined(CONFIG_DRM_MSM_GPU_STATE)
1963		.show = a6xx_show,
1964#endif
1965		.gpu_busy = a6xx_gpu_busy,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1966		.gpu_get_freq = a6xx_gmu_get_freq,
1967		.gpu_set_freq = a6xx_gpu_set_freq,
1968#if defined(CONFIG_DRM_MSM_GPU_STATE)
1969		.gpu_state_get = a6xx_gpu_state_get,
1970		.gpu_state_put = a6xx_gpu_state_put,
1971#endif
1972		.create_address_space = a6xx_create_address_space,
1973		.create_private_address_space = a6xx_create_private_address_space,
1974		.get_rptr = a6xx_get_rptr,
1975		.progress = a6xx_progress,
1976	},
1977	.get_timestamp = a6xx_get_timestamp,
1978};
1979
1980struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
1981{
1982	struct msm_drm_private *priv = dev->dev_private;
1983	struct platform_device *pdev = priv->gpu_pdev;
1984	struct adreno_platform_config *config = pdev->dev.platform_data;
1985	const struct adreno_info *info;
1986	struct device_node *node;
1987	struct a6xx_gpu *a6xx_gpu;
1988	struct adreno_gpu *adreno_gpu;
1989	struct msm_gpu *gpu;
 
 
1990	int ret;
1991
1992	a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
1993	if (!a6xx_gpu)
1994		return ERR_PTR(-ENOMEM);
1995
1996	adreno_gpu = &a6xx_gpu->base;
1997	gpu = &adreno_gpu->base;
1998
 
 
1999	adreno_gpu->registers = NULL;
2000
2001	/*
2002	 * We need to know the platform type before calling into adreno_gpu_init
2003	 * so that the hw_apriv flag can be correctly set. Snoop into the info
2004	 * and grab the revision number
2005	 */
2006	info = adreno_info(config->rev);
 
 
 
2007
2008	if (info && (info->revn == 650 || info->revn == 660 ||
2009			adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
2010		adreno_gpu->base.hw_apriv = true;
 
2011
2012	a6xx_llc_slices_init(pdev, a6xx_gpu);
2013
2014	ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
2015	if (ret) {
2016		a6xx_destroy(&(a6xx_gpu->base.base));
 
2017		return ERR_PTR(ret);
2018	}
2019
2020	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
 
 
 
 
 
 
 
 
2021	if (ret) {
2022		a6xx_destroy(&(a6xx_gpu->base.base));
2023		return ERR_PTR(ret);
2024	}
2025
2026	/*
2027	 * For now only clamp to idle freq for devices where this is known not
2028	 * to cause power supply issues:
2029	 */
2030	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
2031		gpu->clamp_to_idle = true;
2032
2033	/* Check if there is a GMU phandle and set it up */
2034	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
2035
2036	/* FIXME: How do we gracefully handle this? */
2037	BUG_ON(!node);
2038
2039	ret = a6xx_gmu_init(a6xx_gpu, node);
2040	of_node_put(node);
2041	if (ret) {
2042		a6xx_destroy(&(a6xx_gpu->base.base));
2043		return ERR_PTR(ret);
2044	}
2045
 
 
 
 
 
 
 
 
2046	if (gpu->aspace)
2047		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
2048				a6xx_fault_handler);
 
 
 
 
2049
2050	return gpu;
2051}