Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
  3 */
  4#include "a4xx_gpu.h"
  5
  6#define A4XX_INT0_MASK \
  7	(A4XX_INT0_RBBM_AHB_ERROR |        \
  8	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
  9	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
 10	 A4XX_INT0_CP_OPCODE_ERROR |       \
 11	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
 12	 A4XX_INT0_CP_HW_FAULT |           \
 13	 A4XX_INT0_CP_IB1_INT |            \
 14	 A4XX_INT0_CP_IB2_INT |            \
 15	 A4XX_INT0_CP_RB_INT |             \
 16	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
 17	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
 18	 A4XX_INT0_CACHE_FLUSH_TS |        \
 19	 A4XX_INT0_UCHE_OOB_ACCESS)
 20
 21extern bool hang_debug;
 22static void a4xx_dump(struct msm_gpu *gpu);
 23static bool a4xx_idle(struct msm_gpu *gpu);
 24
 25static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 26{
 27	struct msm_drm_private *priv = gpu->dev->dev_private;
 28	struct msm_ringbuffer *ring = submit->ring;
 29	unsigned int i;
 30
 31	for (i = 0; i < submit->nr_cmds; i++) {
 32		switch (submit->cmd[i].type) {
 33		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 34			/* ignore IB-targets */
 35			break;
 36		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 37			/* ignore if there has not been a ctx switch: */
 38			if (priv->lastctx == submit->queue->ctx)
 39				break;
 40			fallthrough;
 41		case MSM_SUBMIT_CMD_BUF:
 42			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
 43			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 44			OUT_RING(ring, submit->cmd[i].size);
 45			OUT_PKT2(ring);
 46			break;
 47		}
 48	}
 49
 50	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
 51	OUT_RING(ring, submit->seqno);
 52
 53	/* Flush HLSQ lazy updates to make sure there is nothing
 54	 * pending for indirect loads after the timestamp has
 55	 * passed:
 56	 */
 57	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 58	OUT_RING(ring, HLSQ_FLUSH);
 59
 60	/* wait for idle before cache flush/interrupt */
 61	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
 62	OUT_RING(ring, 0x00000000);
 63
 64	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
 65	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
 66	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
 67	OUT_RING(ring, rbmemptr(ring, fence));
 68	OUT_RING(ring, submit->seqno);
 69
 70	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
 71}
 72
 73/*
 74 * a4xx_enable_hwcg() - Program the clock control registers
 75 * @device: The adreno device pointer
 76 */
 77static void a4xx_enable_hwcg(struct msm_gpu *gpu)
 78{
 79	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 80	unsigned int i;
 81	for (i = 0; i < 4; i++)
 82		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
 83	for (i = 0; i < 4; i++)
 84		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
 85	for (i = 0; i < 4; i++)
 86		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
 87	for (i = 0; i < 4; i++)
 88		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
 89	for (i = 0; i < 4; i++)
 90		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
 91	for (i = 0; i < 4; i++)
 92		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
 93	for (i = 0; i < 4; i++)
 94		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
 95	for (i = 0; i < 4; i++)
 96		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
 97	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
 98	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
 99	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
100	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
101	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
102	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
103	for (i = 0; i < 4; i++)
104		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
105
106	/* Disable L1 clocking in A420 due to CCU issues with it */
107	for (i = 0; i < 4; i++) {
108		if (adreno_is_a420(adreno_gpu)) {
109			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
110					0x00002020);
111		} else {
112			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
113					0x00022020);
114		}
115	}
116
117	/* No CCU for A405 */
118	if (!adreno_is_a405(adreno_gpu)) {
119		for (i = 0; i < 4; i++) {
120			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
121					0x00000922);
122		}
123
124		for (i = 0; i < 4; i++) {
125			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
126					0x00000000);
127		}
128
129		for (i = 0; i < 4; i++) {
130			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
131					0x00000001);
132		}
133	}
134
135	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
136	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
137	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
138	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
139	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
140	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
141	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
142	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
143	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
144	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
145	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
146	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
147	/* Early A430's have a timing issue with SP/TP power collapse;
148	   disabling HW clock gating prevents it. */
149	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
150		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
151	else
152		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
153	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
154}
155
156
157static bool a4xx_me_init(struct msm_gpu *gpu)
158{
159	struct msm_ringbuffer *ring = gpu->rb[0];
160
161	OUT_PKT3(ring, CP_ME_INIT, 17);
162	OUT_RING(ring, 0x000003f7);
163	OUT_RING(ring, 0x00000000);
164	OUT_RING(ring, 0x00000000);
165	OUT_RING(ring, 0x00000000);
166	OUT_RING(ring, 0x00000080);
167	OUT_RING(ring, 0x00000100);
168	OUT_RING(ring, 0x00000180);
169	OUT_RING(ring, 0x00006600);
170	OUT_RING(ring, 0x00000150);
171	OUT_RING(ring, 0x0000014e);
172	OUT_RING(ring, 0x00000154);
173	OUT_RING(ring, 0x00000001);
174	OUT_RING(ring, 0x00000000);
175	OUT_RING(ring, 0x00000000);
176	OUT_RING(ring, 0x00000000);
177	OUT_RING(ring, 0x00000000);
178	OUT_RING(ring, 0x00000000);
179
180	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
181	return a4xx_idle(gpu);
182}
183
184static int a4xx_hw_init(struct msm_gpu *gpu)
185{
186	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
187	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
188	uint32_t *ptr, len;
189	int i, ret;
190
191	if (adreno_is_a405(adreno_gpu)) {
192		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
193	} else if (adreno_is_a420(adreno_gpu)) {
194		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
195		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
196		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
197		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
198		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
199		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
200		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
201		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
202	} else if (adreno_is_a430(adreno_gpu)) {
203		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
204		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
205		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
206		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
207		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
208		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
209	} else {
210		BUG();
211	}
212
213	/* Make all blocks contribute to the GPU BUSY perf counter */
214	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
215
216	/* Tune the hystersis counters for SP and CP idle detection */
217	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
218	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
219
220	if (adreno_is_a430(adreno_gpu)) {
221		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
222	}
223
224	 /* Enable the RBBM error reporting bits */
225	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
226
227	/* Enable AHB error reporting*/
228	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
229
230	/* Enable power counters*/
231	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
232
233	/*
234	 * Turn on hang detection - this spews a lot of useful information
235	 * into the RBBM registers on a hang:
236	 */
237	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
238			(1 << 30) | 0xFFFF);
239
240	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
241			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
242
243	/* Turn on performance counters: */
244	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
245
246	/* use the first CP counter for timestamp queries.. userspace may set
247	 * this as well but it selects the same counter/countable:
248	 */
249	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
250
251	if (adreno_is_a430(adreno_gpu))
252		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
253
254	/* Disable L2 bypass to avoid UCHE out of bounds errors */
255	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
256	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
257
258	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
259			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
260
261	/* On A430 enable SP regfile sleep for power savings */
262	/* TODO downstream does this for !420, so maybe applies for 405 too? */
263	if (!adreno_is_a420(adreno_gpu)) {
264		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
265			0x00000441);
266		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
267			0x00000441);
268	}
269
270	a4xx_enable_hwcg(gpu);
271
272	/*
273	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
274	 * due to timing issue with HLSQ_TP_CLK_EN
275	 */
276	if (adreno_is_a420(adreno_gpu)) {
277		unsigned int val;
278		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
279		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
280		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
281		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
282	}
283
284	/* setup access protection: */
285	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
286
287	/* RBBM registers */
288	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
289	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
290	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
291	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
292	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
293	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
294
295	/* CP registers */
296	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
297	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
298
299
300	/* RB registers */
301	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
302
303	/* HLSQ registers */
304	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
305
306	/* VPC registers */
307	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
308
309	/* SMMU registers */
310	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
311
312	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
313
314	ret = adreno_hw_init(gpu);
315	if (ret)
316		return ret;
317
318	/*
319	 * Use the default ringbuffer size and block size but disable the RPTR
320	 * shadow
321	 */
322	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
323		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
324
325	/* Set the ringbuffer address */
326	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
327
328	/* Load PM4: */
329	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
330	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
331	DBG("loading PM4 ucode version: %u", ptr[0]);
332	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
333	for (i = 1; i < len; i++)
334		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
335
336	/* Load PFP: */
337	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
338	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
339	DBG("loading PFP ucode version: %u", ptr[0]);
340
341	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
342	for (i = 1; i < len; i++)
343		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
344
345	/* clear ME_HALT to start micro engine */
346	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
347
348	return a4xx_me_init(gpu) ? 0 : -EINVAL;
349}
350
351static void a4xx_recover(struct msm_gpu *gpu)
352{
353	int i;
354
355	adreno_dump_info(gpu);
356
357	for (i = 0; i < 8; i++) {
358		printk("CP_SCRATCH_REG%d: %u\n", i,
359			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360	}
361
362	/* dump registers before resetting gpu, if enabled: */
363	if (hang_debug)
364		a4xx_dump(gpu);
365
366	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
367	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
368	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
369	adreno_recover(gpu);
370}
371
372static void a4xx_destroy(struct msm_gpu *gpu)
373{
374	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
376
377	DBG("%s", gpu->name);
378
379	adreno_gpu_cleanup(adreno_gpu);
380
381	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
382
383	kfree(a4xx_gpu);
384}
385
386static bool a4xx_idle(struct msm_gpu *gpu)
387{
388	/* wait for ringbuffer to drain: */
389	if (!adreno_idle(gpu, gpu->rb[0]))
390		return false;
391
392	/* then wait for GPU to finish: */
393	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
394					A4XX_RBBM_STATUS_GPU_BUSY))) {
395		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396		/* TODO maybe we need to reset GPU here to recover from hang? */
397		return false;
398	}
399
400	return true;
401}
402
403static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
404{
405	uint32_t status;
406
407	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
408	DBG("%s: Int status %08x", gpu->name, status);
409
410	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
411		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
412		printk("CP | Protected mode error| %s | addr=%x\n",
413			reg & (1 << 24) ? "WRITE" : "READ",
414			(reg & 0xFFFFF) >> 2);
415	}
416
417	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
418
419	msm_gpu_retire(gpu);
420
421	return IRQ_HANDLED;
422}
423
424static const unsigned int a4xx_registers[] = {
425	/* RBBM */
426	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
427	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
428	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
429	/* CP */
430	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
431	0x0578, 0x058F,
432	/* VSC */
433	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
434	/* GRAS */
435	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
436	/* RB */
437	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
438	/* PC */
439	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
440	/* VFD */
441	0x0E40, 0x0E4A,
442	/* VPC */
443	0x0E60, 0x0E61, 0x0E63, 0x0E68,
444	/* UCHE */
445	0x0E80, 0x0E84, 0x0E88, 0x0E95,
446	/* VMIDMT */
447	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
448	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
449	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
450	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
451	0x1380, 0x1380,
452	/* GRAS CTX 0 */
453	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
454	/* PC CTX 0 */
455	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
456	/* VFD CTX 0 */
457	0x2200, 0x2204, 0x2208, 0x22A9,
458	/* GRAS CTX 1 */
459	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
460	/* PC CTX 1 */
461	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
462	/* VFD CTX 1 */
463	0x2600, 0x2604, 0x2608, 0x26A9,
464	/* XPU */
465	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
466	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
467	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
468	/* VBIF */
469	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
470	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
471	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
472	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
473	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
474	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
475	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
476	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
477	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
478	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
479	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
480	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
481	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
482	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
483	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
484	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
485	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
486	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
487	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
488	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
489	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
490	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
491	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
492	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
493	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
494	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
495	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
496	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
497	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
498	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
499	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
500	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
501	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
502	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
503	~0 /* sentinel */
504};
505
506static const unsigned int a405_registers[] = {
507	/* RBBM */
508	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
509	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
510	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
511	/* CP */
512	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
513	0x0578, 0x058F,
514	/* VSC */
515	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
516	/* GRAS */
517	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
518	/* RB */
519	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
520	/* PC */
521	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
522	/* VFD */
523	0x0E40, 0x0E4A,
524	/* VPC */
525	0x0E60, 0x0E61, 0x0E63, 0x0E68,
526	/* UCHE */
527	0x0E80, 0x0E84, 0x0E88, 0x0E95,
528	/* GRAS CTX 0 */
529	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
530	/* PC CTX 0 */
531	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
532	/* VFD CTX 0 */
533	0x2200, 0x2204, 0x2208, 0x22A9,
534	/* GRAS CTX 1 */
535	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
536	/* PC CTX 1 */
537	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
538	/* VFD CTX 1 */
539	0x2600, 0x2604, 0x2608, 0x26A9,
540	/* VBIF version 0x20050000*/
541	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
542	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
543	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
544	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
545	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
546	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
547	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
548	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
549	~0 /* sentinel */
550};
551
552static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
553{
554	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
555
556	if (!state)
557		return ERR_PTR(-ENOMEM);
558
559	adreno_gpu_state_get(gpu, state);
560
561	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
562
563	return state;
564}
565
566static void a4xx_dump(struct msm_gpu *gpu)
567{
568	printk("status:   %08x\n",
569			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
570	adreno_dump(gpu);
571}
572
573static int a4xx_pm_resume(struct msm_gpu *gpu) {
574	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
575	int ret;
576
577	ret = msm_gpu_pm_resume(gpu);
578	if (ret)
579		return ret;
580
581	if (adreno_is_a430(adreno_gpu)) {
582		unsigned int reg;
583		/* Set the default register values; set SW_COLLAPSE to 0 */
584		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
585		do {
586			udelay(5);
587			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
588		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
589	}
590	return 0;
591}
592
593static int a4xx_pm_suspend(struct msm_gpu *gpu) {
594	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
595	int ret;
596
597	ret = msm_gpu_pm_suspend(gpu);
598	if (ret)
599		return ret;
600
601	if (adreno_is_a430(adreno_gpu)) {
602		/* Set the default register values; set SW_COLLAPSE to 1 */
603		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
604	}
605	return 0;
606}
607
608static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
609{
610	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
611		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
612
613	return 0;
614}
615
616static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
617{
618	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
619	return ring->memptrs->rptr;
620}
621
622static const struct adreno_gpu_funcs funcs = {
623	.base = {
624		.get_param = adreno_get_param,
625		.hw_init = a4xx_hw_init,
626		.pm_suspend = a4xx_pm_suspend,
627		.pm_resume = a4xx_pm_resume,
628		.recover = a4xx_recover,
629		.submit = a4xx_submit,
630		.active_ring = adreno_active_ring,
631		.irq = a4xx_irq,
632		.destroy = a4xx_destroy,
633#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634		.show = adreno_show,
635#endif
636		.gpu_state_get = a4xx_gpu_state_get,
637		.gpu_state_put = adreno_gpu_state_put,
638		.create_address_space = adreno_iommu_create_address_space,
639		.get_rptr = a4xx_get_rptr,
640	},
641	.get_timestamp = a4xx_get_timestamp,
642};
643
644struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645{
646	struct a4xx_gpu *a4xx_gpu = NULL;
647	struct adreno_gpu *adreno_gpu;
648	struct msm_gpu *gpu;
649	struct msm_drm_private *priv = dev->dev_private;
650	struct platform_device *pdev = priv->gpu_pdev;
651	struct icc_path *ocmem_icc_path;
652	struct icc_path *icc_path;
653	int ret;
654
655	if (!pdev) {
656		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
657		ret = -ENXIO;
658		goto fail;
659	}
660
661	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
662	if (!a4xx_gpu) {
663		ret = -ENOMEM;
664		goto fail;
665	}
666
667	adreno_gpu = &a4xx_gpu->base;
668	gpu = &adreno_gpu->base;
669
670	gpu->perfcntrs = NULL;
671	gpu->num_perfcntrs = 0;
672
673	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
674	if (ret)
675		goto fail;
676
677	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
678							     a4xx_registers;
679
680	/* if needed, allocate gmem: */
681	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
682				    &a4xx_gpu->ocmem);
683	if (ret)
684		goto fail;
685
686	if (!gpu->aspace) {
687		/* TODO we think it is possible to configure the GPU to
688		 * restrict access to VRAM carveout.  But the required
689		 * registers are unknown.  For now just bail out and
690		 * limp along with just modesetting.  If it turns out
691		 * to not be possible to restrict access, then we must
692		 * implement a cmdstream validator.
693		 */
694		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
695		if (!allow_vram_carveout) {
696			ret = -ENXIO;
697			goto fail;
698		}
699	}
700
701	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
702	if (IS_ERR(icc_path)) {
703		ret = PTR_ERR(icc_path);
704		goto fail;
705	}
706
707	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
708	if (IS_ERR(ocmem_icc_path)) {
709		ret = PTR_ERR(ocmem_icc_path);
710		/* allow -ENODATA, ocmem icc is optional */
711		if (ret != -ENODATA)
712			goto fail;
713		ocmem_icc_path = NULL;
714	}
715
716	/*
717	 * Set the ICC path to maximum speed for now by multiplying the fastest
718	 * frequency by the bus width (8). We'll want to scale this later on to
719	 * improve battery life.
720	 */
721	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
723
724	return gpu;
725
726fail:
727	if (a4xx_gpu)
728		a4xx_destroy(&a4xx_gpu->base.base);
729
730	return ERR_PTR(ret);
731}