Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2013 Red Hat
  4 * Author: Rob Clark <robdclark@gmail.com>
  5 *
  6 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
  7 */
  8
  9#include "a3xx_gpu.h"
 10
 11#define A3XX_INT0_MASK \
 12	(A3XX_INT0_RBBM_AHB_ERROR |        \
 13	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
 14	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
 15	 A3XX_INT0_CP_OPCODE_ERROR |       \
 16	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
 17	 A3XX_INT0_CP_HW_FAULT |           \
 18	 A3XX_INT0_CP_IB1_INT |            \
 19	 A3XX_INT0_CP_IB2_INT |            \
 20	 A3XX_INT0_CP_RB_INT |             \
 21	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
 22	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
 23	 A3XX_INT0_CACHE_FLUSH_TS |        \
 24	 A3XX_INT0_UCHE_OOB_ACCESS)
 25
 26extern bool hang_debug;
 27
 28static void a3xx_dump(struct msm_gpu *gpu);
 29static bool a3xx_idle(struct msm_gpu *gpu);
 30
 31static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 32{
 33	struct msm_ringbuffer *ring = submit->ring;
 34	unsigned int i;
 35
 36	for (i = 0; i < submit->nr_cmds; i++) {
 37		switch (submit->cmd[i].type) {
 38		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 39			/* ignore IB-targets */
 40			break;
 41		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 42			/* ignore if there has not been a ctx switch: */
 43			if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
 44				break;
 45			fallthrough;
 46		case MSM_SUBMIT_CMD_BUF:
 47			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
 48			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 49			OUT_RING(ring, submit->cmd[i].size);
 50			OUT_PKT2(ring);
 51			break;
 52		}
 53	}
 54
 55	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
 56	OUT_RING(ring, submit->seqno);
 57
 58	/* Flush HLSQ lazy updates to make sure there is nothing
 59	 * pending for indirect loads after the timestamp has
 60	 * passed:
 61	 */
 62	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 63	OUT_RING(ring, HLSQ_FLUSH);
 64
 65	/* wait for idle before cache flush/interrupt */
 66	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
 67	OUT_RING(ring, 0x00000000);
 68
 69	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
 70	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
 71	OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
 72	OUT_RING(ring, rbmemptr(ring, fence));
 73	OUT_RING(ring, submit->seqno);
 74
 75#if 0
 76	/* Dummy set-constant to trigger context rollover */
 77	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 78	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
 79	OUT_RING(ring, 0x00000000);
 80#endif
 81
 82	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
 83}
 84
 85static bool a3xx_me_init(struct msm_gpu *gpu)
 86{
 87	struct msm_ringbuffer *ring = gpu->rb[0];
 88
 89	OUT_PKT3(ring, CP_ME_INIT, 17);
 90	OUT_RING(ring, 0x000003f7);
 91	OUT_RING(ring, 0x00000000);
 92	OUT_RING(ring, 0x00000000);
 93	OUT_RING(ring, 0x00000000);
 94	OUT_RING(ring, 0x00000080);
 95	OUT_RING(ring, 0x00000100);
 96	OUT_RING(ring, 0x00000180);
 97	OUT_RING(ring, 0x00006600);
 98	OUT_RING(ring, 0x00000150);
 99	OUT_RING(ring, 0x0000014e);
100	OUT_RING(ring, 0x00000154);
101	OUT_RING(ring, 0x00000001);
102	OUT_RING(ring, 0x00000000);
103	OUT_RING(ring, 0x00000000);
104	OUT_RING(ring, 0x00000000);
105	OUT_RING(ring, 0x00000000);
106	OUT_RING(ring, 0x00000000);
107
108	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
109	return a3xx_idle(gpu);
110}
111
112static int a3xx_hw_init(struct msm_gpu *gpu)
113{
114	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
115	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
116	uint32_t *ptr, len;
117	int i, ret;
118
119	DBG("%s", gpu->name);
120
121	if (adreno_is_a305(adreno_gpu)) {
122		/* Set up 16 deep read/write request queues: */
123		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
124		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
125		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
126		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
127		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
128		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
129		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
130		/* Enable WR-REQ: */
131		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
132		/* Set up round robin arbitration between both AXI ports: */
133		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
134		/* Set up AOOO: */
135		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
136		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
137	} else if (adreno_is_a306(adreno_gpu)) {
138		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
139		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
140		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
141	} else if (adreno_is_a320(adreno_gpu)) {
142		/* Set up 16 deep read/write request queues: */
143		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
144		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
145		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
146		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
147		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
148		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
149		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
150		/* Enable WR-REQ: */
151		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
152		/* Set up round robin arbitration between both AXI ports: */
153		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
154		/* Set up AOOO: */
155		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
156		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
157		/* Enable 1K sort: */
158		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
159		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
160
161	} else if (adreno_is_a330v2(adreno_gpu)) {
162		/*
163		 * Most of the VBIF registers on 8974v2 have the correct
164		 * values at power on, so we won't modify those if we don't
165		 * need to
166		 */
167		/* Enable 1k sort: */
168		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
169		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
170		/* Enable WR-REQ: */
171		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
172		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
173		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
174		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
175
176	} else if (adreno_is_a330(adreno_gpu)) {
177		/* Set up 16 deep read/write request queues: */
178		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
179		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
180		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
181		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
182		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
183		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
184		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
185		/* Enable WR-REQ: */
186		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
187		/* Set up round robin arbitration between both AXI ports: */
188		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
189		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
190		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
191		/* Set up AOOO: */
192		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
193		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
194		/* Enable 1K sort: */
195		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
196		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
197		/* Disable VBIF clock gating. This is to enable AXI running
198		 * higher frequency than GPU:
199		 */
200		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
201
202	} else {
203		BUG();
204	}
205
206	/* Make all blocks contribute to the GPU BUSY perf counter: */
207	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
208
209	/* Tune the hystersis counters for SP and CP idle detection: */
210	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
211	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
212
213	/* Enable the RBBM error reporting bits.  This lets us get
214	 * useful information on failure:
215	 */
216	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
217
218	/* Enable AHB error reporting: */
219	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
220
221	/* Turn on the power counters: */
222	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
223
224	/* Turn on hang detection - this spews a lot of useful information
225	 * into the RBBM registers on a hang:
226	 */
227	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
228
229	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
230	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
231
232	/* Enable Clock gating: */
233	if (adreno_is_a306(adreno_gpu))
234		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
235	else if (adreno_is_a320(adreno_gpu))
236		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
237	else if (adreno_is_a330v2(adreno_gpu))
238		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
239	else if (adreno_is_a330(adreno_gpu))
240		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
241
242	if (adreno_is_a330v2(adreno_gpu))
243		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
244	else if (adreno_is_a330(adreno_gpu))
245		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
246
247	/* Set the OCMEM base address for A330, etc */
248	if (a3xx_gpu->ocmem.hdl) {
249		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
250			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
251	}
252
253	/* Turn on performance counters: */
254	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
255
256	/* Enable the perfcntrs that we use.. */
257	for (i = 0; i < gpu->num_perfcntrs; i++) {
258		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
259		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
260	}
261
262	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
263
264	ret = adreno_hw_init(gpu);
265	if (ret)
266		return ret;
267
268	/*
269	 * Use the default ringbuffer size and block size but disable the RPTR
270	 * shadow
271	 */
272	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
273		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
274
275	/* Set the ringbuffer address */
276	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
277
278	/* setup access protection: */
279	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
280
281	/* RBBM registers */
282	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
283	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
284	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
285	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
286	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
287	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
288
289	/* CP registers */
290	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
291	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
292	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
293	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
294	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
295
296	/* RB registers */
297	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
298
299	/* VBIF registers */
300	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
301
302	/* NOTE: PM4/micro-engine firmware registers look to be the same
303	 * for a2xx and a3xx.. we could possibly push that part down to
304	 * adreno_gpu base class.  Or push both PM4 and PFP but
305	 * parameterize the pfp ucode addr/data registers..
306	 */
307
308	/* Load PM4: */
309	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
310	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
311	DBG("loading PM4 ucode version: %x", ptr[1]);
312
313	gpu_write(gpu, REG_AXXX_CP_DEBUG,
314			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
315			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
316	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
317	for (i = 1; i < len; i++)
318		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
319
320	/* Load PFP: */
321	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
322	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
323	DBG("loading PFP ucode version: %x", ptr[5]);
324
325	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
326	for (i = 1; i < len; i++)
327		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
328
329	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
330	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
331			adreno_is_a320(adreno_gpu)) {
332		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
333				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
334				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
335				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
336	} else if (adreno_is_a330(adreno_gpu)) {
337		/* NOTE: this (value take from downstream android driver)
338		 * includes some bits outside of the known bitfields.  But
339		 * A330 has this "MERCIU queue" thing too, which might
340		 * explain a new bitfield or reshuffling:
341		 */
342		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
343	}
344
345	/* clear ME_HALT to start micro engine */
346	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
347
348	return a3xx_me_init(gpu) ? 0 : -EINVAL;
349}
350
351static void a3xx_recover(struct msm_gpu *gpu)
352{
353	int i;
354
355	adreno_dump_info(gpu);
356
357	for (i = 0; i < 8; i++) {
358		printk("CP_SCRATCH_REG%d: %u\n", i,
359			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360	}
361
362	/* dump registers before resetting gpu, if enabled: */
363	if (hang_debug)
364		a3xx_dump(gpu);
365
366	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
367	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
368	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
369	adreno_recover(gpu);
370}
371
372static void a3xx_destroy(struct msm_gpu *gpu)
373{
374	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
376
377	DBG("%s", gpu->name);
378
379	adreno_gpu_cleanup(adreno_gpu);
380
381	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
382
383	kfree(a3xx_gpu);
384}
385
386static bool a3xx_idle(struct msm_gpu *gpu)
387{
388	/* wait for ringbuffer to drain: */
389	if (!adreno_idle(gpu, gpu->rb[0]))
390		return false;
391
392	/* then wait for GPU to finish: */
393	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
394			A3XX_RBBM_STATUS_GPU_BUSY))) {
395		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396
397		/* TODO maybe we need to reset GPU here to recover from hang? */
398		return false;
399	}
400
401	return true;
402}
403
404static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
405{
406	uint32_t status;
407
408	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
409	DBG("%s: %08x", gpu->name, status);
410
411	// TODO
412
413	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
414
415	msm_gpu_retire(gpu);
416
417	return IRQ_HANDLED;
418}
419
420static const unsigned int a3xx_registers[] = {
421	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
422	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
423	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
424	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
425	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
426	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
427	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
428	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
429	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
430	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
431	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
432	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
433	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
434	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
435	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
436	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
437	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
438	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
439	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
440	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
441	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
442	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
443	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
444	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
445	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
446	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
447	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
448	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
449	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
450	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
451	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
452	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
453	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
454	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
455	~0   /* sentinel */
456};
457
458/* would be nice to not have to duplicate the _show() stuff with printk(): */
459static void a3xx_dump(struct msm_gpu *gpu)
460{
461	printk("status:   %08x\n",
462			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
463	adreno_dump(gpu);
464}
465
466static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
467{
468	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
469
470	if (!state)
471		return ERR_PTR(-ENOMEM);
472
473	adreno_gpu_state_get(gpu, state);
474
475	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
476
477	return state;
478}
479
480static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
481{
482	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
483	return ring->memptrs->rptr;
484}
485
486static const struct adreno_gpu_funcs funcs = {
487	.base = {
488		.get_param = adreno_get_param,
489		.set_param = adreno_set_param,
490		.hw_init = a3xx_hw_init,
491		.pm_suspend = msm_gpu_pm_suspend,
492		.pm_resume = msm_gpu_pm_resume,
493		.recover = a3xx_recover,
494		.submit = a3xx_submit,
495		.active_ring = adreno_active_ring,
496		.irq = a3xx_irq,
497		.destroy = a3xx_destroy,
498#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
499		.show = adreno_show,
500#endif
501		.gpu_state_get = a3xx_gpu_state_get,
502		.gpu_state_put = adreno_gpu_state_put,
503		.create_address_space = adreno_create_address_space,
504		.get_rptr = a3xx_get_rptr,
505	},
506};
507
508static const struct msm_gpu_perfcntr perfcntrs[] = {
509	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
510			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
511	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
512			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
513};
514
515struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
516{
517	struct a3xx_gpu *a3xx_gpu = NULL;
518	struct adreno_gpu *adreno_gpu;
519	struct msm_gpu *gpu;
520	struct msm_drm_private *priv = dev->dev_private;
521	struct platform_device *pdev = priv->gpu_pdev;
522	struct icc_path *ocmem_icc_path;
523	struct icc_path *icc_path;
524	int ret;
525
526	if (!pdev) {
527		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
528		ret = -ENXIO;
529		goto fail;
530	}
531
532	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
533	if (!a3xx_gpu) {
534		ret = -ENOMEM;
535		goto fail;
536	}
537
538	adreno_gpu = &a3xx_gpu->base;
539	gpu = &adreno_gpu->base;
540
541	gpu->perfcntrs = perfcntrs;
542	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
543
544	adreno_gpu->registers = a3xx_registers;
545
546	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
547	if (ret)
548		goto fail;
549
550	/* if needed, allocate gmem: */
551	if (adreno_is_a330(adreno_gpu)) {
552		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
553					    adreno_gpu, &a3xx_gpu->ocmem);
554		if (ret)
555			goto fail;
556	}
557
558	if (!gpu->aspace) {
559		/* TODO we think it is possible to configure the GPU to
560		 * restrict access to VRAM carveout.  But the required
561		 * registers are unknown.  For now just bail out and
562		 * limp along with just modesetting.  If it turns out
563		 * to not be possible to restrict access, then we must
564		 * implement a cmdstream validator.
565		 */
566		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
567		if (!allow_vram_carveout) {
568			ret = -ENXIO;
569			goto fail;
570		}
571	}
572
573	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
574	if (IS_ERR(icc_path)) {
575		ret = PTR_ERR(icc_path);
576		goto fail;
577	}
578
579	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
580	if (IS_ERR(ocmem_icc_path)) {
581		ret = PTR_ERR(ocmem_icc_path);
582		/* allow -ENODATA, ocmem icc is optional */
583		if (ret != -ENODATA)
584			goto fail;
585		ocmem_icc_path = NULL;
586	}
587
588
589	/*
590	 * Set the ICC path to maximum speed for now by multiplying the fastest
591	 * frequency by the bus width (8). We'll want to scale this later on to
592	 * improve battery life.
593	 */
594	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
595	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
596
597	return gpu;
598
599fail:
600	if (a3xx_gpu)
601		a3xx_destroy(&a3xx_gpu->base.base);
602
603	return ERR_PTR(ret);
604}