Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Copyright (C) 2013 Red Hat
  4 * Author: Rob Clark <robdclark@gmail.com>
  5 *
  6 * Copyright (c) 2014 The Linux Foundation. All rights reserved.
  7 */
  8
  9#include "a3xx_gpu.h"
 10
 11#define A3XX_INT0_MASK \
 12	(A3XX_INT0_RBBM_AHB_ERROR |        \
 13	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
 14	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
 15	 A3XX_INT0_CP_OPCODE_ERROR |       \
 16	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
 17	 A3XX_INT0_CP_HW_FAULT |           \
 18	 A3XX_INT0_CP_IB1_INT |            \
 19	 A3XX_INT0_CP_IB2_INT |            \
 20	 A3XX_INT0_CP_RB_INT |             \
 21	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
 22	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
 23	 A3XX_INT0_CACHE_FLUSH_TS |        \
 24	 A3XX_INT0_UCHE_OOB_ACCESS)
 25
 26extern bool hang_debug;
 27
 28static void a3xx_dump(struct msm_gpu *gpu);
 29static bool a3xx_idle(struct msm_gpu *gpu);
 30
 31static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 32{
 33	struct msm_drm_private *priv = gpu->dev->dev_private;
 34	struct msm_ringbuffer *ring = submit->ring;
 35	unsigned int i;
 36
 37	for (i = 0; i < submit->nr_cmds; i++) {
 38		switch (submit->cmd[i].type) {
 39		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
 40			/* ignore IB-targets */
 41			break;
 42		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 43			/* ignore if there has not been a ctx switch: */
 44			if (priv->lastctx == submit->queue->ctx)
 45				break;
 46			fallthrough;
 47		case MSM_SUBMIT_CMD_BUF:
 48			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
 49			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
 50			OUT_RING(ring, submit->cmd[i].size);
 51			OUT_PKT2(ring);
 52			break;
 53		}
 54	}
 55
 56	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
 57	OUT_RING(ring, submit->seqno);
 58
 59	/* Flush HLSQ lazy updates to make sure there is nothing
 60	 * pending for indirect loads after the timestamp has
 61	 * passed:
 62	 */
 63	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
 64	OUT_RING(ring, HLSQ_FLUSH);
 65
 66	/* wait for idle before cache flush/interrupt */
 67	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
 68	OUT_RING(ring, 0x00000000);
 69
 70	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
 71	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
 72	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
 73	OUT_RING(ring, rbmemptr(ring, fence));
 74	OUT_RING(ring, submit->seqno);
 75
 76#if 0
 77	/* Dummy set-constant to trigger context rollover */
 78	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
 79	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
 80	OUT_RING(ring, 0x00000000);
 81#endif
 82
 83	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
 84}
 85
 86static bool a3xx_me_init(struct msm_gpu *gpu)
 87{
 88	struct msm_ringbuffer *ring = gpu->rb[0];
 89
 90	OUT_PKT3(ring, CP_ME_INIT, 17);
 91	OUT_RING(ring, 0x000003f7);
 92	OUT_RING(ring, 0x00000000);
 93	OUT_RING(ring, 0x00000000);
 94	OUT_RING(ring, 0x00000000);
 95	OUT_RING(ring, 0x00000080);
 96	OUT_RING(ring, 0x00000100);
 97	OUT_RING(ring, 0x00000180);
 98	OUT_RING(ring, 0x00006600);
 99	OUT_RING(ring, 0x00000150);
100	OUT_RING(ring, 0x0000014e);
101	OUT_RING(ring, 0x00000154);
102	OUT_RING(ring, 0x00000001);
103	OUT_RING(ring, 0x00000000);
104	OUT_RING(ring, 0x00000000);
105	OUT_RING(ring, 0x00000000);
106	OUT_RING(ring, 0x00000000);
107	OUT_RING(ring, 0x00000000);
108
109	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
110	return a3xx_idle(gpu);
111}
112
113static int a3xx_hw_init(struct msm_gpu *gpu)
114{
115	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
116	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
117	uint32_t *ptr, len;
118	int i, ret;
119
120	DBG("%s", gpu->name);
121
122	if (adreno_is_a305(adreno_gpu)) {
123		/* Set up 16 deep read/write request queues: */
124		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
125		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
126		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
127		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
128		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
130		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
131		/* Enable WR-REQ: */
132		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
133		/* Set up round robin arbitration between both AXI ports: */
134		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
135		/* Set up AOOO: */
136		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
137		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
138	} else if (adreno_is_a306(adreno_gpu)) {
139		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
140		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
141		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
142	} else if (adreno_is_a320(adreno_gpu)) {
143		/* Set up 16 deep read/write request queues: */
144		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
145		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
146		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
147		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
148		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
149		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
150		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
151		/* Enable WR-REQ: */
152		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
153		/* Set up round robin arbitration between both AXI ports: */
154		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
155		/* Set up AOOO: */
156		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
157		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
158		/* Enable 1K sort: */
159		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
160		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
161
162	} else if (adreno_is_a330v2(adreno_gpu)) {
163		/*
164		 * Most of the VBIF registers on 8974v2 have the correct
165		 * values at power on, so we won't modify those if we don't
166		 * need to
167		 */
168		/* Enable 1k sort: */
169		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
170		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
171		/* Enable WR-REQ: */
172		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
173		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
174		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
175		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
176
177	} else if (adreno_is_a330(adreno_gpu)) {
178		/* Set up 16 deep read/write request queues: */
179		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
180		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
181		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
182		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
183		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
184		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
185		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
186		/* Enable WR-REQ: */
187		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
188		/* Set up round robin arbitration between both AXI ports: */
189		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
190		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
191		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
192		/* Set up AOOO: */
193		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
194		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
195		/* Enable 1K sort: */
196		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
197		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
198		/* Disable VBIF clock gating. This is to enable AXI running
199		 * higher frequency than GPU:
200		 */
201		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
202
203	} else {
204		BUG();
205	}
206
207	/* Make all blocks contribute to the GPU BUSY perf counter: */
208	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
209
210	/* Tune the hystersis counters for SP and CP idle detection: */
211	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
212	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
213
214	/* Enable the RBBM error reporting bits.  This lets us get
215	 * useful information on failure:
216	 */
217	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
218
219	/* Enable AHB error reporting: */
220	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
221
222	/* Turn on the power counters: */
223	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
224
225	/* Turn on hang detection - this spews a lot of useful information
226	 * into the RBBM registers on a hang:
227	 */
228	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
229
230	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
231	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
232
233	/* Enable Clock gating: */
234	if (adreno_is_a306(adreno_gpu))
235		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
236	else if (adreno_is_a320(adreno_gpu))
237		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
238	else if (adreno_is_a330v2(adreno_gpu))
239		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
240	else if (adreno_is_a330(adreno_gpu))
241		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
242
243	if (adreno_is_a330v2(adreno_gpu))
244		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
245	else if (adreno_is_a330(adreno_gpu))
246		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
247
248	/* Set the OCMEM base address for A330, etc */
249	if (a3xx_gpu->ocmem.hdl) {
250		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
251			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
252	}
253
254	/* Turn on performance counters: */
255	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
256
257	/* Enable the perfcntrs that we use.. */
258	for (i = 0; i < gpu->num_perfcntrs; i++) {
259		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
260		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
261	}
262
263	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
264
265	ret = adreno_hw_init(gpu);
266	if (ret)
267		return ret;
268
269	/*
270	 * Use the default ringbuffer size and block size but disable the RPTR
271	 * shadow
272	 */
273	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
274		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
275
276	/* Set the ringbuffer address */
277	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
278
279	/* setup access protection: */
280	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
281
282	/* RBBM registers */
283	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
284	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
285	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
286	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
287	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
288	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
289
290	/* CP registers */
291	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
292	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
293	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
294	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
295	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
296
297	/* RB registers */
298	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
299
300	/* VBIF registers */
301	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
302
303	/* NOTE: PM4/micro-engine firmware registers look to be the same
304	 * for a2xx and a3xx.. we could possibly push that part down to
305	 * adreno_gpu base class.  Or push both PM4 and PFP but
306	 * parameterize the pfp ucode addr/data registers..
307	 */
308
309	/* Load PM4: */
310	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
311	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
312	DBG("loading PM4 ucode version: %x", ptr[1]);
313
314	gpu_write(gpu, REG_AXXX_CP_DEBUG,
315			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
316			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
317	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
318	for (i = 1; i < len; i++)
319		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
320
321	/* Load PFP: */
322	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
323	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
324	DBG("loading PFP ucode version: %x", ptr[5]);
325
326	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
327	for (i = 1; i < len; i++)
328		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
329
330	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
331	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
332			adreno_is_a320(adreno_gpu)) {
333		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
334				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
335				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
336				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
337	} else if (adreno_is_a330(adreno_gpu)) {
338		/* NOTE: this (value take from downstream android driver)
339		 * includes some bits outside of the known bitfields.  But
340		 * A330 has this "MERCIU queue" thing too, which might
341		 * explain a new bitfield or reshuffling:
342		 */
343		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
344	}
345
346	/* clear ME_HALT to start micro engine */
347	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
348
349	return a3xx_me_init(gpu) ? 0 : -EINVAL;
350}
351
352static void a3xx_recover(struct msm_gpu *gpu)
353{
354	int i;
355
356	adreno_dump_info(gpu);
357
358	for (i = 0; i < 8; i++) {
359		printk("CP_SCRATCH_REG%d: %u\n", i,
360			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
361	}
362
363	/* dump registers before resetting gpu, if enabled: */
364	if (hang_debug)
365		a3xx_dump(gpu);
366
367	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
368	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
369	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
370	adreno_recover(gpu);
371}
372
373static void a3xx_destroy(struct msm_gpu *gpu)
374{
375	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
376	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
377
378	DBG("%s", gpu->name);
379
380	adreno_gpu_cleanup(adreno_gpu);
381
382	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
383
384	kfree(a3xx_gpu);
385}
386
387static bool a3xx_idle(struct msm_gpu *gpu)
388{
389	/* wait for ringbuffer to drain: */
390	if (!adreno_idle(gpu, gpu->rb[0]))
391		return false;
392
393	/* then wait for GPU to finish: */
394	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
395			A3XX_RBBM_STATUS_GPU_BUSY))) {
396		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
397
398		/* TODO maybe we need to reset GPU here to recover from hang? */
399		return false;
400	}
401
402	return true;
403}
404
405static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
406{
407	uint32_t status;
408
409	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
410	DBG("%s: %08x", gpu->name, status);
411
412	// TODO
413
414	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
415
416	msm_gpu_retire(gpu);
417
418	return IRQ_HANDLED;
419}
420
421static const unsigned int a3xx_registers[] = {
422	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
423	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
424	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
425	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
426	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
427	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
428	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
429	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
430	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
431	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
432	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
433	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
434	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
435	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
436	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
437	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
438	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
439	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
440	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
441	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
442	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
443	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
444	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
445	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
446	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
447	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
448	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
449	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
450	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
451	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
452	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
453	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
454	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
455	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
456	~0   /* sentinel */
457};
458
459/* would be nice to not have to duplicate the _show() stuff with printk(): */
460static void a3xx_dump(struct msm_gpu *gpu)
461{
462	printk("status:   %08x\n",
463			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
464	adreno_dump(gpu);
465}
466
467static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
468{
469	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
470
471	if (!state)
472		return ERR_PTR(-ENOMEM);
473
474	adreno_gpu_state_get(gpu, state);
475
476	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
477
478	return state;
479}
480
481static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
482{
483	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
484	return ring->memptrs->rptr;
485}
486
487static const struct adreno_gpu_funcs funcs = {
488	.base = {
489		.get_param = adreno_get_param,
490		.hw_init = a3xx_hw_init,
491		.pm_suspend = msm_gpu_pm_suspend,
492		.pm_resume = msm_gpu_pm_resume,
493		.recover = a3xx_recover,
494		.submit = a3xx_submit,
495		.active_ring = adreno_active_ring,
496		.irq = a3xx_irq,
497		.destroy = a3xx_destroy,
498#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
499		.show = adreno_show,
500#endif
501		.gpu_state_get = a3xx_gpu_state_get,
502		.gpu_state_put = adreno_gpu_state_put,
503		.create_address_space = adreno_iommu_create_address_space,
504		.get_rptr = a3xx_get_rptr,
505	},
506};
507
508static const struct msm_gpu_perfcntr perfcntrs[] = {
509	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
510			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
511	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
512			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
513};
514
515struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
516{
517	struct a3xx_gpu *a3xx_gpu = NULL;
518	struct adreno_gpu *adreno_gpu;
519	struct msm_gpu *gpu;
520	struct msm_drm_private *priv = dev->dev_private;
521	struct platform_device *pdev = priv->gpu_pdev;
522	struct icc_path *ocmem_icc_path;
523	struct icc_path *icc_path;
524	int ret;
525
526	if (!pdev) {
527		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
528		ret = -ENXIO;
529		goto fail;
530	}
531
532	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
533	if (!a3xx_gpu) {
534		ret = -ENOMEM;
535		goto fail;
536	}
537
538	adreno_gpu = &a3xx_gpu->base;
539	gpu = &adreno_gpu->base;
540
541	gpu->perfcntrs = perfcntrs;
542	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
543
544	adreno_gpu->registers = a3xx_registers;
545
546	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
547	if (ret)
548		goto fail;
549
550	/* if needed, allocate gmem: */
551	if (adreno_is_a330(adreno_gpu)) {
552		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
553					    adreno_gpu, &a3xx_gpu->ocmem);
554		if (ret)
555			goto fail;
556	}
557
558	if (!gpu->aspace) {
559		/* TODO we think it is possible to configure the GPU to
560		 * restrict access to VRAM carveout.  But the required
561		 * registers are unknown.  For now just bail out and
562		 * limp along with just modesetting.  If it turns out
563		 * to not be possible to restrict access, then we must
564		 * implement a cmdstream validator.
565		 */
566		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
567		if (!allow_vram_carveout) {
568			ret = -ENXIO;
569			goto fail;
570		}
571	}
572
573	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
574	if (IS_ERR(icc_path)) {
575		ret = PTR_ERR(icc_path);
576		goto fail;
577	}
578
579	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
580	if (IS_ERR(ocmem_icc_path)) {
581		ret = PTR_ERR(ocmem_icc_path);
582		/* allow -ENODATA, ocmem icc is optional */
583		if (ret != -ENODATA)
584			goto fail;
585		ocmem_icc_path = NULL;
586	}
587
588
589	/*
590	 * Set the ICC path to maximum speed for now by multiplying the fastest
591	 * frequency by the bus width (8). We'll want to scale this later on to
592	 * improve battery life.
593	 */
594	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
595	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
596
597	return gpu;
598
599fail:
600	if (a3xx_gpu)
601		a3xx_destroy(&a3xx_gpu->base.base);
602
603	return ERR_PTR(ret);
604}