Loading...
Note: File does not exist in v3.1.
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3 */
4
5#include <linux/kernel.h>
6#include <linux/types.h>
7#include <linux/cpumask.h>
8#include <linux/qcom_scm.h>
9#include <linux/pm_opp.h>
10#include <linux/nvmem-consumer.h>
11#include <linux/slab.h>
12#include "msm_gem.h"
13#include "msm_mmu.h"
14#include "a5xx_gpu.h"
15
16extern bool hang_debug;
17static void a5xx_dump(struct msm_gpu *gpu);
18
19#define GPU_PAS_ID 13
20
21static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22{
23 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 uint32_t wptr;
26 unsigned long flags;
27
28 spin_lock_irqsave(&ring->lock, flags);
29
30 /* Copy the shadow to the actual register */
31 ring->cur = ring->next;
32
33 /* Make sure to wrap wptr if we need to */
34 wptr = get_wptr(ring);
35
36 spin_unlock_irqrestore(&ring->lock, flags);
37
38 /* Make sure everything is posted before making a decision */
39 mb();
40
41 /* Update HW if this is the current ring and we are not in preempt */
42 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44}
45
46static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 struct msm_file_private *ctx)
48{
49 struct msm_drm_private *priv = gpu->dev->dev_private;
50 struct msm_ringbuffer *ring = submit->ring;
51 struct msm_gem_object *obj;
52 uint32_t *ptr, dwords;
53 unsigned int i;
54
55 for (i = 0; i < submit->nr_cmds; i++) {
56 switch (submit->cmd[i].type) {
57 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 break;
59 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 if (priv->lastctx == ctx)
61 break;
62 fallthrough;
63 case MSM_SUBMIT_CMD_BUF:
64 /* copy commands into RB: */
65 obj = submit->bos[submit->cmd[i].idx].obj;
66 dwords = submit->cmd[i].size;
67
68 ptr = msm_gem_get_vaddr(&obj->base);
69
70 /* _get_vaddr() shouldn't fail at this point,
71 * since we've already mapped it once in
72 * submit_reloc()
73 */
74 if (WARN_ON(!ptr))
75 return;
76
77 for (i = 0; i < dwords; i++) {
78 /* normally the OUT_PKTn() would wait
79 * for space for the packet. But since
80 * we just OUT_RING() the whole thing,
81 * need to call adreno_wait_ring()
82 * ourself:
83 */
84 adreno_wait_ring(ring, 1);
85 OUT_RING(ring, ptr[i]);
86 }
87
88 msm_gem_put_vaddr(&obj->base);
89
90 break;
91 }
92 }
93
94 a5xx_flush(gpu, ring);
95 a5xx_preempt_trigger(gpu);
96
97 /* we might not necessarily have a cmd from userspace to
98 * trigger an event to know that submit has completed, so
99 * do this manually:
100 */
101 a5xx_idle(gpu, ring);
102 ring->memptrs->fence = submit->seqno;
103 msm_gpu_retire(gpu);
104}
105
106static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 struct msm_file_private *ctx)
108{
109 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 struct msm_drm_private *priv = gpu->dev->dev_private;
112 struct msm_ringbuffer *ring = submit->ring;
113 unsigned int i, ibs = 0;
114
115 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 priv->lastctx = NULL;
117 a5xx_submit_in_rb(gpu, submit, ctx);
118 return;
119 }
120
121 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 OUT_RING(ring, 0x02);
123
124 /* Turn off protected mode to write to special registers */
125 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 OUT_RING(ring, 0);
127
128 /* Set the save preemption record for the ring/command */
129 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132
133 /* Turn back on protected mode */
134 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 OUT_RING(ring, 1);
136
137 /* Enable local preemption for finegrain preemption */
138 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 OUT_RING(ring, 0x02);
140
141 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 OUT_RING(ring, 0x02);
144
145 /* Submit the commands */
146 for (i = 0; i < submit->nr_cmds; i++) {
147 switch (submit->cmd[i].type) {
148 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 break;
150 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 if (priv->lastctx == ctx)
152 break;
153 fallthrough;
154 case MSM_SUBMIT_CMD_BUF:
155 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 OUT_RING(ring, submit->cmd[i].size);
159 ibs++;
160 break;
161 }
162 }
163
164 /*
165 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 * are done rendering - otherwise a lucky preemption would start
167 * replaying from the last checkpoint
168 */
169 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 OUT_RING(ring, 0);
171 OUT_RING(ring, 0);
172 OUT_RING(ring, 0);
173 OUT_RING(ring, 0);
174 OUT_RING(ring, 0);
175
176 /* Turn off IB level preemptions */
177 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 OUT_RING(ring, 0x01);
179
180 /* Write the fence to the scratch register */
181 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 OUT_RING(ring, submit->seqno);
183
184 /*
185 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 * timestamp is written to the memory and then triggers the interrupt
187 */
188 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
190 CP_EVENT_WRITE_0_IRQ);
191 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
192 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
193 OUT_RING(ring, submit->seqno);
194
195 /* Yield the floor on command completion */
196 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
197 /*
198 * If dword[2:1] are non zero, they specify an address for the CP to
199 * write the value of dword[3] to on preemption complete. Write 0 to
200 * skip the write
201 */
202 OUT_RING(ring, 0x00);
203 OUT_RING(ring, 0x00);
204 /* Data value - not used if the address above is 0 */
205 OUT_RING(ring, 0x01);
206 /* Set bit 0 to trigger an interrupt on preempt complete */
207 OUT_RING(ring, 0x01);
208
209 a5xx_flush(gpu, ring);
210
211 /* Check to see if we need to start preemption */
212 a5xx_preempt_trigger(gpu);
213}
214
215static const struct {
216 u32 offset;
217 u32 value;
218} a5xx_hwcg[] = {
219 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
220 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
221 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
222 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
223 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
224 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
225 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
226 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
227 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
228 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
229 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
230 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
231 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
232 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
233 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
234 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
235 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
236 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
237 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
238 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
239 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
240 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
241 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
242 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
243 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
244 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
245 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
246 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
247 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
248 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
249 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
250 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
251 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
252 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
253 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
254 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
255 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
256 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
257 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
258 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
259 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
260 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
261 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
262 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
263 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
264 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
265 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
266 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
267 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
268 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
269 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
270 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
271 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
272 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
273 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
274 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
275 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
276 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
277 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
278 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
279 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
280 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
281 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
282 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
283 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
284 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
285 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
286 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
287 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
288 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
289 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
290 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
291 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
292 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
293 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
294 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
295 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
296 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
297 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
298 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
299 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
300 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
301 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
302 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
303 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
304 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
305 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
306 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
307 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
308 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
309 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
310 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
311};
312
313void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
314{
315 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
316 unsigned int i;
317
318 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
319 gpu_write(gpu, a5xx_hwcg[i].offset,
320 state ? a5xx_hwcg[i].value : 0);
321
322 if (adreno_is_a540(adreno_gpu)) {
323 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
324 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
325 }
326
327 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
329}
330
331static int a5xx_me_init(struct msm_gpu *gpu)
332{
333 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334 struct msm_ringbuffer *ring = gpu->rb[0];
335
336 OUT_PKT7(ring, CP_ME_INIT, 8);
337
338 OUT_RING(ring, 0x0000002F);
339
340 /* Enable multiple hardware contexts */
341 OUT_RING(ring, 0x00000003);
342
343 /* Enable error detection */
344 OUT_RING(ring, 0x20000000);
345
346 /* Don't enable header dump */
347 OUT_RING(ring, 0x00000000);
348 OUT_RING(ring, 0x00000000);
349
350 /* Specify workarounds for various microcode issues */
351 if (adreno_is_a530(adreno_gpu)) {
352 /* Workaround for token end syncs
353 * Force a WFI after every direct-render 3D mode draw and every
354 * 2D mode 3 draw
355 */
356 OUT_RING(ring, 0x0000000B);
357 } else if (adreno_is_a510(adreno_gpu)) {
358 /* Workaround for token and syncs */
359 OUT_RING(ring, 0x00000001);
360 } else {
361 /* No workarounds enabled */
362 OUT_RING(ring, 0x00000000);
363 }
364
365 OUT_RING(ring, 0x00000000);
366 OUT_RING(ring, 0x00000000);
367
368 gpu->funcs->flush(gpu, ring);
369 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
370}
371
372static int a5xx_preempt_start(struct msm_gpu *gpu)
373{
374 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
376 struct msm_ringbuffer *ring = gpu->rb[0];
377
378 if (gpu->nr_rings == 1)
379 return 0;
380
381 /* Turn off protected mode to write to special registers */
382 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
383 OUT_RING(ring, 0);
384
385 /* Set the save preemption record for the ring/command */
386 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
387 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
389
390 /* Turn back on protected mode */
391 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
392 OUT_RING(ring, 1);
393
394 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
395 OUT_RING(ring, 0x00);
396
397 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
398 OUT_RING(ring, 0x01);
399
400 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
401 OUT_RING(ring, 0x01);
402
403 /* Yield the floor on command completion */
404 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
405 OUT_RING(ring, 0x00);
406 OUT_RING(ring, 0x00);
407 OUT_RING(ring, 0x01);
408 OUT_RING(ring, 0x01);
409
410 gpu->funcs->flush(gpu, ring);
411
412 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
413}
414
415static int a5xx_ucode_init(struct msm_gpu *gpu)
416{
417 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
418 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
419 int ret;
420
421 if (!a5xx_gpu->pm4_bo) {
422 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
423 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
424
425
426 if (IS_ERR(a5xx_gpu->pm4_bo)) {
427 ret = PTR_ERR(a5xx_gpu->pm4_bo);
428 a5xx_gpu->pm4_bo = NULL;
429 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
430 ret);
431 return ret;
432 }
433
434 msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
435 }
436
437 if (!a5xx_gpu->pfp_bo) {
438 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
439 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
440
441 if (IS_ERR(a5xx_gpu->pfp_bo)) {
442 ret = PTR_ERR(a5xx_gpu->pfp_bo);
443 a5xx_gpu->pfp_bo = NULL;
444 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
445 ret);
446 return ret;
447 }
448
449 msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
450 }
451
452 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
453 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
454
455 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
456 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
457
458 return 0;
459}
460
461#define SCM_GPU_ZAP_SHADER_RESUME 0
462
463static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
464{
465 int ret;
466
467 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
468 if (ret)
469 DRM_ERROR("%s: zap-shader resume failed: %d\n",
470 gpu->name, ret);
471
472 return ret;
473}
474
475static int a5xx_zap_shader_init(struct msm_gpu *gpu)
476{
477 static bool loaded;
478 int ret;
479
480 /*
481 * If the zap shader is already loaded into memory we just need to kick
482 * the remote processor to reinitialize it
483 */
484 if (loaded)
485 return a5xx_zap_shader_resume(gpu);
486
487 ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
488
489 loaded = !ret;
490 return ret;
491}
492
493#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
494 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
495 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
496 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
497 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
498 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
499 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
500 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
501 A5XX_RBBM_INT_0_MASK_CP_SW | \
502 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
503 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
504 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
505
506static int a5xx_hw_init(struct msm_gpu *gpu)
507{
508 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
509 int ret;
510
511 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
512
513 if (adreno_is_a540(adreno_gpu))
514 gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
515
516 /* Make all blocks contribute to the GPU BUSY perf counter */
517 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
518
519 /* Enable RBBM error reporting bits */
520 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
521
522 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
523 /*
524 * Mask out the activity signals from RB1-3 to avoid false
525 * positives
526 */
527
528 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
529 0xF0000000);
530 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
531 0xFFFFFFFF);
532 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
533 0xFFFFFFFF);
534 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
535 0xFFFFFFFF);
536 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
537 0xFFFFFFFF);
538 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
539 0xFFFFFFFF);
540 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
541 0xFFFFFFFF);
542 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
543 0xFFFFFFFF);
544 }
545
546 /* Enable fault detection */
547 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
548 (1 << 30) | 0xFFFF);
549
550 /* Turn on performance counters */
551 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
552
553 /* Select CP0 to always count cycles */
554 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
555
556 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
557 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
558
559 /* Increase VFD cache access so LRZ and other data gets evicted less */
560 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
561
562 /* Disable L2 bypass in the UCHE */
563 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
564 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
565 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
566 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
567
568 /* Set the GMEM VA range (0 to gpu->gmem) */
569 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
570 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
571 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
572 0x00100000 + adreno_gpu->gmem - 1);
573 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
574
575 if (adreno_is_a510(adreno_gpu)) {
576 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
577 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
578 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
579 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
580 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
581 (0x200 << 11 | 0x200 << 22));
582 } else {
583 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
584 if (adreno_is_a530(adreno_gpu))
585 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
586 if (adreno_is_a540(adreno_gpu))
587 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
588 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
589 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
590 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
591 (0x400 << 11 | 0x300 << 22));
592 }
593
594 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
595 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
596
597 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
598
599 /* Enable USE_RETENTION_FLOPS */
600 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
601
602 /* Enable ME/PFP split notification */
603 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
604
605 /*
606 * In A5x, CCU can send context_done event of a particular context to
607 * UCHE which ultimately reaches CP even when there is valid
608 * transaction of that context inside CCU. This can let CP to program
609 * config registers, which will make the "valid transaction" inside
610 * CCU to be interpreted differently. This can cause gpu fault. This
611 * bug is fixed in latest A510 revision. To enable this bug fix -
612 * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
613 * (disable). For older A510 version this bit is unused.
614 */
615 if (adreno_is_a510(adreno_gpu))
616 gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
617
618 /* Enable HWCG */
619 a5xx_set_hwcg(gpu, true);
620
621 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
622
623 /* Set the highest bank bit */
624 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
625 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
626 if (adreno_is_a540(adreno_gpu))
627 gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
628
629 /* Protect registers from the CP */
630 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
631
632 /* RBBM */
633 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
634 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
635 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
636 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
637 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
638 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
639
640 /* Content protect */
641 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
642 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
643 16));
644 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
645 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
646
647 /* CP */
648 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
649 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
650 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
651 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
652
653 /* RB */
654 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
655 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
656
657 /* VPC */
658 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
659 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
660
661 /* UCHE */
662 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
663
664 if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
665 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
666 ADRENO_PROTECT_RW(0x10000, 0x8000));
667
668 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
669 /*
670 * Disable the trusted memory range - we don't actually supported secure
671 * memory rendering at this point in time and we don't want to block off
672 * part of the virtual memory space.
673 */
674 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
676 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
677
678 /* Put the GPU into 64 bit by default */
679 gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
680 gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
681 gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
682 gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
683 gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
684 gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
685 gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
686 gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
687 gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
688 gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
689 gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
690 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
691
692 /*
693 * VPC corner case with local memory load kill leads to corrupt
694 * internal state. Normal Disable does not work for all a5x chips.
695 * So do the following setting to disable it.
696 */
697 if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
698 gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
699 gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
700 }
701
702 ret = adreno_hw_init(gpu);
703 if (ret)
704 return ret;
705
706 if (!adreno_is_a510(adreno_gpu))
707 a5xx_gpmu_ucode_init(gpu);
708
709 ret = a5xx_ucode_init(gpu);
710 if (ret)
711 return ret;
712
713 /* Set the ringbuffer address */
714 gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
715 gpu->rb[0]->iova);
716
717 gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
718 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
719
720 a5xx_preempt_hw_init(gpu);
721
722 /* Disable the interrupts through the initial bringup stage */
723 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
724
725 /* Clear ME_HALT to start the micro engine */
726 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
727 ret = a5xx_me_init(gpu);
728 if (ret)
729 return ret;
730
731 ret = a5xx_power_init(gpu);
732 if (ret)
733 return ret;
734
735 /*
736 * Send a pipeline event stat to get misbehaving counters to start
737 * ticking correctly
738 */
739 if (adreno_is_a530(adreno_gpu)) {
740 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
741 OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
742
743 gpu->funcs->flush(gpu, gpu->rb[0]);
744 if (!a5xx_idle(gpu, gpu->rb[0]))
745 return -EINVAL;
746 }
747
748 /*
749 * If the chip that we are using does support loading one, then
750 * try to load a zap shader into the secure world. If successful
751 * we can use the CP to switch out of secure mode. If not then we
752 * have no resource but to try to switch ourselves out manually. If we
753 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
754 * be blocked and a permissions violation will soon follow.
755 */
756 ret = a5xx_zap_shader_init(gpu);
757 if (!ret) {
758 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
759 OUT_RING(gpu->rb[0], 0x00000000);
760
761 gpu->funcs->flush(gpu, gpu->rb[0]);
762 if (!a5xx_idle(gpu, gpu->rb[0]))
763 return -EINVAL;
764 } else if (ret == -ENODEV) {
765 /*
766 * This device does not use zap shader (but print a warning
767 * just in case someone got their dt wrong.. hopefully they
768 * have a debug UART to realize the error of their ways...
769 * if you mess this up you are about to crash horribly)
770 */
771 dev_warn_once(gpu->dev->dev,
772 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
773 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
774 } else {
775 return ret;
776 }
777
778 /* Last step - yield the ringbuffer */
779 a5xx_preempt_start(gpu);
780
781 return 0;
782}
783
784static void a5xx_recover(struct msm_gpu *gpu)
785{
786 int i;
787
788 adreno_dump_info(gpu);
789
790 for (i = 0; i < 8; i++) {
791 printk("CP_SCRATCH_REG%d: %u\n", i,
792 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
793 }
794
795 if (hang_debug)
796 a5xx_dump(gpu);
797
798 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
799 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
800 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
801 adreno_recover(gpu);
802}
803
804static void a5xx_destroy(struct msm_gpu *gpu)
805{
806 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
807 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
808
809 DBG("%s", gpu->name);
810
811 a5xx_preempt_fini(gpu);
812
813 if (a5xx_gpu->pm4_bo) {
814 msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
815 drm_gem_object_put(a5xx_gpu->pm4_bo);
816 }
817
818 if (a5xx_gpu->pfp_bo) {
819 msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
820 drm_gem_object_put(a5xx_gpu->pfp_bo);
821 }
822
823 if (a5xx_gpu->gpmu_bo) {
824 msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
825 drm_gem_object_put(a5xx_gpu->gpmu_bo);
826 }
827
828 adreno_gpu_cleanup(adreno_gpu);
829 kfree(a5xx_gpu);
830}
831
832static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
833{
834 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
835 return false;
836
837 /*
838 * Nearly every abnormality ends up pausing the GPU and triggering a
839 * fault so we can safely just watch for this one interrupt to fire
840 */
841 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
842 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
843}
844
845bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
846{
847 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
848 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
849
850 if (ring != a5xx_gpu->cur_ring) {
851 WARN(1, "Tried to idle a non-current ringbuffer\n");
852 return false;
853 }
854
855 /* wait for CP to drain ringbuffer: */
856 if (!adreno_idle(gpu, ring))
857 return false;
858
859 if (spin_until(_a5xx_check_idle(gpu))) {
860 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
861 gpu->name, __builtin_return_address(0),
862 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
863 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
864 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
865 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
866 return false;
867 }
868
869 return true;
870}
871
872static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
873{
874 struct msm_gpu *gpu = arg;
875 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
876 iova, flags,
877 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
878 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
879 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
880 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
881
882 return -EFAULT;
883}
884
885static void a5xx_cp_err_irq(struct msm_gpu *gpu)
886{
887 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
888
889 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
890 u32 val;
891
892 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
893
894 /*
895 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
896 * read it twice
897 */
898
899 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
900 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
901
902 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
903 val);
904 }
905
906 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
907 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
908 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
909
910 if (status & A5XX_CP_INT_CP_DMA_ERROR)
911 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
912
913 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
914 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
915
916 dev_err_ratelimited(gpu->dev->dev,
917 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
918 val & (1 << 24) ? "WRITE" : "READ",
919 (val & 0xFFFFF) >> 2, val);
920 }
921
922 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
923 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
924 const char *access[16] = { "reserved", "reserved",
925 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
926 "", "", "me read", "me write", "", "", "crashdump read",
927 "crashdump write" };
928
929 dev_err_ratelimited(gpu->dev->dev,
930 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
931 status & 0xFFFFF, access[(status >> 24) & 0xF],
932 (status & (1 << 31)), status);
933 }
934}
935
936static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
937{
938 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
939 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
940
941 dev_err_ratelimited(gpu->dev->dev,
942 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
943 val & (1 << 28) ? "WRITE" : "READ",
944 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
945 (val >> 24) & 0xF);
946
947 /* Clear the error */
948 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
949
950 /* Clear the interrupt */
951 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
952 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
953 }
954
955 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
956 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
957
958 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
959 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
960 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
961
962 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
963 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
964 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
965
966 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
967 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
968 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
969
970 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
971 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
972
973 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
974 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
975}
976
977static void a5xx_uche_err_irq(struct msm_gpu *gpu)
978{
979 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
980
981 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
982
983 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
984 addr);
985}
986
987static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
988{
989 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
990}
991
992static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
993{
994 struct drm_device *dev = gpu->dev;
995 struct msm_drm_private *priv = dev->dev_private;
996 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
997
998 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
999 ring ? ring->id : -1, ring ? ring->seqno : 0,
1000 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1001 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1002 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1003 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1004 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1005 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1006 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1007
1008 /* Turn off the hangcheck timer to keep it from bothering us */
1009 del_timer(&gpu->hangcheck_timer);
1010
1011 queue_work(priv->wq, &gpu->recover_work);
1012}
1013
1014#define RBBM_ERROR_MASK \
1015 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1016 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1017 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1018 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1019 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1020 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1021
1022static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1023{
1024 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1025
1026 /*
1027 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1028 * before the source is cleared the interrupt will storm.
1029 */
1030 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1031 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1032
1033 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1034 if (status & RBBM_ERROR_MASK)
1035 a5xx_rbbm_err_irq(gpu, status);
1036
1037 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1038 a5xx_cp_err_irq(gpu);
1039
1040 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1041 a5xx_fault_detect_irq(gpu);
1042
1043 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1044 a5xx_uche_err_irq(gpu);
1045
1046 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1047 a5xx_gpmu_err_irq(gpu);
1048
1049 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1050 a5xx_preempt_trigger(gpu);
1051 msm_gpu_retire(gpu);
1052 }
1053
1054 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1055 a5xx_preempt_irq(gpu);
1056
1057 return IRQ_HANDLED;
1058}
1059
1060static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1061 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1062 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1063 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1064 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1065 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1066 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1067 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1068 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1069};
1070
1071static const u32 a5xx_registers[] = {
1072 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1073 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1074 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1075 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1076 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1077 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1078 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1079 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1080 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1081 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1082 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1083 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1084 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1085 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1086 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1087 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1088 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1089 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1090 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1091 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1092 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1093 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1094 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1095 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1096 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1097 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1098 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1099 0xAC60, 0xAC60, ~0,
1100};
1101
1102static void a5xx_dump(struct msm_gpu *gpu)
1103{
1104 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
1105 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1106 adreno_dump(gpu);
1107}
1108
1109static int a5xx_pm_resume(struct msm_gpu *gpu)
1110{
1111 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1112 int ret;
1113
1114 /* Turn on the core power */
1115 ret = msm_gpu_pm_resume(gpu);
1116 if (ret)
1117 return ret;
1118
1119 if (adreno_is_a510(adreno_gpu)) {
1120 /* Halt the sp_input_clk at HM level */
1121 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1122 a5xx_set_hwcg(gpu, true);
1123 /* Turn on sp_input_clk at HM level */
1124 gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1125 return 0;
1126 }
1127
1128 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1129 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1130
1131 /* Wait 3 usecs before polling */
1132 udelay(3);
1133
1134 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1135 (1 << 20), (1 << 20));
1136 if (ret) {
1137 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1138 gpu->name,
1139 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1140 return ret;
1141 }
1142
1143 /* Turn on the SP domain */
1144 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1145 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1146 (1 << 20), (1 << 20));
1147 if (ret)
1148 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1149 gpu->name);
1150
1151 return ret;
1152}
1153
1154static int a5xx_pm_suspend(struct msm_gpu *gpu)
1155{
1156 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1157 u32 mask = 0xf;
1158
1159 /* A510 has 3 XIN ports in VBIF */
1160 if (adreno_is_a510(adreno_gpu))
1161 mask = 0x7;
1162
1163 /* Clear the VBIF pipe before shutting down */
1164 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1165 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1166 mask) == mask);
1167
1168 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1169
1170 /*
1171 * Reset the VBIF before power collapse to avoid issue with FIFO
1172 * entries
1173 */
1174 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1175 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1176
1177 return msm_gpu_pm_suspend(gpu);
1178}
1179
1180static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1181{
1182 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1183 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1184
1185 return 0;
1186}
1187
1188struct a5xx_crashdumper {
1189 void *ptr;
1190 struct drm_gem_object *bo;
1191 u64 iova;
1192};
1193
1194struct a5xx_gpu_state {
1195 struct msm_gpu_state base;
1196 u32 *hlsqregs;
1197};
1198
1199static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1200 struct a5xx_crashdumper *dumper)
1201{
1202 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1203 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1204 &dumper->bo, &dumper->iova);
1205
1206 if (!IS_ERR(dumper->ptr))
1207 msm_gem_object_set_name(dumper->bo, "crashdump");
1208
1209 return PTR_ERR_OR_ZERO(dumper->ptr);
1210}
1211
1212static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1213 struct a5xx_crashdumper *dumper)
1214{
1215 u32 val;
1216
1217 if (IS_ERR_OR_NULL(dumper->ptr))
1218 return -EINVAL;
1219
1220 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1221 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1222
1223 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1224
1225 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1226 val & 0x04, 100, 10000);
1227}
1228
1229/*
1230 * These are a list of the registers that need to be read through the HLSQ
1231 * aperture through the crashdumper. These are not nominally accessible from
1232 * the CPU on a secure platform.
1233 */
1234static const struct {
1235 u32 type;
1236 u32 regoffset;
1237 u32 count;
1238} a5xx_hlsq_aperture_regs[] = {
1239 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1240 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1241 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1242 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1243 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1244 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1245 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1246 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1247 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1248 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1249 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1250 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1251 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1252 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1253 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1254};
1255
1256static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1257 struct a5xx_gpu_state *a5xx_state)
1258{
1259 struct a5xx_crashdumper dumper = { 0 };
1260 u32 offset, count = 0;
1261 u64 *ptr;
1262 int i;
1263
1264 if (a5xx_crashdumper_init(gpu, &dumper))
1265 return;
1266
1267 /* The script will be written at offset 0 */
1268 ptr = dumper.ptr;
1269
1270 /* Start writing the data at offset 256k */
1271 offset = dumper.iova + (256 * SZ_1K);
1272
1273 /* Count how many additional registers to get from the HLSQ aperture */
1274 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1275 count += a5xx_hlsq_aperture_regs[i].count;
1276
1277 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1278 if (!a5xx_state->hlsqregs)
1279 return;
1280
1281 /* Build the crashdump script */
1282 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1283 u32 type = a5xx_hlsq_aperture_regs[i].type;
1284 u32 c = a5xx_hlsq_aperture_regs[i].count;
1285
1286 /* Write the register to select the desired bank */
1287 *ptr++ = ((u64) type << 8);
1288 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1289 (1 << 21) | 1;
1290
1291 *ptr++ = offset;
1292 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1293 | c;
1294
1295 offset += c * sizeof(u32);
1296 }
1297
1298 /* Write two zeros to close off the script */
1299 *ptr++ = 0;
1300 *ptr++ = 0;
1301
1302 if (a5xx_crashdumper_run(gpu, &dumper)) {
1303 kfree(a5xx_state->hlsqregs);
1304 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305 return;
1306 }
1307
1308 /* Copy the data from the crashdumper to the state */
1309 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1310 count * sizeof(u32));
1311
1312 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1313}
1314
1315static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1316{
1317 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1318 GFP_KERNEL);
1319
1320 if (!a5xx_state)
1321 return ERR_PTR(-ENOMEM);
1322
1323 /* Temporarily disable hardware clock gating before reading the hw */
1324 a5xx_set_hwcg(gpu, false);
1325
1326 /* First get the generic state from the adreno core */
1327 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1328
1329 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1330
1331 /* Get the HLSQ regs with the help of the crashdumper */
1332 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1333
1334 a5xx_set_hwcg(gpu, true);
1335
1336 return &a5xx_state->base;
1337}
1338
1339static void a5xx_gpu_state_destroy(struct kref *kref)
1340{
1341 struct msm_gpu_state *state = container_of(kref,
1342 struct msm_gpu_state, ref);
1343 struct a5xx_gpu_state *a5xx_state = container_of(state,
1344 struct a5xx_gpu_state, base);
1345
1346 kfree(a5xx_state->hlsqregs);
1347
1348 adreno_gpu_state_destroy(state);
1349 kfree(a5xx_state);
1350}
1351
1352static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1353{
1354 if (IS_ERR_OR_NULL(state))
1355 return 1;
1356
1357 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1358}
1359
1360
1361#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1362static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1363 struct drm_printer *p)
1364{
1365 int i, j;
1366 u32 pos = 0;
1367 struct a5xx_gpu_state *a5xx_state = container_of(state,
1368 struct a5xx_gpu_state, base);
1369
1370 if (IS_ERR_OR_NULL(state))
1371 return;
1372
1373 adreno_show(gpu, state, p);
1374
1375 /* Dump the additional a5xx HLSQ registers */
1376 if (!a5xx_state->hlsqregs)
1377 return;
1378
1379 drm_printf(p, "registers-hlsq:\n");
1380
1381 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1382 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1383 u32 c = a5xx_hlsq_aperture_regs[i].count;
1384
1385 for (j = 0; j < c; j++, pos++, o++) {
1386 /*
1387 * To keep the crashdump simple we pull the entire range
1388 * for each register type but not all of the registers
1389 * in the range are valid. Fortunately invalid registers
1390 * stick out like a sore thumb with a value of
1391 * 0xdeadbeef
1392 */
1393 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1394 continue;
1395
1396 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1397 o << 2, a5xx_state->hlsqregs[pos]);
1398 }
1399 }
1400}
1401#endif
1402
1403static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1404{
1405 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1406 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1407
1408 return a5xx_gpu->cur_ring;
1409}
1410
1411static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1412{
1413 u64 busy_cycles, busy_time;
1414
1415 /* Only read the gpu busy if the hardware is already active */
1416 if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1417 return 0;
1418
1419 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1420 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1421
1422 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1423 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1424
1425 gpu->devfreq.busy_cycles = busy_cycles;
1426
1427 pm_runtime_put(&gpu->pdev->dev);
1428
1429 if (WARN_ON(busy_time > ~0LU))
1430 return ~0LU;
1431
1432 return (unsigned long)busy_time;
1433}
1434
1435static const struct adreno_gpu_funcs funcs = {
1436 .base = {
1437 .get_param = adreno_get_param,
1438 .hw_init = a5xx_hw_init,
1439 .pm_suspend = a5xx_pm_suspend,
1440 .pm_resume = a5xx_pm_resume,
1441 .recover = a5xx_recover,
1442 .submit = a5xx_submit,
1443 .flush = a5xx_flush,
1444 .active_ring = a5xx_active_ring,
1445 .irq = a5xx_irq,
1446 .destroy = a5xx_destroy,
1447#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1448 .show = a5xx_show,
1449#endif
1450#if defined(CONFIG_DEBUG_FS)
1451 .debugfs_init = a5xx_debugfs_init,
1452#endif
1453 .gpu_busy = a5xx_gpu_busy,
1454 .gpu_state_get = a5xx_gpu_state_get,
1455 .gpu_state_put = a5xx_gpu_state_put,
1456 .create_address_space = adreno_iommu_create_address_space,
1457 },
1458 .get_timestamp = a5xx_get_timestamp,
1459};
1460
1461static void check_speed_bin(struct device *dev)
1462{
1463 struct nvmem_cell *cell;
1464 u32 val;
1465
1466 /*
1467 * If the OPP table specifies a opp-supported-hw property then we have
1468 * to set something with dev_pm_opp_set_supported_hw() or the table
1469 * doesn't get populated so pick an arbitrary value that should
1470 * ensure the default frequencies are selected but not conflict with any
1471 * actual bins
1472 */
1473 val = 0x80;
1474
1475 cell = nvmem_cell_get(dev, "speed_bin");
1476
1477 if (!IS_ERR(cell)) {
1478 void *buf = nvmem_cell_read(cell, NULL);
1479
1480 if (!IS_ERR(buf)) {
1481 u8 bin = *((u8 *) buf);
1482
1483 val = (1 << bin);
1484 kfree(buf);
1485 }
1486
1487 nvmem_cell_put(cell);
1488 }
1489
1490 dev_pm_opp_set_supported_hw(dev, &val, 1);
1491}
1492
1493struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1494{
1495 struct msm_drm_private *priv = dev->dev_private;
1496 struct platform_device *pdev = priv->gpu_pdev;
1497 struct a5xx_gpu *a5xx_gpu = NULL;
1498 struct adreno_gpu *adreno_gpu;
1499 struct msm_gpu *gpu;
1500 int ret;
1501
1502 if (!pdev) {
1503 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1504 return ERR_PTR(-ENXIO);
1505 }
1506
1507 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1508 if (!a5xx_gpu)
1509 return ERR_PTR(-ENOMEM);
1510
1511 adreno_gpu = &a5xx_gpu->base;
1512 gpu = &adreno_gpu->base;
1513
1514 adreno_gpu->registers = a5xx_registers;
1515 adreno_gpu->reg_offsets = a5xx_register_offsets;
1516
1517 a5xx_gpu->lm_leakage = 0x4E001A;
1518
1519 check_speed_bin(&pdev->dev);
1520
1521 /* Restricting nr_rings to 1 to temporarily disable preemption */
1522 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1523 if (ret) {
1524 a5xx_destroy(&(a5xx_gpu->base.base));
1525 return ERR_PTR(ret);
1526 }
1527
1528 if (gpu->aspace)
1529 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1530
1531 /* Set up the preemption specific bits and pieces for each ringbuffer */
1532 a5xx_preempt_init(gpu);
1533
1534 return gpu;
1535}