Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4#include <linux/ascii85.h>
   5#include "msm_gem.h"
   6#include "a6xx_gpu.h"
   7#include "a6xx_gmu.h"
   8#include "a6xx_gpu_state.h"
   9#include "a6xx_gmu.xml.h"
  10
  11struct a6xx_gpu_state_obj {
  12	const void *handle;
  13	u32 *data;
  14};
  15
  16struct a6xx_gpu_state {
  17	struct msm_gpu_state base;
  18
  19	struct a6xx_gpu_state_obj *gmu_registers;
  20	int nr_gmu_registers;
  21
  22	struct a6xx_gpu_state_obj *registers;
  23	int nr_registers;
  24
  25	struct a6xx_gpu_state_obj *shaders;
  26	int nr_shaders;
  27
  28	struct a6xx_gpu_state_obj *clusters;
  29	int nr_clusters;
  30
  31	struct a6xx_gpu_state_obj *dbgahb_clusters;
  32	int nr_dbgahb_clusters;
  33
  34	struct a6xx_gpu_state_obj *indexed_regs;
  35	int nr_indexed_regs;
  36
  37	struct a6xx_gpu_state_obj *debugbus;
  38	int nr_debugbus;
  39
  40	struct a6xx_gpu_state_obj *vbif_debugbus;
  41
  42	struct a6xx_gpu_state_obj *cx_debugbus;
  43	int nr_cx_debugbus;
  44
  45	struct list_head objs;
  46};
  47
  48static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  49{
  50	in[0] = val;
  51	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  52
  53	return 2;
  54}
  55
  56static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  57{
  58	in[0] = target;
  59	in[1] = (((u64) reg) << 44 | dwords);
  60
  61	return 2;
  62}
  63
  64static inline int CRASHDUMP_FINI(u64 *in)
  65{
  66	in[0] = 0;
  67	in[1] = 0;
  68
  69	return 2;
  70}
  71
  72struct a6xx_crashdumper {
  73	void *ptr;
  74	struct drm_gem_object *bo;
  75	u64 iova;
  76};
  77
  78struct a6xx_state_memobj {
  79	struct list_head node;
  80	unsigned long long data[];
  81};
  82
  83static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
  84{
  85	struct a6xx_state_memobj *obj =
  86		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
  87
  88	if (!obj)
  89		return NULL;
  90
  91	list_add_tail(&obj->node, &a6xx_state->objs);
  92	return &obj->data;
  93}
  94
  95static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
  96		size_t size)
  97{
  98	void *dst = state_kcalloc(a6xx_state, 1, size);
  99
 100	if (dst)
 101		memcpy(dst, src, size);
 102	return dst;
 103}
 104
 105/*
 106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 107 * the rest for the data
 108 */
 109#define A6XX_CD_DATA_OFFSET 8192
 110#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 111
 112static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 113		struct a6xx_crashdumper *dumper)
 114{
 115	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
 116		SZ_1M, MSM_BO_WC, gpu->aspace,
 117		&dumper->bo, &dumper->iova);
 118
 119	if (!IS_ERR(dumper->ptr))
 120		msm_gem_object_set_name(dumper->bo, "crashdump");
 121
 122	return PTR_ERR_OR_ZERO(dumper->ptr);
 123}
 124
 125static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 126		struct a6xx_crashdumper *dumper)
 127{
 128	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 129	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 130	u32 val;
 131	int ret;
 132
 133	if (IS_ERR_OR_NULL(dumper->ptr))
 134		return -EINVAL;
 135
 136	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 137		return -EINVAL;
 138
 139	/* Make sure all pending memory writes are posted */
 140	wmb();
 141
 142	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
 143		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
 144
 145	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 146
 147	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 148		val & 0x02, 100, 10000);
 149
 150	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 151
 152	return ret;
 153}
 154
 155/* read a value from the GX debug bus */
 156static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 157		u32 *data)
 158{
 159	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 160		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 161
 162	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 163	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 164	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 165	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 166
 167	/* Wait 1 us to make sure the data is flowing */
 168	udelay(1);
 169
 170	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 171	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 172
 173	return 2;
 174}
 175
 176#define cxdbg_write(ptr, offset, val) \
 177	msm_writel((val), (ptr) + ((offset) << 2))
 178
 179#define cxdbg_read(ptr, offset) \
 180	msm_readl((ptr) + ((offset) << 2))
 181
 182/* read a value from the CX debug bus */
 183static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
 184		u32 *data)
 185{
 186	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 187		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 188
 189	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 190	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 191	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 192	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 193
 194	/* Wait 1 us to make sure the data is flowing */
 195	udelay(1);
 196
 197	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 198	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 199
 200	return 2;
 201}
 202
 203/* Read a chunk of data from the VBIF debug bus */
 204static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 205		u32 reg, int count, u32 *data)
 206{
 207	int i;
 208
 209	gpu_write(gpu, ctrl0, reg);
 210
 211	for (i = 0; i < count; i++) {
 212		gpu_write(gpu, ctrl1, i);
 213		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 214	}
 215
 216	return count;
 217}
 218
 219#define AXI_ARB_BLOCKS 2
 220#define XIN_AXI_BLOCKS 5
 221#define XIN_CORE_BLOCKS 4
 222
 223#define VBIF_DEBUGBUS_BLOCK_SIZE \
 224	((16 * AXI_ARB_BLOCKS) + \
 225	 (18 * XIN_AXI_BLOCKS) + \
 226	 (12 * XIN_CORE_BLOCKS))
 227
 228static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 229		struct a6xx_gpu_state *a6xx_state,
 230		struct a6xx_gpu_state_obj *obj)
 231{
 232	u32 clk, *ptr;
 233	int i;
 234
 235	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 236		sizeof(u32));
 237	if (!obj->data)
 238		return;
 239
 240	obj->handle = NULL;
 241
 242	/* Get the current clock setting */
 243	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 244
 245	/* Force on the bus so we can read it */
 246	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 247		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 248
 249	/* We will read from BUS2 first, so disable BUS1 */
 250	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 251
 252	/* Enable the VBIF bus for reading */
 253	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 254
 255	ptr = obj->data;
 256
 257	for (i = 0; i < AXI_ARB_BLOCKS; i++)
 258		ptr += vbif_debugbus_read(gpu,
 259			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 260			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 261			1 << (i + 16), 16, ptr);
 262
 263	for (i = 0; i < XIN_AXI_BLOCKS; i++)
 264		ptr += vbif_debugbus_read(gpu,
 265			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 266			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 267			1 << i, 18, ptr);
 268
 269	/* Stop BUS2 so we can turn on BUS1 */
 270	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 271
 272	for (i = 0; i < XIN_CORE_BLOCKS; i++)
 273		ptr += vbif_debugbus_read(gpu,
 274			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 275			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 276			1 << i, 12, ptr);
 277
 278	/* Restore the VBIF clock setting */
 279	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 280}
 281
 282static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 283		struct a6xx_gpu_state *a6xx_state,
 284		const struct a6xx_debugbus_block *block,
 285		struct a6xx_gpu_state_obj *obj)
 286{
 287	int i;
 288	u32 *ptr;
 289
 290	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 291	if (!obj->data)
 292		return;
 293
 294	obj->handle = block;
 295
 296	for (ptr = obj->data, i = 0; i < block->count; i++)
 297		ptr += debugbus_read(gpu, block->id, i, ptr);
 298}
 299
 300static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 301		struct a6xx_gpu_state *a6xx_state,
 302		const struct a6xx_debugbus_block *block,
 303		struct a6xx_gpu_state_obj *obj)
 304{
 305	int i;
 306	u32 *ptr;
 307
 308	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 309	if (!obj->data)
 310		return;
 311
 312	obj->handle = block;
 313
 314	for (ptr = obj->data, i = 0; i < block->count; i++)
 315		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 316}
 317
 318static void a6xx_get_debugbus(struct msm_gpu *gpu,
 319		struct a6xx_gpu_state *a6xx_state)
 320{
 321	struct resource *res;
 322	void __iomem *cxdbg = NULL;
 323	int nr_debugbus_blocks;
 324
 325	/* Set up the GX debug bus */
 326
 327	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 328		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 329
 330	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 331		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 332
 333	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 334	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 335	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 336	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 337
 338	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 339	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 340
 341	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 342	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 343	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 344	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 345
 346	/* Set up the CX debug bus - it lives elsewhere in the system so do a
 347	 * temporary ioremap for the registers
 348	 */
 349	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 350			"cx_dbgc");
 351
 352	if (res)
 353		cxdbg = ioremap(res->start, resource_size(res));
 354
 355	if (cxdbg) {
 356		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 357			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 358
 359		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 360			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 361
 362		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 363		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 364		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 365		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 366
 367		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 368			0x76543210);
 369		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 370			0xFEDCBA98);
 371
 372		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 373		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 374		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 375		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 376	}
 377
 378	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 379		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 380
 381	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 382			sizeof(*a6xx_state->debugbus));
 383
 384	if (a6xx_state->debugbus) {
 385		int i;
 386
 387		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 388			a6xx_get_debugbus_block(gpu,
 389				a6xx_state,
 390				&a6xx_debugbus_blocks[i],
 391				&a6xx_state->debugbus[i]);
 392
 393		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 394
 395		/*
 396		 * GBIF has same debugbus as of other GPU blocks, fall back to
 397		 * default path if GPU uses GBIF, also GBIF uses exactly same
 398		 * ID as of VBIF.
 399		 */
 400		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 401			a6xx_get_debugbus_block(gpu, a6xx_state,
 402				&a6xx_gbif_debugbus_block,
 403				&a6xx_state->debugbus[i]);
 404
 405			a6xx_state->nr_debugbus += 1;
 406		}
 407	}
 408
 409	/*  Dump the VBIF debugbus on applicable targets */
 410	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
 411		a6xx_state->vbif_debugbus =
 412			state_kcalloc(a6xx_state, 1,
 413					sizeof(*a6xx_state->vbif_debugbus));
 414
 415		if (a6xx_state->vbif_debugbus)
 416			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 417					a6xx_state->vbif_debugbus);
 418	}
 419
 420	if (cxdbg) {
 421		a6xx_state->cx_debugbus =
 422			state_kcalloc(a6xx_state,
 423			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
 424			sizeof(*a6xx_state->cx_debugbus));
 425
 426		if (a6xx_state->cx_debugbus) {
 427			int i;
 428
 429			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
 430				a6xx_get_cx_debugbus_block(cxdbg,
 431					a6xx_state,
 432					&a6xx_cx_debugbus_blocks[i],
 433					&a6xx_state->cx_debugbus[i]);
 434
 435			a6xx_state->nr_cx_debugbus =
 436				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 437		}
 438
 439		iounmap(cxdbg);
 440	}
 441}
 442
 443#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 444
 445/* Read a data cluster from behind the AHB aperture */
 446static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 447		struct a6xx_gpu_state *a6xx_state,
 448		const struct a6xx_dbgahb_cluster *dbgahb,
 449		struct a6xx_gpu_state_obj *obj,
 450		struct a6xx_crashdumper *dumper)
 451{
 452	u64 *in = dumper->ptr;
 453	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 454	size_t datasize;
 455	int i, regcount = 0;
 456
 457	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 458		int j;
 459
 460		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 461			(dbgahb->statetype + i * 2) << 8);
 462
 463		for (j = 0; j < dbgahb->count; j += 2) {
 464			int count = RANGE(dbgahb->registers, j);
 465			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 466				dbgahb->registers[j] - (dbgahb->base >> 2);
 467
 468			in += CRASHDUMP_READ(in, offset, count, out);
 469
 470			out += count * sizeof(u32);
 471
 472			if (i == 0)
 473				regcount += count;
 474		}
 475	}
 476
 477	CRASHDUMP_FINI(in);
 478
 479	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 480
 481	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 482		return;
 483
 484	if (a6xx_crashdumper_run(gpu, dumper))
 485		return;
 486
 487	obj->handle = dbgahb;
 488	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 489		datasize);
 490}
 491
 492static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 493		struct a6xx_gpu_state *a6xx_state,
 494		struct a6xx_crashdumper *dumper)
 495{
 496	int i;
 497
 498	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 499		ARRAY_SIZE(a6xx_dbgahb_clusters),
 500		sizeof(*a6xx_state->dbgahb_clusters));
 501
 502	if (!a6xx_state->dbgahb_clusters)
 503		return;
 504
 505	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 506
 507	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 508		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 509			&a6xx_dbgahb_clusters[i],
 510			&a6xx_state->dbgahb_clusters[i], dumper);
 511}
 512
 513/* Read a data cluster from the CP aperture with the crashdumper */
 514static void a6xx_get_cluster(struct msm_gpu *gpu,
 515		struct a6xx_gpu_state *a6xx_state,
 516		const struct a6xx_cluster *cluster,
 517		struct a6xx_gpu_state_obj *obj,
 518		struct a6xx_crashdumper *dumper)
 519{
 520	u64 *in = dumper->ptr;
 521	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 522	size_t datasize;
 523	int i, regcount = 0;
 524
 525	/* Some clusters need a selector register to be programmed too */
 526	if (cluster->sel_reg)
 527		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 528
 529	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 530		int j;
 531
 532		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 533			(cluster->id << 8) | (i << 4) | i);
 534
 535		for (j = 0; j < cluster->count; j += 2) {
 536			int count = RANGE(cluster->registers, j);
 537
 538			in += CRASHDUMP_READ(in, cluster->registers[j],
 539				count, out);
 540
 541			out += count * sizeof(u32);
 542
 543			if (i == 0)
 544				regcount += count;
 545		}
 546	}
 547
 548	CRASHDUMP_FINI(in);
 549
 550	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 551
 552	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 553		return;
 554
 555	if (a6xx_crashdumper_run(gpu, dumper))
 556		return;
 557
 558	obj->handle = cluster;
 559	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 560		datasize);
 561}
 562
 563static void a6xx_get_clusters(struct msm_gpu *gpu,
 564		struct a6xx_gpu_state *a6xx_state,
 565		struct a6xx_crashdumper *dumper)
 566{
 567	int i;
 568
 569	a6xx_state->clusters = state_kcalloc(a6xx_state,
 570		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 571
 572	if (!a6xx_state->clusters)
 573		return;
 574
 575	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 576
 577	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 578		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 579			&a6xx_state->clusters[i], dumper);
 580}
 581
 582/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 583static void a6xx_get_shader_block(struct msm_gpu *gpu,
 584		struct a6xx_gpu_state *a6xx_state,
 585		const struct a6xx_shader_block *block,
 586		struct a6xx_gpu_state_obj *obj,
 587		struct a6xx_crashdumper *dumper)
 588{
 589	u64 *in = dumper->ptr;
 590	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 591	int i;
 592
 593	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 594		return;
 595
 596	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 597		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 598			(block->type << 8) | i);
 599
 600		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 601			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
 602	}
 603
 604	CRASHDUMP_FINI(in);
 605
 606	if (a6xx_crashdumper_run(gpu, dumper))
 607		return;
 608
 609	obj->handle = block;
 610	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 611		datasize);
 612}
 613
 614static void a6xx_get_shaders(struct msm_gpu *gpu,
 615		struct a6xx_gpu_state *a6xx_state,
 616		struct a6xx_crashdumper *dumper)
 617{
 618	int i;
 619
 620	a6xx_state->shaders = state_kcalloc(a6xx_state,
 621		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 622
 623	if (!a6xx_state->shaders)
 624		return;
 625
 626	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 627
 628	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 629		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 630			&a6xx_state->shaders[i], dumper);
 631}
 632
 633/* Read registers from behind the HLSQ aperture with the crashdumper */
 634static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 635		struct a6xx_gpu_state *a6xx_state,
 636		const struct a6xx_registers *regs,
 637		struct a6xx_gpu_state_obj *obj,
 638		struct a6xx_crashdumper *dumper)
 639
 640{
 641	u64 *in = dumper->ptr;
 642	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 643	int i, regcount = 0;
 644
 645	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 646
 647	for (i = 0; i < regs->count; i += 2) {
 648		u32 count = RANGE(regs->registers, i);
 649		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 650			regs->registers[i] - (regs->val0 >> 2);
 651
 652		in += CRASHDUMP_READ(in, offset, count, out);
 653
 654		out += count * sizeof(u32);
 655		regcount += count;
 656	}
 657
 658	CRASHDUMP_FINI(in);
 659
 660	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 661		return;
 662
 663	if (a6xx_crashdumper_run(gpu, dumper))
 664		return;
 665
 666	obj->handle = regs;
 667	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 668		regcount * sizeof(u32));
 669}
 670
 671/* Read a block of registers using the crashdumper */
 672static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
 673		struct a6xx_gpu_state *a6xx_state,
 674		const struct a6xx_registers *regs,
 675		struct a6xx_gpu_state_obj *obj,
 676		struct a6xx_crashdumper *dumper)
 677
 678{
 679	u64 *in = dumper->ptr;
 680	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 681	int i, regcount = 0;
 682
 683	/* Some blocks might need to program a selector register first */
 684	if (regs->val0)
 685		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
 686
 687	for (i = 0; i < regs->count; i += 2) {
 688		u32 count = RANGE(regs->registers, i);
 689
 690		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
 691
 692		out += count * sizeof(u32);
 693		regcount += count;
 694	}
 695
 696	CRASHDUMP_FINI(in);
 697
 698	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 699		return;
 700
 701	if (a6xx_crashdumper_run(gpu, dumper))
 702		return;
 703
 704	obj->handle = regs;
 705	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 706		regcount * sizeof(u32));
 707}
 708
 709/* Read a block of registers via AHB */
 710static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
 711		struct a6xx_gpu_state *a6xx_state,
 712		const struct a6xx_registers *regs,
 713		struct a6xx_gpu_state_obj *obj)
 714{
 715	int i, regcount = 0, index = 0;
 716
 717	for (i = 0; i < regs->count; i += 2)
 718		regcount += RANGE(regs->registers, i);
 719
 720	obj->handle = (const void *) regs;
 721	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 722	if (!obj->data)
 723		return;
 724
 725	for (i = 0; i < regs->count; i += 2) {
 726		u32 count = RANGE(regs->registers, i);
 727		int j;
 728
 729		for (j = 0; j < count; j++)
 730			obj->data[index++] = gpu_read(gpu,
 731				regs->registers[i] + j);
 732	}
 733}
 734
 735/* Read a block of GMU registers */
 736static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
 737		struct a6xx_gpu_state *a6xx_state,
 738		const struct a6xx_registers *regs,
 739		struct a6xx_gpu_state_obj *obj,
 740		bool rscc)
 741{
 742	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 743	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 744	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 745	int i, regcount = 0, index = 0;
 746
 747	for (i = 0; i < regs->count; i += 2)
 748		regcount += RANGE(regs->registers, i);
 749
 750	obj->handle = (const void *) regs;
 751	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 752	if (!obj->data)
 753		return;
 754
 755	for (i = 0; i < regs->count; i += 2) {
 756		u32 count = RANGE(regs->registers, i);
 757		int j;
 758
 759		for (j = 0; j < count; j++) {
 760			u32 offset = regs->registers[i] + j;
 761			u32 val;
 762
 763			if (rscc)
 764				val = gmu_read_rscc(gmu, offset);
 765			else
 766				val = gmu_read(gmu, offset);
 767
 768			obj->data[index++] = val;
 769		}
 770	}
 771}
 772
 773static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 774		struct a6xx_gpu_state *a6xx_state)
 775{
 776	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 777	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 778
 779	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
 780		2, sizeof(*a6xx_state->gmu_registers));
 781
 782	if (!a6xx_state->gmu_registers)
 783		return;
 784
 785	a6xx_state->nr_gmu_registers = 2;
 786
 787	/* Get the CX GMU registers from AHB */
 788	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
 789		&a6xx_state->gmu_registers[0], false);
 790	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
 791		&a6xx_state->gmu_registers[1], true);
 792
 793	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 794		return;
 795
 796	/* Set the fence to ALLOW mode so we can access the registers */
 797	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
 798
 799	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
 800		&a6xx_state->gmu_registers[2], false);
 801}
 802
 803#define A6XX_GBIF_REGLIST_SIZE   1
 804static void a6xx_get_registers(struct msm_gpu *gpu,
 805		struct a6xx_gpu_state *a6xx_state,
 806		struct a6xx_crashdumper *dumper)
 807{
 808	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 809		ARRAY_SIZE(a6xx_reglist) +
 810		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
 811	int index = 0;
 812	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 813
 814	a6xx_state->registers = state_kcalloc(a6xx_state,
 815		count, sizeof(*a6xx_state->registers));
 816
 817	if (!a6xx_state->registers)
 818		return;
 819
 820	a6xx_state->nr_registers = count;
 821
 822	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
 823		a6xx_get_ahb_gpu_registers(gpu,
 824			a6xx_state, &a6xx_ahb_reglist[i],
 825			&a6xx_state->registers[index++]);
 826
 827	if (a6xx_has_gbif(adreno_gpu))
 828		a6xx_get_ahb_gpu_registers(gpu,
 829				a6xx_state, &a6xx_gbif_reglist,
 830				&a6xx_state->registers[index++]);
 831	else
 832		a6xx_get_ahb_gpu_registers(gpu,
 833				a6xx_state, &a6xx_vbif_reglist,
 834				&a6xx_state->registers[index++]);
 835	if (!dumper) {
 836		/*
 837		 * We can't use the crashdumper when the SMMU is stalled,
 838		 * because the GPU has no memory access until we resume
 839		 * translation (but we don't want to do that until after
 840		 * we have captured as much useful GPU state as possible).
 841		 * So instead collect registers via the CPU:
 842		 */
 843		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 844			a6xx_get_ahb_gpu_registers(gpu,
 845				a6xx_state, &a6xx_reglist[i],
 846				&a6xx_state->registers[index++]);
 847		return;
 848	}
 849
 850	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 851		a6xx_get_crashdumper_registers(gpu,
 852			a6xx_state, &a6xx_reglist[i],
 853			&a6xx_state->registers[index++],
 854			dumper);
 855
 856	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
 857		a6xx_get_crashdumper_hlsq_registers(gpu,
 858			a6xx_state, &a6xx_hlsq_reglist[i],
 859			&a6xx_state->registers[index++],
 860			dumper);
 861}
 862
 863/* Read a block of data from an indexed register pair */
 864static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
 865		struct a6xx_gpu_state *a6xx_state,
 866		const struct a6xx_indexed_registers *indexed,
 867		struct a6xx_gpu_state_obj *obj)
 868{
 869	int i;
 870
 871	obj->handle = (const void *) indexed;
 872	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
 873	if (!obj->data)
 874		return;
 875
 876	/* All the indexed banks start at address 0 */
 877	gpu_write(gpu, indexed->addr, 0);
 878
 879	/* Read the data - each read increments the internal address by 1 */
 880	for (i = 0; i < indexed->count; i++)
 881		obj->data[i] = gpu_read(gpu, indexed->data);
 882}
 883
 884static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 885		struct a6xx_gpu_state *a6xx_state)
 886{
 887	u32 mempool_size;
 888	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
 889	int i;
 890
 891	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
 892		sizeof(*a6xx_state->indexed_regs));
 893	if (!a6xx_state->indexed_regs)
 894		return;
 895
 896	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
 897		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
 898			&a6xx_state->indexed_regs[i]);
 899
 900	/* Set the CP mempool size to 0 to stabilize it while dumping */
 901	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
 902	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
 903
 904	/* Get the contents of the CP mempool */
 905	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
 906		&a6xx_state->indexed_regs[i]);
 907
 908	/*
 909	 * Offset 0x2000 in the mempool is the size - copy the saved size over
 910	 * so the data is consistent
 911	 */
 912	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
 913
 914	/* Restore the size in the hardware */
 915	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
 916
 917	a6xx_state->nr_indexed_regs = count;
 918}
 919
 920struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 921{
 922	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
 923	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 924	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 925	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 926		GFP_KERNEL);
 927	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
 928			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
 929
 930	if (!a6xx_state)
 931		return ERR_PTR(-ENOMEM);
 932
 933	INIT_LIST_HEAD(&a6xx_state->objs);
 934
 935	/* Get the generic state from the adreno core */
 936	adreno_gpu_state_get(gpu, &a6xx_state->base);
 937
 938	a6xx_get_gmu_registers(gpu, a6xx_state);
 939
 940	/* If GX isn't on the rest of the data isn't going to be accessible */
 941	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 942		return &a6xx_state->base;
 943
 944	/* Get the banks of indexed registers */
 945	a6xx_get_indexed_registers(gpu, a6xx_state);
 946
 947	/*
 948	 * Try to initialize the crashdumper, if we are not dumping state
 949	 * with the SMMU stalled.  The crashdumper needs memory access to
 950	 * write out GPU state, so we need to skip this when the SMMU is
 951	 * stalled in response to an iova fault
 952	 */
 953	if (!stalled && !a6xx_crashdumper_init(gpu, &_dumper)) {
 954		dumper = &_dumper;
 955	}
 956
 957	a6xx_get_registers(gpu, a6xx_state, dumper);
 958
 959	if (dumper) {
 960		a6xx_get_shaders(gpu, a6xx_state, dumper);
 961		a6xx_get_clusters(gpu, a6xx_state, dumper);
 962		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
 963
 964		msm_gem_kernel_put(dumper->bo, gpu->aspace, true);
 965	}
 966
 967	if (snapshot_debugbus)
 968		a6xx_get_debugbus(gpu, a6xx_state);
 969
 970	return  &a6xx_state->base;
 971}
 972
 973static void a6xx_gpu_state_destroy(struct kref *kref)
 974{
 975	struct a6xx_state_memobj *obj, *tmp;
 976	struct msm_gpu_state *state = container_of(kref,
 977			struct msm_gpu_state, ref);
 978	struct a6xx_gpu_state *a6xx_state = container_of(state,
 979			struct a6xx_gpu_state, base);
 980
 981	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
 982		kfree(obj);
 983
 984	adreno_gpu_state_destroy(state);
 985	kfree(a6xx_state);
 986}
 987
 988int a6xx_gpu_state_put(struct msm_gpu_state *state)
 989{
 990	if (IS_ERR_OR_NULL(state))
 991		return 1;
 992
 993	return kref_put(&state->ref, a6xx_gpu_state_destroy);
 994}
 995
 996static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
 997		struct drm_printer *p)
 998{
 999	int i, index = 0;
1000
1001	if (!data)
1002		return;
1003
1004	for (i = 0; i < count; i += 2) {
1005		u32 count = RANGE(registers, i);
1006		u32 offset = registers[i];
1007		int j;
1008
1009		for (j = 0; j < count; index++, offset++, j++) {
1010			if (data[index] == 0xdeafbead)
1011				continue;
1012
1013			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1014				offset << 2, data[index]);
1015		}
1016	}
1017}
1018
1019static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1020{
1021	char out[ASCII85_BUFSZ];
1022	long i, l, datalen = 0;
1023
1024	for (i = 0; i < len >> 2; i++) {
1025		if (data[i])
1026			datalen = (i + 1) << 2;
1027	}
1028
1029	if (datalen == 0)
1030		return;
1031
1032	drm_puts(p, "    data: !!ascii85 |\n");
1033	drm_puts(p, "      ");
1034
1035
1036	l = ascii85_encode_len(datalen);
1037
1038	for (i = 0; i < l; i++)
1039		drm_puts(p, ascii85_encode(data[i], out));
1040
1041	drm_puts(p, "\n");
1042}
1043
1044static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1045{
1046	drm_puts(p, fmt);
1047	drm_puts(p, name);
1048	drm_puts(p, "\n");
1049}
1050
1051static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1052		struct drm_printer *p)
1053{
1054	const struct a6xx_shader_block *block = obj->handle;
1055	int i;
1056
1057	if (!obj->handle)
1058		return;
1059
1060	print_name(p, "  - type: ", block->name);
1061
1062	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1063		drm_printf(p, "    - bank: %d\n", i);
1064		drm_printf(p, "      size: %d\n", block->size);
1065
1066		if (!obj->data)
1067			continue;
1068
1069		print_ascii85(p, block->size << 2,
1070			obj->data + (block->size * i));
1071	}
1072}
1073
1074static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1075		struct drm_printer *p)
1076{
1077	int ctx, index = 0;
1078
1079	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1080		int j;
1081
1082		drm_printf(p, "    - context: %d\n", ctx);
1083
1084		for (j = 0; j < size; j += 2) {
1085			u32 count = RANGE(registers, j);
1086			u32 offset = registers[j];
1087			int k;
1088
1089			for (k = 0; k < count; index++, offset++, k++) {
1090				if (data[index] == 0xdeafbead)
1091					continue;
1092
1093				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1094					offset << 2, data[index]);
1095			}
1096		}
1097	}
1098}
1099
1100static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1101		struct drm_printer *p)
1102{
1103	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1104
1105	if (dbgahb) {
1106		print_name(p, "  - cluster-name: ", dbgahb->name);
1107		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1108			obj->data, p);
1109	}
1110}
1111
1112static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1113		struct drm_printer *p)
1114{
1115	const struct a6xx_cluster *cluster = obj->handle;
1116
1117	if (cluster) {
1118		print_name(p, "  - cluster-name: ", cluster->name);
1119		a6xx_show_cluster_data(cluster->registers, cluster->count,
1120			obj->data, p);
1121	}
1122}
1123
1124static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1125		struct drm_printer *p)
1126{
1127	const struct a6xx_indexed_registers *indexed = obj->handle;
1128
1129	if (!indexed)
1130		return;
1131
1132	print_name(p, "  - regs-name: ", indexed->name);
1133	drm_printf(p, "    dwords: %d\n", indexed->count);
1134
1135	print_ascii85(p, indexed->count << 2, obj->data);
1136}
1137
1138static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1139		u32 *data, struct drm_printer *p)
1140{
1141	if (block) {
1142		print_name(p, "  - debugbus-block: ", block->name);
1143
1144		/*
1145		 * count for regular debugbus data is in quadwords,
1146		 * but print the size in dwords for consistency
1147		 */
1148		drm_printf(p, "    count: %d\n", block->count << 1);
1149
1150		print_ascii85(p, block->count << 3, data);
1151	}
1152}
1153
1154static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1155		struct drm_printer *p)
1156{
1157	int i;
1158
1159	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1160		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1161
1162		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1163	}
1164
1165	if (a6xx_state->vbif_debugbus) {
1166		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1167
1168		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1169		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1170
1171		/* vbif debugbus data is in dwords.  Confusing, huh? */
1172		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1173	}
1174
1175	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1176		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1177
1178		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1179	}
1180}
1181
1182void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1183		struct drm_printer *p)
1184{
1185	struct a6xx_gpu_state *a6xx_state = container_of(state,
1186			struct a6xx_gpu_state, base);
1187	int i;
1188
1189	if (IS_ERR_OR_NULL(state))
1190		return;
1191
1192	adreno_show(gpu, state, p);
1193
1194	drm_puts(p, "registers:\n");
1195	for (i = 0; i < a6xx_state->nr_registers; i++) {
1196		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1197		const struct a6xx_registers *regs = obj->handle;
1198
1199		if (!obj->handle)
1200			continue;
1201
1202		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1203	}
1204
1205	drm_puts(p, "registers-gmu:\n");
1206	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1207		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1208		const struct a6xx_registers *regs = obj->handle;
1209
1210		if (!obj->handle)
1211			continue;
1212
1213		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1214	}
1215
1216	drm_puts(p, "indexed-registers:\n");
1217	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1218		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1219
1220	drm_puts(p, "shader-blocks:\n");
1221	for (i = 0; i < a6xx_state->nr_shaders; i++)
1222		a6xx_show_shader(&a6xx_state->shaders[i], p);
1223
1224	drm_puts(p, "clusters:\n");
1225	for (i = 0; i < a6xx_state->nr_clusters; i++)
1226		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1227
1228	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1229		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1230
1231	drm_puts(p, "debugbus:\n");
1232	a6xx_show_debugbus(a6xx_state, p);
1233}