Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4#include <linux/ascii85.h>
   5#include "msm_gem.h"
   6#include "a6xx_gpu.h"
   7#include "a6xx_gmu.h"
   8#include "a6xx_gpu_state.h"
   9#include "a6xx_gmu.xml.h"
  10
  11/* Ignore diagnostics about register tables that we aren't using yet. We don't
  12 * want to modify these headers too much from their original source.
  13 */
  14#pragma GCC diagnostic push
  15#pragma GCC diagnostic ignored "-Wunused-variable"
  16
  17#include "adreno_gen7_0_0_snapshot.h"
  18#include "adreno_gen7_2_0_snapshot.h"
  19
  20#pragma GCC diagnostic pop
  21
  22struct a6xx_gpu_state_obj {
  23	const void *handle;
  24	u32 *data;
  25};
  26
  27struct a6xx_gpu_state {
  28	struct msm_gpu_state base;
  29
  30	struct a6xx_gpu_state_obj *gmu_registers;
  31	int nr_gmu_registers;
  32
  33	struct a6xx_gpu_state_obj *registers;
  34	int nr_registers;
  35
  36	struct a6xx_gpu_state_obj *shaders;
  37	int nr_shaders;
  38
  39	struct a6xx_gpu_state_obj *clusters;
  40	int nr_clusters;
  41
  42	struct a6xx_gpu_state_obj *dbgahb_clusters;
  43	int nr_dbgahb_clusters;
  44
  45	struct a6xx_gpu_state_obj *indexed_regs;
  46	int nr_indexed_regs;
  47
  48	struct a6xx_gpu_state_obj *debugbus;
  49	int nr_debugbus;
  50
  51	struct a6xx_gpu_state_obj *vbif_debugbus;
  52
  53	struct a6xx_gpu_state_obj *cx_debugbus;
  54	int nr_cx_debugbus;
  55
  56	struct msm_gpu_state_bo *gmu_log;
  57	struct msm_gpu_state_bo *gmu_hfi;
  58	struct msm_gpu_state_bo *gmu_debug;
  59
  60	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
  61
  62	struct list_head objs;
  63
  64	bool gpu_initialized;
  65};
  66
  67static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  68{
  69	in[0] = val;
  70	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  71
  72	return 2;
  73}
  74
  75static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  76{
  77	in[0] = target;
  78	in[1] = (((u64) reg) << 44 | dwords);
  79
  80	return 2;
  81}
  82
  83static inline int CRASHDUMP_FINI(u64 *in)
  84{
  85	in[0] = 0;
  86	in[1] = 0;
  87
  88	return 2;
  89}
  90
  91struct a6xx_crashdumper {
  92	void *ptr;
  93	struct drm_gem_object *bo;
  94	u64 iova;
  95};
  96
  97struct a6xx_state_memobj {
  98	struct list_head node;
  99	unsigned long long data[];
 100};
 101
 102static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
 103{
 104	struct a6xx_state_memobj *obj =
 105		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
 106
 107	if (!obj)
 108		return NULL;
 109
 110	list_add_tail(&obj->node, &a6xx_state->objs);
 111	return &obj->data;
 112}
 113
 114static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
 115		size_t size)
 116{
 117	void *dst = state_kcalloc(a6xx_state, 1, size);
 118
 119	if (dst)
 120		memcpy(dst, src, size);
 121	return dst;
 122}
 123
 124/*
 125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 126 * the rest for the data
 127 */
 128#define A6XX_CD_DATA_OFFSET 8192
 129#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 130
 131static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 132		struct a6xx_crashdumper *dumper)
 133{
 134	dumper->ptr = msm_gem_kernel_new(gpu->dev,
 135		SZ_1M, MSM_BO_WC, gpu->aspace,
 136		&dumper->bo, &dumper->iova);
 137
 138	if (!IS_ERR(dumper->ptr))
 139		msm_gem_object_set_name(dumper->bo, "crashdump");
 140
 141	return PTR_ERR_OR_ZERO(dumper->ptr);
 142}
 143
 144static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 145		struct a6xx_crashdumper *dumper)
 146{
 147	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 148	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 149	u32 val;
 150	int ret;
 151
 152	if (IS_ERR_OR_NULL(dumper->ptr))
 153		return -EINVAL;
 154
 155	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 156		return -EINVAL;
 157
 158	/* Make sure all pending memory writes are posted */
 159	wmb();
 160
 161	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
 162
 163	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 164
 165	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 166		val & 0x02, 100, 10000);
 167
 168	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 169
 170	return ret;
 171}
 172
 173/* read a value from the GX debug bus */
 174static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 175		u32 *data)
 176{
 177	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 178		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 179
 180	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 181	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 182	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 183	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 184
 185	/* Wait 1 us to make sure the data is flowing */
 186	udelay(1);
 187
 188	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 189	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 190
 191	return 2;
 192}
 193
 194#define cxdbg_write(ptr, offset, val) \
 195	msm_writel((val), (ptr) + ((offset) << 2))
 196
 197#define cxdbg_read(ptr, offset) \
 198	msm_readl((ptr) + ((offset) << 2))
 199
 200/* read a value from the CX debug bus */
 201static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
 202		u32 *data)
 203{
 204	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 205		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 206
 207	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 208	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 209	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 210	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 211
 212	/* Wait 1 us to make sure the data is flowing */
 213	udelay(1);
 214
 215	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 216	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 217
 218	return 2;
 219}
 220
 221/* Read a chunk of data from the VBIF debug bus */
 222static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 223		u32 reg, int count, u32 *data)
 224{
 225	int i;
 226
 227	gpu_write(gpu, ctrl0, reg);
 228
 229	for (i = 0; i < count; i++) {
 230		gpu_write(gpu, ctrl1, i);
 231		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 232	}
 233
 234	return count;
 235}
 236
 237#define AXI_ARB_BLOCKS 2
 238#define XIN_AXI_BLOCKS 5
 239#define XIN_CORE_BLOCKS 4
 240
 241#define VBIF_DEBUGBUS_BLOCK_SIZE \
 242	((16 * AXI_ARB_BLOCKS) + \
 243	 (18 * XIN_AXI_BLOCKS) + \
 244	 (12 * XIN_CORE_BLOCKS))
 245
 246static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 247		struct a6xx_gpu_state *a6xx_state,
 248		struct a6xx_gpu_state_obj *obj)
 249{
 250	u32 clk, *ptr;
 251	int i;
 252
 253	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 254		sizeof(u32));
 255	if (!obj->data)
 256		return;
 257
 258	obj->handle = NULL;
 259
 260	/* Get the current clock setting */
 261	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 262
 263	/* Force on the bus so we can read it */
 264	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 265		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 266
 267	/* We will read from BUS2 first, so disable BUS1 */
 268	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 269
 270	/* Enable the VBIF bus for reading */
 271	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 272
 273	ptr = obj->data;
 274
 275	for (i = 0; i < AXI_ARB_BLOCKS; i++)
 276		ptr += vbif_debugbus_read(gpu,
 277			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 278			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 279			1 << (i + 16), 16, ptr);
 280
 281	for (i = 0; i < XIN_AXI_BLOCKS; i++)
 282		ptr += vbif_debugbus_read(gpu,
 283			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 284			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 285			1 << i, 18, ptr);
 286
 287	/* Stop BUS2 so we can turn on BUS1 */
 288	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 289
 290	for (i = 0; i < XIN_CORE_BLOCKS; i++)
 291		ptr += vbif_debugbus_read(gpu,
 292			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 293			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 294			1 << i, 12, ptr);
 295
 296	/* Restore the VBIF clock setting */
 297	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 298}
 299
 300static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 301		struct a6xx_gpu_state *a6xx_state,
 302		const struct a6xx_debugbus_block *block,
 303		struct a6xx_gpu_state_obj *obj)
 304{
 305	int i;
 306	u32 *ptr;
 307
 308	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 309	if (!obj->data)
 310		return;
 311
 312	obj->handle = block;
 313
 314	for (ptr = obj->data, i = 0; i < block->count; i++)
 315		ptr += debugbus_read(gpu, block->id, i, ptr);
 316}
 317
 318static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 319		struct a6xx_gpu_state *a6xx_state,
 320		const struct a6xx_debugbus_block *block,
 321		struct a6xx_gpu_state_obj *obj)
 322{
 323	int i;
 324	u32 *ptr;
 325
 326	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 327	if (!obj->data)
 328		return;
 329
 330	obj->handle = block;
 331
 332	for (ptr = obj->data, i = 0; i < block->count; i++)
 333		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 334}
 335
 336static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
 337		struct a6xx_gpu_state *a6xx_state)
 338{
 339	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 340		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 341
 342	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
 343		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
 344
 345	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 346			sizeof(*a6xx_state->debugbus));
 347
 348	if (a6xx_state->debugbus) {
 349		int i;
 350
 351		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 352			a6xx_get_debugbus_block(gpu,
 353				a6xx_state,
 354				&a6xx_debugbus_blocks[i],
 355				&a6xx_state->debugbus[i]);
 356
 357		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 358
 359		/*
 360		 * GBIF has same debugbus as of other GPU blocks, fall back to
 361		 * default path if GPU uses GBIF, also GBIF uses exactly same
 362		 * ID as of VBIF.
 363		 */
 364		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 365			a6xx_get_debugbus_block(gpu, a6xx_state,
 366				&a6xx_gbif_debugbus_block,
 367				&a6xx_state->debugbus[i]);
 368
 369			a6xx_state->nr_debugbus += 1;
 370		}
 371
 372
 373		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
 374			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
 375				a6xx_get_debugbus_block(gpu,
 376					a6xx_state,
 377					&a650_debugbus_blocks[i],
 378					&a6xx_state->debugbus[i]);
 379		}
 380	}
 381}
 382
 383static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
 384		struct a6xx_gpu_state *a6xx_state)
 385{
 386	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 387	int debugbus_blocks_count, total_debugbus_blocks;
 388	const u32 *debugbus_blocks;
 389	int i;
 390
 391	if (adreno_is_a730(adreno_gpu)) {
 392		debugbus_blocks = gen7_0_0_debugbus_blocks;
 393		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
 394	} else {
 395		BUG_ON(!adreno_is_a740_family(adreno_gpu));
 396		debugbus_blocks = gen7_2_0_debugbus_blocks;
 397		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
 398	}
 399
 400	total_debugbus_blocks = debugbus_blocks_count +
 401		ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
 402
 403	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
 404			sizeof(*a6xx_state->debugbus));
 405
 406	if (a6xx_state->debugbus) {
 407		for (i = 0; i < debugbus_blocks_count; i++) {
 408			a6xx_get_debugbus_block(gpu,
 409				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
 410				&a6xx_state->debugbus[i]);
 411		}
 412
 413		for (i = 0; i < ARRAY_SIZE(a7xx_gbif_debugbus_blocks); i++) {
 414			a6xx_get_debugbus_block(gpu,
 415				a6xx_state, &a7xx_gbif_debugbus_blocks[i],
 416				&a6xx_state->debugbus[i + debugbus_blocks_count]);
 417		}
 418	}
 419
 420}
 421
 422static void a6xx_get_debugbus(struct msm_gpu *gpu,
 423		struct a6xx_gpu_state *a6xx_state)
 424{
 425	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 426	struct resource *res;
 427	void __iomem *cxdbg = NULL;
 428
 429	/* Set up the GX debug bus */
 430
 431	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 432		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 433
 434	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 435		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 436
 437	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 438	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 439	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 440	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 441
 442	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 443	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 444
 445	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 446	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 447	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 448	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 449
 450	/* Set up the CX debug bus - it lives elsewhere in the system so do a
 451	 * temporary ioremap for the registers
 452	 */
 453	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 454			"cx_dbgc");
 455
 456	if (res)
 457		cxdbg = ioremap(res->start, resource_size(res));
 458
 459	if (cxdbg) {
 460		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 461			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 462
 463		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 464			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 465
 466		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 467		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 468		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 469		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 470
 471		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 472			0x76543210);
 473		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 474			0xFEDCBA98);
 475
 476		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 477		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 478		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 479		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 480	}
 481
 482	if (adreno_is_a7xx(adreno_gpu)) {
 483		a7xx_get_debugbus_blocks(gpu, a6xx_state);
 484	} else {
 485		a6xx_get_debugbus_blocks(gpu, a6xx_state);
 486	}
 487
 488	/*  Dump the VBIF debugbus on applicable targets */
 489	if (!a6xx_has_gbif(adreno_gpu)) {
 490		a6xx_state->vbif_debugbus =
 491			state_kcalloc(a6xx_state, 1,
 492					sizeof(*a6xx_state->vbif_debugbus));
 493
 494		if (a6xx_state->vbif_debugbus)
 495			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 496					a6xx_state->vbif_debugbus);
 497	}
 498
 499	if (cxdbg) {
 500		unsigned nr_cx_debugbus_blocks;
 501		const struct a6xx_debugbus_block *cx_debugbus_blocks;
 502
 503		if (adreno_is_a7xx(adreno_gpu)) {
 504			BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
 505			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
 506			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
 507		} else {
 508			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
 509			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 510		}
 511
 512		a6xx_state->cx_debugbus =
 513			state_kcalloc(a6xx_state,
 514			nr_cx_debugbus_blocks,
 515			sizeof(*a6xx_state->cx_debugbus));
 516
 517		if (a6xx_state->cx_debugbus) {
 518			int i;
 519
 520			for (i = 0; i < nr_cx_debugbus_blocks; i++)
 521				a6xx_get_cx_debugbus_block(cxdbg,
 522					a6xx_state,
 523					&cx_debugbus_blocks[i],
 524					&a6xx_state->cx_debugbus[i]);
 525
 526			a6xx_state->nr_cx_debugbus =
 527				nr_cx_debugbus_blocks;
 528		}
 529
 530		iounmap(cxdbg);
 531	}
 532}
 533
 534#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 535
 536/* Read a data cluster from behind the AHB aperture */
 537static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 538		struct a6xx_gpu_state *a6xx_state,
 539		const struct a6xx_dbgahb_cluster *dbgahb,
 540		struct a6xx_gpu_state_obj *obj,
 541		struct a6xx_crashdumper *dumper)
 542{
 543	u64 *in = dumper->ptr;
 544	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 545	size_t datasize;
 546	int i, regcount = 0;
 547
 548	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 549		int j;
 550
 551		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 552			(dbgahb->statetype + i * 2) << 8);
 553
 554		for (j = 0; j < dbgahb->count; j += 2) {
 555			int count = RANGE(dbgahb->registers, j);
 556			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 557				dbgahb->registers[j] - (dbgahb->base >> 2);
 558
 559			in += CRASHDUMP_READ(in, offset, count, out);
 560
 561			out += count * sizeof(u32);
 562
 563			if (i == 0)
 564				regcount += count;
 565		}
 566	}
 567
 568	CRASHDUMP_FINI(in);
 569
 570	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 571
 572	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 573		return;
 574
 575	if (a6xx_crashdumper_run(gpu, dumper))
 576		return;
 577
 578	obj->handle = dbgahb;
 579	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 580		datasize);
 581}
 582
 583static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 584		struct a6xx_gpu_state *a6xx_state,
 585		const struct gen7_sptp_cluster_registers *dbgahb,
 586		struct a6xx_gpu_state_obj *obj,
 587		struct a6xx_crashdumper *dumper)
 588{
 589	u64 *in = dumper->ptr;
 590	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 591	size_t datasize;
 592	int i, regcount = 0;
 593
 594	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
 595		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
 596		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
 597		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
 598
 599	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
 600		int count = RANGE(dbgahb->regs, i);
 601		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
 602			dbgahb->regs[i] - dbgahb->regbase;
 603
 604		in += CRASHDUMP_READ(in, offset, count, out);
 605
 606		out += count * sizeof(u32);
 607		regcount += count;
 608	}
 609
 610	CRASHDUMP_FINI(in);
 611
 612	datasize = regcount * sizeof(u32);
 613
 614	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 615		return;
 616
 617	if (a6xx_crashdumper_run(gpu, dumper))
 618		return;
 619
 620	obj->handle = dbgahb;
 621	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 622		datasize);
 623}
 624
 625static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 626		struct a6xx_gpu_state *a6xx_state,
 627		struct a6xx_crashdumper *dumper)
 628{
 629	int i;
 630
 631	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 632		ARRAY_SIZE(a6xx_dbgahb_clusters),
 633		sizeof(*a6xx_state->dbgahb_clusters));
 634
 635	if (!a6xx_state->dbgahb_clusters)
 636		return;
 637
 638	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 639
 640	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 641		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 642			&a6xx_dbgahb_clusters[i],
 643			&a6xx_state->dbgahb_clusters[i], dumper);
 644}
 645
 646static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 647		struct a6xx_gpu_state *a6xx_state,
 648		struct a6xx_crashdumper *dumper)
 649{
 650	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 651	int i;
 652	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
 653	unsigned dbgahb_clusters_size;
 654
 655	if (adreno_is_a730(adreno_gpu)) {
 656		dbgahb_clusters = gen7_0_0_sptp_clusters;
 657		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
 658	} else {
 659		BUG_ON(!adreno_is_a740_family(adreno_gpu));
 660		dbgahb_clusters = gen7_2_0_sptp_clusters;
 661		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
 662	}
 663
 664	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 665		dbgahb_clusters_size,
 666		sizeof(*a6xx_state->dbgahb_clusters));
 667
 668	if (!a6xx_state->dbgahb_clusters)
 669		return;
 670
 671	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
 672
 673	for (i = 0; i < dbgahb_clusters_size; i++)
 674		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
 675			&dbgahb_clusters[i],
 676			&a6xx_state->dbgahb_clusters[i], dumper);
 677}
 678
 679/* Read a data cluster from the CP aperture with the crashdumper */
 680static void a6xx_get_cluster(struct msm_gpu *gpu,
 681		struct a6xx_gpu_state *a6xx_state,
 682		const struct a6xx_cluster *cluster,
 683		struct a6xx_gpu_state_obj *obj,
 684		struct a6xx_crashdumper *dumper)
 685{
 686	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 687	u64 *in = dumper->ptr;
 688	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 689	size_t datasize;
 690	int i, regcount = 0;
 691	u32 id = cluster->id;
 692
 693	/* Skip registers that are not present on older generation */
 694	if (!adreno_is_a660_family(adreno_gpu) &&
 695			cluster->registers == a660_fe_cluster)
 696		return;
 697
 698	if (adreno_is_a650_family(adreno_gpu) &&
 699			cluster->registers == a6xx_ps_cluster)
 700		id = CLUSTER_VPC_PS;
 701
 702	/* Some clusters need a selector register to be programmed too */
 703	if (cluster->sel_reg)
 704		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 705
 706	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 707		int j;
 708
 709		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 710			(id << 8) | (i << 4) | i);
 711
 712		for (j = 0; j < cluster->count; j += 2) {
 713			int count = RANGE(cluster->registers, j);
 714
 715			in += CRASHDUMP_READ(in, cluster->registers[j],
 716				count, out);
 717
 718			out += count * sizeof(u32);
 719
 720			if (i == 0)
 721				regcount += count;
 722		}
 723	}
 724
 725	CRASHDUMP_FINI(in);
 726
 727	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 728
 729	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 730		return;
 731
 732	if (a6xx_crashdumper_run(gpu, dumper))
 733		return;
 734
 735	obj->handle = cluster;
 736	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 737		datasize);
 738}
 739
 740static void a7xx_get_cluster(struct msm_gpu *gpu,
 741		struct a6xx_gpu_state *a6xx_state,
 742		const struct gen7_cluster_registers *cluster,
 743		struct a6xx_gpu_state_obj *obj,
 744		struct a6xx_crashdumper *dumper)
 745{
 746	u64 *in = dumper->ptr;
 747	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 748	size_t datasize;
 749	int i, regcount = 0;
 750
 751	/* Some clusters need a selector register to be programmed too */
 752	if (cluster->sel)
 753		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
 754
 755	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
 756		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
 757		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
 758		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
 759
 760	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
 761		int count = RANGE(cluster->regs, i);
 762
 763		in += CRASHDUMP_READ(in, cluster->regs[i],
 764			count, out);
 765
 766		out += count * sizeof(u32);
 767		regcount += count;
 768	}
 769
 770	CRASHDUMP_FINI(in);
 771
 772	datasize = regcount * sizeof(u32);
 773
 774	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 775		return;
 776
 777	if (a6xx_crashdumper_run(gpu, dumper))
 778		return;
 779
 780	obj->handle = cluster;
 781	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 782		datasize);
 783}
 784
 785static void a6xx_get_clusters(struct msm_gpu *gpu,
 786		struct a6xx_gpu_state *a6xx_state,
 787		struct a6xx_crashdumper *dumper)
 788{
 789	int i;
 790
 791	a6xx_state->clusters = state_kcalloc(a6xx_state,
 792		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 793
 794	if (!a6xx_state->clusters)
 795		return;
 796
 797	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 798
 799	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 800		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 801			&a6xx_state->clusters[i], dumper);
 802}
 803
 804static void a7xx_get_clusters(struct msm_gpu *gpu,
 805		struct a6xx_gpu_state *a6xx_state,
 806		struct a6xx_crashdumper *dumper)
 807{
 808	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 809	int i;
 810	const struct gen7_cluster_registers *clusters;
 811	unsigned clusters_size;
 812
 813	if (adreno_is_a730(adreno_gpu)) {
 814		clusters = gen7_0_0_clusters;
 815		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
 816	} else {
 817		BUG_ON(!adreno_is_a740_family(adreno_gpu));
 818		clusters = gen7_2_0_clusters;
 819		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
 820	}
 821
 822	a6xx_state->clusters = state_kcalloc(a6xx_state,
 823		clusters_size, sizeof(*a6xx_state->clusters));
 824
 825	if (!a6xx_state->clusters)
 826		return;
 827
 828	a6xx_state->nr_clusters = clusters_size;
 829
 830	for (i = 0; i < clusters_size; i++)
 831		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
 832			&a6xx_state->clusters[i], dumper);
 833}
 834
 835/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 836static void a6xx_get_shader_block(struct msm_gpu *gpu,
 837		struct a6xx_gpu_state *a6xx_state,
 838		const struct a6xx_shader_block *block,
 839		struct a6xx_gpu_state_obj *obj,
 840		struct a6xx_crashdumper *dumper)
 841{
 842	u64 *in = dumper->ptr;
 843	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 844	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 845	int i;
 846
 847	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 848		return;
 849
 850	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 851		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 852			(block->type << 8) | i);
 853
 854		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 855			block->size, out);
 856
 857		out += block->size * sizeof(u32);
 858	}
 859
 860	CRASHDUMP_FINI(in);
 861
 862	if (a6xx_crashdumper_run(gpu, dumper))
 863		return;
 864
 865	obj->handle = block;
 866	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 867		datasize);
 868}
 869
 870static void a7xx_get_shader_block(struct msm_gpu *gpu,
 871		struct a6xx_gpu_state *a6xx_state,
 872		const struct gen7_shader_block *block,
 873		struct a6xx_gpu_state_obj *obj,
 874		struct a6xx_crashdumper *dumper)
 875{
 876	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 877	u64 *in = dumper->ptr;
 878	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 879	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
 880	int i, j;
 881
 882	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 883		return;
 884
 885	if (adreno_is_a730(adreno_gpu)) {
 886		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
 887	}
 888
 889	for (i = 0; i < block->num_sps; i++) {
 890		for (j = 0; j < block->num_usptps; j++) {
 891			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
 892				A7XX_SP_READ_SEL_LOCATION(block->location) |
 893				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
 894				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
 895				A7XX_SP_READ_SEL_USPTP(j) |
 896				A7XX_SP_READ_SEL_SPTP(i));
 897
 898			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
 899				block->size, out);
 900
 901			out += block->size * sizeof(u32);
 902		}
 903	}
 904
 905	CRASHDUMP_FINI(in);
 906
 907	if (a6xx_crashdumper_run(gpu, dumper))
 908		goto out;
 909
 910	obj->handle = block;
 911	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 912		datasize);
 913
 914out:
 915	if (adreno_is_a730(adreno_gpu)) {
 916		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
 917	}
 918}
 919
 920static void a6xx_get_shaders(struct msm_gpu *gpu,
 921		struct a6xx_gpu_state *a6xx_state,
 922		struct a6xx_crashdumper *dumper)
 923{
 924	int i;
 925
 926	a6xx_state->shaders = state_kcalloc(a6xx_state,
 927		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 928
 929	if (!a6xx_state->shaders)
 930		return;
 931
 932	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 933
 934	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 935		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 936			&a6xx_state->shaders[i], dumper);
 937}
 938
 939static void a7xx_get_shaders(struct msm_gpu *gpu,
 940		struct a6xx_gpu_state *a6xx_state,
 941		struct a6xx_crashdumper *dumper)
 942{
 943	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 944	const struct gen7_shader_block *shader_blocks;
 945	unsigned num_shader_blocks;
 946	int i;
 947
 948	if (adreno_is_a730(adreno_gpu)) {
 949		shader_blocks = gen7_0_0_shader_blocks;
 950		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
 951	} else {
 952		BUG_ON(!adreno_is_a740_family(adreno_gpu));
 953		shader_blocks = gen7_2_0_shader_blocks;
 954		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
 955	}
 956
 957	a6xx_state->shaders = state_kcalloc(a6xx_state,
 958		num_shader_blocks, sizeof(*a6xx_state->shaders));
 959
 960	if (!a6xx_state->shaders)
 961		return;
 962
 963	a6xx_state->nr_shaders = num_shader_blocks;
 964
 965	for (i = 0; i < num_shader_blocks; i++)
 966		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
 967			&a6xx_state->shaders[i], dumper);
 968}
 969
 970/* Read registers from behind the HLSQ aperture with the crashdumper */
 971static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 972		struct a6xx_gpu_state *a6xx_state,
 973		const struct a6xx_registers *regs,
 974		struct a6xx_gpu_state_obj *obj,
 975		struct a6xx_crashdumper *dumper)
 976
 977{
 978	u64 *in = dumper->ptr;
 979	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 980	int i, regcount = 0;
 981
 982	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 983
 984	for (i = 0; i < regs->count; i += 2) {
 985		u32 count = RANGE(regs->registers, i);
 986		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 987			regs->registers[i] - (regs->val0 >> 2);
 988
 989		in += CRASHDUMP_READ(in, offset, count, out);
 990
 991		out += count * sizeof(u32);
 992		regcount += count;
 993	}
 994
 995	CRASHDUMP_FINI(in);
 996
 997	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 998		return;
 999
1000	if (a6xx_crashdumper_run(gpu, dumper))
1001		return;
1002
1003	obj->handle = regs;
1004	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1005		regcount * sizeof(u32));
1006}
1007
1008/* Read a block of registers using the crashdumper */
1009static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1010		struct a6xx_gpu_state *a6xx_state,
1011		const struct a6xx_registers *regs,
1012		struct a6xx_gpu_state_obj *obj,
1013		struct a6xx_crashdumper *dumper)
1014
1015{
1016	u64 *in = dumper->ptr;
1017	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1018	int i, regcount = 0;
1019
1020	/* Skip unsupported registers on older generations */
1021	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1022			(regs->registers == a660_registers))
1023		return;
1024
1025	/* Some blocks might need to program a selector register first */
1026	if (regs->val0)
1027		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1028
1029	for (i = 0; i < regs->count; i += 2) {
1030		u32 count = RANGE(regs->registers, i);
1031
1032		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1033
1034		out += count * sizeof(u32);
1035		regcount += count;
1036	}
1037
1038	CRASHDUMP_FINI(in);
1039
1040	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1041		return;
1042
1043	if (a6xx_crashdumper_run(gpu, dumper))
1044		return;
1045
1046	obj->handle = regs;
1047	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1048		regcount * sizeof(u32));
1049}
1050
1051static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1052		struct a6xx_gpu_state *a6xx_state,
1053		const struct gen7_reg_list *regs,
1054		struct a6xx_gpu_state_obj *obj,
1055		struct a6xx_crashdumper *dumper)
1056
1057{
1058	u64 *in = dumper->ptr;
1059	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1060	int i, regcount = 0;
1061
1062	/* Some blocks might need to program a selector register first */
1063	if (regs->sel)
1064		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1065
1066	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1067		u32 count = RANGE(regs->regs, i);
1068
1069		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1070
1071		out += count * sizeof(u32);
1072		regcount += count;
1073	}
1074
1075	CRASHDUMP_FINI(in);
1076
1077	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1078		return;
1079
1080	if (a6xx_crashdumper_run(gpu, dumper))
1081		return;
1082
1083	obj->handle = regs->regs;
1084	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1085		regcount * sizeof(u32));
1086}
1087
1088
1089/* Read a block of registers via AHB */
1090static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1091		struct a6xx_gpu_state *a6xx_state,
1092		const struct a6xx_registers *regs,
1093		struct a6xx_gpu_state_obj *obj)
1094{
1095	int i, regcount = 0, index = 0;
1096
1097	/* Skip unsupported registers on older generations */
1098	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1099			(regs->registers == a660_registers))
1100		return;
1101
1102	for (i = 0; i < regs->count; i += 2)
1103		regcount += RANGE(regs->registers, i);
1104
1105	obj->handle = (const void *) regs;
1106	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1107	if (!obj->data)
1108		return;
1109
1110	for (i = 0; i < regs->count; i += 2) {
1111		u32 count = RANGE(regs->registers, i);
1112		int j;
1113
1114		for (j = 0; j < count; j++)
1115			obj->data[index++] = gpu_read(gpu,
1116				regs->registers[i] + j);
1117	}
1118}
1119
1120static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1121		struct a6xx_gpu_state *a6xx_state,
1122		const u32 *regs,
1123		struct a6xx_gpu_state_obj *obj)
1124{
1125	int i, regcount = 0, index = 0;
1126
1127	for (i = 0; regs[i] != UINT_MAX; i += 2)
1128		regcount += RANGE(regs, i);
1129
1130	obj->handle = (const void *) regs;
1131	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1132	if (!obj->data)
1133		return;
1134
1135	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1136		u32 count = RANGE(regs, i);
1137		int j;
1138
1139		for (j = 0; j < count; j++)
1140			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1141	}
1142}
1143
1144static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1145		struct a6xx_gpu_state *a6xx_state,
1146		const struct gen7_reg_list *regs,
1147		struct a6xx_gpu_state_obj *obj)
1148{
1149	if (regs->sel)
1150		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1151
1152	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1153}
1154
1155/* Read a block of GMU registers */
1156static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1157		struct a6xx_gpu_state *a6xx_state,
1158		const struct a6xx_registers *regs,
1159		struct a6xx_gpu_state_obj *obj,
1160		bool rscc)
1161{
1162	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1163	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1164	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165	int i, regcount = 0, index = 0;
1166
1167	for (i = 0; i < regs->count; i += 2)
1168		regcount += RANGE(regs->registers, i);
1169
1170	obj->handle = (const void *) regs;
1171	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1172	if (!obj->data)
1173		return;
1174
1175	for (i = 0; i < regs->count; i += 2) {
1176		u32 count = RANGE(regs->registers, i);
1177		int j;
1178
1179		for (j = 0; j < count; j++) {
1180			u32 offset = regs->registers[i] + j;
1181			u32 val;
1182
1183			if (rscc)
1184				val = gmu_read_rscc(gmu, offset);
1185			else
1186				val = gmu_read(gmu, offset);
1187
1188			obj->data[index++] = val;
1189		}
1190	}
1191}
1192
1193static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1194		struct a6xx_gpu_state *a6xx_state)
1195{
1196	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1197	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1198
1199	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1200		3, sizeof(*a6xx_state->gmu_registers));
1201
1202	if (!a6xx_state->gmu_registers)
1203		return;
1204
1205	a6xx_state->nr_gmu_registers = 3;
1206
1207	/* Get the CX GMU registers from AHB */
1208	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1209		&a6xx_state->gmu_registers[0], false);
1210	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1211		&a6xx_state->gmu_registers[1], true);
1212
1213	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1214		return;
1215
1216	/* Set the fence to ALLOW mode so we can access the registers */
1217	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1218
1219	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1220		&a6xx_state->gmu_registers[2], false);
1221}
1222
1223static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1224		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1225{
1226	struct msm_gpu_state_bo *snapshot;
1227
1228	if (!bo->size)
1229		return NULL;
1230
1231	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1232	if (!snapshot)
1233		return NULL;
1234
1235	snapshot->iova = bo->iova;
1236	snapshot->size = bo->size;
1237	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1238	if (!snapshot->data)
1239		return NULL;
1240
1241	memcpy(snapshot->data, bo->virt, bo->size);
1242
1243	return snapshot;
1244}
1245
1246static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1247					  struct a6xx_gpu_state *a6xx_state)
1248{
1249	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1250	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1251	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1252	unsigned i, j;
1253
1254	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1255
1256	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1257		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1258		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1259			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1260			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1261		}
1262	}
1263}
1264
1265#define A6XX_REGLIST_SIZE        1
1266#define A6XX_GBIF_REGLIST_SIZE   1
1267static void a6xx_get_registers(struct msm_gpu *gpu,
1268		struct a6xx_gpu_state *a6xx_state,
1269		struct a6xx_crashdumper *dumper)
1270{
1271	int i, count = A6XX_REGLIST_SIZE +
1272		ARRAY_SIZE(a6xx_reglist) +
1273		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1274	int index = 0;
1275	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1276
1277	a6xx_state->registers = state_kcalloc(a6xx_state,
1278		count, sizeof(*a6xx_state->registers));
1279
1280	if (!a6xx_state->registers)
1281		return;
1282
1283	a6xx_state->nr_registers = count;
1284
1285	a6xx_get_ahb_gpu_registers(gpu,
1286		a6xx_state, &a6xx_ahb_reglist,
1287		&a6xx_state->registers[index++]);
1288
1289	if (a6xx_has_gbif(adreno_gpu))
1290		a6xx_get_ahb_gpu_registers(gpu,
1291				a6xx_state, &a6xx_gbif_reglist,
1292				&a6xx_state->registers[index++]);
1293	else
1294		a6xx_get_ahb_gpu_registers(gpu,
1295				a6xx_state, &a6xx_vbif_reglist,
1296				&a6xx_state->registers[index++]);
1297	if (!dumper) {
1298		/*
1299		 * We can't use the crashdumper when the SMMU is stalled,
1300		 * because the GPU has no memory access until we resume
1301		 * translation (but we don't want to do that until after
1302		 * we have captured as much useful GPU state as possible).
1303		 * So instead collect registers via the CPU:
1304		 */
1305		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1306			a6xx_get_ahb_gpu_registers(gpu,
1307				a6xx_state, &a6xx_reglist[i],
1308				&a6xx_state->registers[index++]);
1309		return;
1310	}
1311
1312	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1313		a6xx_get_crashdumper_registers(gpu,
1314			a6xx_state, &a6xx_reglist[i],
1315			&a6xx_state->registers[index++],
1316			dumper);
1317
1318	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1319		a6xx_get_crashdumper_hlsq_registers(gpu,
1320			a6xx_state, &a6xx_hlsq_reglist[i],
1321			&a6xx_state->registers[index++],
1322			dumper);
1323}
1324
1325#define A7XX_PRE_CRASHDUMPER_SIZE    1
1326#define A7XX_POST_CRASHDUMPER_SIZE   1
1327static void a7xx_get_registers(struct msm_gpu *gpu,
1328		struct a6xx_gpu_state *a6xx_state,
1329		struct a6xx_crashdumper *dumper)
1330{
1331	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1332	int i, count;
1333	int index = 0;
1334	const u32 *pre_crashdumper_regs;
1335	const struct gen7_reg_list *reglist;
1336
1337	if (adreno_is_a730(adreno_gpu)) {
1338		reglist = gen7_0_0_reg_list;
1339		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1340	} else {
1341		BUG_ON(!adreno_is_a740_family(adreno_gpu));
1342		reglist = gen7_2_0_reg_list;
1343		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1344	}
1345
1346	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1347
1348	/* The downstream reglist contains registers in other memory regions
1349	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1350	 * offsets and map them to read them on the CPU. For now only read the
1351	 * first region which is the main one.
1352	 */
1353	if (dumper) {
1354		for (i = 0; reglist[i].regs; i++)
1355			count++;
1356	} else {
1357		count++;
1358	}
1359
1360	a6xx_state->registers = state_kcalloc(a6xx_state,
1361		count, sizeof(*a6xx_state->registers));
1362
1363	if (!a6xx_state->registers)
1364		return;
1365
1366	a6xx_state->nr_registers = count;
1367
1368	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1369		&a6xx_state->registers[index++]);
1370
1371	if (!dumper) {
1372		a7xx_get_ahb_gpu_reglist(gpu,
1373			a6xx_state, &reglist[0],
1374			&a6xx_state->registers[index++]);
1375		return;
1376	}
1377
1378	for (i = 0; reglist[i].regs; i++)
1379		a7xx_get_crashdumper_registers(gpu,
1380			a6xx_state, &reglist[i],
1381			&a6xx_state->registers[index++],
1382			dumper);
1383}
1384
1385static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1386		struct a6xx_gpu_state *a6xx_state)
1387{
1388	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1389	const u32 *regs;
1390
1391	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1392	regs = gen7_0_0_post_crashdumper_registers;
1393
1394	a7xx_get_ahb_gpu_registers(gpu,
1395		a6xx_state, regs,
1396		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1397}
1398
1399static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1400{
1401	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1402	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1403}
1404
1405static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1406{
1407	/*
1408	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1409	 * That register however is not directly accessible from APSS on A7xx.
1410	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1411	 */
1412	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1413
1414	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1415}
1416
1417/* Read a block of data from an indexed register pair */
1418static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1419		struct a6xx_gpu_state *a6xx_state,
1420		struct a6xx_indexed_registers *indexed,
1421		struct a6xx_gpu_state_obj *obj)
1422{
1423	int i;
1424
1425	obj->handle = (const void *) indexed;
1426	if (indexed->count_fn)
1427		indexed->count = indexed->count_fn(gpu);
1428
1429	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
1430	if (!obj->data)
1431		return;
1432
1433	/* All the indexed banks start at address 0 */
1434	gpu_write(gpu, indexed->addr, 0);
1435
1436	/* Read the data - each read increments the internal address by 1 */
1437	for (i = 0; i < indexed->count; i++)
1438		obj->data[i] = gpu_read(gpu, indexed->data);
1439}
1440
1441static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1442		struct a6xx_gpu_state *a6xx_state)
1443{
1444	u32 mempool_size;
1445	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1446	int i;
1447
1448	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1449		sizeof(*a6xx_state->indexed_regs));
1450	if (!a6xx_state->indexed_regs)
1451		return;
1452
1453	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1454		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1455			&a6xx_state->indexed_regs[i]);
1456
1457	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1458		u32 val;
1459
1460		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1461		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1462
1463		/* Get the contents of the CP mempool */
1464		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1465			&a6xx_state->indexed_regs[i]);
1466
1467		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1468		a6xx_state->nr_indexed_regs = count;
1469		return;
1470	}
1471
1472	/* Set the CP mempool size to 0 to stabilize it while dumping */
1473	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1474	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1475
1476	/* Get the contents of the CP mempool */
1477	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1478		&a6xx_state->indexed_regs[i]);
1479
1480	/*
1481	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1482	 * so the data is consistent
1483	 */
1484	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1485
1486	/* Restore the size in the hardware */
1487	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1488}
1489
1490static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1491		struct a6xx_gpu_state *a6xx_state)
1492{
1493	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1494	int i, indexed_count, mempool_count;
1495
1496	BUG_ON(!(adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)));
1497	indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1498	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1499
1500	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1501					indexed_count + mempool_count,
1502					sizeof(*a6xx_state->indexed_regs));
1503	if (!a6xx_state->indexed_regs)
1504		return;
1505
1506	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1507
1508	/* First read the common regs */
1509	for (i = 0; i < indexed_count; i++)
1510		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_indexed_reglist[i],
1511			&a6xx_state->indexed_regs[i]);
1512
1513	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1514	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1515
1516	/* Get the contents of the CP_BV mempool */
1517	for (i = 0; i < mempool_count; i++)
1518		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1519			&a6xx_state->indexed_regs[indexed_count + i]);
1520
1521	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1522	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1523	return;
1524}
1525
1526struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1527{
1528	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1529	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1530	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1531	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1532		GFP_KERNEL);
1533	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1534			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1535
1536	if (!a6xx_state)
1537		return ERR_PTR(-ENOMEM);
1538
1539	INIT_LIST_HEAD(&a6xx_state->objs);
1540
1541	/* Get the generic state from the adreno core */
1542	adreno_gpu_state_get(gpu, &a6xx_state->base);
1543
1544	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1545		a6xx_get_gmu_registers(gpu, a6xx_state);
1546
1547		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1548		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1549		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1550
1551		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1552	}
1553
1554	/* If GX isn't on the rest of the data isn't going to be accessible */
1555	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1556		return &a6xx_state->base;
1557
1558	/* Get the banks of indexed registers */
1559	if (adreno_is_a7xx(adreno_gpu))
1560		a7xx_get_indexed_registers(gpu, a6xx_state);
1561	else
1562		a6xx_get_indexed_registers(gpu, a6xx_state);
1563
1564	/*
1565	 * Try to initialize the crashdumper, if we are not dumping state
1566	 * with the SMMU stalled.  The crashdumper needs memory access to
1567	 * write out GPU state, so we need to skip this when the SMMU is
1568	 * stalled in response to an iova fault
1569	 */
1570	if (!stalled && !gpu->needs_hw_init &&
1571	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1572		dumper = &_dumper;
1573	}
1574
1575	if (adreno_is_a7xx(adreno_gpu)) {
1576		a7xx_get_registers(gpu, a6xx_state, dumper);
1577
1578		if (dumper) {
1579			a7xx_get_shaders(gpu, a6xx_state, dumper);
1580			a7xx_get_clusters(gpu, a6xx_state, dumper);
1581			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1582
1583			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1584		}
1585
1586		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1587	} else {
1588		a6xx_get_registers(gpu, a6xx_state, dumper);
1589
1590		if (dumper) {
1591			a6xx_get_shaders(gpu, a6xx_state, dumper);
1592			a6xx_get_clusters(gpu, a6xx_state, dumper);
1593			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1594
1595			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1596		}
1597	}
1598
1599	if (snapshot_debugbus)
1600		a6xx_get_debugbus(gpu, a6xx_state);
1601
1602	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1603
1604	return  &a6xx_state->base;
1605}
1606
1607static void a6xx_gpu_state_destroy(struct kref *kref)
1608{
1609	struct a6xx_state_memobj *obj, *tmp;
1610	struct msm_gpu_state *state = container_of(kref,
1611			struct msm_gpu_state, ref);
1612	struct a6xx_gpu_state *a6xx_state = container_of(state,
1613			struct a6xx_gpu_state, base);
1614
1615	if (a6xx_state->gmu_log)
1616		kvfree(a6xx_state->gmu_log->data);
1617
1618	if (a6xx_state->gmu_hfi)
1619		kvfree(a6xx_state->gmu_hfi->data);
1620
1621	if (a6xx_state->gmu_debug)
1622		kvfree(a6xx_state->gmu_debug->data);
1623
1624	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1625		list_del(&obj->node);
1626		kvfree(obj);
1627	}
1628
1629	adreno_gpu_state_destroy(state);
1630	kfree(a6xx_state);
1631}
1632
1633int a6xx_gpu_state_put(struct msm_gpu_state *state)
1634{
1635	if (IS_ERR_OR_NULL(state))
1636		return 1;
1637
1638	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1639}
1640
1641static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1642		struct drm_printer *p)
1643{
1644	int i, index = 0;
1645
1646	if (!data)
1647		return;
1648
1649	for (i = 0; i < count; i += 2) {
1650		u32 count = RANGE(registers, i);
1651		u32 offset = registers[i];
1652		int j;
1653
1654		for (j = 0; j < count; index++, offset++, j++) {
1655			if (data[index] == 0xdeafbead)
1656				continue;
1657
1658			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1659				offset << 2, data[index]);
1660		}
1661	}
1662}
1663
1664static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1665		struct drm_printer *p, unsigned indent)
1666{
1667	int i, index = 0;
1668
1669	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1670		u32 count = RANGE(registers, i);
1671		u32 offset = registers[i];
1672		int j;
1673
1674		for (j = 0; j < count; index++, offset++, j++) {
1675			int k;
1676
1677			if (data[index] == 0xdeafbead)
1678				continue;
1679
1680			for (k = 0; k < indent; k++)
1681				drm_printf(p, "  ");
1682			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1683				offset << 2, data[index]);
1684		}
1685	}
1686}
1687
1688static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1689{
1690	a7xx_show_registers_indented(registers, data, p, 1);
1691}
1692
1693static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1694{
1695	char out[ASCII85_BUFSZ];
1696	long i, l, datalen = 0;
1697
1698	for (i = 0; i < len >> 2; i++) {
1699		if (data[i])
1700			datalen = (i + 1) << 2;
1701	}
1702
1703	if (datalen == 0)
1704		return;
1705
1706	drm_puts(p, "    data: !!ascii85 |\n");
1707	drm_puts(p, "      ");
1708
1709
1710	l = ascii85_encode_len(datalen);
1711
1712	for (i = 0; i < l; i++)
1713		drm_puts(p, ascii85_encode(data[i], out));
1714
1715	drm_puts(p, "\n");
1716}
1717
1718static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1719{
1720	drm_puts(p, fmt);
1721	drm_puts(p, name);
1722	drm_puts(p, "\n");
1723}
1724
1725static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1726		struct drm_printer *p)
1727{
1728	const struct a6xx_shader_block *block = obj->handle;
1729	int i;
1730
1731	if (!obj->handle)
1732		return;
1733
1734	print_name(p, "  - type: ", block->name);
1735
1736	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1737		drm_printf(p, "    - bank: %d\n", i);
1738		drm_printf(p, "      size: %d\n", block->size);
1739
1740		if (!obj->data)
1741			continue;
1742
1743		print_ascii85(p, block->size << 2,
1744			obj->data + (block->size * i));
1745	}
1746}
1747
1748static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1749		struct drm_printer *p)
1750{
1751	const struct gen7_shader_block *block = obj->handle;
1752	int i, j;
1753	u32 *data = obj->data;
1754
1755	if (!obj->handle)
1756		return;
1757
1758	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1759	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1760
1761	for (i = 0; i < block->num_sps; i++) {
1762		drm_printf(p, "      - sp: %d\n", i);
1763
1764		for (j = 0; j < block->num_usptps; j++) {
1765			drm_printf(p, "        - usptp: %d\n", j);
1766			drm_printf(p, "          size: %d\n", block->size);
1767
1768			if (!obj->data)
1769				continue;
1770
1771			print_ascii85(p, block->size << 2, data);
1772
1773			data += block->size;
1774		}
1775	}
1776}
1777
1778static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1779		struct drm_printer *p)
1780{
1781	int ctx, index = 0;
1782
1783	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1784		int j;
1785
1786		drm_printf(p, "    - context: %d\n", ctx);
1787
1788		for (j = 0; j < size; j += 2) {
1789			u32 count = RANGE(registers, j);
1790			u32 offset = registers[j];
1791			int k;
1792
1793			for (k = 0; k < count; index++, offset++, k++) {
1794				if (data[index] == 0xdeafbead)
1795					continue;
1796
1797				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1798					offset << 2, data[index]);
1799			}
1800		}
1801	}
1802}
1803
1804static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1805		struct drm_printer *p)
1806{
1807	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1808
1809	if (dbgahb) {
1810		print_name(p, "  - cluster-name: ", dbgahb->name);
1811		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1812			obj->data, p);
1813	}
1814}
1815
1816static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1817		struct drm_printer *p)
1818{
1819	const struct a6xx_cluster *cluster = obj->handle;
1820
1821	if (cluster) {
1822		print_name(p, "  - cluster-name: ", cluster->name);
1823		a6xx_show_cluster_data(cluster->registers, cluster->count,
1824			obj->data, p);
1825	}
1826}
1827
1828static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1829		struct drm_printer *p)
1830{
1831	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1832
1833	if (dbgahb) {
1834		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1835		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1836		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1837		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1838	}
1839}
1840
1841static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1842		struct drm_printer *p)
1843{
1844	const struct gen7_cluster_registers *cluster = obj->handle;
1845
1846	if (cluster) {
1847		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1848
1849		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1850		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1851		drm_printf(p, "      - context: %d\n", context);
1852		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1853	}
1854}
1855
1856static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1857		struct drm_printer *p)
1858{
1859	const struct a6xx_indexed_registers *indexed = obj->handle;
1860
1861	if (!indexed)
1862		return;
1863
1864	print_name(p, "  - regs-name: ", indexed->name);
1865	drm_printf(p, "    dwords: %d\n", indexed->count);
1866
1867	print_ascii85(p, indexed->count << 2, obj->data);
1868}
1869
1870static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1871		u32 *data, struct drm_printer *p)
1872{
1873	if (block) {
1874		print_name(p, "  - debugbus-block: ", block->name);
1875
1876		/*
1877		 * count for regular debugbus data is in quadwords,
1878		 * but print the size in dwords for consistency
1879		 */
1880		drm_printf(p, "    count: %d\n", block->count << 1);
1881
1882		print_ascii85(p, block->count << 3, data);
1883	}
1884}
1885
1886static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1887		struct drm_printer *p)
1888{
1889	int i;
1890
1891	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1892		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1893
1894		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1895	}
1896
1897	if (a6xx_state->vbif_debugbus) {
1898		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1899
1900		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1901		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1902
1903		/* vbif debugbus data is in dwords.  Confusing, huh? */
1904		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1905	}
1906
1907	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1908		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1909
1910		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1911	}
1912}
1913
1914void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1915		struct drm_printer *p)
1916{
1917	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1918	struct a6xx_gpu_state *a6xx_state = container_of(state,
1919			struct a6xx_gpu_state, base);
1920	int i;
1921
1922	if (IS_ERR_OR_NULL(state))
1923		return;
1924
1925	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1926
1927	adreno_show(gpu, state, p);
1928
1929	drm_puts(p, "gmu-log:\n");
1930	if (a6xx_state->gmu_log) {
1931		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1932
1933		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1934		drm_printf(p, "    size: %zu\n", gmu_log->size);
1935		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1936				&gmu_log->encoded);
1937	}
1938
1939	drm_puts(p, "gmu-hfi:\n");
1940	if (a6xx_state->gmu_hfi) {
1941		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1942		unsigned i, j;
1943
1944		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1945		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1946		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1947			drm_printf(p, "    queue-history[%u]:", i);
1948			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1949				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1950			}
1951			drm_printf(p, "\n");
1952		}
1953		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1954				&gmu_hfi->encoded);
1955	}
1956
1957	drm_puts(p, "gmu-debug:\n");
1958	if (a6xx_state->gmu_debug) {
1959		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1960
1961		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1962		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1963		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1964				&gmu_debug->encoded);
1965	}
1966
1967	drm_puts(p, "registers:\n");
1968	for (i = 0; i < a6xx_state->nr_registers; i++) {
1969		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1970
1971		if (!obj->handle)
1972			continue;
1973
1974		if (adreno_is_a7xx(adreno_gpu)) {
1975			a7xx_show_registers(obj->handle, obj->data, p);
1976		} else {
1977			const struct a6xx_registers *regs = obj->handle;
1978
1979			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1980		}
1981	}
1982
1983	drm_puts(p, "registers-gmu:\n");
1984	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1985		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1986		const struct a6xx_registers *regs = obj->handle;
1987
1988		if (!obj->handle)
1989			continue;
1990
1991		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1992	}
1993
1994	drm_puts(p, "indexed-registers:\n");
1995	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1996		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1997
1998	drm_puts(p, "shader-blocks:\n");
1999	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2000		if (adreno_is_a7xx(adreno_gpu))
2001			a7xx_show_shader(&a6xx_state->shaders[i], p);
2002		else
2003			a6xx_show_shader(&a6xx_state->shaders[i], p);
2004	}
2005
2006	drm_puts(p, "clusters:\n");
2007	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2008		if (adreno_is_a7xx(adreno_gpu))
2009			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2010		else
2011			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2012	}
2013
2014	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2015		if (adreno_is_a7xx(adreno_gpu))
2016			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2017		else
2018			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2019	}
2020
2021	drm_puts(p, "debugbus:\n");
2022	a6xx_show_debugbus(a6xx_state, p);
2023}