Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4#include <linux/ascii85.h>
   5#include "msm_gem.h"
   6#include "a6xx_gpu.h"
   7#include "a6xx_gmu.h"
   8#include "a6xx_gpu_state.h"
   9#include "a6xx_gmu.xml.h"
  10
  11static const unsigned int *gen7_0_0_external_core_regs[] __always_unused;
  12static const unsigned int *gen7_2_0_external_core_regs[] __always_unused;
  13static const unsigned int *gen7_9_0_external_core_regs[] __always_unused;
  14static struct gen7_sptp_cluster_registers gen7_9_0_sptp_clusters[] __always_unused;
  15static const u32 gen7_9_0_cx_debugbus_blocks[] __always_unused;
  16
  17#include "adreno_gen7_0_0_snapshot.h"
  18#include "adreno_gen7_2_0_snapshot.h"
  19#include "adreno_gen7_9_0_snapshot.h"
  20
  21struct a6xx_gpu_state_obj {
  22	const void *handle;
  23	u32 *data;
  24	u32 count;	/* optional, used when count potentially read from hw */
  25};
  26
  27struct a6xx_gpu_state {
  28	struct msm_gpu_state base;
  29
  30	struct a6xx_gpu_state_obj *gmu_registers;
  31	int nr_gmu_registers;
  32
  33	struct a6xx_gpu_state_obj *registers;
  34	int nr_registers;
  35
  36	struct a6xx_gpu_state_obj *shaders;
  37	int nr_shaders;
  38
  39	struct a6xx_gpu_state_obj *clusters;
  40	int nr_clusters;
  41
  42	struct a6xx_gpu_state_obj *dbgahb_clusters;
  43	int nr_dbgahb_clusters;
  44
  45	struct a6xx_gpu_state_obj *indexed_regs;
  46	int nr_indexed_regs;
  47
  48	struct a6xx_gpu_state_obj *debugbus;
  49	int nr_debugbus;
  50
  51	struct a6xx_gpu_state_obj *vbif_debugbus;
  52
  53	struct a6xx_gpu_state_obj *cx_debugbus;
  54	int nr_cx_debugbus;
  55
  56	struct msm_gpu_state_bo *gmu_log;
  57	struct msm_gpu_state_bo *gmu_hfi;
  58	struct msm_gpu_state_bo *gmu_debug;
  59
  60	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
  61
  62	struct list_head objs;
  63
  64	bool gpu_initialized;
  65};
  66
  67static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  68{
  69	in[0] = val;
  70	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  71
  72	return 2;
  73}
  74
  75static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  76{
  77	in[0] = target;
  78	in[1] = (((u64) reg) << 44 | dwords);
  79
  80	return 2;
  81}
  82
  83static inline int CRASHDUMP_FINI(u64 *in)
  84{
  85	in[0] = 0;
  86	in[1] = 0;
  87
  88	return 2;
  89}
  90
  91struct a6xx_crashdumper {
  92	void *ptr;
  93	struct drm_gem_object *bo;
  94	u64 iova;
  95};
  96
  97struct a6xx_state_memobj {
  98	struct list_head node;
  99	unsigned long long data[];
 100};
 101
 102static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
 103{
 104	struct a6xx_state_memobj *obj =
 105		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
 106
 107	if (!obj)
 108		return NULL;
 109
 110	list_add_tail(&obj->node, &a6xx_state->objs);
 111	return &obj->data;
 112}
 113
 114static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
 115		size_t size)
 116{
 117	void *dst = state_kcalloc(a6xx_state, 1, size);
 118
 119	if (dst)
 120		memcpy(dst, src, size);
 121	return dst;
 122}
 123
 124/*
 125 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 126 * the rest for the data
 127 */
 128#define A6XX_CD_DATA_OFFSET 8192
 129#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 130
 131static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 132		struct a6xx_crashdumper *dumper)
 133{
 134	dumper->ptr = msm_gem_kernel_new(gpu->dev,
 135		SZ_1M, MSM_BO_WC, gpu->aspace,
 136		&dumper->bo, &dumper->iova);
 137
 138	if (!IS_ERR(dumper->ptr))
 139		msm_gem_object_set_name(dumper->bo, "crashdump");
 140
 141	return PTR_ERR_OR_ZERO(dumper->ptr);
 142}
 143
 144static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 145		struct a6xx_crashdumper *dumper)
 146{
 147	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 148	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 149	u32 val;
 150	int ret;
 151
 152	if (IS_ERR_OR_NULL(dumper->ptr))
 153		return -EINVAL;
 154
 155	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 156		return -EINVAL;
 157
 158	/* Make sure all pending memory writes are posted */
 159	wmb();
 160
 161	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE, dumper->iova);
 162
 163	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 164
 165	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 166		val & 0x02, 100, 10000);
 167
 168	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 169
 170	return ret;
 171}
 172
 173/* read a value from the GX debug bus */
 174static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 175		u32 *data)
 176{
 177	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 178		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 179
 180	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 181	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 182	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 183	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 184
 185	/* Wait 1 us to make sure the data is flowing */
 186	udelay(1);
 187
 188	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 189	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 190
 191	return 2;
 192}
 193
 194#define cxdbg_write(ptr, offset, val) \
 195	writel((val), (ptr) + ((offset) << 2))
 196
 197#define cxdbg_read(ptr, offset) \
 198	readl((ptr) + ((offset) << 2))
 199
 200/* read a value from the CX debug bus */
 201static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
 202		u32 *data)
 203{
 204	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 205		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 206
 207	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 208	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 209	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 210	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 211
 212	/* Wait 1 us to make sure the data is flowing */
 213	udelay(1);
 214
 215	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 216	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 217
 218	return 2;
 219}
 220
 221/* Read a chunk of data from the VBIF debug bus */
 222static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 223		u32 reg, int count, u32 *data)
 224{
 225	int i;
 226
 227	gpu_write(gpu, ctrl0, reg);
 228
 229	for (i = 0; i < count; i++) {
 230		gpu_write(gpu, ctrl1, i);
 231		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 232	}
 233
 234	return count;
 235}
 236
 237#define AXI_ARB_BLOCKS 2
 238#define XIN_AXI_BLOCKS 5
 239#define XIN_CORE_BLOCKS 4
 240
 241#define VBIF_DEBUGBUS_BLOCK_SIZE \
 242	((16 * AXI_ARB_BLOCKS) + \
 243	 (18 * XIN_AXI_BLOCKS) + \
 244	 (12 * XIN_CORE_BLOCKS))
 245
 246static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 247		struct a6xx_gpu_state *a6xx_state,
 248		struct a6xx_gpu_state_obj *obj)
 249{
 250	u32 clk, *ptr;
 251	int i;
 252
 253	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 254		sizeof(u32));
 255	if (!obj->data)
 256		return;
 257
 258	obj->handle = NULL;
 259
 260	/* Get the current clock setting */
 261	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 262
 263	/* Force on the bus so we can read it */
 264	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 265		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 266
 267	/* We will read from BUS2 first, so disable BUS1 */
 268	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 269
 270	/* Enable the VBIF bus for reading */
 271	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 272
 273	ptr = obj->data;
 274
 275	for (i = 0; i < AXI_ARB_BLOCKS; i++)
 276		ptr += vbif_debugbus_read(gpu,
 277			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 278			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 279			1 << (i + 16), 16, ptr);
 280
 281	for (i = 0; i < XIN_AXI_BLOCKS; i++)
 282		ptr += vbif_debugbus_read(gpu,
 283			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 284			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 285			1 << i, 18, ptr);
 286
 287	/* Stop BUS2 so we can turn on BUS1 */
 288	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 289
 290	for (i = 0; i < XIN_CORE_BLOCKS; i++)
 291		ptr += vbif_debugbus_read(gpu,
 292			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 293			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 294			1 << i, 12, ptr);
 295
 296	/* Restore the VBIF clock setting */
 297	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 298}
 299
 300static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 301		struct a6xx_gpu_state *a6xx_state,
 302		const struct a6xx_debugbus_block *block,
 303		struct a6xx_gpu_state_obj *obj)
 304{
 305	int i;
 306	u32 *ptr;
 307
 308	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 309	if (!obj->data)
 310		return;
 311
 312	obj->handle = block;
 313
 314	for (ptr = obj->data, i = 0; i < block->count; i++)
 315		ptr += debugbus_read(gpu, block->id, i, ptr);
 316}
 317
 318static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 319		struct a6xx_gpu_state *a6xx_state,
 320		const struct a6xx_debugbus_block *block,
 321		struct a6xx_gpu_state_obj *obj)
 322{
 323	int i;
 324	u32 *ptr;
 325
 326	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 327	if (!obj->data)
 328		return;
 329
 330	obj->handle = block;
 331
 332	for (ptr = obj->data, i = 0; i < block->count; i++)
 333		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 334}
 335
 336static void a6xx_get_debugbus_blocks(struct msm_gpu *gpu,
 337		struct a6xx_gpu_state *a6xx_state)
 338{
 339	int nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 340		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 341
 342	if (adreno_is_a650_family(to_adreno_gpu(gpu)))
 343		nr_debugbus_blocks += ARRAY_SIZE(a650_debugbus_blocks);
 344
 345	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 346			sizeof(*a6xx_state->debugbus));
 347
 348	if (a6xx_state->debugbus) {
 349		int i;
 350
 351		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 352			a6xx_get_debugbus_block(gpu,
 353				a6xx_state,
 354				&a6xx_debugbus_blocks[i],
 355				&a6xx_state->debugbus[i]);
 356
 357		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 358
 359		/*
 360		 * GBIF has same debugbus as of other GPU blocks, fall back to
 361		 * default path if GPU uses GBIF, also GBIF uses exactly same
 362		 * ID as of VBIF.
 363		 */
 364		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 365			a6xx_get_debugbus_block(gpu, a6xx_state,
 366				&a6xx_gbif_debugbus_block,
 367				&a6xx_state->debugbus[i]);
 368
 369			a6xx_state->nr_debugbus += 1;
 370		}
 371
 372
 373		if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
 374			for (i = 0; i < ARRAY_SIZE(a650_debugbus_blocks); i++)
 375				a6xx_get_debugbus_block(gpu,
 376					a6xx_state,
 377					&a650_debugbus_blocks[i],
 378					&a6xx_state->debugbus[i]);
 379		}
 380	}
 381}
 382
 383static void a7xx_get_debugbus_blocks(struct msm_gpu *gpu,
 384		struct a6xx_gpu_state *a6xx_state)
 385{
 386	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 387	int debugbus_blocks_count, gbif_debugbus_blocks_count, total_debugbus_blocks;
 388	const u32 *debugbus_blocks, *gbif_debugbus_blocks;
 389	int i;
 390
 391	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 392		debugbus_blocks = gen7_0_0_debugbus_blocks;
 393		debugbus_blocks_count = ARRAY_SIZE(gen7_0_0_debugbus_blocks);
 394		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
 395		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
 396	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
 397		debugbus_blocks = gen7_2_0_debugbus_blocks;
 398		debugbus_blocks_count = ARRAY_SIZE(gen7_2_0_debugbus_blocks);
 399		gbif_debugbus_blocks = a7xx_gbif_debugbus_blocks;
 400		gbif_debugbus_blocks_count = ARRAY_SIZE(a7xx_gbif_debugbus_blocks);
 401	} else {
 402		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
 403		debugbus_blocks = gen7_9_0_debugbus_blocks;
 404		debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_debugbus_blocks);
 405		gbif_debugbus_blocks = gen7_9_0_gbif_debugbus_blocks;
 406		gbif_debugbus_blocks_count = ARRAY_SIZE(gen7_9_0_gbif_debugbus_blocks);
 407	}
 408
 409	total_debugbus_blocks = debugbus_blocks_count + gbif_debugbus_blocks_count;
 410
 411	a6xx_state->debugbus = state_kcalloc(a6xx_state, total_debugbus_blocks,
 412			sizeof(*a6xx_state->debugbus));
 413
 414	if (a6xx_state->debugbus) {
 415		for (i = 0; i < debugbus_blocks_count; i++) {
 416			a6xx_get_debugbus_block(gpu,
 417				a6xx_state, &a7xx_debugbus_blocks[debugbus_blocks[i]],
 418				&a6xx_state->debugbus[i]);
 419		}
 420
 421		for (i = 0; i < gbif_debugbus_blocks_count; i++) {
 422			a6xx_get_debugbus_block(gpu,
 423				a6xx_state, &a7xx_debugbus_blocks[gbif_debugbus_blocks[i]],
 424				&a6xx_state->debugbus[i + debugbus_blocks_count]);
 425		}
 426	}
 427
 428}
 429
 430static void a6xx_get_debugbus(struct msm_gpu *gpu,
 431		struct a6xx_gpu_state *a6xx_state)
 432{
 433	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 434	struct resource *res;
 435	void __iomem *cxdbg = NULL;
 
 436
 437	/* Set up the GX debug bus */
 438
 439	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 440		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 441
 442	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 443		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 444
 445	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 446	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 447	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 448	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 449
 450	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 451	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 452
 453	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 454	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 455	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 456	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 457
 458	/* Set up the CX debug bus - it lives elsewhere in the system so do a
 459	 * temporary ioremap for the registers
 460	 */
 461	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 462			"cx_dbgc");
 463
 464	if (res)
 465		cxdbg = ioremap(res->start, resource_size(res));
 466
 467	if (cxdbg) {
 468		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 469			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 470
 471		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 472			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 473
 474		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 475		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 476		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 477		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 478
 479		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 480			0x76543210);
 481		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 482			0xFEDCBA98);
 483
 484		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 485		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 486		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 487		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 488	}
 489
 490	if (adreno_is_a7xx(adreno_gpu)) {
 491		a7xx_get_debugbus_blocks(gpu, a6xx_state);
 492	} else {
 493		a6xx_get_debugbus_blocks(gpu, a6xx_state);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 494	}
 495
 496	/*  Dump the VBIF debugbus on applicable targets */
 497	if (!a6xx_has_gbif(adreno_gpu)) {
 498		a6xx_state->vbif_debugbus =
 499			state_kcalloc(a6xx_state, 1,
 500					sizeof(*a6xx_state->vbif_debugbus));
 501
 502		if (a6xx_state->vbif_debugbus)
 503			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 504					a6xx_state->vbif_debugbus);
 505	}
 506
 507	if (cxdbg) {
 508		unsigned nr_cx_debugbus_blocks;
 509		const struct a6xx_debugbus_block *cx_debugbus_blocks;
 510
 511		if (adreno_is_a7xx(adreno_gpu)) {
 512			BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
 513			cx_debugbus_blocks = a7xx_cx_debugbus_blocks;
 514			nr_cx_debugbus_blocks = ARRAY_SIZE(a7xx_cx_debugbus_blocks);
 515		} else {
 516			cx_debugbus_blocks = a6xx_cx_debugbus_blocks;
 517			nr_cx_debugbus_blocks = ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 518		}
 519
 520		a6xx_state->cx_debugbus =
 521			state_kcalloc(a6xx_state,
 522			nr_cx_debugbus_blocks,
 523			sizeof(*a6xx_state->cx_debugbus));
 524
 525		if (a6xx_state->cx_debugbus) {
 526			int i;
 527
 528			for (i = 0; i < nr_cx_debugbus_blocks; i++)
 529				a6xx_get_cx_debugbus_block(cxdbg,
 530					a6xx_state,
 531					&cx_debugbus_blocks[i],
 532					&a6xx_state->cx_debugbus[i]);
 533
 534			a6xx_state->nr_cx_debugbus =
 535				nr_cx_debugbus_blocks;
 536		}
 537
 538		iounmap(cxdbg);
 539	}
 540}
 541
 542#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 543
 544/* Read a data cluster from behind the AHB aperture */
 545static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 546		struct a6xx_gpu_state *a6xx_state,
 547		const struct a6xx_dbgahb_cluster *dbgahb,
 548		struct a6xx_gpu_state_obj *obj,
 549		struct a6xx_crashdumper *dumper)
 550{
 551	u64 *in = dumper->ptr;
 552	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 553	size_t datasize;
 554	int i, regcount = 0;
 555
 556	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 557		int j;
 558
 559		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 560			(dbgahb->statetype + i * 2) << 8);
 561
 562		for (j = 0; j < dbgahb->count; j += 2) {
 563			int count = RANGE(dbgahb->registers, j);
 564			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 565				dbgahb->registers[j] - (dbgahb->base >> 2);
 566
 567			in += CRASHDUMP_READ(in, offset, count, out);
 568
 569			out += count * sizeof(u32);
 570
 571			if (i == 0)
 572				regcount += count;
 573		}
 574	}
 575
 576	CRASHDUMP_FINI(in);
 577
 578	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 579
 580	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 581		return;
 582
 583	if (a6xx_crashdumper_run(gpu, dumper))
 584		return;
 585
 586	obj->handle = dbgahb;
 587	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 588		datasize);
 589}
 590
 591static void a7xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 592		struct a6xx_gpu_state *a6xx_state,
 593		const struct gen7_sptp_cluster_registers *dbgahb,
 594		struct a6xx_gpu_state_obj *obj,
 595		struct a6xx_crashdumper *dumper)
 596{
 597	u64 *in = dumper->ptr;
 598	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 599	size_t datasize;
 600	int i, regcount = 0;
 601
 602	in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
 603		A7XX_SP_READ_SEL_LOCATION(dbgahb->location_id) |
 604		A7XX_SP_READ_SEL_PIPE(dbgahb->pipe_id) |
 605		A7XX_SP_READ_SEL_STATETYPE(dbgahb->statetype));
 606
 607	for (i = 0; dbgahb->regs[i] != UINT_MAX; i += 2) {
 608		int count = RANGE(dbgahb->regs, i);
 609		u32 offset = REG_A7XX_SP_AHB_READ_APERTURE +
 610			dbgahb->regs[i] - dbgahb->regbase;
 611
 612		in += CRASHDUMP_READ(in, offset, count, out);
 613
 614		out += count * sizeof(u32);
 615		regcount += count;
 616	}
 617
 618	CRASHDUMP_FINI(in);
 619
 620	datasize = regcount * sizeof(u32);
 621
 622	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 623		return;
 624
 625	if (a6xx_crashdumper_run(gpu, dumper))
 626		return;
 627
 628	obj->handle = dbgahb;
 629	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 630		datasize);
 631}
 632
 633static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 634		struct a6xx_gpu_state *a6xx_state,
 635		struct a6xx_crashdumper *dumper)
 636{
 637	int i;
 638
 639	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 640		ARRAY_SIZE(a6xx_dbgahb_clusters),
 641		sizeof(*a6xx_state->dbgahb_clusters));
 642
 643	if (!a6xx_state->dbgahb_clusters)
 644		return;
 645
 646	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 647
 648	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 649		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 650			&a6xx_dbgahb_clusters[i],
 651			&a6xx_state->dbgahb_clusters[i], dumper);
 652}
 653
 654static void a7xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 655		struct a6xx_gpu_state *a6xx_state,
 656		struct a6xx_crashdumper *dumper)
 657{
 658	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 659	int i;
 660	const struct gen7_sptp_cluster_registers *dbgahb_clusters;
 661	unsigned dbgahb_clusters_size;
 662
 663	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 664		dbgahb_clusters = gen7_0_0_sptp_clusters;
 665		dbgahb_clusters_size = ARRAY_SIZE(gen7_0_0_sptp_clusters);
 666	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
 667		dbgahb_clusters = gen7_2_0_sptp_clusters;
 668		dbgahb_clusters_size = ARRAY_SIZE(gen7_2_0_sptp_clusters);
 669	} else {
 670		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
 671		dbgahb_clusters = gen7_9_0_sptp_clusters;
 672		dbgahb_clusters_size = ARRAY_SIZE(gen7_9_0_sptp_clusters);
 673	}
 674
 675	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 676		dbgahb_clusters_size,
 677		sizeof(*a6xx_state->dbgahb_clusters));
 678
 679	if (!a6xx_state->dbgahb_clusters)
 680		return;
 681
 682	a6xx_state->nr_dbgahb_clusters = dbgahb_clusters_size;
 683
 684	for (i = 0; i < dbgahb_clusters_size; i++)
 685		a7xx_get_dbgahb_cluster(gpu, a6xx_state,
 686			&dbgahb_clusters[i],
 687			&a6xx_state->dbgahb_clusters[i], dumper);
 688}
 689
 690/* Read a data cluster from the CP aperture with the crashdumper */
 691static void a6xx_get_cluster(struct msm_gpu *gpu,
 692		struct a6xx_gpu_state *a6xx_state,
 693		const struct a6xx_cluster *cluster,
 694		struct a6xx_gpu_state_obj *obj,
 695		struct a6xx_crashdumper *dumper)
 696{
 697	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 698	u64 *in = dumper->ptr;
 699	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 700	size_t datasize;
 701	int i, regcount = 0;
 702	u32 id = cluster->id;
 703
 704	/* Skip registers that are not present on older generation */
 705	if (!adreno_is_a660_family(adreno_gpu) &&
 706			cluster->registers == a660_fe_cluster)
 707		return;
 708
 709	if (adreno_is_a650_family(adreno_gpu) &&
 710			cluster->registers == a6xx_ps_cluster)
 711		id = CLUSTER_VPC_PS;
 712
 713	/* Some clusters need a selector register to be programmed too */
 714	if (cluster->sel_reg)
 715		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 716
 717	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 718		int j;
 719
 720		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 721			(id << 8) | (i << 4) | i);
 722
 723		for (j = 0; j < cluster->count; j += 2) {
 724			int count = RANGE(cluster->registers, j);
 725
 726			in += CRASHDUMP_READ(in, cluster->registers[j],
 727				count, out);
 728
 729			out += count * sizeof(u32);
 730
 731			if (i == 0)
 732				regcount += count;
 733		}
 734	}
 735
 736	CRASHDUMP_FINI(in);
 737
 738	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 739
 740	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 741		return;
 742
 743	if (a6xx_crashdumper_run(gpu, dumper))
 744		return;
 745
 746	obj->handle = cluster;
 747	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 748		datasize);
 749}
 750
 751static void a7xx_get_cluster(struct msm_gpu *gpu,
 752		struct a6xx_gpu_state *a6xx_state,
 753		const struct gen7_cluster_registers *cluster,
 754		struct a6xx_gpu_state_obj *obj,
 755		struct a6xx_crashdumper *dumper)
 756{
 757	u64 *in = dumper->ptr;
 758	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 759	size_t datasize;
 760	int i, regcount = 0;
 761
 762	/* Some clusters need a selector register to be programmed too */
 763	if (cluster->sel)
 764		in += CRASHDUMP_WRITE(in, cluster->sel->cd_reg, cluster->sel->val);
 765
 766	in += CRASHDUMP_WRITE(in, REG_A7XX_CP_APERTURE_CNTL_CD,
 767		A7XX_CP_APERTURE_CNTL_CD_PIPE(cluster->pipe_id) |
 768		A7XX_CP_APERTURE_CNTL_CD_CLUSTER(cluster->cluster_id) |
 769		A7XX_CP_APERTURE_CNTL_CD_CONTEXT(cluster->context_id));
 770
 771	for (i = 0; cluster->regs[i] != UINT_MAX; i += 2) {
 772		int count = RANGE(cluster->regs, i);
 773
 774		in += CRASHDUMP_READ(in, cluster->regs[i],
 775			count, out);
 776
 777		out += count * sizeof(u32);
 778		regcount += count;
 779	}
 780
 781	CRASHDUMP_FINI(in);
 782
 783	datasize = regcount * sizeof(u32);
 784
 785	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 786		return;
 787
 788	if (a6xx_crashdumper_run(gpu, dumper))
 789		return;
 790
 791	obj->handle = cluster;
 792	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 793		datasize);
 794}
 795
 796static void a6xx_get_clusters(struct msm_gpu *gpu,
 797		struct a6xx_gpu_state *a6xx_state,
 798		struct a6xx_crashdumper *dumper)
 799{
 800	int i;
 801
 802	a6xx_state->clusters = state_kcalloc(a6xx_state,
 803		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 804
 805	if (!a6xx_state->clusters)
 806		return;
 807
 808	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 809
 810	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 811		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 812			&a6xx_state->clusters[i], dumper);
 813}
 814
 815static void a7xx_get_clusters(struct msm_gpu *gpu,
 816		struct a6xx_gpu_state *a6xx_state,
 817		struct a6xx_crashdumper *dumper)
 818{
 819	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 820	int i;
 821	const struct gen7_cluster_registers *clusters;
 822	unsigned clusters_size;
 823
 824	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 825		clusters = gen7_0_0_clusters;
 826		clusters_size = ARRAY_SIZE(gen7_0_0_clusters);
 827	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
 828		clusters = gen7_2_0_clusters;
 829		clusters_size = ARRAY_SIZE(gen7_2_0_clusters);
 830	} else {
 831		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
 832		clusters = gen7_9_0_clusters;
 833		clusters_size = ARRAY_SIZE(gen7_9_0_clusters);
 834	}
 835
 836	a6xx_state->clusters = state_kcalloc(a6xx_state,
 837		clusters_size, sizeof(*a6xx_state->clusters));
 838
 839	if (!a6xx_state->clusters)
 840		return;
 841
 842	a6xx_state->nr_clusters = clusters_size;
 843
 844	for (i = 0; i < clusters_size; i++)
 845		a7xx_get_cluster(gpu, a6xx_state, &clusters[i],
 846			&a6xx_state->clusters[i], dumper);
 847}
 848
 849/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 850static void a6xx_get_shader_block(struct msm_gpu *gpu,
 851		struct a6xx_gpu_state *a6xx_state,
 852		const struct a6xx_shader_block *block,
 853		struct a6xx_gpu_state_obj *obj,
 854		struct a6xx_crashdumper *dumper)
 855{
 856	u64 *in = dumper->ptr;
 857	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 858	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 859	int i;
 860
 861	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 862		return;
 863
 864	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 865		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 866			(block->type << 8) | i);
 867
 868		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 869			block->size, out);
 870
 871		out += block->size * sizeof(u32);
 872	}
 873
 874	CRASHDUMP_FINI(in);
 875
 876	if (a6xx_crashdumper_run(gpu, dumper))
 877		return;
 878
 879	obj->handle = block;
 880	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 881		datasize);
 882}
 883
 884static void a7xx_get_shader_block(struct msm_gpu *gpu,
 885		struct a6xx_gpu_state *a6xx_state,
 886		const struct gen7_shader_block *block,
 887		struct a6xx_gpu_state_obj *obj,
 888		struct a6xx_crashdumper *dumper)
 889{
 890	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 891	u64 *in = dumper->ptr;
 892	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 893	size_t datasize = block->size * block->num_sps * block->num_usptps * sizeof(u32);
 894	int i, j;
 895
 896	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 897		return;
 898
 899	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 900		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 3);
 901	}
 902
 903	for (i = 0; i < block->num_sps; i++) {
 904		for (j = 0; j < block->num_usptps; j++) {
 905			in += CRASHDUMP_WRITE(in, REG_A7XX_SP_READ_SEL,
 906				A7XX_SP_READ_SEL_LOCATION(block->location) |
 907				A7XX_SP_READ_SEL_PIPE(block->pipeid) |
 908				A7XX_SP_READ_SEL_STATETYPE(block->statetype) |
 909				A7XX_SP_READ_SEL_USPTP(j) |
 910				A7XX_SP_READ_SEL_SPTP(i));
 911
 912			in += CRASHDUMP_READ(in, REG_A7XX_SP_AHB_READ_APERTURE,
 913				block->size, out);
 914
 915			out += block->size * sizeof(u32);
 916		}
 917	}
 918
 919	CRASHDUMP_FINI(in);
 920
 921	if (a6xx_crashdumper_run(gpu, dumper))
 922		goto out;
 923
 924	obj->handle = block;
 925	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 926		datasize);
 927
 928out:
 929	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 930		gpu_rmw(gpu, REG_A7XX_SP_DBG_CNTL, GENMASK(1, 0), 0);
 931	}
 932}
 933
 934static void a6xx_get_shaders(struct msm_gpu *gpu,
 935		struct a6xx_gpu_state *a6xx_state,
 936		struct a6xx_crashdumper *dumper)
 937{
 938	int i;
 939
 940	a6xx_state->shaders = state_kcalloc(a6xx_state,
 941		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 942
 943	if (!a6xx_state->shaders)
 944		return;
 945
 946	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 947
 948	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 949		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 950			&a6xx_state->shaders[i], dumper);
 951}
 952
 953static void a7xx_get_shaders(struct msm_gpu *gpu,
 954		struct a6xx_gpu_state *a6xx_state,
 955		struct a6xx_crashdumper *dumper)
 956{
 957	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 958	const struct gen7_shader_block *shader_blocks;
 959	unsigned num_shader_blocks;
 960	int i;
 961
 962	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
 963		shader_blocks = gen7_0_0_shader_blocks;
 964		num_shader_blocks = ARRAY_SIZE(gen7_0_0_shader_blocks);
 965	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
 966		shader_blocks = gen7_2_0_shader_blocks;
 967		num_shader_blocks = ARRAY_SIZE(gen7_2_0_shader_blocks);
 968	} else {
 969		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
 970		shader_blocks = gen7_9_0_shader_blocks;
 971		num_shader_blocks = ARRAY_SIZE(gen7_9_0_shader_blocks);
 972	}
 973
 974	a6xx_state->shaders = state_kcalloc(a6xx_state,
 975		num_shader_blocks, sizeof(*a6xx_state->shaders));
 976
 977	if (!a6xx_state->shaders)
 978		return;
 979
 980	a6xx_state->nr_shaders = num_shader_blocks;
 981
 982	for (i = 0; i < num_shader_blocks; i++)
 983		a7xx_get_shader_block(gpu, a6xx_state, &shader_blocks[i],
 984			&a6xx_state->shaders[i], dumper);
 985}
 986
 987/* Read registers from behind the HLSQ aperture with the crashdumper */
 988static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 989		struct a6xx_gpu_state *a6xx_state,
 990		const struct a6xx_registers *regs,
 991		struct a6xx_gpu_state_obj *obj,
 992		struct a6xx_crashdumper *dumper)
 993
 994{
 995	u64 *in = dumper->ptr;
 996	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 997	int i, regcount = 0;
 998
 999	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
1000
1001	for (i = 0; i < regs->count; i += 2) {
1002		u32 count = RANGE(regs->registers, i);
1003		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
1004			regs->registers[i] - (regs->val0 >> 2);
1005
1006		in += CRASHDUMP_READ(in, offset, count, out);
1007
1008		out += count * sizeof(u32);
1009		regcount += count;
1010	}
1011
1012	CRASHDUMP_FINI(in);
1013
1014	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1015		return;
1016
1017	if (a6xx_crashdumper_run(gpu, dumper))
1018		return;
1019
1020	obj->handle = regs;
1021	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1022		regcount * sizeof(u32));
1023}
1024
1025/* Read a block of registers using the crashdumper */
1026static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
1027		struct a6xx_gpu_state *a6xx_state,
1028		const struct a6xx_registers *regs,
1029		struct a6xx_gpu_state_obj *obj,
1030		struct a6xx_crashdumper *dumper)
1031
1032{
1033	u64 *in = dumper->ptr;
1034	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1035	int i, regcount = 0;
1036
1037	/* Skip unsupported registers on older generations */
1038	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1039			(regs->registers == a660_registers))
1040		return;
1041
1042	/* Some blocks might need to program a selector register first */
1043	if (regs->val0)
1044		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
1045
1046	for (i = 0; i < regs->count; i += 2) {
1047		u32 count = RANGE(regs->registers, i);
1048
1049		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
1050
1051		out += count * sizeof(u32);
1052		regcount += count;
1053	}
1054
1055	CRASHDUMP_FINI(in);
1056
1057	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1058		return;
1059
1060	if (a6xx_crashdumper_run(gpu, dumper))
1061		return;
1062
1063	obj->handle = regs;
1064	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1065		regcount * sizeof(u32));
1066}
1067
1068static void a7xx_get_crashdumper_registers(struct msm_gpu *gpu,
1069		struct a6xx_gpu_state *a6xx_state,
1070		const struct gen7_reg_list *regs,
1071		struct a6xx_gpu_state_obj *obj,
1072		struct a6xx_crashdumper *dumper)
1073
1074{
1075	u64 *in = dumper->ptr;
1076	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
1077	int i, regcount = 0;
1078
1079	/* Some blocks might need to program a selector register first */
1080	if (regs->sel)
1081		in += CRASHDUMP_WRITE(in, regs->sel->cd_reg, regs->sel->val);
1082
1083	for (i = 0; regs->regs[i] != UINT_MAX; i += 2) {
1084		u32 count = RANGE(regs->regs, i);
1085
1086		in += CRASHDUMP_READ(in, regs->regs[i], count, out);
1087
1088		out += count * sizeof(u32);
1089		regcount += count;
1090	}
1091
1092	CRASHDUMP_FINI(in);
1093
1094	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
1095		return;
1096
1097	if (a6xx_crashdumper_run(gpu, dumper))
1098		return;
1099
1100	obj->handle = regs->regs;
1101	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
1102		regcount * sizeof(u32));
1103}
1104
1105
1106/* Read a block of registers via AHB */
1107static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1108		struct a6xx_gpu_state *a6xx_state,
1109		const struct a6xx_registers *regs,
1110		struct a6xx_gpu_state_obj *obj)
1111{
1112	int i, regcount = 0, index = 0;
1113
1114	/* Skip unsupported registers on older generations */
1115	if (!adreno_is_a660_family(to_adreno_gpu(gpu)) &&
1116			(regs->registers == a660_registers))
1117		return;
1118
1119	for (i = 0; i < regs->count; i += 2)
1120		regcount += RANGE(regs->registers, i);
1121
1122	obj->handle = (const void *) regs;
1123	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1124	if (!obj->data)
1125		return;
1126
1127	for (i = 0; i < regs->count; i += 2) {
1128		u32 count = RANGE(regs->registers, i);
1129		int j;
1130
1131		for (j = 0; j < count; j++)
1132			obj->data[index++] = gpu_read(gpu,
1133				regs->registers[i] + j);
1134	}
1135}
1136
1137static void a7xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
1138		struct a6xx_gpu_state *a6xx_state,
1139		const u32 *regs,
1140		struct a6xx_gpu_state_obj *obj)
1141{
1142	int i, regcount = 0, index = 0;
1143
1144	for (i = 0; regs[i] != UINT_MAX; i += 2)
1145		regcount += RANGE(regs, i);
1146
1147	obj->handle = (const void *) regs;
1148	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1149	if (!obj->data)
1150		return;
1151
1152	for (i = 0; regs[i] != UINT_MAX; i += 2) {
1153		u32 count = RANGE(regs, i);
1154		int j;
1155
1156		for (j = 0; j < count; j++)
1157			obj->data[index++] = gpu_read(gpu, regs[i] + j);
1158	}
1159}
1160
1161static void a7xx_get_ahb_gpu_reglist(struct msm_gpu *gpu,
1162		struct a6xx_gpu_state *a6xx_state,
1163		const struct gen7_reg_list *regs,
1164		struct a6xx_gpu_state_obj *obj)
1165{
1166	if (regs->sel)
1167		gpu_write(gpu, regs->sel->host_reg, regs->sel->val);
1168
1169	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, regs->regs, obj);
1170}
1171
1172/* Read a block of GMU registers */
1173static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
1174		struct a6xx_gpu_state *a6xx_state,
1175		const struct a6xx_registers *regs,
1176		struct a6xx_gpu_state_obj *obj,
1177		bool rscc)
1178{
1179	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1180	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1181	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1182	int i, regcount = 0, index = 0;
1183
1184	for (i = 0; i < regs->count; i += 2)
1185		regcount += RANGE(regs->registers, i);
1186
1187	obj->handle = (const void *) regs;
1188	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
1189	if (!obj->data)
1190		return;
1191
1192	for (i = 0; i < regs->count; i += 2) {
1193		u32 count = RANGE(regs->registers, i);
1194		int j;
1195
1196		for (j = 0; j < count; j++) {
1197			u32 offset = regs->registers[i] + j;
1198			u32 val;
1199
1200			if (rscc)
1201				val = gmu_read_rscc(gmu, offset);
1202			else
1203				val = gmu_read(gmu, offset);
1204
1205			obj->data[index++] = val;
1206		}
1207	}
1208}
1209
1210static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
1211		struct a6xx_gpu_state *a6xx_state)
1212{
1213	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1214	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1215
1216	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
1217		3, sizeof(*a6xx_state->gmu_registers));
1218
1219	if (!a6xx_state->gmu_registers)
1220		return;
1221
1222	a6xx_state->nr_gmu_registers = 3;
1223
1224	/* Get the CX GMU registers from AHB */
1225	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
1226		&a6xx_state->gmu_registers[0], false);
1227	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
1228		&a6xx_state->gmu_registers[1], true);
1229
1230	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1231		return;
1232
1233	/* Set the fence to ALLOW mode so we can access the registers */
1234	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1235
1236	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
1237		&a6xx_state->gmu_registers[2], false);
1238}
1239
1240static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
1241		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
1242{
1243	struct msm_gpu_state_bo *snapshot;
1244
1245	if (!bo->size)
1246		return NULL;
1247
1248	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
1249	if (!snapshot)
1250		return NULL;
1251
1252	snapshot->iova = bo->iova;
1253	snapshot->size = bo->size;
1254	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
1255	if (!snapshot->data)
1256		return NULL;
1257
1258	memcpy(snapshot->data, bo->virt, bo->size);
1259
1260	return snapshot;
1261}
1262
1263static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
1264					  struct a6xx_gpu_state *a6xx_state)
1265{
1266	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1267	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1268	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1269	unsigned i, j;
1270
1271	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
1272
1273	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
1274		struct a6xx_hfi_queue *queue = &gmu->queues[i];
1275		for (j = 0; j < HFI_HISTORY_SZ; j++) {
1276			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
1277			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
1278		}
1279	}
1280}
1281
1282#define A6XX_REGLIST_SIZE        1
1283#define A6XX_GBIF_REGLIST_SIZE   1
1284static void a6xx_get_registers(struct msm_gpu *gpu,
1285		struct a6xx_gpu_state *a6xx_state,
1286		struct a6xx_crashdumper *dumper)
1287{
1288	int i, count = A6XX_REGLIST_SIZE +
1289		ARRAY_SIZE(a6xx_reglist) +
1290		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
1291	int index = 0;
1292	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1293
1294	a6xx_state->registers = state_kcalloc(a6xx_state,
1295		count, sizeof(*a6xx_state->registers));
1296
1297	if (!a6xx_state->registers)
1298		return;
1299
1300	a6xx_state->nr_registers = count;
1301
1302	a6xx_get_ahb_gpu_registers(gpu,
1303		a6xx_state, &a6xx_ahb_reglist,
1304		&a6xx_state->registers[index++]);
 
1305
1306	if (a6xx_has_gbif(adreno_gpu))
1307		a6xx_get_ahb_gpu_registers(gpu,
1308				a6xx_state, &a6xx_gbif_reglist,
1309				&a6xx_state->registers[index++]);
1310	else
1311		a6xx_get_ahb_gpu_registers(gpu,
1312				a6xx_state, &a6xx_vbif_reglist,
1313				&a6xx_state->registers[index++]);
1314	if (!dumper) {
1315		/*
1316		 * We can't use the crashdumper when the SMMU is stalled,
1317		 * because the GPU has no memory access until we resume
1318		 * translation (but we don't want to do that until after
1319		 * we have captured as much useful GPU state as possible).
1320		 * So instead collect registers via the CPU:
1321		 */
1322		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1323			a6xx_get_ahb_gpu_registers(gpu,
1324				a6xx_state, &a6xx_reglist[i],
1325				&a6xx_state->registers[index++]);
1326		return;
1327	}
1328
1329	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
1330		a6xx_get_crashdumper_registers(gpu,
1331			a6xx_state, &a6xx_reglist[i],
1332			&a6xx_state->registers[index++],
1333			dumper);
1334
1335	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
1336		a6xx_get_crashdumper_hlsq_registers(gpu,
1337			a6xx_state, &a6xx_hlsq_reglist[i],
1338			&a6xx_state->registers[index++],
1339			dumper);
1340}
1341
1342#define A7XX_PRE_CRASHDUMPER_SIZE    1
1343#define A7XX_POST_CRASHDUMPER_SIZE   1
1344static void a7xx_get_registers(struct msm_gpu *gpu,
1345		struct a6xx_gpu_state *a6xx_state,
1346		struct a6xx_crashdumper *dumper)
1347{
1348	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1349	int i, count;
1350	int index = 0;
1351	const u32 *pre_crashdumper_regs;
1352	const struct gen7_reg_list *reglist;
1353
1354	if (adreno_gpu->info->family == ADRENO_7XX_GEN1) {
1355		reglist = gen7_0_0_reg_list;
1356		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1357	} else if (adreno_gpu->info->family == ADRENO_7XX_GEN2) {
1358		reglist = gen7_2_0_reg_list;
1359		pre_crashdumper_regs = gen7_0_0_pre_crashdumper_gpu_registers;
1360	} else {
1361		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1362		reglist = gen7_9_0_reg_list;
1363		pre_crashdumper_regs = gen7_9_0_pre_crashdumper_gpu_registers;
1364	}
1365
1366	count = A7XX_PRE_CRASHDUMPER_SIZE + A7XX_POST_CRASHDUMPER_SIZE;
1367
1368	/* The downstream reglist contains registers in other memory regions
1369	 * (cx_misc/cx_mem and cx_dbgc) and we need to plumb through their
1370	 * offsets and map them to read them on the CPU. For now only read the
1371	 * first region which is the main one.
1372	 */
1373	if (dumper) {
1374		for (i = 0; reglist[i].regs; i++)
1375			count++;
1376	} else {
1377		count++;
1378	}
1379
1380	a6xx_state->registers = state_kcalloc(a6xx_state,
1381		count, sizeof(*a6xx_state->registers));
1382
1383	if (!a6xx_state->registers)
1384		return;
1385
1386	a6xx_state->nr_registers = count;
1387
1388	a7xx_get_ahb_gpu_registers(gpu, a6xx_state, pre_crashdumper_regs,
1389		&a6xx_state->registers[index++]);
1390
1391	if (!dumper) {
1392		a7xx_get_ahb_gpu_reglist(gpu,
1393			a6xx_state, &reglist[0],
1394			&a6xx_state->registers[index++]);
1395		return;
1396	}
1397
1398	for (i = 0; reglist[i].regs; i++)
1399		a7xx_get_crashdumper_registers(gpu,
1400			a6xx_state, &reglist[i],
1401			&a6xx_state->registers[index++],
1402			dumper);
1403}
1404
1405static void a7xx_get_post_crashdumper_registers(struct msm_gpu *gpu,
1406		struct a6xx_gpu_state *a6xx_state)
1407{
1408	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1409	const u32 *regs;
1410
1411	BUG_ON(adreno_gpu->info->family > ADRENO_7XX_GEN3);
1412	regs = gen7_0_0_post_crashdumper_registers;
1413
1414	a7xx_get_ahb_gpu_registers(gpu,
1415		a6xx_state, regs,
1416		&a6xx_state->registers[a6xx_state->nr_registers - 1]);
1417}
1418
1419static u32 a6xx_get_cp_roq_size(struct msm_gpu *gpu)
1420{
1421	/* The value at [16:31] is in 4dword units. Convert it to dwords */
1422	return gpu_read(gpu, REG_A6XX_CP_ROQ_THRESHOLDS_2) >> 14;
1423}
1424
1425static u32 a7xx_get_cp_roq_size(struct msm_gpu *gpu)
1426{
1427	/*
1428	 * The value at CP_ROQ_THRESHOLDS_2[20:31] is in 4dword units.
1429	 * That register however is not directly accessible from APSS on A7xx.
1430	 * Program the SQE_UCODE_DBG_ADDR with offset=0x70d3 and read the value.
1431	 */
1432	gpu_write(gpu, REG_A6XX_CP_SQE_UCODE_DBG_ADDR, 0x70d3);
1433
1434	return 4 * (gpu_read(gpu, REG_A6XX_CP_SQE_UCODE_DBG_DATA) >> 20);
1435}
1436
1437/* Read a block of data from an indexed register pair */
1438static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
1439		struct a6xx_gpu_state *a6xx_state,
1440		const struct a6xx_indexed_registers *indexed,
1441		struct a6xx_gpu_state_obj *obj)
1442{
1443	u32 count = indexed->count;
1444	int i;
1445
1446	obj->handle = (const void *) indexed;
1447	if (indexed->count_fn)
1448		count = indexed->count_fn(gpu);
1449
1450	obj->data = state_kcalloc(a6xx_state, count, sizeof(u32));
1451	obj->count = count;
1452	if (!obj->data)
1453		return;
1454
1455	/* All the indexed banks start at address 0 */
1456	gpu_write(gpu, indexed->addr, 0);
1457
1458	/* Read the data - each read increments the internal address by 1 */
1459	for (i = 0; i < count; i++)
1460		obj->data[i] = gpu_read(gpu, indexed->data);
1461}
1462
1463static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
1464		struct a6xx_gpu_state *a6xx_state)
1465{
1466	u32 mempool_size;
1467	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
1468	int i;
1469
1470	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
1471		sizeof(*a6xx_state->indexed_regs));
1472	if (!a6xx_state->indexed_regs)
1473		return;
1474
1475	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
1476		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
1477			&a6xx_state->indexed_regs[i]);
1478
1479	if (adreno_is_a650_family(to_adreno_gpu(gpu))) {
1480		u32 val;
1481
1482		val = gpu_read(gpu, REG_A6XX_CP_CHICKEN_DBG);
1483		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val | 4);
1484
1485		/* Get the contents of the CP mempool */
1486		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1487			&a6xx_state->indexed_regs[i]);
1488
1489		gpu_write(gpu, REG_A6XX_CP_CHICKEN_DBG, val);
1490		a6xx_state->nr_indexed_regs = count;
1491		return;
1492	}
1493
1494	/* Set the CP mempool size to 0 to stabilize it while dumping */
1495	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
1496	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
1497
1498	/* Get the contents of the CP mempool */
1499	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
1500		&a6xx_state->indexed_regs[i]);
1501
1502	/*
1503	 * Offset 0x2000 in the mempool is the size - copy the saved size over
1504	 * so the data is consistent
1505	 */
1506	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
1507
1508	/* Restore the size in the hardware */
1509	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
1510}
1511
1512static void a7xx_get_indexed_registers(struct msm_gpu *gpu,
1513		struct a6xx_gpu_state *a6xx_state)
1514{
1515	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1516	const struct a6xx_indexed_registers *indexed_regs;
1517	int i, indexed_count, mempool_count;
1518
1519	if (adreno_gpu->info->family <= ADRENO_7XX_GEN2) {
1520		indexed_regs = a7xx_indexed_reglist;
1521		indexed_count = ARRAY_SIZE(a7xx_indexed_reglist);
1522	} else {
1523		BUG_ON(adreno_gpu->info->family != ADRENO_7XX_GEN3);
1524		indexed_regs = gen7_9_0_cp_indexed_reg_list;
1525		indexed_count = ARRAY_SIZE(gen7_9_0_cp_indexed_reg_list);
1526	}
1527
1528	mempool_count = ARRAY_SIZE(a7xx_cp_bv_mempool_indexed);
1529
1530	a6xx_state->indexed_regs = state_kcalloc(a6xx_state,
1531					indexed_count + mempool_count,
1532					sizeof(*a6xx_state->indexed_regs));
1533	if (!a6xx_state->indexed_regs)
1534		return;
1535
1536	a6xx_state->nr_indexed_regs = indexed_count + mempool_count;
1537
1538	/* First read the common regs */
1539	for (i = 0; i < indexed_count; i++)
1540		a6xx_get_indexed_regs(gpu, a6xx_state, &indexed_regs[i],
1541			&a6xx_state->indexed_regs[i]);
1542
1543	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, 0, BIT(2));
1544	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, 0, BIT(2));
1545
1546	/* Get the contents of the CP_BV mempool */
1547	for (i = 0; i < mempool_count; i++)
1548		a6xx_get_indexed_regs(gpu, a6xx_state, &a7xx_cp_bv_mempool_indexed[i],
1549			&a6xx_state->indexed_regs[indexed_count + i]);
1550
1551	gpu_rmw(gpu, REG_A6XX_CP_CHICKEN_DBG, BIT(2), 0);
1552	gpu_rmw(gpu, REG_A7XX_CP_BV_CHICKEN_DBG, BIT(2), 0);
1553	return;
1554}
1555
1556struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
1557{
1558	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
1559	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1560	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1561	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
1562		GFP_KERNEL);
1563	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
1564			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
1565
1566	if (!a6xx_state)
1567		return ERR_PTR(-ENOMEM);
1568
1569	INIT_LIST_HEAD(&a6xx_state->objs);
1570
1571	/* Get the generic state from the adreno core */
1572	adreno_gpu_state_get(gpu, &a6xx_state->base);
1573
1574	if (!adreno_has_gmu_wrapper(adreno_gpu)) {
1575		a6xx_get_gmu_registers(gpu, a6xx_state);
1576
1577		a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
1578		a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
1579		a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
1580
1581		a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
1582	}
1583
1584	/* If GX isn't on the rest of the data isn't going to be accessible */
1585	if (!adreno_has_gmu_wrapper(adreno_gpu) && !a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
1586		return &a6xx_state->base;
1587
1588	/* Get the banks of indexed registers */
1589	if (adreno_is_a7xx(adreno_gpu))
1590		a7xx_get_indexed_registers(gpu, a6xx_state);
1591	else
1592		a6xx_get_indexed_registers(gpu, a6xx_state);
1593
1594	/*
1595	 * Try to initialize the crashdumper, if we are not dumping state
1596	 * with the SMMU stalled.  The crashdumper needs memory access to
1597	 * write out GPU state, so we need to skip this when the SMMU is
1598	 * stalled in response to an iova fault
1599	 */
1600	if (!stalled && !gpu->needs_hw_init &&
1601	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1602		dumper = &_dumper;
1603	}
1604
1605	if (adreno_is_a7xx(adreno_gpu)) {
1606		a7xx_get_registers(gpu, a6xx_state, dumper);
1607
1608		if (dumper) {
1609			a7xx_get_shaders(gpu, a6xx_state, dumper);
1610			a7xx_get_clusters(gpu, a6xx_state, dumper);
1611			a7xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1612
1613			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1614		}
1615
1616		a7xx_get_post_crashdumper_registers(gpu, a6xx_state);
1617	} else {
1618		a6xx_get_registers(gpu, a6xx_state, dumper);
1619
1620		if (dumper) {
1621			a6xx_get_shaders(gpu, a6xx_state, dumper);
1622			a6xx_get_clusters(gpu, a6xx_state, dumper);
1623			a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
1624
1625			msm_gem_kernel_put(dumper->bo, gpu->aspace);
1626		}
1627	}
1628
1629	if (snapshot_debugbus)
1630		a6xx_get_debugbus(gpu, a6xx_state);
1631
1632	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1633
1634	return  &a6xx_state->base;
1635}
1636
1637static void a6xx_gpu_state_destroy(struct kref *kref)
1638{
1639	struct a6xx_state_memobj *obj, *tmp;
1640	struct msm_gpu_state *state = container_of(kref,
1641			struct msm_gpu_state, ref);
1642	struct a6xx_gpu_state *a6xx_state = container_of(state,
1643			struct a6xx_gpu_state, base);
1644
1645	if (a6xx_state->gmu_log)
1646		kvfree(a6xx_state->gmu_log->data);
1647
1648	if (a6xx_state->gmu_hfi)
1649		kvfree(a6xx_state->gmu_hfi->data);
1650
1651	if (a6xx_state->gmu_debug)
1652		kvfree(a6xx_state->gmu_debug->data);
1653
1654	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1655		list_del(&obj->node);
1656		kvfree(obj);
1657	}
1658
1659	adreno_gpu_state_destroy(state);
1660	kfree(a6xx_state);
1661}
1662
1663int a6xx_gpu_state_put(struct msm_gpu_state *state)
1664{
1665	if (IS_ERR_OR_NULL(state))
1666		return 1;
1667
1668	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1669}
1670
1671static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1672		struct drm_printer *p)
1673{
1674	int i, index = 0;
1675
1676	if (!data)
1677		return;
1678
1679	for (i = 0; i < count; i += 2) {
1680		u32 count = RANGE(registers, i);
1681		u32 offset = registers[i];
1682		int j;
1683
1684		for (j = 0; j < count; index++, offset++, j++) {
1685			if (data[index] == 0xdeafbead)
1686				continue;
1687
1688			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1689				offset << 2, data[index]);
1690		}
1691	}
1692}
1693
1694static void a7xx_show_registers_indented(const u32 *registers, u32 *data,
1695		struct drm_printer *p, unsigned indent)
1696{
1697	int i, index = 0;
1698
1699	for (i = 0; registers[i] != UINT_MAX; i += 2) {
1700		u32 count = RANGE(registers, i);
1701		u32 offset = registers[i];
1702		int j;
1703
1704		for (j = 0; j < count; index++, offset++, j++) {
1705			int k;
1706
1707			if (data[index] == 0xdeafbead)
1708				continue;
1709
1710			for (k = 0; k < indent; k++)
1711				drm_printf(p, "  ");
1712			drm_printf(p, "- { offset: 0x%06x, value: 0x%08x }\n",
1713				offset << 2, data[index]);
1714		}
1715	}
1716}
1717
1718static void a7xx_show_registers(const u32 *registers, u32 *data, struct drm_printer *p)
1719{
1720	a7xx_show_registers_indented(registers, data, p, 1);
1721}
1722
1723static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1724{
1725	char out[ASCII85_BUFSZ];
1726	long i, l, datalen = 0;
1727
1728	for (i = 0; i < len >> 2; i++) {
1729		if (data[i])
1730			datalen = (i + 1) << 2;
1731	}
1732
1733	if (datalen == 0)
1734		return;
1735
1736	drm_puts(p, "    data: !!ascii85 |\n");
1737	drm_puts(p, "      ");
1738
1739
1740	l = ascii85_encode_len(datalen);
1741
1742	for (i = 0; i < l; i++)
1743		drm_puts(p, ascii85_encode(data[i], out));
1744
1745	drm_puts(p, "\n");
1746}
1747
1748static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1749{
1750	drm_puts(p, fmt);
1751	drm_puts(p, name);
1752	drm_puts(p, "\n");
1753}
1754
1755static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1756		struct drm_printer *p)
1757{
1758	const struct a6xx_shader_block *block = obj->handle;
1759	int i;
1760
1761	if (!obj->handle)
1762		return;
1763
1764	print_name(p, "  - type: ", block->name);
1765
1766	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1767		drm_printf(p, "    - bank: %d\n", i);
1768		drm_printf(p, "      size: %d\n", block->size);
1769
1770		if (!obj->data)
1771			continue;
1772
1773		print_ascii85(p, block->size << 2,
1774			obj->data + (block->size * i));
1775	}
1776}
1777
1778static void a7xx_show_shader(struct a6xx_gpu_state_obj *obj,
1779		struct drm_printer *p)
1780{
1781	const struct gen7_shader_block *block = obj->handle;
1782	int i, j;
1783	u32 *data = obj->data;
1784
1785	if (!obj->handle)
1786		return;
1787
1788	print_name(p, "  - type: ", a7xx_statetype_names[block->statetype]);
1789	print_name(p, "    - pipe: ", a7xx_pipe_names[block->pipeid]);
1790
1791	for (i = 0; i < block->num_sps; i++) {
1792		drm_printf(p, "      - sp: %d\n", i);
1793
1794		for (j = 0; j < block->num_usptps; j++) {
1795			drm_printf(p, "        - usptp: %d\n", j);
1796			drm_printf(p, "          size: %d\n", block->size);
1797
1798			if (!obj->data)
1799				continue;
1800
1801			print_ascii85(p, block->size << 2, data);
1802
1803			data += block->size;
1804		}
1805	}
1806}
1807
1808static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1809		struct drm_printer *p)
1810{
1811	int ctx, index = 0;
1812
1813	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1814		int j;
1815
1816		drm_printf(p, "    - context: %d\n", ctx);
1817
1818		for (j = 0; j < size; j += 2) {
1819			u32 count = RANGE(registers, j);
1820			u32 offset = registers[j];
1821			int k;
1822
1823			for (k = 0; k < count; index++, offset++, k++) {
1824				if (data[index] == 0xdeafbead)
1825					continue;
1826
1827				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1828					offset << 2, data[index]);
1829			}
1830		}
1831	}
1832}
1833
1834static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1835		struct drm_printer *p)
1836{
1837	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1838
1839	if (dbgahb) {
1840		print_name(p, "  - cluster-name: ", dbgahb->name);
1841		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1842			obj->data, p);
1843	}
1844}
1845
1846static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1847		struct drm_printer *p)
1848{
1849	const struct a6xx_cluster *cluster = obj->handle;
1850
1851	if (cluster) {
1852		print_name(p, "  - cluster-name: ", cluster->name);
1853		a6xx_show_cluster_data(cluster->registers, cluster->count,
1854			obj->data, p);
1855	}
1856}
1857
1858static void a7xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1859		struct drm_printer *p)
1860{
1861	const struct gen7_sptp_cluster_registers *dbgahb = obj->handle;
1862
1863	if (dbgahb) {
1864		print_name(p, "  - pipe: ", a7xx_pipe_names[dbgahb->pipe_id]);
1865		print_name(p, "    - cluster-name: ", a7xx_cluster_names[dbgahb->cluster_id]);
1866		drm_printf(p, "      - context: %d\n", dbgahb->context_id);
1867		a7xx_show_registers_indented(dbgahb->regs, obj->data, p, 4);
1868	}
1869}
1870
1871static void a7xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1872		struct drm_printer *p)
1873{
1874	const struct gen7_cluster_registers *cluster = obj->handle;
1875
1876	if (cluster) {
1877		int context = (cluster->context_id == STATE_FORCE_CTXT_1) ? 1 : 0;
1878
1879		print_name(p, "  - pipe: ", a7xx_pipe_names[cluster->pipe_id]);
1880		print_name(p, "    - cluster-name: ", a7xx_cluster_names[cluster->cluster_id]);
1881		drm_printf(p, "      - context: %d\n", context);
1882		a7xx_show_registers_indented(cluster->regs, obj->data, p, 4);
1883	}
1884}
1885
1886static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1887		struct drm_printer *p)
1888{
1889	const struct a6xx_indexed_registers *indexed = obj->handle;
1890
1891	if (!indexed)
1892		return;
1893
1894	print_name(p, "  - regs-name: ", indexed->name);
1895	drm_printf(p, "    dwords: %d\n", obj->count);
1896
1897	print_ascii85(p, obj->count << 2, obj->data);
1898}
1899
1900static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1901		u32 *data, struct drm_printer *p)
1902{
1903	if (block) {
1904		print_name(p, "  - debugbus-block: ", block->name);
1905
1906		/*
1907		 * count for regular debugbus data is in quadwords,
1908		 * but print the size in dwords for consistency
1909		 */
1910		drm_printf(p, "    count: %d\n", block->count << 1);
1911
1912		print_ascii85(p, block->count << 3, data);
1913	}
1914}
1915
1916static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1917		struct drm_printer *p)
1918{
1919	int i;
1920
1921	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1922		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1923
1924		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1925	}
1926
1927	if (a6xx_state->vbif_debugbus) {
1928		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1929
1930		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1931		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1932
1933		/* vbif debugbus data is in dwords.  Confusing, huh? */
1934		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1935	}
1936
1937	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1938		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1939
1940		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1941	}
1942}
1943
1944void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1945		struct drm_printer *p)
1946{
1947	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1948	struct a6xx_gpu_state *a6xx_state = container_of(state,
1949			struct a6xx_gpu_state, base);
1950	int i;
1951
1952	if (IS_ERR_OR_NULL(state))
1953		return;
1954
1955	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1956
1957	adreno_show(gpu, state, p);
1958
1959	drm_puts(p, "gmu-log:\n");
1960	if (a6xx_state->gmu_log) {
1961		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1962
1963		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1964		drm_printf(p, "    size: %zu\n", gmu_log->size);
1965		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1966				&gmu_log->encoded);
1967	}
1968
1969	drm_puts(p, "gmu-hfi:\n");
1970	if (a6xx_state->gmu_hfi) {
1971		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1972		unsigned i, j;
1973
1974		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1975		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1976		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1977			drm_printf(p, "    queue-history[%u]:", i);
1978			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1979				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1980			}
1981			drm_printf(p, "\n");
1982		}
1983		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1984				&gmu_hfi->encoded);
1985	}
1986
1987	drm_puts(p, "gmu-debug:\n");
1988	if (a6xx_state->gmu_debug) {
1989		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1990
1991		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1992		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1993		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1994				&gmu_debug->encoded);
1995	}
1996
1997	drm_puts(p, "registers:\n");
1998	for (i = 0; i < a6xx_state->nr_registers; i++) {
1999		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
 
2000
2001		if (!obj->handle)
2002			continue;
2003
2004		if (adreno_is_a7xx(adreno_gpu)) {
2005			a7xx_show_registers(obj->handle, obj->data, p);
2006		} else {
2007			const struct a6xx_registers *regs = obj->handle;
2008
2009			a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2010		}
2011	}
2012
2013	drm_puts(p, "registers-gmu:\n");
2014	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
2015		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
2016		const struct a6xx_registers *regs = obj->handle;
2017
2018		if (!obj->handle)
2019			continue;
2020
2021		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
2022	}
2023
2024	drm_puts(p, "indexed-registers:\n");
2025	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
2026		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
2027
2028	drm_puts(p, "shader-blocks:\n");
2029	for (i = 0; i < a6xx_state->nr_shaders; i++) {
2030		if (adreno_is_a7xx(adreno_gpu))
2031			a7xx_show_shader(&a6xx_state->shaders[i], p);
2032		else
2033			a6xx_show_shader(&a6xx_state->shaders[i], p);
2034	}
2035
2036	drm_puts(p, "clusters:\n");
2037	for (i = 0; i < a6xx_state->nr_clusters; i++) {
2038		if (adreno_is_a7xx(adreno_gpu))
2039			a7xx_show_cluster(&a6xx_state->clusters[i], p);
2040		else
2041			a6xx_show_cluster(&a6xx_state->clusters[i], p);
2042	}
2043
2044	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++) {
2045		if (adreno_is_a7xx(adreno_gpu))
2046			a7xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2047		else
2048			a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
2049	}
2050
2051	drm_puts(p, "debugbus:\n");
2052	a6xx_show_debugbus(a6xx_state, p);
2053}
v6.2
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. */
   3
   4#include <linux/ascii85.h>
   5#include "msm_gem.h"
   6#include "a6xx_gpu.h"
   7#include "a6xx_gmu.h"
   8#include "a6xx_gpu_state.h"
   9#include "a6xx_gmu.xml.h"
  10
 
 
 
 
 
 
 
 
 
 
  11struct a6xx_gpu_state_obj {
  12	const void *handle;
  13	u32 *data;
 
  14};
  15
  16struct a6xx_gpu_state {
  17	struct msm_gpu_state base;
  18
  19	struct a6xx_gpu_state_obj *gmu_registers;
  20	int nr_gmu_registers;
  21
  22	struct a6xx_gpu_state_obj *registers;
  23	int nr_registers;
  24
  25	struct a6xx_gpu_state_obj *shaders;
  26	int nr_shaders;
  27
  28	struct a6xx_gpu_state_obj *clusters;
  29	int nr_clusters;
  30
  31	struct a6xx_gpu_state_obj *dbgahb_clusters;
  32	int nr_dbgahb_clusters;
  33
  34	struct a6xx_gpu_state_obj *indexed_regs;
  35	int nr_indexed_regs;
  36
  37	struct a6xx_gpu_state_obj *debugbus;
  38	int nr_debugbus;
  39
  40	struct a6xx_gpu_state_obj *vbif_debugbus;
  41
  42	struct a6xx_gpu_state_obj *cx_debugbus;
  43	int nr_cx_debugbus;
  44
  45	struct msm_gpu_state_bo *gmu_log;
  46	struct msm_gpu_state_bo *gmu_hfi;
  47	struct msm_gpu_state_bo *gmu_debug;
  48
  49	s32 hfi_queue_history[2][HFI_HISTORY_SZ];
  50
  51	struct list_head objs;
  52
  53	bool gpu_initialized;
  54};
  55
  56static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  57{
  58	in[0] = val;
  59	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  60
  61	return 2;
  62}
  63
  64static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  65{
  66	in[0] = target;
  67	in[1] = (((u64) reg) << 44 | dwords);
  68
  69	return 2;
  70}
  71
  72static inline int CRASHDUMP_FINI(u64 *in)
  73{
  74	in[0] = 0;
  75	in[1] = 0;
  76
  77	return 2;
  78}
  79
  80struct a6xx_crashdumper {
  81	void *ptr;
  82	struct drm_gem_object *bo;
  83	u64 iova;
  84};
  85
  86struct a6xx_state_memobj {
  87	struct list_head node;
  88	unsigned long long data[];
  89};
  90
  91static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
  92{
  93	struct a6xx_state_memobj *obj =
  94		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
  95
  96	if (!obj)
  97		return NULL;
  98
  99	list_add_tail(&obj->node, &a6xx_state->objs);
 100	return &obj->data;
 101}
 102
 103static void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
 104		size_t size)
 105{
 106	void *dst = state_kcalloc(a6xx_state, 1, size);
 107
 108	if (dst)
 109		memcpy(dst, src, size);
 110	return dst;
 111}
 112
 113/*
 114 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 115 * the rest for the data
 116 */
 117#define A6XX_CD_DATA_OFFSET 8192
 118#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 119
 120static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 121		struct a6xx_crashdumper *dumper)
 122{
 123	dumper->ptr = msm_gem_kernel_new(gpu->dev,
 124		SZ_1M, MSM_BO_WC, gpu->aspace,
 125		&dumper->bo, &dumper->iova);
 126
 127	if (!IS_ERR(dumper->ptr))
 128		msm_gem_object_set_name(dumper->bo, "crashdump");
 129
 130	return PTR_ERR_OR_ZERO(dumper->ptr);
 131}
 132
 133static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 134		struct a6xx_crashdumper *dumper)
 135{
 136	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 137	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 138	u32 val;
 139	int ret;
 140
 141	if (IS_ERR_OR_NULL(dumper->ptr))
 142		return -EINVAL;
 143
 144	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 145		return -EINVAL;
 146
 147	/* Make sure all pending memory writes are posted */
 148	wmb();
 149
 150	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
 151
 152	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 153
 154	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 155		val & 0x02, 100, 10000);
 156
 157	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 158
 159	return ret;
 160}
 161
 162/* read a value from the GX debug bus */
 163static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 164		u32 *data)
 165{
 166	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 167		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 168
 169	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 170	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 171	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 172	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 173
 174	/* Wait 1 us to make sure the data is flowing */
 175	udelay(1);
 176
 177	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 178	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 179
 180	return 2;
 181}
 182
 183#define cxdbg_write(ptr, offset, val) \
 184	msm_writel((val), (ptr) + ((offset) << 2))
 185
 186#define cxdbg_read(ptr, offset) \
 187	msm_readl((ptr) + ((offset) << 2))
 188
 189/* read a value from the CX debug bus */
 190static int cx_debugbus_read(void __iomem *cxdbg, u32 block, u32 offset,
 191		u32 *data)
 192{
 193	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 194		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 195
 196	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 197	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 198	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 199	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 200
 201	/* Wait 1 us to make sure the data is flowing */
 202	udelay(1);
 203
 204	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 205	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 206
 207	return 2;
 208}
 209
 210/* Read a chunk of data from the VBIF debug bus */
 211static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 212		u32 reg, int count, u32 *data)
 213{
 214	int i;
 215
 216	gpu_write(gpu, ctrl0, reg);
 217
 218	for (i = 0; i < count; i++) {
 219		gpu_write(gpu, ctrl1, i);
 220		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 221	}
 222
 223	return count;
 224}
 225
 226#define AXI_ARB_BLOCKS 2
 227#define XIN_AXI_BLOCKS 5
 228#define XIN_CORE_BLOCKS 4
 229
 230#define VBIF_DEBUGBUS_BLOCK_SIZE \
 231	((16 * AXI_ARB_BLOCKS) + \
 232	 (18 * XIN_AXI_BLOCKS) + \
 233	 (12 * XIN_CORE_BLOCKS))
 234
 235static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 236		struct a6xx_gpu_state *a6xx_state,
 237		struct a6xx_gpu_state_obj *obj)
 238{
 239	u32 clk, *ptr;
 240	int i;
 241
 242	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 243		sizeof(u32));
 244	if (!obj->data)
 245		return;
 246
 247	obj->handle = NULL;
 248
 249	/* Get the current clock setting */
 250	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 251
 252	/* Force on the bus so we can read it */
 253	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 254		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 255
 256	/* We will read from BUS2 first, so disable BUS1 */
 257	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 258
 259	/* Enable the VBIF bus for reading */
 260	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 261
 262	ptr = obj->data;
 263
 264	for (i = 0; i < AXI_ARB_BLOCKS; i++)
 265		ptr += vbif_debugbus_read(gpu,
 266			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 267			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 268			1 << (i + 16), 16, ptr);
 269
 270	for (i = 0; i < XIN_AXI_BLOCKS; i++)
 271		ptr += vbif_debugbus_read(gpu,
 272			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 273			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 274			1 << i, 18, ptr);
 275
 276	/* Stop BUS2 so we can turn on BUS1 */
 277	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 278
 279	for (i = 0; i < XIN_CORE_BLOCKS; i++)
 280		ptr += vbif_debugbus_read(gpu,
 281			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 282			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 283			1 << i, 12, ptr);
 284
 285	/* Restore the VBIF clock setting */
 286	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 287}
 288
 289static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 290		struct a6xx_gpu_state *a6xx_state,
 291		const struct a6xx_debugbus_block *block,
 292		struct a6xx_gpu_state_obj *obj)
 293{
 294	int i;
 295	u32 *ptr;
 296
 297	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 298	if (!obj->data)
 299		return;
 300
 301	obj->handle = block;
 302
 303	for (ptr = obj->data, i = 0; i < block->count; i++)
 304		ptr += debugbus_read(gpu, block->id, i, ptr);
 305}
 306
 307static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 308		struct a6xx_gpu_state *a6xx_state,
 309		const struct a6xx_debugbus_block *block,
 310		struct a6xx_gpu_state_obj *obj)
 311{
 312	int i;
 313	u32 *ptr;
 314
 315	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 316	if (!obj->data)
 317		return;
 318
 319	obj->handle = block;
 320
 321	for (ptr = obj->data, i = 0; i < block->count; i++)
 322		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 323}
 324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 325static void a6xx_get_debugbus(struct msm_gpu *gpu,
 326		struct a6xx_gpu_state *a6xx_state)
 327{
 
 328	struct resource *res;
 329	void __iomem *cxdbg = NULL;
 330	int nr_debugbus_blocks;
 331
 332	/* Set up the GX debug bus */
 333
 334	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 335		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 336
 337	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 338		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 339
 340	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 341	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 342	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 343	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 344
 345	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 346	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 347
 348	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 349	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 350	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 351	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 352
 353	/* Set up the CX debug bus - it lives elsewhere in the system so do a
 354	 * temporary ioremap for the registers
 355	 */
 356	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 357			"cx_dbgc");
 358
 359	if (res)
 360		cxdbg = ioremap(res->start, resource_size(res));
 361
 362	if (cxdbg) {
 363		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLT,
 364			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 365
 366		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_CNTLM,
 367			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 368
 369		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 370		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 371		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 372		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 373
 374		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0,
 375			0x76543210);
 376		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1,
 377			0xFEDCBA98);
 378
 379		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 380		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 381		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 382		cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 383	}
 384
 385	nr_debugbus_blocks = ARRAY_SIZE(a6xx_debugbus_blocks) +
 386		(a6xx_has_gbif(to_adreno_gpu(gpu)) ? 1 : 0);
 387
 388	a6xx_state->debugbus = state_kcalloc(a6xx_state, nr_debugbus_blocks,
 389			sizeof(*a6xx_state->debugbus));
 390
 391	if (a6xx_state->debugbus) {
 392		int i;
 393
 394		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 395			a6xx_get_debugbus_block(gpu,
 396				a6xx_state,
 397				&a6xx_debugbus_blocks[i],
 398				&a6xx_state->debugbus[i]);
 399
 400		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 401
 402		/*
 403		 * GBIF has same debugbus as of other GPU blocks, fall back to
 404		 * default path if GPU uses GBIF, also GBIF uses exactly same
 405		 * ID as of VBIF.
 406		 */
 407		if (a6xx_has_gbif(to_adreno_gpu(gpu))) {
 408			a6xx_get_debugbus_block(gpu, a6xx_state,
 409				&a6xx_gbif_debugbus_block,
 410				&a6xx_state->debugbus[i]);
 411
 412			a6xx_state->nr_debugbus += 1;
 413		}
 414	}
 415
 416	/*  Dump the VBIF debugbus on applicable targets */
 417	if (!a6xx_has_gbif(to_adreno_gpu(gpu))) {
 418		a6xx_state->vbif_debugbus =
 419			state_kcalloc(a6xx_state, 1,
 420					sizeof(*a6xx_state->vbif_debugbus));
 421
 422		if (a6xx_state->vbif_debugbus)
 423			a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 424					a6xx_state->vbif_debugbus);
 425	}
 426
 427	if (cxdbg) {
 
 
 
 
 
 
 
 
 
 
 
 
 428		a6xx_state->cx_debugbus =
 429			state_kcalloc(a6xx_state,
 430			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
 431			sizeof(*a6xx_state->cx_debugbus));
 432
 433		if (a6xx_state->cx_debugbus) {
 434			int i;
 435
 436			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
 437				a6xx_get_cx_debugbus_block(cxdbg,
 438					a6xx_state,
 439					&a6xx_cx_debugbus_blocks[i],
 440					&a6xx_state->cx_debugbus[i]);
 441
 442			a6xx_state->nr_cx_debugbus =
 443				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 444		}
 445
 446		iounmap(cxdbg);
 447	}
 448}
 449
 450#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 451
 452/* Read a data cluster from behind the AHB aperture */
 453static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 454		struct a6xx_gpu_state *a6xx_state,
 455		const struct a6xx_dbgahb_cluster *dbgahb,
 456		struct a6xx_gpu_state_obj *obj,
 457		struct a6xx_crashdumper *dumper)
 458{
 459	u64 *in = dumper->ptr;
 460	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 461	size_t datasize;
 462	int i, regcount = 0;
 463
 464	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 465		int j;
 466
 467		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 468			(dbgahb->statetype + i * 2) << 8);
 469
 470		for (j = 0; j < dbgahb->count; j += 2) {
 471			int count = RANGE(dbgahb->registers, j);
 472			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 473				dbgahb->registers[j] - (dbgahb->base >> 2);
 474
 475			in += CRASHDUMP_READ(in, offset, count, out);
 476
 477			out += count * sizeof(u32);
 478
 479			if (i == 0)
 480				regcount += count;
 481		}
 482	}
 483
 484	CRASHDUMP_FINI(in);
 485
 486	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 487
 488	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 489		return;
 490
 491	if (a6xx_crashdumper_run(gpu, dumper))
 492		return;
 493
 494	obj->handle = dbgahb;
 495	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 496		datasize);
 497}
 498
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 499static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 500		struct a6xx_gpu_state *a6xx_state,
 501		struct a6xx_crashdumper *dumper)
 502{
 503	int i;
 504
 505	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 506		ARRAY_SIZE(a6xx_dbgahb_clusters),
 507		sizeof(*a6xx_state->dbgahb_clusters));
 508
 509	if (!a6xx_state->dbgahb_clusters)
 510		return;
 511
 512	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 513
 514	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 515		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 516			&a6xx_dbgahb_clusters[i],
 517			&a6xx_state->dbgahb_clusters[i], dumper);
 518}
 519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 520/* Read a data cluster from the CP aperture with the crashdumper */
 521static void a6xx_get_cluster(struct msm_gpu *gpu,
 522		struct a6xx_gpu_state *a6xx_state,
 523		const struct a6xx_cluster *cluster,
 524		struct a6xx_gpu_state_obj *obj,
 525		struct a6xx_crashdumper *dumper)
 526{
 
 527	u64 *in = dumper->ptr;
 528	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 529	size_t datasize;
 530	int i, regcount = 0;
 
 
 
 
 
 
 
 
 
 
 531
 532	/* Some clusters need a selector register to be programmed too */
 533	if (cluster->sel_reg)
 534		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 535
 536	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 537		int j;
 538
 539		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 540			(cluster->id << 8) | (i << 4) | i);
 541
 542		for (j = 0; j < cluster->count; j += 2) {
 543			int count = RANGE(cluster->registers, j);
 544
 545			in += CRASHDUMP_READ(in, cluster->registers[j],
 546				count, out);
 547
 548			out += count * sizeof(u32);
 549
 550			if (i == 0)
 551				regcount += count;
 552		}
 553	}
 554
 555	CRASHDUMP_FINI(in);
 556
 557	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 558
 559	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 560		return;
 561
 562	if (a6xx_crashdumper_run(gpu, dumper))
 563		return;
 564
 565	obj->handle = cluster;
 566	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 567		datasize);
 568}
 569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 570static void a6xx_get_clusters(struct msm_gpu *gpu,
 571		struct a6xx_gpu_state *a6xx_state,
 572		struct a6xx_crashdumper *dumper)
 573{
 574	int i;
 575
 576	a6xx_state->clusters = state_kcalloc(a6xx_state,
 577		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 578
 579	if (!a6xx_state->clusters)
 580		return;
 581
 582	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 583
 584	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 585		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 586			&a6xx_state->clusters[i], dumper);
 587}
 588
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 589/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 590static void a6xx_get_shader_block(struct msm_gpu *gpu,
 591		struct a6xx_gpu_state *a6xx_state,
 592		const struct a6xx_shader_block *block,
 593		struct a6xx_gpu_state_obj *obj,
 594		struct a6xx_crashdumper *dumper)
 595{
 596	u64 *in = dumper->ptr;
 
 597	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 598	int i;
 599
 600	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 601		return;
 602
 603	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 604		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 605			(block->type << 8) | i);
 606
 607		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 608			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
 
 
 609	}
 610
 611	CRASHDUMP_FINI(in);
 612
 613	if (a6xx_crashdumper_run(gpu, dumper))
 614		return;
 615
 616	obj->handle = block;
 617	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 618		datasize);
 619}
 620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 621static void a6xx_get_shaders(struct msm_gpu *gpu,
 622		struct a6xx_gpu_state *a6xx_state,
 623		struct a6xx_crashdumper *dumper)
 624{
 625	int i;
 626
 627	a6xx_state->shaders = state_kcalloc(a6xx_state,
 628		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 629
 630	if (!a6xx_state->shaders)
 631		return;
 632
 633	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 634
 635	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 636		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 637			&a6xx_state->shaders[i], dumper);
 638}
 639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 640/* Read registers from behind the HLSQ aperture with the crashdumper */
 641static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 642		struct a6xx_gpu_state *a6xx_state,
 643		const struct a6xx_registers *regs,
 644		struct a6xx_gpu_state_obj *obj,
 645		struct a6xx_crashdumper *dumper)
 646
 647{
 648	u64 *in = dumper->ptr;
 649	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 650	int i, regcount = 0;
 651
 652	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 653
 654	for (i = 0; i < regs->count; i += 2) {
 655		u32 count = RANGE(regs->registers, i);
 656		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 657			regs->registers[i] - (regs->val0 >> 2);
 658
 659		in += CRASHDUMP_READ(in, offset, count, out);
 660
 661		out += count * sizeof(u32);
 662		regcount += count;
 663	}
 664
 665	CRASHDUMP_FINI(in);
 666
 667	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 668		return;
 669
 670	if (a6xx_crashdumper_run(gpu, dumper))
 671		return;
 672
 673	obj->handle = regs;
 674	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 675		regcount * sizeof(u32));
 676}
 677
 678/* Read a block of registers using the crashdumper */
 679static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
 680		struct a6xx_gpu_state *a6xx_state,
 681		const struct a6xx_registers *regs,
 682		struct a6xx_gpu_state_obj *obj,
 683		struct a6xx_crashdumper *dumper)
 684
 685{
 686	u64 *in = dumper->ptr;
 687	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 688	int i, regcount = 0;
 689
 
 
 
 
 
 690	/* Some blocks might need to program a selector register first */
 691	if (regs->val0)
 692		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
 693
 694	for (i = 0; i < regs->count; i += 2) {
 695		u32 count = RANGE(regs->registers, i);
 696
 697		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
 698
 699		out += count * sizeof(u32);
 700		regcount += count;
 701	}
 702
 703	CRASHDUMP_FINI(in);
 704
 705	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 706		return;
 707
 708	if (a6xx_crashdumper_run(gpu, dumper))
 709		return;
 710
 711	obj->handle = regs;
 712	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 713		regcount * sizeof(u32));
 714}
 715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 716/* Read a block of registers via AHB */
 717static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
 718		struct a6xx_gpu_state *a6xx_state,
 719		const struct a6xx_registers *regs,
 720		struct a6xx_gpu_state_obj *obj)
 721{
 722	int i, regcount = 0, index = 0;
 723
 
 
 
 
 
 724	for (i = 0; i < regs->count; i += 2)
 725		regcount += RANGE(regs->registers, i);
 726
 727	obj->handle = (const void *) regs;
 728	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 729	if (!obj->data)
 730		return;
 731
 732	for (i = 0; i < regs->count; i += 2) {
 733		u32 count = RANGE(regs->registers, i);
 734		int j;
 735
 736		for (j = 0; j < count; j++)
 737			obj->data[index++] = gpu_read(gpu,
 738				regs->registers[i] + j);
 739	}
 740}
 741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 742/* Read a block of GMU registers */
 743static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
 744		struct a6xx_gpu_state *a6xx_state,
 745		const struct a6xx_registers *regs,
 746		struct a6xx_gpu_state_obj *obj,
 747		bool rscc)
 748{
 749	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 750	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 751	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 752	int i, regcount = 0, index = 0;
 753
 754	for (i = 0; i < regs->count; i += 2)
 755		regcount += RANGE(regs->registers, i);
 756
 757	obj->handle = (const void *) regs;
 758	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 759	if (!obj->data)
 760		return;
 761
 762	for (i = 0; i < regs->count; i += 2) {
 763		u32 count = RANGE(regs->registers, i);
 764		int j;
 765
 766		for (j = 0; j < count; j++) {
 767			u32 offset = regs->registers[i] + j;
 768			u32 val;
 769
 770			if (rscc)
 771				val = gmu_read_rscc(gmu, offset);
 772			else
 773				val = gmu_read(gmu, offset);
 774
 775			obj->data[index++] = val;
 776		}
 777	}
 778}
 779
 780static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 781		struct a6xx_gpu_state *a6xx_state)
 782{
 783	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 784	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 785
 786	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
 787		3, sizeof(*a6xx_state->gmu_registers));
 788
 789	if (!a6xx_state->gmu_registers)
 790		return;
 791
 792	a6xx_state->nr_gmu_registers = 3;
 793
 794	/* Get the CX GMU registers from AHB */
 795	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
 796		&a6xx_state->gmu_registers[0], false);
 797	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
 798		&a6xx_state->gmu_registers[1], true);
 799
 800	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 801		return;
 802
 803	/* Set the fence to ALLOW mode so we can access the registers */
 804	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
 805
 806	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[2],
 807		&a6xx_state->gmu_registers[2], false);
 808}
 809
 810static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
 811		struct a6xx_gpu_state *a6xx_state, struct a6xx_gmu_bo *bo)
 812{
 813	struct msm_gpu_state_bo *snapshot;
 814
 815	if (!bo->size)
 816		return NULL;
 817
 818	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
 819	if (!snapshot)
 820		return NULL;
 821
 822	snapshot->iova = bo->iova;
 823	snapshot->size = bo->size;
 824	snapshot->data = kvzalloc(snapshot->size, GFP_KERNEL);
 825	if (!snapshot->data)
 826		return NULL;
 827
 828	memcpy(snapshot->data, bo->virt, bo->size);
 829
 830	return snapshot;
 831}
 832
 833static void a6xx_snapshot_gmu_hfi_history(struct msm_gpu *gpu,
 834					  struct a6xx_gpu_state *a6xx_state)
 835{
 836	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 837	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 838	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 839	unsigned i, j;
 840
 841	BUILD_BUG_ON(ARRAY_SIZE(gmu->queues) != ARRAY_SIZE(a6xx_state->hfi_queue_history));
 842
 843	for (i = 0; i < ARRAY_SIZE(gmu->queues); i++) {
 844		struct a6xx_hfi_queue *queue = &gmu->queues[i];
 845		for (j = 0; j < HFI_HISTORY_SZ; j++) {
 846			unsigned idx = (j + queue->history_idx) % HFI_HISTORY_SZ;
 847			a6xx_state->hfi_queue_history[i][j] = queue->history[idx];
 848		}
 849	}
 850}
 851
 
 852#define A6XX_GBIF_REGLIST_SIZE   1
 853static void a6xx_get_registers(struct msm_gpu *gpu,
 854		struct a6xx_gpu_state *a6xx_state,
 855		struct a6xx_crashdumper *dumper)
 856{
 857	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 858		ARRAY_SIZE(a6xx_reglist) +
 859		ARRAY_SIZE(a6xx_hlsq_reglist) + A6XX_GBIF_REGLIST_SIZE;
 860	int index = 0;
 861	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 862
 863	a6xx_state->registers = state_kcalloc(a6xx_state,
 864		count, sizeof(*a6xx_state->registers));
 865
 866	if (!a6xx_state->registers)
 867		return;
 868
 869	a6xx_state->nr_registers = count;
 870
 871	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
 872		a6xx_get_ahb_gpu_registers(gpu,
 873			a6xx_state, &a6xx_ahb_reglist[i],
 874			&a6xx_state->registers[index++]);
 875
 876	if (a6xx_has_gbif(adreno_gpu))
 877		a6xx_get_ahb_gpu_registers(gpu,
 878				a6xx_state, &a6xx_gbif_reglist,
 879				&a6xx_state->registers[index++]);
 880	else
 881		a6xx_get_ahb_gpu_registers(gpu,
 882				a6xx_state, &a6xx_vbif_reglist,
 883				&a6xx_state->registers[index++]);
 884	if (!dumper) {
 885		/*
 886		 * We can't use the crashdumper when the SMMU is stalled,
 887		 * because the GPU has no memory access until we resume
 888		 * translation (but we don't want to do that until after
 889		 * we have captured as much useful GPU state as possible).
 890		 * So instead collect registers via the CPU:
 891		 */
 892		for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 893			a6xx_get_ahb_gpu_registers(gpu,
 894				a6xx_state, &a6xx_reglist[i],
 895				&a6xx_state->registers[index++]);
 896		return;
 897	}
 898
 899	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 900		a6xx_get_crashdumper_registers(gpu,
 901			a6xx_state, &a6xx_reglist[i],
 902			&a6xx_state->registers[index++],
 903			dumper);
 904
 905	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
 906		a6xx_get_crashdumper_hlsq_registers(gpu,
 907			a6xx_state, &a6xx_hlsq_reglist[i],
 908			&a6xx_state->registers[index++],
 909			dumper);
 910}
 911
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 912/* Read a block of data from an indexed register pair */
 913static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
 914		struct a6xx_gpu_state *a6xx_state,
 915		const struct a6xx_indexed_registers *indexed,
 916		struct a6xx_gpu_state_obj *obj)
 917{
 
 918	int i;
 919
 920	obj->handle = (const void *) indexed;
 921	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
 
 
 
 
 922	if (!obj->data)
 923		return;
 924
 925	/* All the indexed banks start at address 0 */
 926	gpu_write(gpu, indexed->addr, 0);
 927
 928	/* Read the data - each read increments the internal address by 1 */
 929	for (i = 0; i < indexed->count; i++)
 930		obj->data[i] = gpu_read(gpu, indexed->data);
 931}
 932
 933static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 934		struct a6xx_gpu_state *a6xx_state)
 935{
 936	u32 mempool_size;
 937	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
 938	int i;
 939
 940	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
 941		sizeof(*a6xx_state->indexed_regs));
 942	if (!a6xx_state->indexed_regs)
 943		return;
 944
 945	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
 946		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
 947			&a6xx_state->indexed_regs[i]);
 948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 949	/* Set the CP mempool size to 0 to stabilize it while dumping */
 950	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
 951	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
 952
 953	/* Get the contents of the CP mempool */
 954	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
 955		&a6xx_state->indexed_regs[i]);
 956
 957	/*
 958	 * Offset 0x2000 in the mempool is the size - copy the saved size over
 959	 * so the data is consistent
 960	 */
 961	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
 962
 963	/* Restore the size in the hardware */
 964	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 965
 966	a6xx_state->nr_indexed_regs = count;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 967}
 968
 969struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 970{
 971	struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL;
 972	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 973	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 974	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 975		GFP_KERNEL);
 976	bool stalled = !!(gpu_read(gpu, REG_A6XX_RBBM_STATUS3) &
 977			A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT);
 978
 979	if (!a6xx_state)
 980		return ERR_PTR(-ENOMEM);
 981
 982	INIT_LIST_HEAD(&a6xx_state->objs);
 983
 984	/* Get the generic state from the adreno core */
 985	adreno_gpu_state_get(gpu, &a6xx_state->base);
 986
 987	a6xx_get_gmu_registers(gpu, a6xx_state);
 
 988
 989	a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log);
 990	a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
 991	a6xx_state->gmu_debug = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.debug);
 992
 993	a6xx_snapshot_gmu_hfi_history(gpu, a6xx_state);
 
 994
 995	/* If GX isn't on the rest of the data isn't going to be accessible */
 996	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 997		return &a6xx_state->base;
 998
 999	/* Get the banks of indexed registers */
1000	a6xx_get_indexed_registers(gpu, a6xx_state);
 
 
 
1001
1002	/*
1003	 * Try to initialize the crashdumper, if we are not dumping state
1004	 * with the SMMU stalled.  The crashdumper needs memory access to
1005	 * write out GPU state, so we need to skip this when the SMMU is
1006	 * stalled in response to an iova fault
1007	 */
1008	if (!stalled && !gpu->needs_hw_init &&
1009	    !a6xx_crashdumper_init(gpu, &_dumper)) {
1010		dumper = &_dumper;
1011	}
1012
1013	a6xx_get_registers(gpu, a6xx_state, dumper);
 
1014
1015	if (dumper) {
1016		a6xx_get_shaders(gpu, a6xx_state, dumper);
1017		a6xx_get_clusters(gpu, a6xx_state, dumper);
1018		a6xx_get_dbgahb_clusters(gpu, a6xx_state, dumper);
 
 
 
 
 
 
 
 
 
 
 
 
1019
1020		msm_gem_kernel_put(dumper->bo, gpu->aspace);
 
1021	}
1022
1023	if (snapshot_debugbus)
1024		a6xx_get_debugbus(gpu, a6xx_state);
1025
1026	a6xx_state->gpu_initialized = !gpu->needs_hw_init;
1027
1028	return  &a6xx_state->base;
1029}
1030
1031static void a6xx_gpu_state_destroy(struct kref *kref)
1032{
1033	struct a6xx_state_memobj *obj, *tmp;
1034	struct msm_gpu_state *state = container_of(kref,
1035			struct msm_gpu_state, ref);
1036	struct a6xx_gpu_state *a6xx_state = container_of(state,
1037			struct a6xx_gpu_state, base);
1038
1039	if (a6xx_state->gmu_log)
1040		kvfree(a6xx_state->gmu_log->data);
1041
1042	if (a6xx_state->gmu_hfi)
1043		kvfree(a6xx_state->gmu_hfi->data);
1044
1045	if (a6xx_state->gmu_debug)
1046		kvfree(a6xx_state->gmu_debug->data);
1047
1048	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
1049		list_del(&obj->node);
1050		kvfree(obj);
1051	}
1052
1053	adreno_gpu_state_destroy(state);
1054	kfree(a6xx_state);
1055}
1056
1057int a6xx_gpu_state_put(struct msm_gpu_state *state)
1058{
1059	if (IS_ERR_OR_NULL(state))
1060		return 1;
1061
1062	return kref_put(&state->ref, a6xx_gpu_state_destroy);
1063}
1064
1065static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
1066		struct drm_printer *p)
1067{
1068	int i, index = 0;
1069
1070	if (!data)
1071		return;
1072
1073	for (i = 0; i < count; i += 2) {
1074		u32 count = RANGE(registers, i);
1075		u32 offset = registers[i];
1076		int j;
1077
1078		for (j = 0; j < count; index++, offset++, j++) {
1079			if (data[index] == 0xdeafbead)
1080				continue;
1081
1082			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
1083				offset << 2, data[index]);
1084		}
1085	}
1086}
1087
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1088static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
1089{
1090	char out[ASCII85_BUFSZ];
1091	long i, l, datalen = 0;
1092
1093	for (i = 0; i < len >> 2; i++) {
1094		if (data[i])
1095			datalen = (i + 1) << 2;
1096	}
1097
1098	if (datalen == 0)
1099		return;
1100
1101	drm_puts(p, "    data: !!ascii85 |\n");
1102	drm_puts(p, "      ");
1103
1104
1105	l = ascii85_encode_len(datalen);
1106
1107	for (i = 0; i < l; i++)
1108		drm_puts(p, ascii85_encode(data[i], out));
1109
1110	drm_puts(p, "\n");
1111}
1112
1113static void print_name(struct drm_printer *p, const char *fmt, const char *name)
1114{
1115	drm_puts(p, fmt);
1116	drm_puts(p, name);
1117	drm_puts(p, "\n");
1118}
1119
1120static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
1121		struct drm_printer *p)
1122{
1123	const struct a6xx_shader_block *block = obj->handle;
1124	int i;
1125
1126	if (!obj->handle)
1127		return;
1128
1129	print_name(p, "  - type: ", block->name);
1130
1131	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
1132		drm_printf(p, "    - bank: %d\n", i);
1133		drm_printf(p, "      size: %d\n", block->size);
1134
1135		if (!obj->data)
1136			continue;
1137
1138		print_ascii85(p, block->size << 2,
1139			obj->data + (block->size * i));
1140	}
1141}
1142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1143static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1144		struct drm_printer *p)
1145{
1146	int ctx, index = 0;
1147
1148	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1149		int j;
1150
1151		drm_printf(p, "    - context: %d\n", ctx);
1152
1153		for (j = 0; j < size; j += 2) {
1154			u32 count = RANGE(registers, j);
1155			u32 offset = registers[j];
1156			int k;
1157
1158			for (k = 0; k < count; index++, offset++, k++) {
1159				if (data[index] == 0xdeafbead)
1160					continue;
1161
1162				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1163					offset << 2, data[index]);
1164			}
1165		}
1166	}
1167}
1168
1169static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1170		struct drm_printer *p)
1171{
1172	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1173
1174	if (dbgahb) {
1175		print_name(p, "  - cluster-name: ", dbgahb->name);
1176		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1177			obj->data, p);
1178	}
1179}
1180
1181static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1182		struct drm_printer *p)
1183{
1184	const struct a6xx_cluster *cluster = obj->handle;
1185
1186	if (cluster) {
1187		print_name(p, "  - cluster-name: ", cluster->name);
1188		a6xx_show_cluster_data(cluster->registers, cluster->count,
1189			obj->data, p);
1190	}
1191}
1192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1193static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1194		struct drm_printer *p)
1195{
1196	const struct a6xx_indexed_registers *indexed = obj->handle;
1197
1198	if (!indexed)
1199		return;
1200
1201	print_name(p, "  - regs-name: ", indexed->name);
1202	drm_printf(p, "    dwords: %d\n", indexed->count);
1203
1204	print_ascii85(p, indexed->count << 2, obj->data);
1205}
1206
1207static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1208		u32 *data, struct drm_printer *p)
1209{
1210	if (block) {
1211		print_name(p, "  - debugbus-block: ", block->name);
1212
1213		/*
1214		 * count for regular debugbus data is in quadwords,
1215		 * but print the size in dwords for consistency
1216		 */
1217		drm_printf(p, "    count: %d\n", block->count << 1);
1218
1219		print_ascii85(p, block->count << 3, data);
1220	}
1221}
1222
1223static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1224		struct drm_printer *p)
1225{
1226	int i;
1227
1228	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1229		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1230
1231		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1232	}
1233
1234	if (a6xx_state->vbif_debugbus) {
1235		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1236
1237		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1238		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1239
1240		/* vbif debugbus data is in dwords.  Confusing, huh? */
1241		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1242	}
1243
1244	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1245		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1246
1247		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1248	}
1249}
1250
1251void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1252		struct drm_printer *p)
1253{
 
1254	struct a6xx_gpu_state *a6xx_state = container_of(state,
1255			struct a6xx_gpu_state, base);
1256	int i;
1257
1258	if (IS_ERR_OR_NULL(state))
1259		return;
1260
1261	drm_printf(p, "gpu-initialized: %d\n", a6xx_state->gpu_initialized);
1262
1263	adreno_show(gpu, state, p);
1264
1265	drm_puts(p, "gmu-log:\n");
1266	if (a6xx_state->gmu_log) {
1267		struct msm_gpu_state_bo *gmu_log = a6xx_state->gmu_log;
1268
1269		drm_printf(p, "    iova: 0x%016llx\n", gmu_log->iova);
1270		drm_printf(p, "    size: %zu\n", gmu_log->size);
1271		adreno_show_object(p, &gmu_log->data, gmu_log->size,
1272				&gmu_log->encoded);
1273	}
1274
1275	drm_puts(p, "gmu-hfi:\n");
1276	if (a6xx_state->gmu_hfi) {
1277		struct msm_gpu_state_bo *gmu_hfi = a6xx_state->gmu_hfi;
1278		unsigned i, j;
1279
1280		drm_printf(p, "    iova: 0x%016llx\n", gmu_hfi->iova);
1281		drm_printf(p, "    size: %zu\n", gmu_hfi->size);
1282		for (i = 0; i < ARRAY_SIZE(a6xx_state->hfi_queue_history); i++) {
1283			drm_printf(p, "    queue-history[%u]:", i);
1284			for (j = 0; j < HFI_HISTORY_SZ; j++) {
1285				drm_printf(p, " %d", a6xx_state->hfi_queue_history[i][j]);
1286			}
1287			drm_printf(p, "\n");
1288		}
1289		adreno_show_object(p, &gmu_hfi->data, gmu_hfi->size,
1290				&gmu_hfi->encoded);
1291	}
1292
1293	drm_puts(p, "gmu-debug:\n");
1294	if (a6xx_state->gmu_debug) {
1295		struct msm_gpu_state_bo *gmu_debug = a6xx_state->gmu_debug;
1296
1297		drm_printf(p, "    iova: 0x%016llx\n", gmu_debug->iova);
1298		drm_printf(p, "    size: %zu\n", gmu_debug->size);
1299		adreno_show_object(p, &gmu_debug->data, gmu_debug->size,
1300				&gmu_debug->encoded);
1301	}
1302
1303	drm_puts(p, "registers:\n");
1304	for (i = 0; i < a6xx_state->nr_registers; i++) {
1305		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1306		const struct a6xx_registers *regs = obj->handle;
1307
1308		if (!obj->handle)
1309			continue;
1310
1311		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
 
 
 
 
 
 
1312	}
1313
1314	drm_puts(p, "registers-gmu:\n");
1315	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1316		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1317		const struct a6xx_registers *regs = obj->handle;
1318
1319		if (!obj->handle)
1320			continue;
1321
1322		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1323	}
1324
1325	drm_puts(p, "indexed-registers:\n");
1326	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1327		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1328
1329	drm_puts(p, "shader-blocks:\n");
1330	for (i = 0; i < a6xx_state->nr_shaders; i++)
1331		a6xx_show_shader(&a6xx_state->shaders[i], p);
 
 
 
 
1332
1333	drm_puts(p, "clusters:\n");
1334	for (i = 0; i < a6xx_state->nr_clusters; i++)
1335		a6xx_show_cluster(&a6xx_state->clusters[i], p);
 
 
 
 
1336
1337	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1338		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
 
 
 
 
1339
1340	drm_puts(p, "debugbus:\n");
1341	a6xx_show_debugbus(a6xx_state, p);
1342}