Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
   3
   4#include <linux/ascii85.h>
   5#include "msm_gem.h"
   6#include "a6xx_gpu.h"
   7#include "a6xx_gmu.h"
   8#include "a6xx_gpu_state.h"
   9#include "a6xx_gmu.xml.h"
  10
  11struct a6xx_gpu_state_obj {
  12	const void *handle;
  13	u32 *data;
  14};
  15
  16struct a6xx_gpu_state {
  17	struct msm_gpu_state base;
  18
  19	struct a6xx_gpu_state_obj *gmu_registers;
  20	int nr_gmu_registers;
  21
  22	struct a6xx_gpu_state_obj *registers;
  23	int nr_registers;
  24
  25	struct a6xx_gpu_state_obj *shaders;
  26	int nr_shaders;
  27
  28	struct a6xx_gpu_state_obj *clusters;
  29	int nr_clusters;
  30
  31	struct a6xx_gpu_state_obj *dbgahb_clusters;
  32	int nr_dbgahb_clusters;
  33
  34	struct a6xx_gpu_state_obj *indexed_regs;
  35	int nr_indexed_regs;
  36
  37	struct a6xx_gpu_state_obj *debugbus;
  38	int nr_debugbus;
  39
  40	struct a6xx_gpu_state_obj *vbif_debugbus;
  41
  42	struct a6xx_gpu_state_obj *cx_debugbus;
  43	int nr_cx_debugbus;
  44
  45	struct list_head objs;
  46};
  47
  48static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
  49{
  50	in[0] = val;
  51	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
  52
  53	return 2;
  54}
  55
  56static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
  57{
  58	in[0] = target;
  59	in[1] = (((u64) reg) << 44 | dwords);
  60
  61	return 2;
  62}
  63
  64static inline int CRASHDUMP_FINI(u64 *in)
  65{
  66	in[0] = 0;
  67	in[1] = 0;
  68
  69	return 2;
  70}
  71
  72struct a6xx_crashdumper {
  73	void *ptr;
  74	struct drm_gem_object *bo;
  75	u64 iova;
  76};
  77
  78struct a6xx_state_memobj {
  79	struct list_head node;
  80	unsigned long long data[];
  81};
  82
  83void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
  84{
  85	struct a6xx_state_memobj *obj =
  86		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
  87
  88	if (!obj)
  89		return NULL;
  90
  91	list_add_tail(&obj->node, &a6xx_state->objs);
  92	return &obj->data;
  93}
  94
  95void *state_kmemdup(struct a6xx_gpu_state *a6xx_state, void *src,
  96		size_t size)
  97{
  98	void *dst = state_kcalloc(a6xx_state, 1, size);
  99
 100	if (dst)
 101		memcpy(dst, src, size);
 102	return dst;
 103}
 104
 105/*
 106 * Allocate 1MB for the crashdumper scratch region - 8k for the script and
 107 * the rest for the data
 108 */
 109#define A6XX_CD_DATA_OFFSET 8192
 110#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
 111
 112static int a6xx_crashdumper_init(struct msm_gpu *gpu,
 113		struct a6xx_crashdumper *dumper)
 114{
 115	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
 116		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
 117		&dumper->bo, &dumper->iova);
 118
 119	if (!IS_ERR(dumper->ptr))
 120		msm_gem_object_set_name(dumper->bo, "crashdump");
 121
 122	return PTR_ERR_OR_ZERO(dumper->ptr);
 123}
 124
 125static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 126		struct a6xx_crashdumper *dumper)
 127{
 128	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 129	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 130	u32 val;
 131	int ret;
 132
 133	if (IS_ERR_OR_NULL(dumper->ptr))
 134		return -EINVAL;
 135
 136	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
 137		return -EINVAL;
 138
 139	/* Make sure all pending memory writes are posted */
 140	wmb();
 141
 142	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
 143		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
 144
 145	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 146
 147	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
 148		val & 0x02, 100, 10000);
 149
 150	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
 151
 152	return ret;
 153}
 154
 155/* read a value from the GX debug bus */
 156static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
 157		u32 *data)
 158{
 159	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
 160		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
 161
 162	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
 163	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
 164	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
 165	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
 166
 167	/* Wait 1 us to make sure the data is flowing */
 168	udelay(1);
 169
 170	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 171	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 172
 173	return 2;
 174}
 175
 176#define cxdbg_write(ptr, offset, val) \
 177	msm_writel((val), (ptr) + ((offset) << 2))
 178
 179#define cxdbg_read(ptr, offset) \
 180	msm_readl((ptr) + ((offset) << 2))
 181
 182/* read a value from the CX debug bus */
 183static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
 184		u32 *data)
 185{
 186	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
 187		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
 188
 189	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
 190	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
 191	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
 192	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
 193
 194	/* Wait 1 us to make sure the data is flowing */
 195	udelay(1);
 196
 197	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
 198	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
 199
 200	return 2;
 201}
 202
 203/* Read a chunk of data from the VBIF debug bus */
 204static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
 205		u32 reg, int count, u32 *data)
 206{
 207	int i;
 208
 209	gpu_write(gpu, ctrl0, reg);
 210
 211	for (i = 0; i < count; i++) {
 212		gpu_write(gpu, ctrl1, i);
 213		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
 214	}
 215
 216	return count;
 217}
 218
 219#define AXI_ARB_BLOCKS 2
 220#define XIN_AXI_BLOCKS 5
 221#define XIN_CORE_BLOCKS 4
 222
 223#define VBIF_DEBUGBUS_BLOCK_SIZE \
 224	((16 * AXI_ARB_BLOCKS) + \
 225	 (18 * XIN_AXI_BLOCKS) + \
 226	 (12 * XIN_CORE_BLOCKS))
 227
 228static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
 229		struct a6xx_gpu_state *a6xx_state,
 230		struct a6xx_gpu_state_obj *obj)
 231{
 232	u32 clk, *ptr;
 233	int i;
 234
 235	obj->data = state_kcalloc(a6xx_state, VBIF_DEBUGBUS_BLOCK_SIZE,
 236		sizeof(u32));
 237	if (!obj->data)
 238		return;
 239
 240	obj->handle = NULL;
 241
 242	/* Get the current clock setting */
 243	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
 244
 245	/* Force on the bus so we can read it */
 246	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
 247		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
 248
 249	/* We will read from BUS2 first, so disable BUS1 */
 250	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
 251
 252	/* Enable the VBIF bus for reading */
 253	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
 254
 255	ptr = obj->data;
 256
 257	for (i = 0; i < AXI_ARB_BLOCKS; i++)
 258		ptr += vbif_debugbus_read(gpu,
 259			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 260			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 261			1 << (i + 16), 16, ptr);
 262
 263	for (i = 0; i < XIN_AXI_BLOCKS; i++)
 264		ptr += vbif_debugbus_read(gpu,
 265			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
 266			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
 267			1 << i, 18, ptr);
 268
 269	/* Stop BUS2 so we can turn on BUS1 */
 270	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
 271
 272	for (i = 0; i < XIN_CORE_BLOCKS; i++)
 273		ptr += vbif_debugbus_read(gpu,
 274			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
 275			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
 276			1 << i, 12, ptr);
 277
 278	/* Restore the VBIF clock setting */
 279	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
 280}
 281
 282static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
 283		struct a6xx_gpu_state *a6xx_state,
 284		const struct a6xx_debugbus_block *block,
 285		struct a6xx_gpu_state_obj *obj)
 286{
 287	int i;
 288	u32 *ptr;
 289
 290	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 291	if (!obj->data)
 292		return;
 293
 294	obj->handle = block;
 295
 296	for (ptr = obj->data, i = 0; i < block->count; i++)
 297		ptr += debugbus_read(gpu, block->id, i, ptr);
 298}
 299
 300static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
 301		struct a6xx_gpu_state *a6xx_state,
 302		const struct a6xx_debugbus_block *block,
 303		struct a6xx_gpu_state_obj *obj)
 304{
 305	int i;
 306	u32 *ptr;
 307
 308	obj->data = state_kcalloc(a6xx_state, block->count, sizeof(u64));
 309	if (!obj->data)
 310		return;
 311
 312	obj->handle = block;
 313
 314	for (ptr = obj->data, i = 0; i < block->count; i++)
 315		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
 316}
 317
 318static void a6xx_get_debugbus(struct msm_gpu *gpu,
 319		struct a6xx_gpu_state *a6xx_state)
 320{
 321	struct resource *res;
 322	void __iomem *cxdbg = NULL;
 323
 324	/* Set up the GX debug bus */
 325
 326	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 327		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 328
 329	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 330		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 331
 332	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 333	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 334	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 335	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 336
 337	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
 338	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
 339
 340	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 341	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 342	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 343	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 344
 345	/* Set up the CX debug bus - it lives elsewhere in the system so do a
 346	 * temporary ioremap for the registers
 347	 */
 348	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
 349			"cx_dbgc");
 350
 351	if (res)
 352		cxdbg = ioremap(res->start, resource_size(res));
 353
 354	if (cxdbg) {
 355		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
 356			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
 357
 358		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
 359			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
 360
 361		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
 362		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
 363		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
 364		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
 365
 366		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
 367			0x76543210);
 368		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
 369			0xFEDCBA98);
 370
 371		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
 372		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
 373		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
 374		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
 375	}
 376
 377	a6xx_state->debugbus = state_kcalloc(a6xx_state,
 378		ARRAY_SIZE(a6xx_debugbus_blocks),
 379		sizeof(*a6xx_state->debugbus));
 380
 381	if (a6xx_state->debugbus) {
 382		int i;
 383
 384		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
 385			a6xx_get_debugbus_block(gpu,
 386				a6xx_state,
 387				&a6xx_debugbus_blocks[i],
 388				&a6xx_state->debugbus[i]);
 389
 390		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
 391	}
 392
 393	a6xx_state->vbif_debugbus =
 394		state_kcalloc(a6xx_state, 1,
 395			sizeof(*a6xx_state->vbif_debugbus));
 396
 397	if (a6xx_state->vbif_debugbus)
 398		a6xx_get_vbif_debugbus_block(gpu, a6xx_state,
 399			a6xx_state->vbif_debugbus);
 400
 401	if (cxdbg) {
 402		a6xx_state->cx_debugbus =
 403			state_kcalloc(a6xx_state,
 404			ARRAY_SIZE(a6xx_cx_debugbus_blocks),
 405			sizeof(*a6xx_state->cx_debugbus));
 406
 407		if (a6xx_state->cx_debugbus) {
 408			int i;
 409
 410			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
 411				a6xx_get_cx_debugbus_block(cxdbg,
 412					a6xx_state,
 413					&a6xx_cx_debugbus_blocks[i],
 414					&a6xx_state->cx_debugbus[i]);
 415
 416			a6xx_state->nr_cx_debugbus =
 417				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
 418		}
 419
 420		iounmap(cxdbg);
 421	}
 422}
 423
 424#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
 425
 426/* Read a data cluster from behind the AHB aperture */
 427static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
 428		struct a6xx_gpu_state *a6xx_state,
 429		const struct a6xx_dbgahb_cluster *dbgahb,
 430		struct a6xx_gpu_state_obj *obj,
 431		struct a6xx_crashdumper *dumper)
 432{
 433	u64 *in = dumper->ptr;
 434	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 435	size_t datasize;
 436	int i, regcount = 0;
 437
 438	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 439		int j;
 440
 441		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 442			(dbgahb->statetype + i * 2) << 8);
 443
 444		for (j = 0; j < dbgahb->count; j += 2) {
 445			int count = RANGE(dbgahb->registers, j);
 446			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 447				dbgahb->registers[j] - (dbgahb->base >> 2);
 448
 449			in += CRASHDUMP_READ(in, offset, count, out);
 450
 451			out += count * sizeof(u32);
 452
 453			if (i == 0)
 454				regcount += count;
 455		}
 456	}
 457
 458	CRASHDUMP_FINI(in);
 459
 460	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 461
 462	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 463		return;
 464
 465	if (a6xx_crashdumper_run(gpu, dumper))
 466		return;
 467
 468	obj->handle = dbgahb;
 469	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 470		datasize);
 471}
 472
 473static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
 474		struct a6xx_gpu_state *a6xx_state,
 475		struct a6xx_crashdumper *dumper)
 476{
 477	int i;
 478
 479	a6xx_state->dbgahb_clusters = state_kcalloc(a6xx_state,
 480		ARRAY_SIZE(a6xx_dbgahb_clusters),
 481		sizeof(*a6xx_state->dbgahb_clusters));
 482
 483	if (!a6xx_state->dbgahb_clusters)
 484		return;
 485
 486	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
 487
 488	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
 489		a6xx_get_dbgahb_cluster(gpu, a6xx_state,
 490			&a6xx_dbgahb_clusters[i],
 491			&a6xx_state->dbgahb_clusters[i], dumper);
 492}
 493
 494/* Read a data cluster from the CP aperture with the crashdumper */
 495static void a6xx_get_cluster(struct msm_gpu *gpu,
 496		struct a6xx_gpu_state *a6xx_state,
 497		const struct a6xx_cluster *cluster,
 498		struct a6xx_gpu_state_obj *obj,
 499		struct a6xx_crashdumper *dumper)
 500{
 501	u64 *in = dumper->ptr;
 502	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 503	size_t datasize;
 504	int i, regcount = 0;
 505
 506	/* Some clusters need a selector register to be programmed too */
 507	if (cluster->sel_reg)
 508		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
 509
 510	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
 511		int j;
 512
 513		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
 514			(cluster->id << 8) | (i << 4) | i);
 515
 516		for (j = 0; j < cluster->count; j += 2) {
 517			int count = RANGE(cluster->registers, j);
 518
 519			in += CRASHDUMP_READ(in, cluster->registers[j],
 520				count, out);
 521
 522			out += count * sizeof(u32);
 523
 524			if (i == 0)
 525				regcount += count;
 526		}
 527	}
 528
 529	CRASHDUMP_FINI(in);
 530
 531	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
 532
 533	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 534		return;
 535
 536	if (a6xx_crashdumper_run(gpu, dumper))
 537		return;
 538
 539	obj->handle = cluster;
 540	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 541		datasize);
 542}
 543
 544static void a6xx_get_clusters(struct msm_gpu *gpu,
 545		struct a6xx_gpu_state *a6xx_state,
 546		struct a6xx_crashdumper *dumper)
 547{
 548	int i;
 549
 550	a6xx_state->clusters = state_kcalloc(a6xx_state,
 551		ARRAY_SIZE(a6xx_clusters), sizeof(*a6xx_state->clusters));
 552
 553	if (!a6xx_state->clusters)
 554		return;
 555
 556	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
 557
 558	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
 559		a6xx_get_cluster(gpu, a6xx_state, &a6xx_clusters[i],
 560			&a6xx_state->clusters[i], dumper);
 561}
 562
 563/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
 564static void a6xx_get_shader_block(struct msm_gpu *gpu,
 565		struct a6xx_gpu_state *a6xx_state,
 566		const struct a6xx_shader_block *block,
 567		struct a6xx_gpu_state_obj *obj,
 568		struct a6xx_crashdumper *dumper)
 569{
 570	u64 *in = dumper->ptr;
 571	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
 572	int i;
 573
 574	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
 575		return;
 576
 577	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 578		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
 579			(block->type << 8) | i);
 580
 581		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
 582			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
 583	}
 584
 585	CRASHDUMP_FINI(in);
 586
 587	if (a6xx_crashdumper_run(gpu, dumper))
 588		return;
 589
 590	obj->handle = block;
 591	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 592		datasize);
 593}
 594
 595static void a6xx_get_shaders(struct msm_gpu *gpu,
 596		struct a6xx_gpu_state *a6xx_state,
 597		struct a6xx_crashdumper *dumper)
 598{
 599	int i;
 600
 601	a6xx_state->shaders = state_kcalloc(a6xx_state,
 602		ARRAY_SIZE(a6xx_shader_blocks), sizeof(*a6xx_state->shaders));
 603
 604	if (!a6xx_state->shaders)
 605		return;
 606
 607	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
 608
 609	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
 610		a6xx_get_shader_block(gpu, a6xx_state, &a6xx_shader_blocks[i],
 611			&a6xx_state->shaders[i], dumper);
 612}
 613
 614/* Read registers from behind the HLSQ aperture with the crashdumper */
 615static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
 616		struct a6xx_gpu_state *a6xx_state,
 617		const struct a6xx_registers *regs,
 618		struct a6xx_gpu_state_obj *obj,
 619		struct a6xx_crashdumper *dumper)
 620
 621{
 622	u64 *in = dumper->ptr;
 623	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 624	int i, regcount = 0;
 625
 626	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
 627
 628	for (i = 0; i < regs->count; i += 2) {
 629		u32 count = RANGE(regs->registers, i);
 630		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
 631			regs->registers[i] - (regs->val0 >> 2);
 632
 633		in += CRASHDUMP_READ(in, offset, count, out);
 634
 635		out += count * sizeof(u32);
 636		regcount += count;
 637	}
 638
 639	CRASHDUMP_FINI(in);
 640
 641	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 642		return;
 643
 644	if (a6xx_crashdumper_run(gpu, dumper))
 645		return;
 646
 647	obj->handle = regs;
 648	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 649		regcount * sizeof(u32));
 650}
 651
 652/* Read a block of registers using the crashdumper */
 653static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
 654		struct a6xx_gpu_state *a6xx_state,
 655		const struct a6xx_registers *regs,
 656		struct a6xx_gpu_state_obj *obj,
 657		struct a6xx_crashdumper *dumper)
 658
 659{
 660	u64 *in = dumper->ptr;
 661	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
 662	int i, regcount = 0;
 663
 664	/* Some blocks might need to program a selector register first */
 665	if (regs->val0)
 666		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
 667
 668	for (i = 0; i < regs->count; i += 2) {
 669		u32 count = RANGE(regs->registers, i);
 670
 671		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
 672
 673		out += count * sizeof(u32);
 674		regcount += count;
 675	}
 676
 677	CRASHDUMP_FINI(in);
 678
 679	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
 680		return;
 681
 682	if (a6xx_crashdumper_run(gpu, dumper))
 683		return;
 684
 685	obj->handle = regs;
 686	obj->data = state_kmemdup(a6xx_state, dumper->ptr + A6XX_CD_DATA_OFFSET,
 687		regcount * sizeof(u32));
 688}
 689
 690/* Read a block of registers via AHB */
 691static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
 692		struct a6xx_gpu_state *a6xx_state,
 693		const struct a6xx_registers *regs,
 694		struct a6xx_gpu_state_obj *obj)
 695{
 696	int i, regcount = 0, index = 0;
 697
 698	for (i = 0; i < regs->count; i += 2)
 699		regcount += RANGE(regs->registers, i);
 700
 701	obj->handle = (const void *) regs;
 702	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 703	if (!obj->data)
 704		return;
 705
 706	for (i = 0; i < regs->count; i += 2) {
 707		u32 count = RANGE(regs->registers, i);
 708		int j;
 709
 710		for (j = 0; j < count; j++)
 711			obj->data[index++] = gpu_read(gpu,
 712				regs->registers[i] + j);
 713	}
 714}
 715
 716/* Read a block of GMU registers */
 717static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
 718		struct a6xx_gpu_state *a6xx_state,
 719		const struct a6xx_registers *regs,
 720		struct a6xx_gpu_state_obj *obj)
 721{
 722	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 723	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 724	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
 725	int i, regcount = 0, index = 0;
 726
 727	for (i = 0; i < regs->count; i += 2)
 728		regcount += RANGE(regs->registers, i);
 729
 730	obj->handle = (const void *) regs;
 731	obj->data = state_kcalloc(a6xx_state, regcount, sizeof(u32));
 732	if (!obj->data)
 733		return;
 734
 735	for (i = 0; i < regs->count; i += 2) {
 736		u32 count = RANGE(regs->registers, i);
 737		int j;
 738
 739		for (j = 0; j < count; j++)
 740			obj->data[index++] = gmu_read(gmu,
 741				regs->registers[i] + j);
 742	}
 743}
 744
 745static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
 746		struct a6xx_gpu_state *a6xx_state)
 747{
 748	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 749	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 750
 751	a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
 752		2, sizeof(*a6xx_state->gmu_registers));
 753
 754	if (!a6xx_state->gmu_registers)
 755		return;
 756
 757	a6xx_state->nr_gmu_registers = 2;
 758
 759	/* Get the CX GMU registers from AHB */
 760	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
 761		&a6xx_state->gmu_registers[0]);
 762
 763	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 764		return;
 765
 766	/* Set the fence to ALLOW mode so we can access the registers */
 767	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
 768
 769	_a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[1],
 770		&a6xx_state->gmu_registers[1]);
 771}
 772
 773static void a6xx_get_registers(struct msm_gpu *gpu,
 774		struct a6xx_gpu_state *a6xx_state,
 775		struct a6xx_crashdumper *dumper)
 776{
 777	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
 778		ARRAY_SIZE(a6xx_reglist) +
 779		ARRAY_SIZE(a6xx_hlsq_reglist);
 780	int index = 0;
 781
 782	a6xx_state->registers = state_kcalloc(a6xx_state,
 783		count, sizeof(*a6xx_state->registers));
 784
 785	if (!a6xx_state->registers)
 786		return;
 787
 788	a6xx_state->nr_registers = count;
 789
 790	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
 791		a6xx_get_ahb_gpu_registers(gpu,
 792			a6xx_state, &a6xx_ahb_reglist[i],
 793			&a6xx_state->registers[index++]);
 794
 795	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
 796		a6xx_get_crashdumper_registers(gpu,
 797			a6xx_state, &a6xx_reglist[i],
 798			&a6xx_state->registers[index++],
 799			dumper);
 800
 801	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
 802		a6xx_get_crashdumper_hlsq_registers(gpu,
 803			a6xx_state, &a6xx_hlsq_reglist[i],
 804			&a6xx_state->registers[index++],
 805			dumper);
 806}
 807
 808/* Read a block of data from an indexed register pair */
 809static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
 810		struct a6xx_gpu_state *a6xx_state,
 811		const struct a6xx_indexed_registers *indexed,
 812		struct a6xx_gpu_state_obj *obj)
 813{
 814	int i;
 815
 816	obj->handle = (const void *) indexed;
 817	obj->data = state_kcalloc(a6xx_state, indexed->count, sizeof(u32));
 818	if (!obj->data)
 819		return;
 820
 821	/* All the indexed banks start at address 0 */
 822	gpu_write(gpu, indexed->addr, 0);
 823
 824	/* Read the data - each read increments the internal address by 1 */
 825	for (i = 0; i < indexed->count; i++)
 826		obj->data[i] = gpu_read(gpu, indexed->data);
 827}
 828
 829static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
 830		struct a6xx_gpu_state *a6xx_state)
 831{
 832	u32 mempool_size;
 833	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
 834	int i;
 835
 836	a6xx_state->indexed_regs = state_kcalloc(a6xx_state, count,
 837		sizeof(a6xx_state->indexed_regs));
 838	if (!a6xx_state->indexed_regs)
 839		return;
 840
 841	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
 842		a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_indexed_reglist[i],
 843			&a6xx_state->indexed_regs[i]);
 844
 845	/* Set the CP mempool size to 0 to stabilize it while dumping */
 846	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
 847	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
 848
 849	/* Get the contents of the CP mempool */
 850	a6xx_get_indexed_regs(gpu, a6xx_state, &a6xx_cp_mempool_indexed,
 851		&a6xx_state->indexed_regs[i]);
 852
 853	/*
 854	 * Offset 0x2000 in the mempool is the size - copy the saved size over
 855	 * so the data is consistent
 856	 */
 857	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
 858
 859	/* Restore the size in the hardware */
 860	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
 861
 862	a6xx_state->nr_indexed_regs = count;
 863}
 864
 865struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
 866{
 867	struct a6xx_crashdumper dumper = { 0 };
 868	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 869	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 870	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
 871		GFP_KERNEL);
 872
 873	if (!a6xx_state)
 874		return ERR_PTR(-ENOMEM);
 875
 876	INIT_LIST_HEAD(&a6xx_state->objs);
 877
 878	/* Get the generic state from the adreno core */
 879	adreno_gpu_state_get(gpu, &a6xx_state->base);
 880
 881	a6xx_get_gmu_registers(gpu, a6xx_state);
 882
 883	/* If GX isn't on the rest of the data isn't going to be accessible */
 884	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
 885		return &a6xx_state->base;
 886
 887	/* Get the banks of indexed registers */
 888	a6xx_get_indexed_registers(gpu, a6xx_state);
 889
 890	/* Try to initialize the crashdumper */
 891	if (!a6xx_crashdumper_init(gpu, &dumper)) {
 892		a6xx_get_registers(gpu, a6xx_state, &dumper);
 893		a6xx_get_shaders(gpu, a6xx_state, &dumper);
 894		a6xx_get_clusters(gpu, a6xx_state, &dumper);
 895		a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
 896
 897		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
 898	}
 899
 900	a6xx_get_debugbus(gpu, a6xx_state);
 901
 902	return  &a6xx_state->base;
 903}
 904
 905void a6xx_gpu_state_destroy(struct kref *kref)
 906{
 907	struct a6xx_state_memobj *obj, *tmp;
 908	struct msm_gpu_state *state = container_of(kref,
 909			struct msm_gpu_state, ref);
 910	struct a6xx_gpu_state *a6xx_state = container_of(state,
 911			struct a6xx_gpu_state, base);
 912
 913	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
 914		kfree(obj);
 915
 916	adreno_gpu_state_destroy(state);
 917	kfree(a6xx_state);
 918}
 919
 920int a6xx_gpu_state_put(struct msm_gpu_state *state)
 921{
 922	if (IS_ERR_OR_NULL(state))
 923		return 1;
 924
 925	return kref_put(&state->ref, a6xx_gpu_state_destroy);
 926}
 927
 928static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
 929		struct drm_printer *p)
 930{
 931	int i, index = 0;
 932
 933	if (!data)
 934		return;
 935
 936	for (i = 0; i < count; i += 2) {
 937		u32 count = RANGE(registers, i);
 938		u32 offset = registers[i];
 939		int j;
 940
 941		for (j = 0; j < count; index++, offset++, j++) {
 942			if (data[index] == 0xdeafbead)
 943				continue;
 944
 945			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
 946				offset << 2, data[index]);
 947		}
 948	}
 949}
 950
 951static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
 952{
 953	char out[ASCII85_BUFSZ];
 954	long i, l, datalen = 0;
 955
 956	for (i = 0; i < len >> 2; i++) {
 957		if (data[i])
 958			datalen = (i + 1) << 2;
 959	}
 960
 961	if (datalen == 0)
 962		return;
 963
 964	drm_puts(p, "    data: !!ascii85 |\n");
 965	drm_puts(p, "      ");
 966
 967
 968	l = ascii85_encode_len(datalen);
 969
 970	for (i = 0; i < l; i++)
 971		drm_puts(p, ascii85_encode(data[i], out));
 972
 973	drm_puts(p, "\n");
 974}
 975
 976static void print_name(struct drm_printer *p, const char *fmt, const char *name)
 977{
 978	drm_puts(p, fmt);
 979	drm_puts(p, name);
 980	drm_puts(p, "\n");
 981}
 982
 983static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
 984		struct drm_printer *p)
 985{
 986	const struct a6xx_shader_block *block = obj->handle;
 987	int i;
 988
 989	if (!obj->handle)
 990		return;
 991
 992	print_name(p, "  - type: ", block->name);
 993
 994	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
 995		drm_printf(p, "    - bank: %d\n", i);
 996		drm_printf(p, "      size: %d\n", block->size);
 997
 998		if (!obj->data)
 999			continue;
1000
1001		print_ascii85(p, block->size << 2,
1002			obj->data + (block->size * i));
1003	}
1004}
1005
1006static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
1007		struct drm_printer *p)
1008{
1009	int ctx, index = 0;
1010
1011	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
1012		int j;
1013
1014		drm_printf(p, "    - context: %d\n", ctx);
1015
1016		for (j = 0; j < size; j += 2) {
1017			u32 count = RANGE(registers, j);
1018			u32 offset = registers[j];
1019			int k;
1020
1021			for (k = 0; k < count; index++, offset++, k++) {
1022				if (data[index] == 0xdeafbead)
1023					continue;
1024
1025				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
1026					offset << 2, data[index]);
1027			}
1028		}
1029	}
1030}
1031
1032static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
1033		struct drm_printer *p)
1034{
1035	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
1036
1037	if (dbgahb) {
1038		print_name(p, "  - cluster-name: ", dbgahb->name);
1039		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
1040			obj->data, p);
1041	}
1042}
1043
1044static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
1045		struct drm_printer *p)
1046{
1047	const struct a6xx_cluster *cluster = obj->handle;
1048
1049	if (cluster) {
1050		print_name(p, "  - cluster-name: ", cluster->name);
1051		a6xx_show_cluster_data(cluster->registers, cluster->count,
1052			obj->data, p);
1053	}
1054}
1055
1056static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
1057		struct drm_printer *p)
1058{
1059	const struct a6xx_indexed_registers *indexed = obj->handle;
1060
1061	if (!indexed)
1062		return;
1063
1064	print_name(p, "  - regs-name: ", indexed->name);
1065	drm_printf(p, "    dwords: %d\n", indexed->count);
1066
1067	print_ascii85(p, indexed->count << 2, obj->data);
1068}
1069
1070static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
1071		u32 *data, struct drm_printer *p)
1072{
1073	if (block) {
1074		print_name(p, "  - debugbus-block: ", block->name);
1075
1076		/*
1077		 * count for regular debugbus data is in quadwords,
1078		 * but print the size in dwords for consistency
1079		 */
1080		drm_printf(p, "    count: %d\n", block->count << 1);
1081
1082		print_ascii85(p, block->count << 3, data);
1083	}
1084}
1085
1086static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
1087		struct drm_printer *p)
1088{
1089	int i;
1090
1091	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
1092		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
1093
1094		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1095	}
1096
1097	if (a6xx_state->vbif_debugbus) {
1098		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
1099
1100		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
1101		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
1102
1103		/* vbif debugbus data is in dwords.  Confusing, huh? */
1104		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
1105	}
1106
1107	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
1108		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
1109
1110		a6xx_show_debugbus_block(obj->handle, obj->data, p);
1111	}
1112}
1113
1114void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1115		struct drm_printer *p)
1116{
1117	struct a6xx_gpu_state *a6xx_state = container_of(state,
1118			struct a6xx_gpu_state, base);
1119	int i;
1120
1121	if (IS_ERR_OR_NULL(state))
1122		return;
1123
1124	adreno_show(gpu, state, p);
1125
1126	drm_puts(p, "registers:\n");
1127	for (i = 0; i < a6xx_state->nr_registers; i++) {
1128		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
1129		const struct a6xx_registers *regs = obj->handle;
1130
1131		if (!obj->handle)
1132			continue;
1133
1134		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1135	}
1136
1137	drm_puts(p, "registers-gmu:\n");
1138	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
1139		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
1140		const struct a6xx_registers *regs = obj->handle;
1141
1142		if (!obj->handle)
1143			continue;
1144
1145		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
1146	}
1147
1148	drm_puts(p, "indexed-registers:\n");
1149	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
1150		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
1151
1152	drm_puts(p, "shader-blocks:\n");
1153	for (i = 0; i < a6xx_state->nr_shaders; i++)
1154		a6xx_show_shader(&a6xx_state->shaders[i], p);
1155
1156	drm_puts(p, "clusters:\n");
1157	for (i = 0; i < a6xx_state->nr_clusters; i++)
1158		a6xx_show_cluster(&a6xx_state->clusters[i], p);
1159
1160	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
1161		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
1162
1163	drm_puts(p, "debugbus:\n");
1164	a6xx_show_debugbus(a6xx_state, p);
1165}