Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0 or MIT
   2/* Copyright 2023 Collabora ltd. */
   3
   4#ifdef CONFIG_ARM_ARCH_TIMER
   5#include <asm/arch_timer.h>
   6#endif
   7
   8#include <linux/clk.h>
   9#include <linux/dma-mapping.h>
  10#include <linux/firmware.h>
  11#include <linux/iopoll.h>
  12#include <linux/iosys-map.h>
  13#include <linux/mutex.h>
  14#include <linux/platform_device.h>
  15
  16#include <drm/drm_drv.h>
  17#include <drm/drm_managed.h>
  18
  19#include "panthor_device.h"
  20#include "panthor_fw.h"
  21#include "panthor_gem.h"
  22#include "panthor_gpu.h"
  23#include "panthor_mmu.h"
  24#include "panthor_regs.h"
  25#include "panthor_sched.h"
  26
  27#define CSF_FW_NAME "mali_csffw.bin"
  28
  29#define PING_INTERVAL_MS			12000
  30#define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
  31#define PROGRESS_TIMEOUT_SCALE_SHIFT		10
  32#define IDLE_HYSTERESIS_US			800
  33#define PWROFF_HYSTERESIS_US			10000
  34
  35/**
  36 * struct panthor_fw_binary_hdr - Firmware binary header.
  37 */
  38struct panthor_fw_binary_hdr {
  39	/** @magic: Magic value to check binary validity. */
  40	u32 magic;
  41#define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
  42
  43	/** @minor: Minor FW version. */
  44	u8 minor;
  45
  46	/** @major: Major FW version. */
  47	u8 major;
  48#define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
  49
  50	/** @padding1: MBZ. */
  51	u16 padding1;
  52
  53	/** @version_hash: FW version hash. */
  54	u32 version_hash;
  55
  56	/** @padding2: MBZ. */
  57	u32 padding2;
  58
  59	/** @size: FW binary size. */
  60	u32 size;
  61};
  62
  63/**
  64 * enum panthor_fw_binary_entry_type - Firmware binary entry type
  65 */
  66enum panthor_fw_binary_entry_type {
  67	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
  68	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
  69
  70	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
  71	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
  72
  73	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
  74	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
  75
  76	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
  77	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
  78
  79	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
  80	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
  81
  82	/**
  83	 * @CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA: Metadata about how
  84	 * the FW binary was built.
  85	 */
  86	CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA = 6
  87};
  88
  89#define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
  90#define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
  91#define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
  92#define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
  93
  94#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
  95#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
  96#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
  97#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
  98#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
  99#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
 100#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
 101#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
 102#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
 103#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
 104#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
 105
 106#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
 107	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
 108	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
 109	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
 110	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
 111	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
 112	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
 113	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
 114
 115/**
 116 * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
 117 */
 118struct panthor_fw_binary_section_entry_hdr {
 119	/** @flags: Section flags. */
 120	u32 flags;
 121
 122	/** @va: MCU virtual range to map this binary section to. */
 123	struct {
 124		/** @start: Start address. */
 125		u32 start;
 126
 127		/** @end: End address. */
 128		u32 end;
 129	} va;
 130
 131	/** @data: Data to initialize the FW section with. */
 132	struct {
 133		/** @start: Start offset in the FW binary. */
 134		u32 start;
 135
 136		/** @end: End offset in the FW binary. */
 137		u32 end;
 138	} data;
 139};
 140
 141struct panthor_fw_build_info_hdr {
 142	/** @meta_start: Offset of the build info data in the FW binary */
 143	u32 meta_start;
 144	/** @meta_size: Size of the build info data in the FW binary */
 145	u32 meta_size;
 146};
 147
 148/**
 149 * struct panthor_fw_binary_iter - Firmware binary iterator
 150 *
 151 * Used to parse a firmware binary.
 152 */
 153struct panthor_fw_binary_iter {
 154	/** @data: FW binary data. */
 155	const void *data;
 156
 157	/** @size: FW binary size. */
 158	size_t size;
 159
 160	/** @offset: Iterator offset. */
 161	size_t offset;
 162};
 163
 164/**
 165 * struct panthor_fw_section - FW section
 166 */
 167struct panthor_fw_section {
 168	/** @node: Used to keep track of FW sections. */
 169	struct list_head node;
 170
 171	/** @flags: Section flags, as encoded in the FW binary. */
 172	u32 flags;
 173
 174	/** @mem: Section memory. */
 175	struct panthor_kernel_bo *mem;
 176
 177	/**
 178	 * @name: Name of the section, as specified in the binary.
 179	 *
 180	 * Can be NULL.
 181	 */
 182	const char *name;
 183
 184	/**
 185	 * @data: Initial data copied to the FW memory.
 186	 *
 187	 * We keep data around so we can reload sections after a reset.
 188	 */
 189	struct {
 190		/** @buf: Buffed used to store init data. */
 191		const void *buf;
 192
 193		/** @size: Size of @buf in bytes. */
 194		size_t size;
 195	} data;
 196};
 197
 198#define CSF_MCU_SHARED_REGION_START		0x04000000ULL
 199#define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
 200
 201#define MIN_CS_PER_CSG				8
 202#define MIN_CSGS				3
 203#define MAX_CSG_PRIO				0xf
 204
 205#define CSF_IFACE_VERSION(major, minor, patch)	\
 206	(((major) << 24) | ((minor) << 16) | (patch))
 207#define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
 208#define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
 209#define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
 210
 211#define CSF_GROUP_CONTROL_OFFSET		0x1000
 212#define CSF_STREAM_CONTROL_OFFSET		0x40
 213#define CSF_UNPRESERVED_REG_COUNT		4
 214
 215/**
 216 * struct panthor_fw_iface - FW interfaces
 217 */
 218struct panthor_fw_iface {
 219	/** @global: Global interface. */
 220	struct panthor_fw_global_iface global;
 221
 222	/** @groups: Group slot interfaces. */
 223	struct panthor_fw_csg_iface groups[MAX_CSGS];
 224
 225	/** @streams: Command stream slot interfaces. */
 226	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
 227};
 228
 229/**
 230 * struct panthor_fw - Firmware management
 231 */
 232struct panthor_fw {
 233	/** @vm: MCU VM. */
 234	struct panthor_vm *vm;
 235
 236	/** @sections: List of FW sections. */
 237	struct list_head sections;
 238
 239	/** @shared_section: The section containing the FW interfaces. */
 240	struct panthor_fw_section *shared_section;
 241
 242	/** @iface: FW interfaces. */
 243	struct panthor_fw_iface iface;
 244
 245	/** @watchdog: Collection of fields relating to the FW watchdog. */
 246	struct {
 247		/** @ping_work: Delayed work used to ping the FW. */
 248		struct delayed_work ping_work;
 249	} watchdog;
 250
 251	/**
 252	 * @req_waitqueue: FW request waitqueue.
 253	 *
 254	 * Everytime a request is sent to a command stream group or the global
 255	 * interface, the caller will first busy wait for the request to be
 256	 * acknowledged, and then fallback to a sleeping wait.
 257	 *
 258	 * This wait queue is here to support the sleeping wait flavor.
 259	 */
 260	wait_queue_head_t req_waitqueue;
 261
 262	/** @booted: True is the FW is booted */
 263	bool booted;
 264
 265	/**
 266	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
 267	 *
 268	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
 269	 *
 270	 * Any time the firmware is properly suspended, a fast reset can take place.
 271	 * On the other hand, if the halt operation failed, the driver will reload
 272	 * all sections to make sure we start from a fresh state.
 273	 */
 274	bool fast_reset;
 275
 276	/** @irq: Job irq data. */
 277	struct panthor_irq irq;
 278};
 279
 280struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
 281{
 282	return ptdev->fw->vm;
 283}
 284
 285/**
 286 * panthor_fw_get_glb_iface() - Get the global interface
 287 * @ptdev: Device.
 288 *
 289 * Return: The global interface.
 290 */
 291struct panthor_fw_global_iface *
 292panthor_fw_get_glb_iface(struct panthor_device *ptdev)
 293{
 294	return &ptdev->fw->iface.global;
 295}
 296
 297/**
 298 * panthor_fw_get_csg_iface() - Get a command stream group slot interface
 299 * @ptdev: Device.
 300 * @csg_slot: Index of the command stream group slot.
 301 *
 302 * Return: The command stream group slot interface.
 303 */
 304struct panthor_fw_csg_iface *
 305panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
 306{
 307	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
 308		return NULL;
 309
 310	return &ptdev->fw->iface.groups[csg_slot];
 311}
 312
 313/**
 314 * panthor_fw_get_cs_iface() - Get a command stream slot interface
 315 * @ptdev: Device.
 316 * @csg_slot: Index of the command stream group slot.
 317 * @cs_slot: Index of the command stream slot.
 318 *
 319 * Return: The command stream slot interface.
 320 */
 321struct panthor_fw_cs_iface *
 322panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
 323{
 324	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
 325		return NULL;
 326
 327	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
 328}
 329
 330/**
 331 * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
 332 * @ptdev: Device.
 333 * @timeout_us: Timeout expressed in micro-seconds.
 334 *
 335 * The FW has two timer sources: the GPU counter or arch-timer. We need
 336 * to express timeouts in term of number of cycles and specify which
 337 * timer source should be used.
 338 *
 339 * Return: A value suitable for timeout fields in the global interface.
 340 */
 341static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
 342{
 343	bool use_cycle_counter = false;
 344	u32 timer_rate = 0;
 345	u64 mod_cycles;
 346
 347#ifdef CONFIG_ARM_ARCH_TIMER
 348	timer_rate = arch_timer_get_cntfrq();
 349#endif
 350
 351	if (!timer_rate) {
 352		use_cycle_counter = true;
 353		timer_rate = clk_get_rate(ptdev->clks.core);
 354	}
 355
 356	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
 357		/* We couldn't get a valid clock rate, let's just pick the
 358		 * maximum value so the FW still handles the core
 359		 * power on/off requests.
 360		 */
 361		return GLB_TIMER_VAL(~0) |
 362		       GLB_TIMER_SOURCE_GPU_COUNTER;
 363	}
 364
 365	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
 366				      1000000ull << 10);
 367	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
 368		mod_cycles = GLB_TIMER_VAL(~0);
 369
 370	return GLB_TIMER_VAL(mod_cycles) |
 371	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
 372}
 373
 374static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
 375				       struct panthor_fw_binary_iter *iter,
 376				       void *out, size_t size)
 377{
 378	size_t new_offset = iter->offset + size;
 379
 380	if (new_offset > iter->size || new_offset < iter->offset) {
 381		drm_err(&ptdev->base, "Firmware too small\n");
 382		return -EINVAL;
 383	}
 384
 385	memcpy(out, iter->data + iter->offset, size);
 386	iter->offset = new_offset;
 387	return 0;
 388}
 389
 390static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
 391					   struct panthor_fw_binary_iter *iter,
 392					   struct panthor_fw_binary_iter *sub_iter,
 393					   size_t size)
 394{
 395	size_t new_offset = iter->offset + size;
 396
 397	if (new_offset > iter->size || new_offset < iter->offset) {
 398		drm_err(&ptdev->base, "Firmware entry too long\n");
 399		return -EINVAL;
 400	}
 401
 402	sub_iter->offset = 0;
 403	sub_iter->data = iter->data + iter->offset;
 404	sub_iter->size = size;
 405	iter->offset = new_offset;
 406	return 0;
 407}
 408
 409static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
 410					struct panthor_fw_section *section)
 411{
 412	bool was_mapped = !!section->mem->kmap;
 413	int ret;
 414
 415	if (!section->data.size &&
 416	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
 417		return;
 418
 419	ret = panthor_kernel_bo_vmap(section->mem);
 420	if (drm_WARN_ON(&ptdev->base, ret))
 421		return;
 422
 423	memcpy(section->mem->kmap, section->data.buf, section->data.size);
 424	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
 425		memset(section->mem->kmap + section->data.size, 0,
 426		       panthor_kernel_bo_size(section->mem) - section->data.size);
 427	}
 428
 429	if (!was_mapped)
 430		panthor_kernel_bo_vunmap(section->mem);
 431}
 432
 433/**
 434 * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
 435 * @ptdev: Device.
 436 * @input: Pointer holding the input interface on success.
 437 * Should be ignored on failure.
 438 * @output: Pointer holding the output interface on success.
 439 * Should be ignored on failure.
 440 * @input_fw_va: Pointer holding the input interface FW VA on success.
 441 * Should be ignored on failure.
 442 * @output_fw_va: Pointer holding the output interface FW VA on success.
 443 * Should be ignored on failure.
 444 *
 445 * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
 446 * interface is at offset 0, and the output interface at offset 4096.
 447 *
 448 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
 449 */
 450struct panthor_kernel_bo *
 451panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
 452				 struct panthor_fw_ringbuf_input_iface **input,
 453				 const struct panthor_fw_ringbuf_output_iface **output,
 454				 u32 *input_fw_va, u32 *output_fw_va)
 455{
 456	struct panthor_kernel_bo *mem;
 457	int ret;
 458
 459	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
 460				       DRM_PANTHOR_BO_NO_MMAP,
 461				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
 462				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
 463				       PANTHOR_VM_KERNEL_AUTO_VA);
 464	if (IS_ERR(mem))
 465		return mem;
 466
 467	ret = panthor_kernel_bo_vmap(mem);
 468	if (ret) {
 469		panthor_kernel_bo_destroy(mem);
 470		return ERR_PTR(ret);
 471	}
 472
 473	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
 474	*input = mem->kmap;
 475	*output = mem->kmap + SZ_4K;
 476	*input_fw_va = panthor_kernel_bo_gpuva(mem);
 477	*output_fw_va = *input_fw_va + SZ_4K;
 478
 479	return mem;
 480}
 481
 482/**
 483 * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
 484 * @ptdev: Device.
 485 * @size: Size of the suspend buffer.
 486 *
 487 * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
 488 */
 489struct panthor_kernel_bo *
 490panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
 491{
 492	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
 493					DRM_PANTHOR_BO_NO_MMAP,
 494					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
 495					PANTHOR_VM_KERNEL_AUTO_VA);
 496}
 497
 498static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
 499					 const struct firmware *fw,
 500					 struct panthor_fw_binary_iter *iter,
 501					 u32 ehdr)
 502{
 503	ssize_t vm_pgsz = panthor_vm_page_size(ptdev->fw->vm);
 504	struct panthor_fw_binary_section_entry_hdr hdr;
 505	struct panthor_fw_section *section;
 506	u32 section_size;
 507	u32 name_len;
 508	int ret;
 509
 510	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
 511	if (ret)
 512		return ret;
 513
 514	if (hdr.data.end < hdr.data.start) {
 515		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
 516			hdr.data.end, hdr.data.start);
 517		return -EINVAL;
 518	}
 519
 520	if (hdr.va.end < hdr.va.start) {
 521		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
 522			hdr.va.end, hdr.va.start);
 523		return -EINVAL;
 524	}
 525
 526	if (hdr.data.end > fw->size) {
 527		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
 528			hdr.data.end, fw->size);
 529		return -EINVAL;
 530	}
 531
 532	if (!IS_ALIGNED(hdr.va.start, vm_pgsz) || !IS_ALIGNED(hdr.va.end, vm_pgsz)) {
 533		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
 534			hdr.va.start, hdr.va.end);
 535		return -EINVAL;
 536	}
 537
 538	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
 539		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
 540			hdr.flags);
 541		return -EINVAL;
 542	}
 543
 544	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
 545		drm_warn(&ptdev->base,
 546			 "Firmware protected mode entry not be supported, ignoring");
 547		return 0;
 548	}
 549
 550	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
 551	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
 552		drm_err(&ptdev->base,
 553			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
 554		return -EINVAL;
 555	}
 556
 557	name_len = iter->size - iter->offset;
 558
 559	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
 560	if (!section)
 561		return -ENOMEM;
 562
 563	list_add_tail(&section->node, &ptdev->fw->sections);
 564	section->flags = hdr.flags;
 565	section->data.size = hdr.data.end - hdr.data.start;
 566
 567	if (section->data.size > 0) {
 568		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
 569
 570		if (!data)
 571			return -ENOMEM;
 572
 573		memcpy(data, fw->data + hdr.data.start, section->data.size);
 574		section->data.buf = data;
 575	}
 576
 577	if (name_len > 0) {
 578		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
 579
 580		if (!name)
 581			return -ENOMEM;
 582
 583		memcpy(name, iter->data + iter->offset, name_len);
 584		name[name_len] = '\0';
 585		section->name = name;
 586	}
 587
 588	section_size = hdr.va.end - hdr.va.start;
 589	if (section_size) {
 590		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
 591		struct panthor_gem_object *bo;
 592		u32 vm_map_flags = 0;
 593		struct sg_table *sgt;
 594		u64 va = hdr.va.start;
 595
 596		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
 597			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
 598
 599		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
 600			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
 601
 602		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
 603		 * non-cacheable for now. We might want to introduce a new
 604		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
 605		 * memory and is currently not used by our driver) for
 606		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
 607		 * of IO-coherent systems.
 608		 */
 609		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
 610			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
 611
 612		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
 613							section_size,
 614							DRM_PANTHOR_BO_NO_MMAP,
 615							vm_map_flags, va);
 616		if (IS_ERR(section->mem))
 617			return PTR_ERR(section->mem);
 618
 619		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
 620			return -EINVAL;
 621
 622		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
 623			ret = panthor_kernel_bo_vmap(section->mem);
 624			if (ret)
 625				return ret;
 626		}
 627
 628		panthor_fw_init_section_mem(ptdev, section);
 629
 630		bo = to_panthor_bo(section->mem->obj);
 631		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
 632		if (IS_ERR(sgt))
 633			return PTR_ERR(sgt);
 634
 635		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
 636	}
 637
 638	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
 639		ptdev->fw->shared_section = section;
 640
 641	return 0;
 642}
 643
 644static int panthor_fw_read_build_info(struct panthor_device *ptdev,
 645				      const struct firmware *fw,
 646				      struct panthor_fw_binary_iter *iter,
 647				      u32 ehdr)
 648{
 649	struct panthor_fw_build_info_hdr hdr;
 650	char header[9];
 651	const char git_sha_header[sizeof(header)] = "git_sha: ";
 652	int ret;
 653
 654	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
 655	if (ret)
 656		return ret;
 657
 658	if (hdr.meta_start > fw->size ||
 659	    hdr.meta_start + hdr.meta_size > fw->size) {
 660		drm_err(&ptdev->base, "Firmware build info corrupt\n");
 661		/* We don't need the build info, so continue */
 662		return 0;
 663	}
 664
 665	if (memcmp(git_sha_header, fw->data + hdr.meta_start,
 666		   sizeof(git_sha_header))) {
 667		/* Not the expected header, this isn't metadata we understand */
 668		return 0;
 669	}
 670
 671	/* Check that the git SHA is NULL terminated as expected */
 672	if (fw->data[hdr.meta_start + hdr.meta_size - 1] != '\0') {
 673		drm_warn(&ptdev->base, "Firmware's git sha is not NULL terminated\n");
 674		/* Don't treat as fatal */
 675		return 0;
 676	}
 677
 678	drm_info(&ptdev->base, "Firmware git sha: %s\n",
 679		 fw->data + hdr.meta_start + sizeof(git_sha_header));
 680
 681	return 0;
 682}
 683
 684static void
 685panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
 686{
 687	struct panthor_fw_section *section;
 688
 689	list_for_each_entry(section, &ptdev->fw->sections, node) {
 690		struct sg_table *sgt;
 691
 692		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
 693			continue;
 694
 695		panthor_fw_init_section_mem(ptdev, section);
 696		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
 697		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
 698			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
 699	}
 700}
 701
 702static int panthor_fw_load_entry(struct panthor_device *ptdev,
 703				 const struct firmware *fw,
 704				 struct panthor_fw_binary_iter *iter)
 705{
 706	struct panthor_fw_binary_iter eiter;
 707	u32 ehdr;
 708	int ret;
 709
 710	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
 711	if (ret)
 712		return ret;
 713
 714	if ((iter->offset % sizeof(u32)) ||
 715	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
 716		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
 717			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
 718		return -EINVAL;
 719	}
 720
 721	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
 722					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
 723		return -EINVAL;
 724
 725	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
 726	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
 727		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
 728	case CSF_FW_BINARY_ENTRY_TYPE_BUILD_INFO_METADATA:
 729		return panthor_fw_read_build_info(ptdev, fw, &eiter, ehdr);
 730
 731	/* FIXME: handle those entry types? */
 732	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
 733	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
 734	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
 735	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
 736		return 0;
 737	default:
 738		break;
 739	}
 740
 741	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
 742		return 0;
 743
 744	drm_err(&ptdev->base,
 745		"Unsupported non-optional entry type %u in firmware\n",
 746		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
 747	return -EINVAL;
 748}
 749
 750static int panthor_fw_load(struct panthor_device *ptdev)
 751{
 752	const struct firmware *fw = NULL;
 753	struct panthor_fw_binary_iter iter = {};
 754	struct panthor_fw_binary_hdr hdr;
 755	char fw_path[128];
 756	int ret;
 757
 758	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
 759		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
 760		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
 761		 CSF_FW_NAME);
 762
 763	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
 764	if (ret) {
 765		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
 766			CSF_FW_NAME);
 767		return ret;
 768	}
 769
 770	iter.data = fw->data;
 771	iter.size = fw->size;
 772	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
 773	if (ret)
 774		goto out;
 775
 776	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
 777		ret = -EINVAL;
 778		drm_err(&ptdev->base, "Invalid firmware magic\n");
 779		goto out;
 780	}
 781
 782	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
 783		ret = -EINVAL;
 784		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
 785			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
 786		goto out;
 787	}
 788
 789	if (hdr.size > iter.size) {
 790		drm_err(&ptdev->base, "Firmware image is truncated\n");
 791		goto out;
 792	}
 793
 794	iter.size = hdr.size;
 795
 796	while (iter.offset < hdr.size) {
 797		ret = panthor_fw_load_entry(ptdev, fw, &iter);
 798		if (ret)
 799			goto out;
 800	}
 801
 802	if (!ptdev->fw->shared_section) {
 803		drm_err(&ptdev->base, "Shared interface region not found\n");
 804		ret = -EINVAL;
 805		goto out;
 806	}
 807
 808out:
 809	release_firmware(fw);
 810	return ret;
 811}
 812
 813/**
 814 * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
 815 * @ptdev: Device.
 816 * @mcu_va: MCU address.
 817 *
 818 * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
 819 */
 820static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
 821{
 822	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
 823	u64 shared_mem_end = shared_mem_start +
 824			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
 825	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
 826		return NULL;
 827
 828	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
 829}
 830
 831static int panthor_init_cs_iface(struct panthor_device *ptdev,
 832				 unsigned int csg_idx, unsigned int cs_idx)
 833{
 834	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
 835	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
 836	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
 837	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
 838	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
 839			   (csg_idx * glb_iface->control->group_stride) +
 840			   CSF_STREAM_CONTROL_OFFSET +
 841			   (cs_idx * csg_iface->control->stream_stride);
 842	struct panthor_fw_cs_iface *first_cs_iface =
 843		panthor_fw_get_cs_iface(ptdev, 0, 0);
 844
 845	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
 846		return -EINVAL;
 847
 848	spin_lock_init(&cs_iface->lock);
 849	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
 850	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
 851	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
 852
 853	if (!cs_iface->input || !cs_iface->output) {
 854		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
 855		return -EINVAL;
 856	}
 857
 858	if (cs_iface != first_cs_iface) {
 859		if (cs_iface->control->features != first_cs_iface->control->features) {
 860			drm_err(&ptdev->base, "Expecting identical CS slots");
 861			return -EINVAL;
 862		}
 863	} else {
 864		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
 865
 866		ptdev->csif_info.cs_reg_count = reg_count;
 867		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
 868	}
 869
 870	return 0;
 871}
 872
 873static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
 874			const struct panthor_fw_csg_control_iface *b)
 875{
 876	if (a->features != b->features)
 877		return false;
 878	if (a->suspend_size != b->suspend_size)
 879		return false;
 880	if (a->protm_suspend_size != b->protm_suspend_size)
 881		return false;
 882	if (a->stream_num != b->stream_num)
 883		return false;
 884	return true;
 885}
 886
 887static int panthor_init_csg_iface(struct panthor_device *ptdev,
 888				  unsigned int csg_idx)
 889{
 890	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
 891	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
 892	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
 893	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
 894	unsigned int i;
 895
 896	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
 897		return -EINVAL;
 898
 899	spin_lock_init(&csg_iface->lock);
 900	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
 901	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
 902	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
 903
 904	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
 905	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
 906		return -EINVAL;
 907
 908	if (!csg_iface->input || !csg_iface->output) {
 909		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
 910		return -EINVAL;
 911	}
 912
 913	if (csg_idx > 0) {
 914		struct panthor_fw_csg_iface *first_csg_iface =
 915			panthor_fw_get_csg_iface(ptdev, 0);
 916
 917		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
 918			drm_err(&ptdev->base, "Expecting identical CSG slots");
 919			return -EINVAL;
 920		}
 921	}
 922
 923	for (i = 0; i < csg_iface->control->stream_num; i++) {
 924		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
 925
 926		if (ret)
 927			return ret;
 928	}
 929
 930	return 0;
 931}
 932
 933static u32 panthor_get_instr_features(struct panthor_device *ptdev)
 934{
 935	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
 936
 937	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
 938		return 0;
 939
 940	return glb_iface->control->instr_features;
 941}
 942
 943static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
 944{
 945	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
 946	unsigned int i;
 947
 948	if (!ptdev->fw->shared_section->mem->kmap)
 949		return -EINVAL;
 950
 951	spin_lock_init(&glb_iface->lock);
 952	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
 953
 954	if (!glb_iface->control->version) {
 955		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
 956		return -EINVAL;
 957	}
 958
 959	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
 960	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
 961	if (!glb_iface->input || !glb_iface->output) {
 962		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
 963		return -EINVAL;
 964	}
 965
 966	if (glb_iface->control->group_num > MAX_CSGS ||
 967	    glb_iface->control->group_num < MIN_CSGS) {
 968		drm_err(&ptdev->base, "Invalid number of control groups");
 969		return -EINVAL;
 970	}
 971
 972	for (i = 0; i < glb_iface->control->group_num; i++) {
 973		int ret = panthor_init_csg_iface(ptdev, i);
 974
 975		if (ret)
 976			return ret;
 977	}
 978
 979	drm_info(&ptdev->base, "CSF FW using interface v%d.%d.%d, Features %#x Instrumentation features %#x",
 980		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
 981		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
 982		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
 983		 glb_iface->control->features,
 984		 panthor_get_instr_features(ptdev));
 985	return 0;
 986}
 987
 988static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
 989{
 990	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
 991
 992	/* Enable all cores. */
 993	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
 994
 995	/* Setup timers. */
 996	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
 997	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
 998	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
 999
1000	/* Enable interrupts we care about. */
1001	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
1002					 GLB_PING |
1003					 GLB_CFG_PROGRESS_TIMER |
1004					 GLB_CFG_POWEROFF_TIMER |
1005					 GLB_IDLE_EN |
1006					 GLB_IDLE;
1007
1008	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
1009	panthor_fw_toggle_reqs(glb_iface, req, ack,
1010			       GLB_CFG_ALLOC_EN |
1011			       GLB_CFG_POWEROFF_TIMER |
1012			       GLB_CFG_PROGRESS_TIMER);
1013
1014	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1015
1016	/* Kick the watchdog. */
1017	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
1018			 msecs_to_jiffies(PING_INTERVAL_MS));
1019}
1020
1021static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
1022{
1023	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
1024		ptdev->fw->booted = true;
1025
1026	wake_up_all(&ptdev->fw->req_waitqueue);
1027
1028	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
1029	if (!ptdev->fw->booted)
1030		return;
1031
1032	panthor_sched_report_fw_events(ptdev, status);
1033}
1034PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
1035
1036static int panthor_fw_start(struct panthor_device *ptdev)
1037{
1038	bool timedout = false;
1039
1040	ptdev->fw->booted = false;
1041	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
1042	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
1043
1044	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
1045				ptdev->fw->booted,
1046				msecs_to_jiffies(1000))) {
1047		if (!ptdev->fw->booted &&
1048		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
1049			timedout = true;
1050	}
1051
1052	if (timedout) {
1053		static const char * const status_str[] = {
1054			[MCU_STATUS_DISABLED] = "disabled",
1055			[MCU_STATUS_ENABLED] = "enabled",
1056			[MCU_STATUS_HALT] = "halt",
1057			[MCU_STATUS_FATAL] = "fatal",
1058		};
1059		u32 status = gpu_read(ptdev, MCU_STATUS);
1060
1061		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
1062			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
1063		return -ETIMEDOUT;
1064	}
1065
1066	return 0;
1067}
1068
1069static void panthor_fw_stop(struct panthor_device *ptdev)
1070{
1071	u32 status;
1072
1073	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
1074	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1075			       status == MCU_STATUS_DISABLED, 10, 100000))
1076		drm_err(&ptdev->base, "Failed to stop MCU");
1077}
1078
1079/**
1080 * panthor_fw_pre_reset() - Call before a reset.
1081 * @ptdev: Device.
1082 * @on_hang: true if the reset was triggered on a GPU hang.
1083 *
1084 * If the reset is not triggered on a hang, we try to gracefully halt the
1085 * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
1086 */
1087void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
1088{
1089	/* Make sure we won't be woken up by a ping. */
1090	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1091
1092	ptdev->fw->fast_reset = false;
1093
1094	if (!on_hang) {
1095		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1096		u32 status;
1097
1098		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
1099		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1100		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
1101					status == MCU_STATUS_HALT, 10, 100000) &&
1102		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
1103			ptdev->fw->fast_reset = true;
1104		} else {
1105			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
1106		}
1107
1108		/* The FW detects 0 -> 1 transitions. Make sure we reset
1109		 * the HALT bit before the FW is rebooted.
1110		 */
1111		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
1112	}
1113
1114	panthor_job_irq_suspend(&ptdev->fw->irq);
1115}
1116
1117/**
1118 * panthor_fw_post_reset() - Call after a reset.
1119 * @ptdev: Device.
1120 *
1121 * Start the FW. If this is not a fast reset, all FW sections are reloaded to
1122 * make sure we can recover from a memory corruption.
1123 */
1124int panthor_fw_post_reset(struct panthor_device *ptdev)
1125{
1126	int ret;
1127
1128	/* Make the MCU VM active. */
1129	ret = panthor_vm_active(ptdev->fw->vm);
1130	if (ret)
1131		return ret;
1132
1133	/* If this is a fast reset, try to start the MCU without reloading
1134	 * the FW sections. If it fails, go for a full reset.
1135	 */
1136	if (ptdev->fw->fast_reset) {
1137		ret = panthor_fw_start(ptdev);
1138		if (!ret)
1139			goto out;
1140
1141		/* Forcibly reset the MCU and force a slow reset, so we get a
1142		 * fresh boot on the next panthor_fw_start() call.
1143		 */
1144		panthor_fw_stop(ptdev);
1145		ptdev->fw->fast_reset = false;
1146		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
1147
1148		ret = panthor_vm_flush_all(ptdev->fw->vm);
1149		if (ret) {
1150			drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)");
1151			return ret;
1152		}
1153	}
1154
1155	/* Reload all sections, including RO ones. We're not supposed
1156	 * to end up here anyway, let's just assume the overhead of
1157	 * reloading everything is acceptable.
1158	 */
1159	panthor_reload_fw_sections(ptdev, true);
1160
1161	ret = panthor_fw_start(ptdev);
1162	if (ret) {
1163		drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )");
1164		return ret;
1165	}
1166
1167out:
1168	/* We must re-initialize the global interface even on fast-reset. */
1169	panthor_fw_init_global_iface(ptdev);
1170	return 0;
1171}
1172
1173/**
1174 * panthor_fw_unplug() - Called when the device is unplugged.
1175 * @ptdev: Device.
1176 *
1177 * This function must make sure all pending operations are flushed before
1178 * will release device resources, thus preventing any interaction with
1179 * the HW.
1180 *
1181 * If there is still FW-related work running after this function returns,
1182 * they must use drm_dev_{enter,exit}() and skip any HW access when
1183 * drm_dev_enter() returns false.
1184 */
1185void panthor_fw_unplug(struct panthor_device *ptdev)
1186{
1187	struct panthor_fw_section *section;
1188
1189	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
1190
1191	/* Make sure the IRQ handler can be called after that point. */
1192	if (ptdev->fw->irq.irq)
1193		panthor_job_irq_suspend(&ptdev->fw->irq);
1194
1195	panthor_fw_stop(ptdev);
1196
1197	list_for_each_entry(section, &ptdev->fw->sections, node)
1198		panthor_kernel_bo_destroy(section->mem);
1199
1200	/* We intentionally don't call panthor_vm_idle() and let
1201	 * panthor_mmu_unplug() release the AS we acquired with
1202	 * panthor_vm_active() so we don't have to track the VM active/idle
1203	 * state to keep the active_refcnt balanced.
1204	 */
1205	panthor_vm_put(ptdev->fw->vm);
1206	ptdev->fw->vm = NULL;
1207
1208	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
1209}
1210
1211/**
1212 * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
1213 * @req_ptr: Pointer to the req register.
1214 * @ack_ptr: Pointer to the ack register.
1215 * @wq: Wait queue to use for the sleeping wait.
1216 * @req_mask: Mask of requests to wait for.
1217 * @acked: Pointer to field that's updated with the acked requests.
1218 * If the function returns 0, *acked == req_mask.
1219 * @timeout_ms: Timeout expressed in milliseconds.
1220 *
1221 * Return: 0 on success, -ETIMEDOUT otherwise.
1222 */
1223static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
1224				wait_queue_head_t *wq,
1225				u32 req_mask, u32 *acked,
1226				u32 timeout_ms)
1227{
1228	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
1229	int ret;
1230
1231	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
1232	*acked = req_mask;
1233	ret = read_poll_timeout_atomic(READ_ONCE, ack,
1234				       (ack & req_mask) == req,
1235				       0, 10, 0,
1236				       *ack_ptr);
1237	if (!ret)
1238		return 0;
1239
1240	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
1241			       msecs_to_jiffies(timeout_ms)))
1242		return 0;
1243
1244	/* Check one last time, in case we were not woken up for some reason. */
1245	ack = READ_ONCE(*ack_ptr);
1246	if ((ack & req_mask) == req)
1247		return 0;
1248
1249	*acked = ~(req ^ ack) & req_mask;
1250	return -ETIMEDOUT;
1251}
1252
1253/**
1254 * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
1255 * @ptdev: Device.
1256 * @req_mask: Mask of requests to wait for.
1257 * @acked: Pointer to field that's updated with the acked requests.
1258 * If the function returns 0, *acked == req_mask.
1259 * @timeout_ms: Timeout expressed in milliseconds.
1260 *
1261 * Return: 0 on success, -ETIMEDOUT otherwise.
1262 */
1263int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
1264			     u32 req_mask, u32 *acked,
1265			     u32 timeout_ms)
1266{
1267	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1268
1269	/* GLB_HALT doesn't get acked through the FW interface. */
1270	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
1271		return -EINVAL;
1272
1273	return panthor_fw_wait_acks(&glb_iface->input->req,
1274				    &glb_iface->output->ack,
1275				    &ptdev->fw->req_waitqueue,
1276				    req_mask, acked, timeout_ms);
1277}
1278
1279/**
1280 * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
1281 * @ptdev: Device.
1282 * @csg_slot: CSG slot ID.
1283 * @req_mask: Mask of requests to wait for.
1284 * @acked: Pointer to field that's updated with the acked requests.
1285 * If the function returns 0, *acked == req_mask.
1286 * @timeout_ms: Timeout expressed in milliseconds.
1287 *
1288 * Return: 0 on success, -ETIMEDOUT otherwise.
1289 */
1290int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
1291			     u32 req_mask, u32 *acked, u32 timeout_ms)
1292{
1293	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
1294	int ret;
1295
1296	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
1297		return -EINVAL;
1298
1299	ret = panthor_fw_wait_acks(&csg_iface->input->req,
1300				   &csg_iface->output->ack,
1301				   &ptdev->fw->req_waitqueue,
1302				   req_mask, acked, timeout_ms);
1303
1304	/*
1305	 * Check that all bits in the state field were updated, if any mismatch
1306	 * then clear all bits in the state field. This allows code to do
1307	 * (acked & CSG_STATE_MASK) and get the right value.
1308	 */
1309
1310	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
1311		*acked &= ~CSG_STATE_MASK;
1312
1313	return ret;
1314}
1315
1316/**
1317 * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
1318 * @ptdev: Device.
1319 * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
1320 *
1321 * This function is toggling bits in the doorbell_req and ringing the
1322 * global doorbell. It doesn't require a user doorbell to be attached to
1323 * the group.
1324 */
1325void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
1326{
1327	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1328
1329	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
1330	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1331}
1332
1333static void panthor_fw_ping_work(struct work_struct *work)
1334{
1335	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
1336	struct panthor_device *ptdev = fw->irq.ptdev;
1337	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1338	u32 acked;
1339	int ret;
1340
1341	if (panthor_device_reset_is_pending(ptdev))
1342		return;
1343
1344	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
1345	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
1346
1347	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
1348	if (ret) {
1349		panthor_device_schedule_reset(ptdev);
1350		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
1351	} else {
1352		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
1353				 msecs_to_jiffies(PING_INTERVAL_MS));
1354	}
1355}
1356
1357/**
1358 * panthor_fw_init() - Initialize FW related data.
1359 * @ptdev: Device.
1360 *
1361 * Return: 0 on success, a negative error code otherwise.
1362 */
1363int panthor_fw_init(struct panthor_device *ptdev)
1364{
1365	struct panthor_fw *fw;
1366	int ret, irq;
1367
1368	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
1369	if (!fw)
1370		return -ENOMEM;
1371
1372	ptdev->fw = fw;
1373	init_waitqueue_head(&fw->req_waitqueue);
1374	INIT_LIST_HEAD(&fw->sections);
1375	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
1376
1377	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
1378	if (irq <= 0)
1379		return -ENODEV;
1380
1381	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
1382	if (ret) {
1383		drm_err(&ptdev->base, "failed to request job irq");
1384		return ret;
1385	}
1386
1387	ret = panthor_gpu_l2_power_on(ptdev);
1388	if (ret)
1389		return ret;
1390
1391	fw->vm = panthor_vm_create(ptdev, true,
1392				   0, SZ_4G,
1393				   CSF_MCU_SHARED_REGION_START,
1394				   CSF_MCU_SHARED_REGION_SIZE);
1395	if (IS_ERR(fw->vm)) {
1396		ret = PTR_ERR(fw->vm);
1397		fw->vm = NULL;
1398		goto err_unplug_fw;
1399	}
1400
1401	ret = panthor_fw_load(ptdev);
1402	if (ret)
1403		goto err_unplug_fw;
1404
1405	ret = panthor_vm_active(fw->vm);
1406	if (ret)
1407		goto err_unplug_fw;
1408
1409	ret = panthor_fw_start(ptdev);
1410	if (ret)
1411		goto err_unplug_fw;
1412
1413	ret = panthor_fw_init_ifaces(ptdev);
1414	if (ret)
1415		goto err_unplug_fw;
1416
1417	panthor_fw_init_global_iface(ptdev);
1418	return 0;
1419
1420err_unplug_fw:
1421	panthor_fw_unplug(ptdev);
1422	return ret;
1423}
1424
1425MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");