panthor_sched.c - drivers/gpu/drm/panthor/panthor_sched.c - Linux source code v6.13.7

Note: File does not exist in v4.17.
   1// SPDX-License-Identifier: GPL-2.0 or MIT
   2/* Copyright 2023 Collabora ltd. */
   3
   4#include <drm/drm_drv.h>
   5#include <drm/drm_exec.h>
   6#include <drm/drm_gem_shmem_helper.h>
   7#include <drm/drm_managed.h>
   8#include <drm/gpu_scheduler.h>
   9#include <drm/panthor_drm.h>
  10
  11#include <linux/build_bug.h>
  12#include <linux/clk.h>
  13#include <linux/delay.h>
  14#include <linux/dma-mapping.h>
  15#include <linux/dma-resv.h>
  16#include <linux/firmware.h>
  17#include <linux/interrupt.h>
  18#include <linux/io.h>
  19#include <linux/iopoll.h>
  20#include <linux/iosys-map.h>
  21#include <linux/module.h>
  22#include <linux/platform_device.h>
  23#include <linux/pm_runtime.h>
  24
  25#include "panthor_devfreq.h"
  26#include "panthor_device.h"
  27#include "panthor_fw.h"
  28#include "panthor_gem.h"
  29#include "panthor_gpu.h"
  30#include "panthor_heap.h"
  31#include "panthor_mmu.h"
  32#include "panthor_regs.h"
  33#include "panthor_sched.h"
  34
  35/**
  36 * DOC: Scheduler
  37 *
  38 * Mali CSF hardware adopts a firmware-assisted scheduling model, where
  39 * the firmware takes care of scheduling aspects, to some extent.
  40 *
  41 * The scheduling happens at the scheduling group level, each group
  42 * contains 1 to N queues (N is FW/hardware dependent, and exposed
  43 * through the firmware interface). Each queue is assigned a command
  44 * stream ring buffer, which serves as a way to get jobs submitted to
  45 * the GPU, among other things.
  46 *
  47 * The firmware can schedule a maximum of M groups (M is FW/hardware
  48 * dependent, and exposed through the firmware interface). Passed
  49 * this maximum number of groups, the kernel must take care of
  50 * rotating the groups passed to the firmware so every group gets
  51 * a chance to have his queues scheduled for execution.
  52 *
  53 * The current implementation only supports with kernel-mode queues.
  54 * In other terms, userspace doesn't have access to the ring-buffer.
  55 * Instead, userspace passes indirect command stream buffers that are
  56 * called from the queue ring-buffer by the kernel using a pre-defined
  57 * sequence of command stream instructions to ensure the userspace driver
  58 * always gets consistent results (cache maintenance,
  59 * synchronization, ...).
  60 *
  61 * We rely on the drm_gpu_scheduler framework to deal with job
  62 * dependencies and submission. As any other driver dealing with a
  63 * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
  64 * entity has its own job scheduler. When a job is ready to be executed
  65 * (all its dependencies are met), it is pushed to the appropriate
  66 * queue ring-buffer, and the group is scheduled for execution if it
  67 * wasn't already active.
  68 *
  69 * Kernel-side group scheduling is timeslice-based. When we have less
  70 * groups than there are slots, the periodic tick is disabled and we
  71 * just let the FW schedule the active groups. When there are more
  72 * groups than slots, we let each group a chance to execute stuff for
  73 * a given amount of time, and then re-evaluate and pick new groups
  74 * to schedule. The group selection algorithm is based on
  75 * priority+round-robin.
  76 *
  77 * Even though user-mode queues is out of the scope right now, the
  78 * current design takes them into account by avoiding any guess on the
  79 * group/queue state that would be based on information we wouldn't have
  80 * if userspace was in charge of the ring-buffer. That's also one of the
  81 * reason we don't do 'cooperative' scheduling (encoding FW group slot
  82 * reservation as dma_fence that would be returned from the
  83 * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
  84 * a queue of waiters, ordered by job submission order). This approach
  85 * would work for kernel-mode queues, but would make user-mode queues a
  86 * lot more complicated to retrofit.
  87 */
  88
  89#define JOB_TIMEOUT_MS				5000
  90
  91#define MIN_CS_PER_CSG				8
  92
  93#define MIN_CSGS				3
  94#define MAX_CSG_PRIO				0xf
  95
  96#define NUM_INSTRS_PER_CACHE_LINE		(64 / sizeof(u64))
  97#define MAX_INSTRS_PER_JOB			24
  98
  99struct panthor_group;
 100
 101/**
 102 * struct panthor_csg_slot - Command stream group slot
 103 *
 104 * This represents a FW slot for a scheduling group.
 105 */
 106struct panthor_csg_slot {
 107	/** @group: Scheduling group bound to this slot. */
 108	struct panthor_group *group;
 109
 110	/** @priority: Group priority. */
 111	u8 priority;
 112
 113	/**
 114	 * @idle: True if the group bound to this slot is idle.
 115	 *
 116	 * A group is idle when it has nothing waiting for execution on
 117	 * all its queues, or when queues are blocked waiting for something
 118	 * to happen (synchronization object).
 119	 */
 120	bool idle;
 121};
 122
 123/**
 124 * enum panthor_csg_priority - Group priority
 125 */
 126enum panthor_csg_priority {
 127	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
 128	PANTHOR_CSG_PRIORITY_LOW = 0,
 129
 130	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
 131	PANTHOR_CSG_PRIORITY_MEDIUM,
 132
 133	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
 134	PANTHOR_CSG_PRIORITY_HIGH,
 135
 136	/**
 137	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
 138	 *
 139	 * Real-time priority allows one to preempt scheduling of other
 140	 * non-real-time groups. When such a group becomes executable,
 141	 * it will evict the group with the lowest non-rt priority if
 142	 * there's no free group slot available.
 143	 */
 144	PANTHOR_CSG_PRIORITY_RT,
 145
 146	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
 147	PANTHOR_CSG_PRIORITY_COUNT,
 148};
 149
 150/**
 151 * struct panthor_scheduler - Object used to manage the scheduler
 152 */
 153struct panthor_scheduler {
 154	/** @ptdev: Device. */
 155	struct panthor_device *ptdev;
 156
 157	/**
 158	 * @wq: Workqueue used by our internal scheduler logic and
 159	 * drm_gpu_scheduler.
 160	 *
 161	 * Used for the scheduler tick, group update or other kind of FW
 162	 * event processing that can't be handled in the threaded interrupt
 163	 * path. Also passed to the drm_gpu_scheduler instances embedded
 164	 * in panthor_queue.
 165	 */
 166	struct workqueue_struct *wq;
 167
 168	/**
 169	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
 170	 *
 171	 * We have a queue dedicated to heap chunk allocation works to avoid
 172	 * blocking the rest of the scheduler if the allocation tries to
 173	 * reclaim memory.
 174	 */
 175	struct workqueue_struct *heap_alloc_wq;
 176
 177	/** @tick_work: Work executed on a scheduling tick. */
 178	struct delayed_work tick_work;
 179
 180	/**
 181	 * @sync_upd_work: Work used to process synchronization object updates.
 182	 *
 183	 * We use this work to unblock queues/groups that were waiting on a
 184	 * synchronization object.
 185	 */
 186	struct work_struct sync_upd_work;
 187
 188	/**
 189	 * @fw_events_work: Work used to process FW events outside the interrupt path.
 190	 *
 191	 * Even if the interrupt is threaded, we need any event processing
 192	 * that require taking the panthor_scheduler::lock to be processed
 193	 * outside the interrupt path so we don't block the tick logic when
 194	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
 195	 * event processing requires taking this lock, we just delegate all
 196	 * FW event processing to the scheduler workqueue.
 197	 */
 198	struct work_struct fw_events_work;
 199
 200	/**
 201	 * @fw_events: Bitmask encoding pending FW events.
 202	 */
 203	atomic_t fw_events;
 204
 205	/**
 206	 * @resched_target: When the next tick should occur.
 207	 *
 208	 * Expressed in jiffies.
 209	 */
 210	u64 resched_target;
 211
 212	/**
 213	 * @last_tick: When the last tick occurred.
 214	 *
 215	 * Expressed in jiffies.
 216	 */
 217	u64 last_tick;
 218
 219	/** @tick_period: Tick period in jiffies. */
 220	u64 tick_period;
 221
 222	/**
 223	 * @lock: Lock protecting access to all the scheduler fields.
 224	 *
 225	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
 226	 * fields are touched.
 227	 */
 228	struct mutex lock;
 229
 230	/** @groups: Various lists used to classify groups. */
 231	struct {
 232		/**
 233		 * @runnable: Runnable group lists.
 234		 *
 235		 * When a group has queues that want to execute something,
 236		 * its panthor_group::run_node should be inserted here.
 237		 *
 238		 * One list per-priority.
 239		 */
 240		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
 241
 242		/**
 243		 * @idle: Idle group lists.
 244		 *
 245		 * When all queues of a group are idle (either because they
 246		 * have nothing to execute, or because they are blocked), the
 247		 * panthor_group::run_node field should be inserted here.
 248		 *
 249		 * One list per-priority.
 250		 */
 251		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
 252
 253		/**
 254		 * @waiting: List of groups whose queues are blocked on a
 255		 * synchronization object.
 256		 *
 257		 * Insert panthor_group::wait_node here when a group is waiting
 258		 * for synchronization objects to be signaled.
 259		 *
 260		 * This list is evaluated in the @sync_upd_work work.
 261		 */
 262		struct list_head waiting;
 263	} groups;
 264
 265	/**
 266	 * @csg_slots: FW command stream group slots.
 267	 */
 268	struct panthor_csg_slot csg_slots[MAX_CSGS];
 269
 270	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
 271	u32 csg_slot_count;
 272
 273	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
 274	u32 cs_slot_count;
 275
 276	/** @as_slot_count: Number of address space slots supported by the MMU. */
 277	u32 as_slot_count;
 278
 279	/** @used_csg_slot_count: Number of command stream group slot currently used. */
 280	u32 used_csg_slot_count;
 281
 282	/** @sb_slot_count: Number of scoreboard slots. */
 283	u32 sb_slot_count;
 284
 285	/**
 286	 * @might_have_idle_groups: True if an active group might have become idle.
 287	 *
 288	 * This will force a tick, so other runnable groups can be scheduled if one
 289	 * or more active groups became idle.
 290	 */
 291	bool might_have_idle_groups;
 292
 293	/** @pm: Power management related fields. */
 294	struct {
 295		/** @has_ref: True if the scheduler owns a runtime PM reference. */
 296		bool has_ref;
 297	} pm;
 298
 299	/** @reset: Reset related fields. */
 300	struct {
 301		/** @lock: Lock protecting the other reset fields. */
 302		struct mutex lock;
 303
 304		/**
 305		 * @in_progress: True if a reset is in progress.
 306		 *
 307		 * Set to true in panthor_sched_pre_reset() and back to false in
 308		 * panthor_sched_post_reset().
 309		 */
 310		atomic_t in_progress;
 311
 312		/**
 313		 * @stopped_groups: List containing all groups that were stopped
 314		 * before a reset.
 315		 *
 316		 * Insert panthor_group::run_node in the pre_reset path.
 317		 */
 318		struct list_head stopped_groups;
 319	} reset;
 320};
 321
 322/**
 323 * struct panthor_syncobj_32b - 32-bit FW synchronization object
 324 */
 325struct panthor_syncobj_32b {
 326	/** @seqno: Sequence number. */
 327	u32 seqno;
 328
 329	/**
 330	 * @status: Status.
 331	 *
 332	 * Not zero on failure.
 333	 */
 334	u32 status;
 335};
 336
 337/**
 338 * struct panthor_syncobj_64b - 64-bit FW synchronization object
 339 */
 340struct panthor_syncobj_64b {
 341	/** @seqno: Sequence number. */
 342	u64 seqno;
 343
 344	/**
 345	 * @status: Status.
 346	 *
 347	 * Not zero on failure.
 348	 */
 349	u32 status;
 350
 351	/** @pad: MBZ. */
 352	u32 pad;
 353};
 354
 355/**
 356 * struct panthor_queue - Execution queue
 357 */
 358struct panthor_queue {
 359	/** @scheduler: DRM scheduler used for this queue. */
 360	struct drm_gpu_scheduler scheduler;
 361
 362	/** @entity: DRM scheduling entity used for this queue. */
 363	struct drm_sched_entity entity;
 364
 365	/**
 366	 * @remaining_time: Time remaining before the job timeout expires.
 367	 *
 368	 * The job timeout is suspended when the queue is not scheduled by the
 369	 * FW. Every time we suspend the timer, we need to save the remaining
 370	 * time so we can restore it later on.
 371	 */
 372	unsigned long remaining_time;
 373
 374	/** @timeout_suspended: True if the job timeout was suspended. */
 375	bool timeout_suspended;
 376
 377	/**
 378	 * @doorbell_id: Doorbell assigned to this queue.
 379	 *
 380	 * Right now, all groups share the same doorbell, and the doorbell ID
 381	 * is assigned to group_slot + 1 when the group is assigned a slot. But
 382	 * we might decide to provide fine grained doorbell assignment at some
 383	 * point, so don't have to wake up all queues in a group every time one
 384	 * of them is updated.
 385	 */
 386	u8 doorbell_id;
 387
 388	/**
 389	 * @priority: Priority of the queue inside the group.
 390	 *
 391	 * Must be less than 16 (Only 4 bits available).
 392	 */
 393	u8 priority;
 394#define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
 395
 396	/** @ringbuf: Command stream ring-buffer. */
 397	struct panthor_kernel_bo *ringbuf;
 398
 399	/** @iface: Firmware interface. */
 400	struct {
 401		/** @mem: FW memory allocated for this interface. */
 402		struct panthor_kernel_bo *mem;
 403
 404		/** @input: Input interface. */
 405		struct panthor_fw_ringbuf_input_iface *input;
 406
 407		/** @output: Output interface. */
 408		const struct panthor_fw_ringbuf_output_iface *output;
 409
 410		/** @input_fw_va: FW virtual address of the input interface buffer. */
 411		u32 input_fw_va;
 412
 413		/** @output_fw_va: FW virtual address of the output interface buffer. */
 414		u32 output_fw_va;
 415	} iface;
 416
 417	/**
 418	 * @syncwait: Stores information about the synchronization object this
 419	 * queue is waiting on.
 420	 */
 421	struct {
 422		/** @gpu_va: GPU address of the synchronization object. */
 423		u64 gpu_va;
 424
 425		/** @ref: Reference value to compare against. */
 426		u64 ref;
 427
 428		/** @gt: True if this is a greater-than test. */
 429		bool gt;
 430
 431		/** @sync64: True if this is a 64-bit sync object. */
 432		bool sync64;
 433
 434		/** @bo: Buffer object holding the synchronization object. */
 435		struct drm_gem_object *obj;
 436
 437		/** @offset: Offset of the synchronization object inside @bo. */
 438		u64 offset;
 439
 440		/**
 441		 * @kmap: Kernel mapping of the buffer object holding the
 442		 * synchronization object.
 443		 */
 444		void *kmap;
 445	} syncwait;
 446
 447	/** @fence_ctx: Fence context fields. */
 448	struct {
 449		/** @lock: Used to protect access to all fences allocated by this context. */
 450		spinlock_t lock;
 451
 452		/**
 453		 * @id: Fence context ID.
 454		 *
 455		 * Allocated with dma_fence_context_alloc().
 456		 */
 457		u64 id;
 458
 459		/** @seqno: Sequence number of the last initialized fence. */
 460		atomic64_t seqno;
 461
 462		/**
 463		 * @last_fence: Fence of the last submitted job.
 464		 *
 465		 * We return this fence when we get an empty command stream.
 466		 * This way, we are guaranteed that all earlier jobs have completed
 467		 * when drm_sched_job::s_fence::finished without having to feed
 468		 * the CS ring buffer with a dummy job that only signals the fence.
 469		 */
 470		struct dma_fence *last_fence;
 471
 472		/**
 473		 * @in_flight_jobs: List containing all in-flight jobs.
 474		 *
 475		 * Used to keep track and signal panthor_job::done_fence when the
 476		 * synchronization object attached to the queue is signaled.
 477		 */
 478		struct list_head in_flight_jobs;
 479	} fence_ctx;
 480
 481	/** @profiling: Job profiling data slots and access information. */
 482	struct {
 483		/** @slots: Kernel BO holding the slots. */
 484		struct panthor_kernel_bo *slots;
 485
 486		/** @slot_count: Number of jobs ringbuffer can hold at once. */
 487		u32 slot_count;
 488
 489		/** @seqno: Index of the next available profiling information slot. */
 490		u32 seqno;
 491	} profiling;
 492};
 493
 494/**
 495 * enum panthor_group_state - Scheduling group state.
 496 */
 497enum panthor_group_state {
 498	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
 499	PANTHOR_CS_GROUP_CREATED,
 500
 501	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
 502	PANTHOR_CS_GROUP_ACTIVE,
 503
 504	/**
 505	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
 506	 * inactive/suspended right now.
 507	 */
 508	PANTHOR_CS_GROUP_SUSPENDED,
 509
 510	/**
 511	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
 512	 *
 513	 * Can no longer be scheduled. The only allowed action is a destruction.
 514	 */
 515	PANTHOR_CS_GROUP_TERMINATED,
 516
 517	/**
 518	 * @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
 519	 *
 520	 * The FW returned an inconsistent state. The group is flagged unusable
 521	 * and can no longer be scheduled. The only allowed action is a
 522	 * destruction.
 523	 *
 524	 * When that happens, we also schedule a FW reset, to start from a fresh
 525	 * state.
 526	 */
 527	PANTHOR_CS_GROUP_UNKNOWN_STATE,
 528};
 529
 530/**
 531 * struct panthor_group - Scheduling group object
 532 */
 533struct panthor_group {
 534	/** @refcount: Reference count */
 535	struct kref refcount;
 536
 537	/** @ptdev: Device. */
 538	struct panthor_device *ptdev;
 539
 540	/** @vm: VM bound to the group. */
 541	struct panthor_vm *vm;
 542
 543	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
 544	u64 compute_core_mask;
 545
 546	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
 547	u64 fragment_core_mask;
 548
 549	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
 550	u64 tiler_core_mask;
 551
 552	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
 553	u8 max_compute_cores;
 554
 555	/** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
 556	u8 max_fragment_cores;
 557
 558	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
 559	u8 max_tiler_cores;
 560
 561	/** @priority: Group priority (check panthor_csg_priority). */
 562	u8 priority;
 563
 564	/** @blocked_queues: Bitmask reflecting the blocked queues. */
 565	u32 blocked_queues;
 566
 567	/** @idle_queues: Bitmask reflecting the idle queues. */
 568	u32 idle_queues;
 569
 570	/** @fatal_lock: Lock used to protect access to fatal fields. */
 571	spinlock_t fatal_lock;
 572
 573	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
 574	u32 fatal_queues;
 575
 576	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
 577	atomic_t tiler_oom;
 578
 579	/** @queue_count: Number of queues in this group. */
 580	u32 queue_count;
 581
 582	/** @queues: Queues owned by this group. */
 583	struct panthor_queue *queues[MAX_CS_PER_CSG];
 584
 585	/**
 586	 * @csg_id: ID of the FW group slot.
 587	 *
 588	 * -1 when the group is not scheduled/active.
 589	 */
 590	int csg_id;
 591
 592	/**
 593	 * @destroyed: True when the group has been destroyed.
 594	 *
 595	 * If a group is destroyed it becomes useless: no further jobs can be submitted
 596	 * to its queues. We simply wait for all references to be dropped so we can
 597	 * release the group object.
 598	 */
 599	bool destroyed;
 600
 601	/**
 602	 * @timedout: True when a timeout occurred on any of the queues owned by
 603	 * this group.
 604	 *
 605	 * Timeouts can be reported by drm_sched or by the FW. If a reset is required,
 606	 * and the group can't be suspended, this also leads to a timeout. In any case,
 607	 * any timeout situation is unrecoverable, and the group becomes useless. We
 608	 * simply wait for all references to be dropped so we can release the group
 609	 * object.
 610	 */
 611	bool timedout;
 612
 613	/**
 614	 * @syncobjs: Pool of per-queue synchronization objects.
 615	 *
 616	 * One sync object per queue. The position of the sync object is
 617	 * determined by the queue index.
 618	 */
 619	struct panthor_kernel_bo *syncobjs;
 620
 621	/** @fdinfo: Per-file total cycle and timestamp values reference. */
 622	struct {
 623		/** @data: Total sampled values for jobs in queues from this group. */
 624		struct panthor_gpu_usage data;
 625
 626		/**
 627		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
 628		 * and job post-completion processing function
 629		 */
 630		struct mutex lock;
 631	} fdinfo;
 632
 633	/** @state: Group state. */
 634	enum panthor_group_state state;
 635
 636	/**
 637	 * @suspend_buf: Suspend buffer.
 638	 *
 639	 * Stores the state of the group and its queues when a group is suspended.
 640	 * Used at resume time to restore the group in its previous state.
 641	 *
 642	 * The size of the suspend buffer is exposed through the FW interface.
 643	 */
 644	struct panthor_kernel_bo *suspend_buf;
 645
 646	/**
 647	 * @protm_suspend_buf: Protection mode suspend buffer.
 648	 *
 649	 * Stores the state of the group and its queues when a group that's in
 650	 * protection mode is suspended.
 651	 *
 652	 * Used at resume time to restore the group in its previous state.
 653	 *
 654	 * The size of the protection mode suspend buffer is exposed through the
 655	 * FW interface.
 656	 */
 657	struct panthor_kernel_bo *protm_suspend_buf;
 658
 659	/** @sync_upd_work: Work used to check/signal job fences. */
 660	struct work_struct sync_upd_work;
 661
 662	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
 663	struct work_struct tiler_oom_work;
 664
 665	/** @term_work: Work used to finish the group termination procedure. */
 666	struct work_struct term_work;
 667
 668	/**
 669	 * @release_work: Work used to release group resources.
 670	 *
 671	 * We need to postpone the group release to avoid a deadlock when
 672	 * the last ref is released in the tick work.
 673	 */
 674	struct work_struct release_work;
 675
 676	/**
 677	 * @run_node: Node used to insert the group in the
 678	 * panthor_group::groups::{runnable,idle} and
 679	 * panthor_group::reset.stopped_groups lists.
 680	 */
 681	struct list_head run_node;
 682
 683	/**
 684	 * @wait_node: Node used to insert the group in the
 685	 * panthor_group::groups::waiting list.
 686	 */
 687	struct list_head wait_node;
 688};
 689
 690struct panthor_job_profiling_data {
 691	struct {
 692		u64 before;
 693		u64 after;
 694	} cycles;
 695
 696	struct {
 697		u64 before;
 698		u64 after;
 699	} time;
 700};
 701
 702/**
 703 * group_queue_work() - Queue a group work
 704 * @group: Group to queue the work for.
 705 * @wname: Work name.
 706 *
 707 * Grabs a ref and queue a work item to the scheduler workqueue. If
 708 * the work was already queued, we release the reference we grabbed.
 709 *
 710 * Work callbacks must release the reference we grabbed here.
 711 */
 712#define group_queue_work(group, wname) \
 713	do { \
 714		group_get(group); \
 715		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
 716			group_put(group); \
 717	} while (0)
 718
 719/**
 720 * sched_queue_work() - Queue a scheduler work.
 721 * @sched: Scheduler object.
 722 * @wname: Work name.
 723 *
 724 * Conditionally queues a scheduler work if no reset is pending/in-progress.
 725 */
 726#define sched_queue_work(sched, wname) \
 727	do { \
 728		if (!atomic_read(&(sched)->reset.in_progress) && \
 729		    !panthor_device_reset_is_pending((sched)->ptdev)) \
 730			queue_work((sched)->wq, &(sched)->wname ## _work); \
 731	} while (0)
 732
 733/**
 734 * sched_queue_delayed_work() - Queue a scheduler delayed work.
 735 * @sched: Scheduler object.
 736 * @wname: Work name.
 737 * @delay: Work delay in jiffies.
 738 *
 739 * Conditionally queues a scheduler delayed work if no reset is
 740 * pending/in-progress.
 741 */
 742#define sched_queue_delayed_work(sched, wname, delay) \
 743	do { \
 744		if (!atomic_read(&sched->reset.in_progress) && \
 745		    !panthor_device_reset_is_pending((sched)->ptdev)) \
 746			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
 747	} while (0)
 748
 749/*
 750 * We currently set the maximum of groups per file to an arbitrary low value.
 751 * But this can be updated if we need more.
 752 */
 753#define MAX_GROUPS_PER_POOL 128
 754
 755/**
 756 * struct panthor_group_pool - Group pool
 757 *
 758 * Each file get assigned a group pool.
 759 */
 760struct panthor_group_pool {
 761	/** @xa: Xarray used to manage group handles. */
 762	struct xarray xa;
 763};
 764
 765/**
 766 * struct panthor_job - Used to manage GPU job
 767 */
 768struct panthor_job {
 769	/** @base: Inherit from drm_sched_job. */
 770	struct drm_sched_job base;
 771
 772	/** @refcount: Reference count. */
 773	struct kref refcount;
 774
 775	/** @group: Group of the queue this job will be pushed to. */
 776	struct panthor_group *group;
 777
 778	/** @queue_idx: Index of the queue inside @group. */
 779	u32 queue_idx;
 780
 781	/** @call_info: Information about the userspace command stream call. */
 782	struct {
 783		/** @start: GPU address of the userspace command stream. */
 784		u64 start;
 785
 786		/** @size: Size of the userspace command stream. */
 787		u32 size;
 788
 789		/**
 790		 * @latest_flush: Flush ID at the time the userspace command
 791		 * stream was built.
 792		 *
 793		 * Needed for the flush reduction mechanism.
 794		 */
 795		u32 latest_flush;
 796	} call_info;
 797
 798	/** @ringbuf: Position of this job is in the ring buffer. */
 799	struct {
 800		/** @start: Start offset. */
 801		u64 start;
 802
 803		/** @end: End offset. */
 804		u64 end;
 805	} ringbuf;
 806
 807	/**
 808	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
 809	 * list.
 810	 */
 811	struct list_head node;
 812
 813	/** @done_fence: Fence signaled when the job is finished or cancelled. */
 814	struct dma_fence *done_fence;
 815
 816	/** @profiling: Job profiling information. */
 817	struct {
 818		/** @mask: Current device job profiling enablement bitmask. */
 819		u32 mask;
 820
 821		/** @slot: Job index in the profiling slots BO. */
 822		u32 slot;
 823	} profiling;
 824};
 825
 826static void
 827panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
 828{
 829	if (queue->syncwait.kmap) {
 830		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
 831
 832		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
 833		queue->syncwait.kmap = NULL;
 834	}
 835
 836	drm_gem_object_put(queue->syncwait.obj);
 837	queue->syncwait.obj = NULL;
 838}
 839
 840static void *
 841panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
 842{
 843	struct panthor_device *ptdev = group->ptdev;
 844	struct panthor_gem_object *bo;
 845	struct iosys_map map;
 846	int ret;
 847
 848	if (queue->syncwait.kmap)
 849		return queue->syncwait.kmap + queue->syncwait.offset;
 850
 851	bo = panthor_vm_get_bo_for_va(group->vm,
 852				      queue->syncwait.gpu_va,
 853				      &queue->syncwait.offset);
 854	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
 855		goto err_put_syncwait_obj;
 856
 857	queue->syncwait.obj = &bo->base.base;
 858	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
 859	if (drm_WARN_ON(&ptdev->base, ret))
 860		goto err_put_syncwait_obj;
 861
 862	queue->syncwait.kmap = map.vaddr;
 863	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
 864		goto err_put_syncwait_obj;
 865
 866	return queue->syncwait.kmap + queue->syncwait.offset;
 867
 868err_put_syncwait_obj:
 869	panthor_queue_put_syncwait_obj(queue);
 870	return NULL;
 871}
 872
 873static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
 874{
 875	if (IS_ERR_OR_NULL(queue))
 876		return;
 877
 878	if (queue->entity.fence_context)
 879		drm_sched_entity_destroy(&queue->entity);
 880
 881	if (queue->scheduler.ops)
 882		drm_sched_fini(&queue->scheduler);
 883
 884	panthor_queue_put_syncwait_obj(queue);
 885
 886	panthor_kernel_bo_destroy(queue->ringbuf);
 887	panthor_kernel_bo_destroy(queue->iface.mem);
 888	panthor_kernel_bo_destroy(queue->profiling.slots);
 889
 890	/* Release the last_fence we were holding, if any. */
 891	dma_fence_put(queue->fence_ctx.last_fence);
 892
 893	kfree(queue);
 894}
 895
 896static void group_release_work(struct work_struct *work)
 897{
 898	struct panthor_group *group = container_of(work,
 899						   struct panthor_group,
 900						   release_work);
 901	u32 i;
 902
 903	mutex_destroy(&group->fdinfo.lock);
 904
 905	for (i = 0; i < group->queue_count; i++)
 906		group_free_queue(group, group->queues[i]);
 907
 908	panthor_kernel_bo_destroy(group->suspend_buf);
 909	panthor_kernel_bo_destroy(group->protm_suspend_buf);
 910	panthor_kernel_bo_destroy(group->syncobjs);
 911
 912	panthor_vm_put(group->vm);
 913	kfree(group);
 914}
 915
 916static void group_release(struct kref *kref)
 917{
 918	struct panthor_group *group = container_of(kref,
 919						   struct panthor_group,
 920						   refcount);
 921	struct panthor_device *ptdev = group->ptdev;
 922
 923	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
 924	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
 925	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
 926
 927	queue_work(panthor_cleanup_wq, &group->release_work);
 928}
 929
 930static void group_put(struct panthor_group *group)
 931{
 932	if (group)
 933		kref_put(&group->refcount, group_release);
 934}
 935
 936static struct panthor_group *
 937group_get(struct panthor_group *group)
 938{
 939	if (group)
 940		kref_get(&group->refcount);
 941
 942	return group;
 943}
 944
 945/**
 946 * group_bind_locked() - Bind a group to a group slot
 947 * @group: Group.
 948 * @csg_id: Slot.
 949 *
 950 * Return: 0 on success, a negative error code otherwise.
 951 */
 952static int
 953group_bind_locked(struct panthor_group *group, u32 csg_id)
 954{
 955	struct panthor_device *ptdev = group->ptdev;
 956	struct panthor_csg_slot *csg_slot;
 957	int ret;
 958
 959	lockdep_assert_held(&ptdev->scheduler->lock);
 960
 961	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
 962			ptdev->scheduler->csg_slots[csg_id].group))
 963		return -EINVAL;
 964
 965	ret = panthor_vm_active(group->vm);
 966	if (ret)
 967		return ret;
 968
 969	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
 970	group_get(group);
 971	group->csg_id = csg_id;
 972
 973	/* Dummy doorbell allocation: doorbell is assigned to the group and
 974	 * all queues use the same doorbell.
 975	 *
 976	 * TODO: Implement LRU-based doorbell assignment, so the most often
 977	 * updated queues get their own doorbell, thus avoiding useless checks
 978	 * on queues belonging to the same group that are rarely updated.
 979	 */
 980	for (u32 i = 0; i < group->queue_count; i++)
 981		group->queues[i]->doorbell_id = csg_id + 1;
 982
 983	csg_slot->group = group;
 984
 985	return 0;
 986}
 987
 988/**
 989 * group_unbind_locked() - Unbind a group from a slot.
 990 * @group: Group to unbind.
 991 *
 992 * Return: 0 on success, a negative error code otherwise.
 993 */
 994static int
 995group_unbind_locked(struct panthor_group *group)
 996{
 997	struct panthor_device *ptdev = group->ptdev;
 998	struct panthor_csg_slot *slot;
 999
1000	lockdep_assert_held(&ptdev->scheduler->lock);
1001
1002	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
1003		return -EINVAL;
1004
1005	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
1006		return -EINVAL;
1007
1008	slot = &ptdev->scheduler->csg_slots[group->csg_id];
1009	panthor_vm_idle(group->vm);
1010	group->csg_id = -1;
1011
1012	/* Tiler OOM events will be re-issued next time the group is scheduled. */
1013	atomic_set(&group->tiler_oom, 0);
1014	cancel_work(&group->tiler_oom_work);
1015
1016	for (u32 i = 0; i < group->queue_count; i++)
1017		group->queues[i]->doorbell_id = -1;
1018
1019	slot->group = NULL;
1020
1021	group_put(group);
1022	return 0;
1023}
1024
1025/**
1026 * cs_slot_prog_locked() - Program a queue slot
1027 * @ptdev: Device.
1028 * @csg_id: Group slot ID.
1029 * @cs_id: Queue slot ID.
1030 *
1031 * Program a queue slot with the queue information so things can start being
1032 * executed on this queue.
1033 *
1034 * The group slot must have a group bound to it already (group_bind_locked()).
1035 */
1036static void
1037cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1038{
1039	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
1040	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1041
1042	lockdep_assert_held(&ptdev->scheduler->lock);
1043
1044	queue->iface.input->extract = queue->iface.output->extract;
1045	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
1046
1047	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
1048	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
1049	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
1050	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
1051	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
1052				  CS_CONFIG_DOORBELL(queue->doorbell_id);
1053	cs_iface->input->ack_irq_mask = ~0;
1054	panthor_fw_update_reqs(cs_iface, req,
1055			       CS_IDLE_SYNC_WAIT |
1056			       CS_IDLE_EMPTY |
1057			       CS_STATE_START |
1058			       CS_EXTRACT_EVENT,
1059			       CS_IDLE_SYNC_WAIT |
1060			       CS_IDLE_EMPTY |
1061			       CS_STATE_MASK |
1062			       CS_EXTRACT_EVENT);
1063	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
1064		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
1065		queue->timeout_suspended = false;
1066	}
1067}
1068
1069/**
1070 * cs_slot_reset_locked() - Reset a queue slot
1071 * @ptdev: Device.
1072 * @csg_id: Group slot.
1073 * @cs_id: Queue slot.
1074 *
1075 * Change the queue slot state to STOP and suspend the queue timeout if
1076 * the queue is not blocked.
1077 *
1078 * The group slot must have a group bound to it (group_bind_locked()).
1079 */
1080static int
1081cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1082{
1083	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1084	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1085	struct panthor_queue *queue = group->queues[cs_id];
1086
1087	lockdep_assert_held(&ptdev->scheduler->lock);
1088
1089	panthor_fw_update_reqs(cs_iface, req,
1090			       CS_STATE_STOP,
1091			       CS_STATE_MASK);
1092
1093	/* If the queue is blocked, we want to keep the timeout running, so
1094	 * we can detect unbounded waits and kill the group when that happens.
1095	 */
1096	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
1097		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
1098		queue->timeout_suspended = true;
1099		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
1100	}
1101
1102	return 0;
1103}
1104
1105/**
1106 * csg_slot_sync_priority_locked() - Synchronize the group slot priority
1107 * @ptdev: Device.
1108 * @csg_id: Group slot ID.
1109 *
1110 * Group slot priority update happens asynchronously. When we receive a
1111 * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
1112 * reflect it to our panthor_csg_slot object.
1113 */
1114static void
1115csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
1116{
1117	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1118	struct panthor_fw_csg_iface *csg_iface;
1119
1120	lockdep_assert_held(&ptdev->scheduler->lock);
1121
1122	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1123	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
1124}
1125
1126/**
1127 * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
1128 * @ptdev: Device.
1129 * @csg_id: Group slot.
1130 * @cs_id: Queue slot.
1131 *
1132 * Queue state is updated on group suspend or STATUS_UPDATE event.
1133 */
1134static void
1135cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1136{
1137	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1138	struct panthor_queue *queue = group->queues[cs_id];
1139	struct panthor_fw_cs_iface *cs_iface =
1140		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
1141
1142	u32 status_wait_cond;
1143
1144	switch (cs_iface->output->status_blocked_reason) {
1145	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
1146		if (queue->iface.input->insert == queue->iface.output->extract &&
1147		    cs_iface->output->status_scoreboards == 0)
1148			group->idle_queues |= BIT(cs_id);
1149		break;
1150
1151	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
1152		if (list_empty(&group->wait_node)) {
1153			list_move_tail(&group->wait_node,
1154				       &group->ptdev->scheduler->groups.waiting);
1155		}
1156
1157		/* The queue is only blocked if there's no deferred operation
1158		 * pending, which can be checked through the scoreboard status.
1159		 */
1160		if (!cs_iface->output->status_scoreboards)
1161			group->blocked_queues |= BIT(cs_id);
1162
1163		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
1164		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
1165		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
1166		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
1167		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
1168			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
1169
1170			queue->syncwait.sync64 = true;
1171			queue->syncwait.ref |= sync_val_hi << 32;
1172		} else {
1173			queue->syncwait.sync64 = false;
1174		}
1175		break;
1176
1177	default:
1178		/* Other reasons are not blocking. Consider the queue as runnable
1179		 * in those cases.
1180		 */
1181		break;
1182	}
1183}
1184
1185static void
1186csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
1187{
1188	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1189	struct panthor_group *group = csg_slot->group;
1190	u32 i;
1191
1192	lockdep_assert_held(&ptdev->scheduler->lock);
1193
1194	group->idle_queues = 0;
1195	group->blocked_queues = 0;
1196
1197	for (i = 0; i < group->queue_count; i++) {
1198		if (group->queues[i])
1199			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
1200	}
1201}
1202
1203static void
1204csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
1205{
1206	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1207	struct panthor_fw_csg_iface *csg_iface;
1208	struct panthor_group *group;
1209	enum panthor_group_state new_state, old_state;
1210	u32 csg_state;
1211
1212	lockdep_assert_held(&ptdev->scheduler->lock);
1213
1214	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1215	group = csg_slot->group;
1216
1217	if (!group)
1218		return;
1219
1220	old_state = group->state;
1221	csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1222	switch (csg_state) {
1223	case CSG_STATE_START:
1224	case CSG_STATE_RESUME:
1225		new_state = PANTHOR_CS_GROUP_ACTIVE;
1226		break;
1227	case CSG_STATE_TERMINATE:
1228		new_state = PANTHOR_CS_GROUP_TERMINATED;
1229		break;
1230	case CSG_STATE_SUSPEND:
1231		new_state = PANTHOR_CS_GROUP_SUSPENDED;
1232		break;
1233	default:
1234		/* The unknown state might be caused by a FW state corruption,
1235		 * which means the group metadata can't be trusted anymore, and
1236		 * the SUSPEND operation might propagate the corruption to the
1237		 * suspend buffers. Flag the group state as unknown to make
1238		 * sure it's unusable after that point.
1239		 */
1240		drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1241			csg_id, csg_state);
1242		new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1243		break;
1244	}
1245
1246	if (old_state == new_state)
1247		return;
1248
1249	/* The unknown state might be caused by a FW issue, reset the FW to
1250	 * take a fresh start.
1251	 */
1252	if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1253		panthor_device_schedule_reset(ptdev);
1254
1255	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
1256		csg_slot_sync_queues_state_locked(ptdev, csg_id);
1257
1258	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
1259		u32 i;
1260
1261		/* Reset the queue slots so we start from a clean
1262		 * state when starting/resuming a new group on this
1263		 * CSG slot. No wait needed here, and no ringbell
1264		 * either, since the CS slot will only be re-used
1265		 * on the next CSG start operation.
1266		 */
1267		for (i = 0; i < group->queue_count; i++) {
1268			if (group->queues[i])
1269				cs_slot_reset_locked(ptdev, csg_id, i);
1270		}
1271	}
1272
1273	group->state = new_state;
1274}
1275
1276static int
1277csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
1278{
1279	struct panthor_fw_csg_iface *csg_iface;
1280	struct panthor_csg_slot *csg_slot;
1281	struct panthor_group *group;
1282	u32 queue_mask = 0, i;
1283
1284	lockdep_assert_held(&ptdev->scheduler->lock);
1285
1286	if (priority > MAX_CSG_PRIO)
1287		return -EINVAL;
1288
1289	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
1290		return -EINVAL;
1291
1292	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1293	group = csg_slot->group;
1294	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
1295		return 0;
1296
1297	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
1298
1299	for (i = 0; i < group->queue_count; i++) {
1300		if (group->queues[i]) {
1301			cs_slot_prog_locked(ptdev, csg_id, i);
1302			queue_mask |= BIT(i);
1303		}
1304	}
1305
1306	csg_iface->input->allow_compute = group->compute_core_mask;
1307	csg_iface->input->allow_fragment = group->fragment_core_mask;
1308	csg_iface->input->allow_other = group->tiler_core_mask;
1309	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
1310					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
1311					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
1312					 CSG_EP_REQ_PRIORITY(priority);
1313	csg_iface->input->config = panthor_vm_as(group->vm);
1314
1315	if (group->suspend_buf)
1316		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
1317	else
1318		csg_iface->input->suspend_buf = 0;
1319
1320	if (group->protm_suspend_buf) {
1321		csg_iface->input->protm_suspend_buf =
1322			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
1323	} else {
1324		csg_iface->input->protm_suspend_buf = 0;
1325	}
1326
1327	csg_iface->input->ack_irq_mask = ~0;
1328	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
1329	return 0;
1330}
1331
1332static void
1333cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
1334				   u32 csg_id, u32 cs_id)
1335{
1336	struct panthor_scheduler *sched = ptdev->scheduler;
1337	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1338	struct panthor_group *group = csg_slot->group;
1339	struct panthor_fw_cs_iface *cs_iface;
1340	u32 fatal;
1341	u64 info;
1342
1343	lockdep_assert_held(&sched->lock);
1344
1345	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1346	fatal = cs_iface->output->fatal;
1347	info = cs_iface->output->fatal_info;
1348
1349	if (group)
1350		group->fatal_queues |= BIT(cs_id);
1351
1352	if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
1353		/* If this exception is unrecoverable, queue a reset, and make
1354		 * sure we stop scheduling groups until the reset has happened.
1355		 */
1356		panthor_device_schedule_reset(ptdev);
1357		cancel_delayed_work(&sched->tick_work);
1358	} else {
1359		sched_queue_delayed_work(sched, tick, 0);
1360	}
1361
1362	drm_warn(&ptdev->base,
1363		 "CSG slot %d CS slot: %d\n"
1364		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
1365		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
1366		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
1367		 csg_id, cs_id,
1368		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
1369		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
1370		 (unsigned int)CS_EXCEPTION_DATA(fatal),
1371		 info);
1372}
1373
1374static void
1375cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
1376				   u32 csg_id, u32 cs_id)
1377{
1378	struct panthor_scheduler *sched = ptdev->scheduler;
1379	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1380	struct panthor_group *group = csg_slot->group;
1381	struct panthor_queue *queue = group && cs_id < group->queue_count ?
1382				      group->queues[cs_id] : NULL;
1383	struct panthor_fw_cs_iface *cs_iface;
1384	u32 fault;
1385	u64 info;
1386
1387	lockdep_assert_held(&sched->lock);
1388
1389	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1390	fault = cs_iface->output->fault;
1391	info = cs_iface->output->fault_info;
1392
1393	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
1394		u64 cs_extract = queue->iface.output->extract;
1395		struct panthor_job *job;
1396
1397		spin_lock(&queue->fence_ctx.lock);
1398		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
1399			if (cs_extract >= job->ringbuf.end)
1400				continue;
1401
1402			if (cs_extract < job->ringbuf.start)
1403				break;
1404
1405			dma_fence_set_error(job->done_fence, -EINVAL);
1406		}
1407		spin_unlock(&queue->fence_ctx.lock);
1408	}
1409
1410	drm_warn(&ptdev->base,
1411		 "CSG slot %d CS slot: %d\n"
1412		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
1413		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
1414		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
1415		 csg_id, cs_id,
1416		 (unsigned int)CS_EXCEPTION_TYPE(fault),
1417		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
1418		 (unsigned int)CS_EXCEPTION_DATA(fault),
1419		 info);
1420}
1421
1422static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
1423{
1424	struct panthor_device *ptdev = group->ptdev;
1425	struct panthor_scheduler *sched = ptdev->scheduler;
1426	u32 renderpasses_in_flight, pending_frag_count;
1427	struct panthor_heap_pool *heaps = NULL;
1428	u64 heap_address, new_chunk_va = 0;
1429	u32 vt_start, vt_end, frag_end;
1430	int ret, csg_id;
1431
1432	mutex_lock(&sched->lock);
1433	csg_id = group->csg_id;
1434	if (csg_id >= 0) {
1435		struct panthor_fw_cs_iface *cs_iface;
1436
1437		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1438		heaps = panthor_vm_get_heap_pool(group->vm, false);
1439		heap_address = cs_iface->output->heap_address;
1440		vt_start = cs_iface->output->heap_vt_start;
1441		vt_end = cs_iface->output->heap_vt_end;
1442		frag_end = cs_iface->output->heap_frag_end;
1443		renderpasses_in_flight = vt_start - frag_end;
1444		pending_frag_count = vt_end - frag_end;
1445	}
1446	mutex_unlock(&sched->lock);
1447
1448	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
1449	 * when it's scheduled again.
1450	 */
1451	if (unlikely(csg_id < 0))
1452		return 0;
1453
1454	if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
1455		ret = -EINVAL;
1456	} else {
1457		/* We do the allocation without holding the scheduler lock to avoid
1458		 * blocking the scheduling.
1459		 */
1460		ret = panthor_heap_grow(heaps, heap_address,
1461					renderpasses_in_flight,
1462					pending_frag_count, &new_chunk_va);
1463	}
1464
1465	/* If the heap context doesn't have memory for us, we want to let the
1466	 * FW try to reclaim memory by waiting for fragment jobs to land or by
1467	 * executing the tiler OOM exception handler, which is supposed to
1468	 * implement incremental rendering.
1469	 */
1470	if (ret && ret != -ENOMEM) {
1471		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
1472		group->fatal_queues |= BIT(cs_id);
1473		sched_queue_delayed_work(sched, tick, 0);
1474		goto out_put_heap_pool;
1475	}
1476
1477	mutex_lock(&sched->lock);
1478	csg_id = group->csg_id;
1479	if (csg_id >= 0) {
1480		struct panthor_fw_csg_iface *csg_iface;
1481		struct panthor_fw_cs_iface *cs_iface;
1482
1483		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1484		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1485
1486		cs_iface->input->heap_start = new_chunk_va;
1487		cs_iface->input->heap_end = new_chunk_va;
1488		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
1489		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
1490		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1491	}
1492	mutex_unlock(&sched->lock);
1493
1494	/* We allocated a chunck, but couldn't link it to the heap
1495	 * context because the group was scheduled out while we were
1496	 * allocating memory. We need to return this chunk to the heap.
1497	 */
1498	if (unlikely(csg_id < 0 && new_chunk_va))
1499		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
1500
1501	ret = 0;
1502
1503out_put_heap_pool:
1504	panthor_heap_pool_put(heaps);
1505	return ret;
1506}
1507
1508static void group_tiler_oom_work(struct work_struct *work)
1509{
1510	struct panthor_group *group =
1511		container_of(work, struct panthor_group, tiler_oom_work);
1512	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
1513
1514	while (tiler_oom) {
1515		u32 cs_id = ffs(tiler_oom) - 1;
1516
1517		group_process_tiler_oom(group, cs_id);
1518		tiler_oom &= ~BIT(cs_id);
1519	}
1520
1521	group_put(group);
1522}
1523
1524static void
1525cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
1526				       u32 csg_id, u32 cs_id)
1527{
1528	struct panthor_scheduler *sched = ptdev->scheduler;
1529	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1530	struct panthor_group *group = csg_slot->group;
1531
1532	lockdep_assert_held(&sched->lock);
1533
1534	if (drm_WARN_ON(&ptdev->base, !group))
1535		return;
1536
1537	atomic_or(BIT(cs_id), &group->tiler_oom);
1538
1539	/* We don't use group_queue_work() here because we want to queue the
1540	 * work item to the heap_alloc_wq.
1541	 */
1542	group_get(group);
1543	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
1544		group_put(group);
1545}
1546
1547static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
1548				       u32 csg_id, u32 cs_id)
1549{
1550	struct panthor_fw_cs_iface *cs_iface;
1551	u32 req, ack, events;
1552
1553	lockdep_assert_held(&ptdev->scheduler->lock);
1554
1555	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
1556	req = cs_iface->input->req;
1557	ack = cs_iface->output->ack;
1558	events = (req ^ ack) & CS_EVT_MASK;
1559
1560	if (events & CS_FATAL)
1561		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
1562
1563	if (events & CS_FAULT)
1564		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
1565
1566	if (events & CS_TILER_OOM)
1567		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
1568
1569	/* We don't acknowledge the TILER_OOM event since its handling is
1570	 * deferred to a separate work.
1571	 */
1572	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
1573
1574	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
1575}
1576
1577static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
1578{
1579	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1580	struct panthor_fw_csg_iface *csg_iface;
1581
1582	lockdep_assert_held(&ptdev->scheduler->lock);
1583
1584	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1585	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
1586}
1587
1588static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
1589{
1590	struct panthor_scheduler *sched = ptdev->scheduler;
1591
1592	lockdep_assert_held(&sched->lock);
1593
1594	sched->might_have_idle_groups = true;
1595
1596	/* Schedule a tick so we can evict idle groups and schedule non-idle
1597	 * ones. This will also update runtime PM and devfreq busy/idle states,
1598	 * so the device can lower its frequency or get suspended.
1599	 */
1600	sched_queue_delayed_work(sched, tick, 0);
1601}
1602
1603static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
1604					u32 csg_id)
1605{
1606	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1607	struct panthor_group *group = csg_slot->group;
1608
1609	lockdep_assert_held(&ptdev->scheduler->lock);
1610
1611	if (group)
1612		group_queue_work(group, sync_upd);
1613
1614	sched_queue_work(ptdev->scheduler, sync_upd);
1615}
1616
1617static void
1618csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
1619{
1620	struct panthor_scheduler *sched = ptdev->scheduler;
1621	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1622	struct panthor_group *group = csg_slot->group;
1623
1624	lockdep_assert_held(&sched->lock);
1625
1626	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
1627
1628	group = csg_slot->group;
1629	if (!drm_WARN_ON(&ptdev->base, !group))
1630		group->timedout = true;
1631
1632	sched_queue_delayed_work(sched, tick, 0);
1633}
1634
1635static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
1636{
1637	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
1638	struct panthor_fw_csg_iface *csg_iface;
1639	u32 ring_cs_db_mask = 0;
1640
1641	lockdep_assert_held(&ptdev->scheduler->lock);
1642
1643	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1644		return;
1645
1646	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1647	req = READ_ONCE(csg_iface->input->req);
1648	ack = READ_ONCE(csg_iface->output->ack);
1649	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
1650	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
1651	csg_events = (req ^ ack) & CSG_EVT_MASK;
1652
1653	/* There may not be any pending CSG/CS interrupts to process */
1654	if (req == ack && cs_irq_req == cs_irq_ack)
1655		return;
1656
1657	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
1658	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
1659	 * doesn't miss an interrupt for the CS in the race scenario where
1660	 * whilst Host is servicing an interrupt for the CS, firmware sends
1661	 * another interrupt for that CS.
1662	 */
1663	csg_iface->input->cs_irq_ack = cs_irq_req;
1664
1665	panthor_fw_update_reqs(csg_iface, req, ack,
1666			       CSG_SYNC_UPDATE |
1667			       CSG_IDLE |
1668			       CSG_PROGRESS_TIMER_EVENT);
1669
1670	if (csg_events & CSG_IDLE)
1671		csg_slot_process_idle_event_locked(ptdev, csg_id);
1672
1673	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
1674		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
1675
1676	cs_irqs = cs_irq_req ^ cs_irq_ack;
1677	while (cs_irqs) {
1678		u32 cs_id = ffs(cs_irqs) - 1;
1679
1680		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
1681			ring_cs_db_mask |= BIT(cs_id);
1682
1683		cs_irqs &= ~BIT(cs_id);
1684	}
1685
1686	if (csg_events & CSG_SYNC_UPDATE)
1687		csg_slot_sync_update_locked(ptdev, csg_id);
1688
1689	if (ring_cs_db_mask)
1690		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
1691
1692	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1693}
1694
1695static void sched_process_idle_event_locked(struct panthor_device *ptdev)
1696{
1697	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1698
1699	lockdep_assert_held(&ptdev->scheduler->lock);
1700
1701	/* Acknowledge the idle event and schedule a tick. */
1702	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
1703	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
1704}
1705
1706/**
1707 * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
1708 * @ptdev: Device.
1709 */
1710static void sched_process_global_irq_locked(struct panthor_device *ptdev)
1711{
1712	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1713	u32 req, ack, evts;
1714
1715	lockdep_assert_held(&ptdev->scheduler->lock);
1716
1717	req = READ_ONCE(glb_iface->input->req);
1718	ack = READ_ONCE(glb_iface->output->ack);
1719	evts = (req ^ ack) & GLB_EVT_MASK;
1720
1721	if (evts & GLB_IDLE)
1722		sched_process_idle_event_locked(ptdev);
1723}
1724
1725static void process_fw_events_work(struct work_struct *work)
1726{
1727	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
1728						      fw_events_work);
1729	u32 events = atomic_xchg(&sched->fw_events, 0);
1730	struct panthor_device *ptdev = sched->ptdev;
1731
1732	mutex_lock(&sched->lock);
1733
1734	if (events & JOB_INT_GLOBAL_IF) {
1735		sched_process_global_irq_locked(ptdev);
1736		events &= ~JOB_INT_GLOBAL_IF;
1737	}
1738
1739	while (events) {
1740		u32 csg_id = ffs(events) - 1;
1741
1742		sched_process_csg_irq_locked(ptdev, csg_id);
1743		events &= ~BIT(csg_id);
1744	}
1745
1746	mutex_unlock(&sched->lock);
1747}
1748
1749/**
1750 * panthor_sched_report_fw_events() - Report FW events to the scheduler.
1751 */
1752void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
1753{
1754	if (!ptdev->scheduler)
1755		return;
1756
1757	atomic_or(events, &ptdev->scheduler->fw_events);
1758	sched_queue_work(ptdev->scheduler, fw_events);
1759}
1760
1761static const char *fence_get_driver_name(struct dma_fence *fence)
1762{
1763	return "panthor";
1764}
1765
1766static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
1767{
1768	return "queue-fence";
1769}
1770
1771static const struct dma_fence_ops panthor_queue_fence_ops = {
1772	.get_driver_name = fence_get_driver_name,
1773	.get_timeline_name = queue_fence_get_timeline_name,
1774};
1775
1776struct panthor_csg_slots_upd_ctx {
1777	u32 update_mask;
1778	u32 timedout_mask;
1779	struct {
1780		u32 value;
1781		u32 mask;
1782	} requests[MAX_CSGS];
1783};
1784
1785static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
1786{
1787	memset(ctx, 0, sizeof(*ctx));
1788}
1789
1790static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
1791				    struct panthor_csg_slots_upd_ctx *ctx,
1792				    u32 csg_id, u32 value, u32 mask)
1793{
1794	if (drm_WARN_ON(&ptdev->base, !mask) ||
1795	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1796		return;
1797
1798	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
1799	ctx->requests[csg_id].mask |= mask;
1800	ctx->update_mask |= BIT(csg_id);
1801}
1802
1803static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
1804				     struct panthor_csg_slots_upd_ctx *ctx)
1805{
1806	struct panthor_scheduler *sched = ptdev->scheduler;
1807	u32 update_slots = ctx->update_mask;
1808
1809	lockdep_assert_held(&sched->lock);
1810
1811	if (!ctx->update_mask)
1812		return 0;
1813
1814	while (update_slots) {
1815		struct panthor_fw_csg_iface *csg_iface;
1816		u32 csg_id = ffs(update_slots) - 1;
1817
1818		update_slots &= ~BIT(csg_id);
1819		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1820		panthor_fw_update_reqs(csg_iface, req,
1821				       ctx->requests[csg_id].value,
1822				       ctx->requests[csg_id].mask);
1823	}
1824
1825	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
1826
1827	update_slots = ctx->update_mask;
1828	while (update_slots) {
1829		struct panthor_fw_csg_iface *csg_iface;
1830		u32 csg_id = ffs(update_slots) - 1;
1831		u32 req_mask = ctx->requests[csg_id].mask, acked;
1832		int ret;
1833
1834		update_slots &= ~BIT(csg_id);
1835		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
1836
1837		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
1838
1839		if (acked & CSG_ENDPOINT_CONFIG)
1840			csg_slot_sync_priority_locked(ptdev, csg_id);
1841
1842		if (acked & CSG_STATE_MASK)
1843			csg_slot_sync_state_locked(ptdev, csg_id);
1844
1845		if (acked & CSG_STATUS_UPDATE) {
1846			csg_slot_sync_queues_state_locked(ptdev, csg_id);
1847			csg_slot_sync_idle_state_locked(ptdev, csg_id);
1848		}
1849
1850		if (ret && acked != req_mask &&
1851		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
1852			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
1853			ctx->timedout_mask |= BIT(csg_id);
1854		}
1855	}
1856
1857	if (ctx->timedout_mask)
1858		return -ETIMEDOUT;
1859
1860	return 0;
1861}
1862
1863struct panthor_sched_tick_ctx {
1864	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
1865	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
1866	u32 idle_group_count;
1867	u32 group_count;
1868	enum panthor_csg_priority min_priority;
1869	struct panthor_vm *vms[MAX_CS_PER_CSG];
1870	u32 as_count;
1871	bool immediate_tick;
1872	u32 csg_upd_failed_mask;
1873};
1874
1875static bool
1876tick_ctx_is_full(const struct panthor_scheduler *sched,
1877		 const struct panthor_sched_tick_ctx *ctx)
1878{
1879	return ctx->group_count == sched->csg_slot_count;
1880}
1881
1882static bool
1883group_is_idle(struct panthor_group *group)
1884{
1885	struct panthor_device *ptdev = group->ptdev;
1886	u32 inactive_queues;
1887
1888	if (group->csg_id >= 0)
1889		return ptdev->scheduler->csg_slots[group->csg_id].idle;
1890
1891	inactive_queues = group->idle_queues | group->blocked_queues;
1892	return hweight32(inactive_queues) == group->queue_count;
1893}
1894
1895static bool
1896group_can_run(struct panthor_group *group)
1897{
1898	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1899	       group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
1900	       !group->destroyed && group->fatal_queues == 0 &&
1901	       !group->timedout;
1902}
1903
1904static void
1905tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
1906			       struct panthor_sched_tick_ctx *ctx,
1907			       struct list_head *queue,
1908			       bool skip_idle_groups,
1909			       bool owned_by_tick_ctx)
1910{
1911	struct panthor_group *group, *tmp;
1912
1913	if (tick_ctx_is_full(sched, ctx))
1914		return;
1915
1916	list_for_each_entry_safe(group, tmp, queue, run_node) {
1917		u32 i;
1918
1919		if (!group_can_run(group))
1920			continue;
1921
1922		if (skip_idle_groups && group_is_idle(group))
1923			continue;
1924
1925		for (i = 0; i < ctx->as_count; i++) {
1926			if (ctx->vms[i] == group->vm)
1927				break;
1928		}
1929
1930		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
1931			continue;
1932
1933		if (!owned_by_tick_ctx)
1934			group_get(group);
1935
1936		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
1937		ctx->group_count++;
1938		if (group_is_idle(group))
1939			ctx->idle_group_count++;
1940
1941		if (i == ctx->as_count)
1942			ctx->vms[ctx->as_count++] = group->vm;
1943
1944		if (ctx->min_priority > group->priority)
1945			ctx->min_priority = group->priority;
1946
1947		if (tick_ctx_is_full(sched, ctx))
1948			return;
1949	}
1950}
1951
1952static void
1953tick_ctx_insert_old_group(struct panthor_scheduler *sched,
1954			  struct panthor_sched_tick_ctx *ctx,
1955			  struct panthor_group *group,
1956			  bool full_tick)
1957{
1958	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
1959	struct panthor_group *other_group;
1960
1961	if (!full_tick) {
1962		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1963		return;
1964	}
1965
1966	/* Rotate to make sure groups with lower CSG slot
1967	 * priorities have a chance to get a higher CSG slot
1968	 * priority next time they get picked. This priority
1969	 * has an impact on resource request ordering, so it's
1970	 * important to make sure we don't let one group starve
1971	 * all other groups with the same group priority.
1972	 */
1973	list_for_each_entry(other_group,
1974			    &ctx->old_groups[csg_slot->group->priority],
1975			    run_node) {
1976		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
1977
1978		if (other_csg_slot->priority > csg_slot->priority) {
1979			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
1980			return;
1981		}
1982	}
1983
1984	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
1985}
1986
1987static void
1988tick_ctx_init(struct panthor_scheduler *sched,
1989	      struct panthor_sched_tick_ctx *ctx,
1990	      bool full_tick)
1991{
1992	struct panthor_device *ptdev = sched->ptdev;
1993	struct panthor_csg_slots_upd_ctx upd_ctx;
1994	int ret;
1995	u32 i;
1996
1997	memset(ctx, 0, sizeof(*ctx));
1998	csgs_upd_ctx_init(&upd_ctx);
1999
2000	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
2001	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
2002		INIT_LIST_HEAD(&ctx->groups[i]);
2003		INIT_LIST_HEAD(&ctx->old_groups[i]);
2004	}
2005
2006	for (i = 0; i < sched->csg_slot_count; i++) {
2007		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2008		struct panthor_group *group = csg_slot->group;
2009		struct panthor_fw_csg_iface *csg_iface;
2010
2011		if (!group)
2012			continue;
2013
2014		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
2015		group_get(group);
2016
2017		/* If there was unhandled faults on the VM, force processing of
2018		 * CSG IRQs, so we can flag the faulty queue.
2019		 */
2020		if (panthor_vm_has_unhandled_faults(group->vm)) {
2021			sched_process_csg_irq_locked(ptdev, i);
2022
2023			/* No fatal fault reported, flag all queues as faulty. */
2024			if (!group->fatal_queues)
2025				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
2026		}
2027
2028		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
2029		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2030					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
2031					CSG_STATUS_UPDATE);
2032	}
2033
2034	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2035	if (ret) {
2036		panthor_device_schedule_reset(ptdev);
2037		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2038	}
2039}
2040
2041static void
2042group_term_post_processing(struct panthor_group *group)
2043{
2044	struct panthor_job *job, *tmp;
2045	LIST_HEAD(faulty_jobs);
2046	bool cookie;
2047	u32 i = 0;
2048
2049	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
2050		return;
2051
2052	cookie = dma_fence_begin_signalling();
2053	for (i = 0; i < group->queue_count; i++) {
2054		struct panthor_queue *queue = group->queues[i];
2055		struct panthor_syncobj_64b *syncobj;
2056		int err;
2057
2058		if (group->fatal_queues & BIT(i))
2059			err = -EINVAL;
2060		else if (group->timedout)
2061			err = -ETIMEDOUT;
2062		else
2063			err = -ECANCELED;
2064
2065		if (!queue)
2066			continue;
2067
2068		spin_lock(&queue->fence_ctx.lock);
2069		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
2070			list_move_tail(&job->node, &faulty_jobs);
2071			dma_fence_set_error(job->done_fence, err);
2072			dma_fence_signal_locked(job->done_fence);
2073		}
2074		spin_unlock(&queue->fence_ctx.lock);
2075
2076		/* Manually update the syncobj seqno to unblock waiters. */
2077		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
2078		syncobj->status = ~0;
2079		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
2080		sched_queue_work(group->ptdev->scheduler, sync_upd);
2081	}
2082	dma_fence_end_signalling(cookie);
2083
2084	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
2085		list_del_init(&job->node);
2086		panthor_job_put(&job->base);
2087	}
2088}
2089
2090static void group_term_work(struct work_struct *work)
2091{
2092	struct panthor_group *group =
2093		container_of(work, struct panthor_group, term_work);
2094
2095	group_term_post_processing(group);
2096	group_put(group);
2097}
2098
2099static void
2100tick_ctx_cleanup(struct panthor_scheduler *sched,
2101		 struct panthor_sched_tick_ctx *ctx)
2102{
2103	struct panthor_device *ptdev = sched->ptdev;
2104	struct panthor_group *group, *tmp;
2105	u32 i;
2106
2107	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
2108		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
2109			/* If everything went fine, we should only have groups
2110			 * to be terminated in the old_groups lists.
2111			 */
2112			drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask &&
2113				    group_can_run(group));
2114
2115			if (!group_can_run(group)) {
2116				list_del_init(&group->run_node);
2117				list_del_init(&group->wait_node);
2118				group_queue_work(group, term);
2119			} else if (group->csg_id >= 0) {
2120				list_del_init(&group->run_node);
2121			} else {
2122				list_move(&group->run_node,
2123					  group_is_idle(group) ?
2124					  &sched->groups.idle[group->priority] :
2125					  &sched->groups.runnable[group->priority]);
2126			}
2127			group_put(group);
2128		}
2129	}
2130
2131	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
2132		/* If everything went fine, the groups to schedule lists should
2133		 * be empty.
2134		 */
2135		drm_WARN_ON(&ptdev->base,
2136			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
2137
2138		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
2139			if (group->csg_id >= 0) {
2140				list_del_init(&group->run_node);
2141			} else {
2142				list_move(&group->run_node,
2143					  group_is_idle(group) ?
2144					  &sched->groups.idle[group->priority] :
2145					  &sched->groups.runnable[group->priority]);
2146			}
2147			group_put(group);
2148		}
2149	}
2150}
2151
2152static void
2153tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
2154{
2155	struct panthor_group *group, *tmp;
2156	struct panthor_device *ptdev = sched->ptdev;
2157	struct panthor_csg_slot *csg_slot;
2158	int prio, new_csg_prio = MAX_CSG_PRIO, i;
2159	u32 free_csg_slots = 0;
2160	struct panthor_csg_slots_upd_ctx upd_ctx;
2161	int ret;
2162
2163	csgs_upd_ctx_init(&upd_ctx);
2164
2165	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2166		/* Suspend or terminate evicted groups. */
2167		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2168			bool term = !group_can_run(group);
2169			int csg_id = group->csg_id;
2170
2171			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2172				continue;
2173
2174			csg_slot = &sched->csg_slots[csg_id];
2175			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2176						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
2177						CSG_STATE_MASK);
2178		}
2179
2180		/* Update priorities on already running groups. */
2181		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2182			struct panthor_fw_csg_iface *csg_iface;
2183			int csg_id = group->csg_id;
2184
2185			if (csg_id < 0) {
2186				new_csg_prio--;
2187				continue;
2188			}
2189
2190			csg_slot = &sched->csg_slots[csg_id];
2191			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2192			if (csg_slot->priority == new_csg_prio) {
2193				new_csg_prio--;
2194				continue;
2195			}
2196
2197			panthor_fw_update_reqs(csg_iface, endpoint_req,
2198					       CSG_EP_REQ_PRIORITY(new_csg_prio),
2199					       CSG_EP_REQ_PRIORITY_MASK);
2200			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2201						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2202						CSG_ENDPOINT_CONFIG);
2203			new_csg_prio--;
2204		}
2205	}
2206
2207	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2208	if (ret) {
2209		panthor_device_schedule_reset(ptdev);
2210		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2211		return;
2212	}
2213
2214	/* Unbind evicted groups. */
2215	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2216		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2217			/* This group is gone. Process interrupts to clear
2218			 * any pending interrupts before we start the new
2219			 * group.
2220			 */
2221			if (group->csg_id >= 0)
2222				sched_process_csg_irq_locked(ptdev, group->csg_id);
2223
2224			group_unbind_locked(group);
2225		}
2226	}
2227
2228	for (i = 0; i < sched->csg_slot_count; i++) {
2229		if (!sched->csg_slots[i].group)
2230			free_csg_slots |= BIT(i);
2231	}
2232
2233	csgs_upd_ctx_init(&upd_ctx);
2234	new_csg_prio = MAX_CSG_PRIO;
2235
2236	/* Start new groups. */
2237	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2238		list_for_each_entry(group, &ctx->groups[prio], run_node) {
2239			int csg_id = group->csg_id;
2240			struct panthor_fw_csg_iface *csg_iface;
2241
2242			if (csg_id >= 0) {
2243				new_csg_prio--;
2244				continue;
2245			}
2246
2247			csg_id = ffs(free_csg_slots) - 1;
2248			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
2249				break;
2250
2251			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
2252			csg_slot = &sched->csg_slots[csg_id];
2253			group_bind_locked(group, csg_id);
2254			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
2255			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2256						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
2257						CSG_STATE_RESUME : CSG_STATE_START,
2258						CSG_STATE_MASK);
2259			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2260						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2261						CSG_ENDPOINT_CONFIG);
2262			free_csg_slots &= ~BIT(csg_id);
2263		}
2264	}
2265
2266	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2267	if (ret) {
2268		panthor_device_schedule_reset(ptdev);
2269		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
2270		return;
2271	}
2272
2273	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
2274		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
2275			list_del_init(&group->run_node);
2276
2277			/* If the group has been destroyed while we were
2278			 * scheduling, ask for an immediate tick to
2279			 * re-evaluate as soon as possible and get rid of
2280			 * this dangling group.
2281			 */
2282			if (group->destroyed)
2283				ctx->immediate_tick = true;
2284			group_put(group);
2285		}
2286
2287		/* Return evicted groups to the idle or run queues. Groups
2288		 * that can no longer be run (because they've been destroyed
2289		 * or experienced an unrecoverable error) will be scheduled
2290		 * for destruction in tick_ctx_cleanup().
2291		 */
2292		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
2293			if (!group_can_run(group))
2294				continue;
2295
2296			if (group_is_idle(group))
2297				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
2298			else
2299				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
2300			group_put(group);
2301		}
2302	}
2303
2304	sched->used_csg_slot_count = ctx->group_count;
2305	sched->might_have_idle_groups = ctx->idle_group_count > 0;
2306}
2307
2308static u64
2309tick_ctx_update_resched_target(struct panthor_scheduler *sched,
2310			       const struct panthor_sched_tick_ctx *ctx)
2311{
2312	/* We had space left, no need to reschedule until some external event happens. */
2313	if (!tick_ctx_is_full(sched, ctx))
2314		goto no_tick;
2315
2316	/* If idle groups were scheduled, no need to wake up until some external
2317	 * event happens (group unblocked, new job submitted, ...).
2318	 */
2319	if (ctx->idle_group_count)
2320		goto no_tick;
2321
2322	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
2323		goto no_tick;
2324
2325	/* If there are groups of the same priority waiting, we need to
2326	 * keep the scheduler ticking, otherwise, we'll just wait for
2327	 * new groups with higher priority to be queued.
2328	 */
2329	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
2330		u64 resched_target = sched->last_tick + sched->tick_period;
2331
2332		if (time_before64(sched->resched_target, sched->last_tick) ||
2333		    time_before64(resched_target, sched->resched_target))
2334			sched->resched_target = resched_target;
2335
2336		return sched->resched_target - sched->last_tick;
2337	}
2338
2339no_tick:
2340	sched->resched_target = U64_MAX;
2341	return U64_MAX;
2342}
2343
2344static void tick_work(struct work_struct *work)
2345{
2346	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
2347						      tick_work.work);
2348	struct panthor_device *ptdev = sched->ptdev;
2349	struct panthor_sched_tick_ctx ctx;
2350	u64 remaining_jiffies = 0, resched_delay;
2351	u64 now = get_jiffies_64();
2352	int prio, ret, cookie;
2353
2354	if (!drm_dev_enter(&ptdev->base, &cookie))
2355		return;
2356
2357	ret = pm_runtime_resume_and_get(ptdev->base.dev);
2358	if (drm_WARN_ON(&ptdev->base, ret))
2359		goto out_dev_exit;
2360
2361	if (time_before64(now, sched->resched_target))
2362		remaining_jiffies = sched->resched_target - now;
2363
2364	mutex_lock(&sched->lock);
2365	if (panthor_device_reset_is_pending(sched->ptdev))
2366		goto out_unlock;
2367
2368	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
2369	if (ctx.csg_upd_failed_mask)
2370		goto out_cleanup_ctx;
2371
2372	if (remaining_jiffies) {
2373		/* Scheduling forced in the middle of a tick. Only RT groups
2374		 * can preempt non-RT ones. Currently running RT groups can't be
2375		 * preempted.
2376		 */
2377		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2378		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2379		     prio--) {
2380			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
2381						       true, true);
2382			if (prio == PANTHOR_CSG_PRIORITY_RT) {
2383				tick_ctx_pick_groups_from_list(sched, &ctx,
2384							       &sched->groups.runnable[prio],
2385							       true, false);
2386			}
2387		}
2388	}
2389
2390	/* First pick non-idle groups */
2391	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2392	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2393	     prio--) {
2394		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
2395					       true, false);
2396		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
2397	}
2398
2399	/* If we have free CSG slots left, pick idle groups */
2400	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
2401	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
2402	     prio--) {
2403		/* Check the old_group queue first to avoid reprogramming the slots */
2404		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
2405		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
2406					       false, false);
2407	}
2408
2409	tick_ctx_apply(sched, &ctx);
2410	if (ctx.csg_upd_failed_mask)
2411		goto out_cleanup_ctx;
2412
2413	if (ctx.idle_group_count == ctx.group_count) {
2414		panthor_devfreq_record_idle(sched->ptdev);
2415		if (sched->pm.has_ref) {
2416			pm_runtime_put_autosuspend(ptdev->base.dev);
2417			sched->pm.has_ref = false;
2418		}
2419	} else {
2420		panthor_devfreq_record_busy(sched->ptdev);
2421		if (!sched->pm.has_ref) {
2422			pm_runtime_get(ptdev->base.dev);
2423			sched->pm.has_ref = true;
2424		}
2425	}
2426
2427	sched->last_tick = now;
2428	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
2429	if (ctx.immediate_tick)
2430		resched_delay = 0;
2431
2432	if (resched_delay != U64_MAX)
2433		sched_queue_delayed_work(sched, tick, resched_delay);
2434
2435out_cleanup_ctx:
2436	tick_ctx_cleanup(sched, &ctx);
2437
2438out_unlock:
2439	mutex_unlock(&sched->lock);
2440	pm_runtime_mark_last_busy(ptdev->base.dev);
2441	pm_runtime_put_autosuspend(ptdev->base.dev);
2442
2443out_dev_exit:
2444	drm_dev_exit(cookie);
2445}
2446
2447static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
2448{
2449	struct panthor_queue *queue = group->queues[queue_idx];
2450	union {
2451		struct panthor_syncobj_64b sync64;
2452		struct panthor_syncobj_32b sync32;
2453	} *syncobj;
2454	bool result;
2455	u64 value;
2456
2457	syncobj = panthor_queue_get_syncwait_obj(group, queue);
2458	if (!syncobj)
2459		return -EINVAL;
2460
2461	value = queue->syncwait.sync64 ?
2462		syncobj->sync64.seqno :
2463		syncobj->sync32.seqno;
2464
2465	if (queue->syncwait.gt)
2466		result = value > queue->syncwait.ref;
2467	else
2468		result = value <= queue->syncwait.ref;
2469
2470	if (result)
2471		panthor_queue_put_syncwait_obj(queue);
2472
2473	return result;
2474}
2475
2476static void sync_upd_work(struct work_struct *work)
2477{
2478	struct panthor_scheduler *sched = container_of(work,
2479						      struct panthor_scheduler,
2480						      sync_upd_work);
2481	struct panthor_group *group, *tmp;
2482	bool immediate_tick = false;
2483
2484	mutex_lock(&sched->lock);
2485	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
2486		u32 tested_queues = group->blocked_queues;
2487		u32 unblocked_queues = 0;
2488
2489		while (tested_queues) {
2490			u32 cs_id = ffs(tested_queues) - 1;
2491			int ret;
2492
2493			ret = panthor_queue_eval_syncwait(group, cs_id);
2494			drm_WARN_ON(&group->ptdev->base, ret < 0);
2495			if (ret)
2496				unblocked_queues |= BIT(cs_id);
2497
2498			tested_queues &= ~BIT(cs_id);
2499		}
2500
2501		if (unblocked_queues) {
2502			group->blocked_queues &= ~unblocked_queues;
2503
2504			if (group->csg_id < 0) {
2505				list_move(&group->run_node,
2506					  &sched->groups.runnable[group->priority]);
2507				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
2508					immediate_tick = true;
2509			}
2510		}
2511
2512		if (!group->blocked_queues)
2513			list_del_init(&group->wait_node);
2514	}
2515	mutex_unlock(&sched->lock);
2516
2517	if (immediate_tick)
2518		sched_queue_delayed_work(sched, tick, 0);
2519}
2520
2521static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
2522{
2523	struct panthor_device *ptdev = group->ptdev;
2524	struct panthor_scheduler *sched = ptdev->scheduler;
2525	struct list_head *queue = &sched->groups.runnable[group->priority];
2526	u64 delay_jiffies = 0;
2527	bool was_idle;
2528	u64 now;
2529
2530	if (!group_can_run(group))
2531		return;
2532
2533	/* All updated queues are blocked, no need to wake up the scheduler. */
2534	if ((queue_mask & group->blocked_queues) == queue_mask)
2535		return;
2536
2537	was_idle = group_is_idle(group);
2538	group->idle_queues &= ~queue_mask;
2539
2540	/* Don't mess up with the lists if we're in a middle of a reset. */
2541	if (atomic_read(&sched->reset.in_progress))
2542		return;
2543
2544	if (was_idle && !group_is_idle(group))
2545		list_move_tail(&group->run_node, queue);
2546
2547	/* RT groups are preemptive. */
2548	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
2549		sched_queue_delayed_work(sched, tick, 0);
2550		return;
2551	}
2552
2553	/* Some groups might be idle, force an immediate tick to
2554	 * re-evaluate.
2555	 */
2556	if (sched->might_have_idle_groups) {
2557		sched_queue_delayed_work(sched, tick, 0);
2558		return;
2559	}
2560
2561	/* Scheduler is ticking, nothing to do. */
2562	if (sched->resched_target != U64_MAX) {
2563		/* If there are free slots, force immediating ticking. */
2564		if (sched->used_csg_slot_count < sched->csg_slot_count)
2565			sched_queue_delayed_work(sched, tick, 0);
2566
2567		return;
2568	}
2569
2570	/* Scheduler tick was off, recalculate the resched_target based on the
2571	 * last tick event, and queue the scheduler work.
2572	 */
2573	now = get_jiffies_64();
2574	sched->resched_target = sched->last_tick + sched->tick_period;
2575	if (sched->used_csg_slot_count == sched->csg_slot_count &&
2576	    time_before64(now, sched->resched_target))
2577		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
2578
2579	sched_queue_delayed_work(sched, tick, delay_jiffies);
2580}
2581
2582static void queue_stop(struct panthor_queue *queue,
2583		       struct panthor_job *bad_job)
2584{
2585	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
2586}
2587
2588static void queue_start(struct panthor_queue *queue)
2589{
2590	struct panthor_job *job;
2591
2592	/* Re-assign the parent fences. */
2593	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
2594		job->base.s_fence->parent = dma_fence_get(job->done_fence);
2595
2596	drm_sched_start(&queue->scheduler, 0);
2597}
2598
2599static void panthor_group_stop(struct panthor_group *group)
2600{
2601	struct panthor_scheduler *sched = group->ptdev->scheduler;
2602
2603	lockdep_assert_held(&sched->reset.lock);
2604
2605	for (u32 i = 0; i < group->queue_count; i++)
2606		queue_stop(group->queues[i], NULL);
2607
2608	group_get(group);
2609	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
2610}
2611
2612static void panthor_group_start(struct panthor_group *group)
2613{
2614	struct panthor_scheduler *sched = group->ptdev->scheduler;
2615
2616	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
2617
2618	for (u32 i = 0; i < group->queue_count; i++)
2619		queue_start(group->queues[i]);
2620
2621	if (group_can_run(group)) {
2622		list_move_tail(&group->run_node,
2623			       group_is_idle(group) ?
2624			       &sched->groups.idle[group->priority] :
2625			       &sched->groups.runnable[group->priority]);
2626	} else {
2627		list_del_init(&group->run_node);
2628		list_del_init(&group->wait_node);
2629		group_queue_work(group, term);
2630	}
2631
2632	group_put(group);
2633}
2634
2635static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
2636{
2637	struct panthor_scheduler *sched = ptdev->scheduler;
2638
2639	sched_queue_delayed_work(sched, tick, 0);
2640}
2641
2642/**
2643 * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
2644 */
2645void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
2646{
2647	/* Force a tick to immediately kill faulty groups. */
2648	if (ptdev->scheduler)
2649		panthor_sched_immediate_tick(ptdev);
2650}
2651
2652void panthor_sched_resume(struct panthor_device *ptdev)
2653{
2654	/* Force a tick to re-evaluate after a resume. */
2655	panthor_sched_immediate_tick(ptdev);
2656}
2657
2658void panthor_sched_suspend(struct panthor_device *ptdev)
2659{
2660	struct panthor_scheduler *sched = ptdev->scheduler;
2661	struct panthor_csg_slots_upd_ctx upd_ctx;
2662	struct panthor_group *group;
2663	u32 suspended_slots;
2664	u32 i;
2665
2666	mutex_lock(&sched->lock);
2667	csgs_upd_ctx_init(&upd_ctx);
2668	for (i = 0; i < sched->csg_slot_count; i++) {
2669		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2670
2671		if (csg_slot->group) {
2672			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
2673						group_can_run(csg_slot->group) ?
2674						CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
2675						CSG_STATE_MASK);
2676		}
2677	}
2678
2679	suspended_slots = upd_ctx.update_mask;
2680
2681	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2682	suspended_slots &= ~upd_ctx.timedout_mask;
2683
2684	if (upd_ctx.timedout_mask) {
2685		u32 slot_mask = upd_ctx.timedout_mask;
2686
2687		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
2688		csgs_upd_ctx_init(&upd_ctx);
2689		while (slot_mask) {
2690			u32 csg_id = ffs(slot_mask) - 1;
2691			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2692
2693			/* We consider group suspension failures as fatal and flag the
2694			 * group as unusable by setting timedout=true.
2695			 */
2696			csg_slot->group->timedout = true;
2697
2698			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
2699						CSG_STATE_TERMINATE,
2700						CSG_STATE_MASK);
2701			slot_mask &= ~BIT(csg_id);
2702		}
2703
2704		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
2705
2706		slot_mask = upd_ctx.timedout_mask;
2707		while (slot_mask) {
2708			u32 csg_id = ffs(slot_mask) - 1;
2709			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2710
2711			/* Terminate command timedout, but the soft-reset will
2712			 * automatically terminate all active groups, so let's
2713			 * force the state to halted here.
2714			 */
2715			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
2716				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2717			slot_mask &= ~BIT(csg_id);
2718		}
2719	}
2720
2721	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
2722	 * If the flush fails, flag all queues for termination.
2723	 */
2724	if (suspended_slots) {
2725		bool flush_caches_failed = false;
2726		u32 slot_mask = suspended_slots;
2727
2728		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
2729			flush_caches_failed = true;
2730
2731		while (slot_mask) {
2732			u32 csg_id = ffs(slot_mask) - 1;
2733			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2734
2735			if (flush_caches_failed)
2736				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2737			else
2738				csg_slot_sync_update_locked(ptdev, csg_id);
2739
2740			slot_mask &= ~BIT(csg_id);
2741		}
2742	}
2743
2744	for (i = 0; i < sched->csg_slot_count; i++) {
2745		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2746
2747		group = csg_slot->group;
2748		if (!group)
2749			continue;
2750
2751		group_get(group);
2752
2753		if (group->csg_id >= 0)
2754			sched_process_csg_irq_locked(ptdev, group->csg_id);
2755
2756		group_unbind_locked(group);
2757
2758		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
2759
2760		if (group_can_run(group)) {
2761			list_add(&group->run_node,
2762				 &sched->groups.idle[group->priority]);
2763		} else {
2764			/* We don't bother stopping the scheduler if the group is
2765			 * faulty, the group termination work will finish the job.
2766			 */
2767			list_del_init(&group->wait_node);
2768			group_queue_work(group, term);
2769		}
2770		group_put(group);
2771	}
2772	mutex_unlock(&sched->lock);
2773}
2774
2775void panthor_sched_pre_reset(struct panthor_device *ptdev)
2776{
2777	struct panthor_scheduler *sched = ptdev->scheduler;
2778	struct panthor_group *group, *group_tmp;
2779	u32 i;
2780
2781	mutex_lock(&sched->reset.lock);
2782	atomic_set(&sched->reset.in_progress, true);
2783
2784	/* Cancel all scheduler works. Once this is done, these works can't be
2785	 * scheduled again until the reset operation is complete.
2786	 */
2787	cancel_work_sync(&sched->sync_upd_work);
2788	cancel_delayed_work_sync(&sched->tick_work);
2789
2790	panthor_sched_suspend(ptdev);
2791
2792	/* Stop all groups that might still accept jobs, so we don't get passed
2793	 * new jobs while we're resetting.
2794	 */
2795	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
2796		/* All groups should be in the idle lists. */
2797		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
2798		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
2799			panthor_group_stop(group);
2800	}
2801
2802	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
2803		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
2804			panthor_group_stop(group);
2805	}
2806
2807	mutex_unlock(&sched->reset.lock);
2808}
2809
2810void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
2811{
2812	struct panthor_scheduler *sched = ptdev->scheduler;
2813	struct panthor_group *group, *group_tmp;
2814
2815	mutex_lock(&sched->reset.lock);
2816
2817	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
2818		/* Consider all previously running group as terminated if the
2819		 * reset failed.
2820		 */
2821		if (reset_failed)
2822			group->state = PANTHOR_CS_GROUP_TERMINATED;
2823
2824		panthor_group_start(group);
2825	}
2826
2827	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
2828	 * kick the scheduler.
2829	 */
2830	atomic_set(&sched->reset.in_progress, false);
2831	mutex_unlock(&sched->reset.lock);
2832
2833	/* No need to queue a tick and update syncs if the reset failed. */
2834	if (!reset_failed) {
2835		sched_queue_delayed_work(sched, tick, 0);
2836		sched_queue_work(sched, sync_upd);
2837	}
2838}
2839
2840static void update_fdinfo_stats(struct panthor_job *job)
2841{
2842	struct panthor_group *group = job->group;
2843	struct panthor_queue *queue = group->queues[job->queue_idx];
2844	struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
2845	struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
2846	struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
2847
2848	mutex_lock(&group->fdinfo.lock);
2849	if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
2850		fdinfo->cycles += data->cycles.after - data->cycles.before;
2851	if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
2852		fdinfo->time += data->time.after - data->time.before;
2853	mutex_unlock(&group->fdinfo.lock);
2854}
2855
2856void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
2857{
2858	struct panthor_group_pool *gpool = pfile->groups;
2859	struct panthor_group *group;
2860	unsigned long i;
2861
2862	if (IS_ERR_OR_NULL(gpool))
2863		return;
2864
2865	xa_for_each(&gpool->xa, i, group) {
2866		mutex_lock(&group->fdinfo.lock);
2867		pfile->stats.cycles += group->fdinfo.data.cycles;
2868		pfile->stats.time += group->fdinfo.data.time;
2869		group->fdinfo.data.cycles = 0;
2870		group->fdinfo.data.time = 0;
2871		mutex_unlock(&group->fdinfo.lock);
2872	}
2873}
2874
2875static void group_sync_upd_work(struct work_struct *work)
2876{
2877	struct panthor_group *group =
2878		container_of(work, struct panthor_group, sync_upd_work);
2879	struct panthor_job *job, *job_tmp;
2880	LIST_HEAD(done_jobs);
2881	u32 queue_idx;
2882	bool cookie;
2883
2884	cookie = dma_fence_begin_signalling();
2885	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
2886		struct panthor_queue *queue = group->queues[queue_idx];
2887		struct panthor_syncobj_64b *syncobj;
2888
2889		if (!queue)
2890			continue;
2891
2892		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
2893
2894		spin_lock(&queue->fence_ctx.lock);
2895		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
2896			if (syncobj->seqno < job->done_fence->seqno)
2897				break;
2898
2899			list_move_tail(&job->node, &done_jobs);
2900			dma_fence_signal_locked(job->done_fence);
2901		}
2902		spin_unlock(&queue->fence_ctx.lock);
2903	}
2904	dma_fence_end_signalling(cookie);
2905
2906	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
2907		if (job->profiling.mask)
2908			update_fdinfo_stats(job);
2909		list_del_init(&job->node);
2910		panthor_job_put(&job->base);
2911	}
2912
2913	group_put(group);
2914}
2915
2916struct panthor_job_ringbuf_instrs {
2917	u64 buffer[MAX_INSTRS_PER_JOB];
2918	u32 count;
2919};
2920
2921struct panthor_job_instr {
2922	u32 profile_mask;
2923	u64 instr;
2924};
2925
2926#define JOB_INSTR(__prof, __instr) \
2927	{ \
2928		.profile_mask = __prof, \
2929		.instr = __instr, \
2930	}
2931
2932static void
2933copy_instrs_to_ringbuf(struct panthor_queue *queue,
2934		       struct panthor_job *job,
2935		       struct panthor_job_ringbuf_instrs *instrs)
2936{
2937	u64 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
2938	u64 start = job->ringbuf.start & (ringbuf_size - 1);
2939	u64 size, written;
2940
2941	/*
2942	 * We need to write a whole slot, including any trailing zeroes
2943	 * that may come at the end of it. Also, because instrs.buffer has
2944	 * been zero-initialised, there's no need to pad it with 0's
2945	 */
2946	instrs->count = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
2947	size = instrs->count * sizeof(u64);
2948	WARN_ON(size > ringbuf_size);
2949	written = min(ringbuf_size - start, size);
2950
2951	memcpy(queue->ringbuf->kmap + start, instrs->buffer, written);
2952
2953	if (written < size)
2954		memcpy(queue->ringbuf->kmap,
2955		       &instrs->buffer[written / sizeof(u64)],
2956		       size - written);
2957}
2958
2959struct panthor_job_cs_params {
2960	u32 profile_mask;
2961	u64 addr_reg; u64 val_reg;
2962	u64 cycle_reg; u64 time_reg;
2963	u64 sync_addr; u64 times_addr;
2964	u64 cs_start; u64 cs_size;
2965	u32 last_flush; u32 waitall_mask;
2966};
2967
2968static void
2969get_job_cs_params(struct panthor_job *job, struct panthor_job_cs_params *params)
2970{
2971	struct panthor_group *group = job->group;
2972	struct panthor_queue *queue = group->queues[job->queue_idx];
2973	struct panthor_device *ptdev = group->ptdev;
2974	struct panthor_scheduler *sched = ptdev->scheduler;
2975
2976	params->addr_reg = ptdev->csif_info.cs_reg_count -
2977			   ptdev->csif_info.unpreserved_cs_reg_count;
2978	params->val_reg = params->addr_reg + 2;
2979	params->cycle_reg = params->addr_reg;
2980	params->time_reg = params->val_reg;
2981
2982	params->sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
2983			    job->queue_idx * sizeof(struct panthor_syncobj_64b);
2984	params->times_addr = panthor_kernel_bo_gpuva(queue->profiling.slots) +
2985			     (job->profiling.slot * sizeof(struct panthor_job_profiling_data));
2986	params->waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
2987
2988	params->cs_start = job->call_info.start;
2989	params->cs_size = job->call_info.size;
2990	params->last_flush = job->call_info.latest_flush;
2991
2992	params->profile_mask = job->profiling.mask;
2993}
2994
2995#define JOB_INSTR_ALWAYS(instr) \
2996	JOB_INSTR(PANTHOR_DEVICE_PROFILING_DISABLED, (instr))
2997#define JOB_INSTR_TIMESTAMP(instr) \
2998	JOB_INSTR(PANTHOR_DEVICE_PROFILING_TIMESTAMP, (instr))
2999#define JOB_INSTR_CYCLES(instr) \
3000	JOB_INSTR(PANTHOR_DEVICE_PROFILING_CYCLES, (instr))
3001
3002static void
3003prepare_job_instrs(const struct panthor_job_cs_params *params,
3004		   struct panthor_job_ringbuf_instrs *instrs)
3005{
3006	const struct panthor_job_instr instr_seq[] = {
3007		/* MOV32 rX+2, cs.latest_flush */
3008		JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->last_flush),
3009		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
3010		JOB_INSTR_ALWAYS((36ull << 56) | (0ull << 48) | (params->val_reg << 40) |
3011				 (0 << 16) | 0x233),
3012		/* MOV48 rX:rX+1, cycles_offset */
3013		JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
3014				 (params->times_addr +
3015				  offsetof(struct panthor_job_profiling_data, cycles.before))),
3016		/* STORE_STATE cycles */
3017		JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
3018		/* MOV48 rX:rX+1, time_offset */
3019		JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
3020				    (params->times_addr +
3021				     offsetof(struct panthor_job_profiling_data, time.before))),
3022		/* STORE_STATE timer */
3023		JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
3024		/* MOV48 rX:rX+1, cs.start */
3025		JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->cs_start),
3026		/* MOV32 rX+2, cs.size */
3027		JOB_INSTR_ALWAYS((2ull << 56) | (params->val_reg << 48) | params->cs_size),
3028		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
3029		JOB_INSTR_ALWAYS((3ull << 56) | (1 << 16)),
3030		/* CALL rX:rX+1, rX+2 */
3031		JOB_INSTR_ALWAYS((32ull << 56) | (params->addr_reg << 40) |
3032				 (params->val_reg << 32)),
3033		/* MOV48 rX:rX+1, cycles_offset */
3034		JOB_INSTR_CYCLES((1ull << 56) | (params->cycle_reg << 48) |
3035				 (params->times_addr +
3036				  offsetof(struct panthor_job_profiling_data, cycles.after))),
3037		/* STORE_STATE cycles */
3038		JOB_INSTR_CYCLES((40ull << 56) | (params->cycle_reg << 40) | (1ll << 32)),
3039		/* MOV48 rX:rX+1, time_offset */
3040		JOB_INSTR_TIMESTAMP((1ull << 56) | (params->time_reg << 48) |
3041			  (params->times_addr +
3042			   offsetof(struct panthor_job_profiling_data, time.after))),
3043		/* STORE_STATE timer */
3044		JOB_INSTR_TIMESTAMP((40ull << 56) | (params->time_reg << 40) | (0ll << 32)),
3045		/* MOV48 rX:rX+1, sync_addr */
3046		JOB_INSTR_ALWAYS((1ull << 56) | (params->addr_reg << 48) | params->sync_addr),
3047		/* MOV48 rX+2, #1 */
3048		JOB_INSTR_ALWAYS((1ull << 56) | (params->val_reg << 48) | 1),
3049		/* WAIT(all) */
3050		JOB_INSTR_ALWAYS((3ull << 56) | (params->waitall_mask << 16)),
3051		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
3052		JOB_INSTR_ALWAYS((51ull << 56) | (0ull << 48) | (params->addr_reg << 40) |
3053				 (params->val_reg << 32) | (0 << 16) | 1),
3054		/* ERROR_BARRIER, so we can recover from faults at job boundaries. */
3055		JOB_INSTR_ALWAYS((47ull << 56)),
3056	};
3057	u32 pad;
3058
3059	instrs->count = 0;
3060
3061	/* NEED to be cacheline aligned to please the prefetcher. */
3062	static_assert(sizeof(instrs->buffer) % 64 == 0,
3063		      "panthor_job_ringbuf_instrs::buffer is not aligned on a cacheline");
3064
3065	/* Make sure we have enough storage to store the whole sequence. */
3066	static_assert(ALIGN(ARRAY_SIZE(instr_seq), NUM_INSTRS_PER_CACHE_LINE) ==
3067		      ARRAY_SIZE(instrs->buffer),
3068		      "instr_seq vs panthor_job_ringbuf_instrs::buffer size mismatch");
3069
3070	for (u32 i = 0; i < ARRAY_SIZE(instr_seq); i++) {
3071		/* If the profile mask of this instruction is not enabled, skip it. */
3072		if (instr_seq[i].profile_mask &&
3073		    !(instr_seq[i].profile_mask & params->profile_mask))
3074			continue;
3075
3076		instrs->buffer[instrs->count++] = instr_seq[i].instr;
3077	}
3078
3079	pad = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
3080	memset(&instrs->buffer[instrs->count], 0,
3081	       (pad - instrs->count) * sizeof(instrs->buffer[0]));
3082	instrs->count = pad;
3083}
3084
3085static u32 calc_job_credits(u32 profile_mask)
3086{
3087	struct panthor_job_ringbuf_instrs instrs;
3088	struct panthor_job_cs_params params = {
3089		.profile_mask = profile_mask,
3090	};
3091
3092	prepare_job_instrs(&params, &instrs);
3093	return instrs.count;
3094}
3095
3096static struct dma_fence *
3097queue_run_job(struct drm_sched_job *sched_job)
3098{
3099	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3100	struct panthor_group *group = job->group;
3101	struct panthor_queue *queue = group->queues[job->queue_idx];
3102	struct panthor_device *ptdev = group->ptdev;
3103	struct panthor_scheduler *sched = ptdev->scheduler;
3104	struct panthor_job_ringbuf_instrs instrs;
3105	struct panthor_job_cs_params cs_params;
3106	struct dma_fence *done_fence;
3107	int ret;
3108
3109	/* Stream size is zero, nothing to do except making sure all previously
3110	 * submitted jobs are done before we signal the
3111	 * drm_sched_job::s_fence::finished fence.
3112	 */
3113	if (!job->call_info.size) {
3114		job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
3115		return dma_fence_get(job->done_fence);
3116	}
3117
3118	ret = pm_runtime_resume_and_get(ptdev->base.dev);
3119	if (drm_WARN_ON(&ptdev->base, ret))
3120		return ERR_PTR(ret);
3121
3122	mutex_lock(&sched->lock);
3123	if (!group_can_run(group)) {
3124		done_fence = ERR_PTR(-ECANCELED);
3125		goto out_unlock;
3126	}
3127
3128	dma_fence_init(job->done_fence,
3129		       &panthor_queue_fence_ops,
3130		       &queue->fence_ctx.lock,
3131		       queue->fence_ctx.id,
3132		       atomic64_inc_return(&queue->fence_ctx.seqno));
3133
3134	job->profiling.slot = queue->profiling.seqno++;
3135	if (queue->profiling.seqno == queue->profiling.slot_count)
3136		queue->profiling.seqno = 0;
3137
3138	job->ringbuf.start = queue->iface.input->insert;
3139
3140	get_job_cs_params(job, &cs_params);
3141	prepare_job_instrs(&cs_params, &instrs);
3142	copy_instrs_to_ringbuf(queue, job, &instrs);
3143
3144	job->ringbuf.end = job->ringbuf.start + (instrs.count * sizeof(u64));
3145
3146	panthor_job_get(&job->base);
3147	spin_lock(&queue->fence_ctx.lock);
3148	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
3149	spin_unlock(&queue->fence_ctx.lock);
3150
3151	/* Make sure the ring buffer is updated before the INSERT
3152	 * register.
3153	 */
3154	wmb();
3155
3156	queue->iface.input->extract = queue->iface.output->extract;
3157	queue->iface.input->insert = job->ringbuf.end;
3158
3159	if (group->csg_id < 0) {
3160		/* If the queue is blocked, we want to keep the timeout running, so we
3161		 * can detect unbounded waits and kill the group when that happens.
3162		 * Otherwise, we suspend the timeout so the time we spend waiting for
3163		 * a CSG slot is not counted.
3164		 */
3165		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
3166		    !queue->timeout_suspended) {
3167			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
3168			queue->timeout_suspended = true;
3169		}
3170
3171		group_schedule_locked(group, BIT(job->queue_idx));
3172	} else {
3173		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
3174		if (!sched->pm.has_ref &&
3175		    !(group->blocked_queues & BIT(job->queue_idx))) {
3176			pm_runtime_get(ptdev->base.dev);
3177			sched->pm.has_ref = true;
3178		}
3179		panthor_devfreq_record_busy(sched->ptdev);
3180	}
3181
3182	/* Update the last fence. */
3183	dma_fence_put(queue->fence_ctx.last_fence);
3184	queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
3185
3186	done_fence = dma_fence_get(job->done_fence);
3187
3188out_unlock:
3189	mutex_unlock(&sched->lock);
3190	pm_runtime_mark_last_busy(ptdev->base.dev);
3191	pm_runtime_put_autosuspend(ptdev->base.dev);
3192
3193	return done_fence;
3194}
3195
3196static enum drm_gpu_sched_stat
3197queue_timedout_job(struct drm_sched_job *sched_job)
3198{
3199	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3200	struct panthor_group *group = job->group;
3201	struct panthor_device *ptdev = group->ptdev;
3202	struct panthor_scheduler *sched = ptdev->scheduler;
3203	struct panthor_queue *queue = group->queues[job->queue_idx];
3204
3205	drm_warn(&ptdev->base, "job timeout\n");
3206
3207	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
3208
3209	queue_stop(queue, job);
3210
3211	mutex_lock(&sched->lock);
3212	group->timedout = true;
3213	if (group->csg_id >= 0) {
3214		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
3215	} else {
3216		/* Remove from the run queues, so the scheduler can't
3217		 * pick the group on the next tick.
3218		 */
3219		list_del_init(&group->run_node);
3220		list_del_init(&group->wait_node);
3221
3222		group_queue_work(group, term);
3223	}
3224	mutex_unlock(&sched->lock);
3225
3226	queue_start(queue);
3227
3228	return DRM_GPU_SCHED_STAT_NOMINAL;
3229}
3230
3231static void queue_free_job(struct drm_sched_job *sched_job)
3232{
3233	drm_sched_job_cleanup(sched_job);
3234	panthor_job_put(sched_job);
3235}
3236
3237static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
3238	.run_job = queue_run_job,
3239	.timedout_job = queue_timedout_job,
3240	.free_job = queue_free_job,
3241};
3242
3243static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
3244					    u32 cs_ringbuf_size)
3245{
3246	u32 min_profiled_job_instrs = U32_MAX;
3247	u32 last_flag = fls(PANTHOR_DEVICE_PROFILING_ALL);
3248
3249	/*
3250	 * We want to calculate the minimum size of a profiled job's CS,
3251	 * because since they need additional instructions for the sampling
3252	 * of performance metrics, they might take up further slots in
3253	 * the queue's ringbuffer. This means we might not need as many job
3254	 * slots for keeping track of their profiling information. What we
3255	 * need is the maximum number of slots we should allocate to this end,
3256	 * which matches the maximum number of profiled jobs we can place
3257	 * simultaneously in the queue's ring buffer.
3258	 * That has to be calculated separately for every single job profiling
3259	 * flag, but not in the case job profiling is disabled, since unprofiled
3260	 * jobs don't need to keep track of this at all.
3261	 */
3262	for (u32 i = 0; i < last_flag; i++) {
3263		min_profiled_job_instrs =
3264			min(min_profiled_job_instrs, calc_job_credits(BIT(i)));
3265	}
3266
3267	return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
3268}
3269
3270static struct panthor_queue *
3271group_create_queue(struct panthor_group *group,
3272		   const struct drm_panthor_queue_create *args)
3273{
3274	struct drm_gpu_scheduler *drm_sched;
3275	struct panthor_queue *queue;
3276	int ret;
3277
3278	if (args->pad[0] || args->pad[1] || args->pad[2])
3279		return ERR_PTR(-EINVAL);
3280
3281	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
3282	    !is_power_of_2(args->ringbuf_size))
3283		return ERR_PTR(-EINVAL);
3284
3285	if (args->priority > CSF_MAX_QUEUE_PRIO)
3286		return ERR_PTR(-EINVAL);
3287
3288	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
3289	if (!queue)
3290		return ERR_PTR(-ENOMEM);
3291
3292	queue->fence_ctx.id = dma_fence_context_alloc(1);
3293	spin_lock_init(&queue->fence_ctx.lock);
3294	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
3295
3296	queue->priority = args->priority;
3297
3298	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
3299						  args->ringbuf_size,
3300						  DRM_PANTHOR_BO_NO_MMAP,
3301						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3302						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3303						  PANTHOR_VM_KERNEL_AUTO_VA);
3304	if (IS_ERR(queue->ringbuf)) {
3305		ret = PTR_ERR(queue->ringbuf);
3306		goto err_free_queue;
3307	}
3308
3309	ret = panthor_kernel_bo_vmap(queue->ringbuf);
3310	if (ret)
3311		goto err_free_queue;
3312
3313	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
3314							    &queue->iface.input,
3315							    &queue->iface.output,
3316							    &queue->iface.input_fw_va,
3317							    &queue->iface.output_fw_va);
3318	if (IS_ERR(queue->iface.mem)) {
3319		ret = PTR_ERR(queue->iface.mem);
3320		goto err_free_queue;
3321	}
3322
3323	queue->profiling.slot_count =
3324		calc_profiling_ringbuf_num_slots(group->ptdev, args->ringbuf_size);
3325
3326	queue->profiling.slots =
3327		panthor_kernel_bo_create(group->ptdev, group->vm,
3328					 queue->profiling.slot_count *
3329					 sizeof(struct panthor_job_profiling_data),
3330					 DRM_PANTHOR_BO_NO_MMAP,
3331					 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3332					 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3333					 PANTHOR_VM_KERNEL_AUTO_VA);
3334
3335	if (IS_ERR(queue->profiling.slots)) {
3336		ret = PTR_ERR(queue->profiling.slots);
3337		goto err_free_queue;
3338	}
3339
3340	ret = panthor_kernel_bo_vmap(queue->profiling.slots);
3341	if (ret)
3342		goto err_free_queue;
3343
3344	/*
3345	 * Credit limit argument tells us the total number of instructions
3346	 * across all CS slots in the ringbuffer, with some jobs requiring
3347	 * twice as many as others, depending on their profiling status.
3348	 */
3349	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
3350			     group->ptdev->scheduler->wq, 1,
3351			     args->ringbuf_size / sizeof(u64),
3352			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
3353			     group->ptdev->reset.wq,
3354			     NULL, "panthor-queue", group->ptdev->base.dev);
3355	if (ret)
3356		goto err_free_queue;
3357
3358	drm_sched = &queue->scheduler;
3359	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
3360
3361	return queue;
3362
3363err_free_queue:
3364	group_free_queue(group, queue);
3365	return ERR_PTR(ret);
3366}
3367
3368#define MAX_GROUPS_PER_POOL		128
3369
3370int panthor_group_create(struct panthor_file *pfile,
3371			 const struct drm_panthor_group_create *group_args,
3372			 const struct drm_panthor_queue_create *queue_args)
3373{
3374	struct panthor_device *ptdev = pfile->ptdev;
3375	struct panthor_group_pool *gpool = pfile->groups;
3376	struct panthor_scheduler *sched = ptdev->scheduler;
3377	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3378	struct panthor_group *group = NULL;
3379	u32 gid, i, suspend_size;
3380	int ret;
3381
3382	if (group_args->pad)
3383		return -EINVAL;
3384
3385	if (group_args->priority >= PANTHOR_CSG_PRIORITY_COUNT)
3386		return -EINVAL;
3387
3388	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
3389	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
3390	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
3391		return -EINVAL;
3392
3393	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
3394	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
3395	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
3396		return -EINVAL;
3397
3398	group = kzalloc(sizeof(*group), GFP_KERNEL);
3399	if (!group)
3400		return -ENOMEM;
3401
3402	spin_lock_init(&group->fatal_lock);
3403	kref_init(&group->refcount);
3404	group->state = PANTHOR_CS_GROUP_CREATED;
3405	group->csg_id = -1;
3406
3407	group->ptdev = ptdev;
3408	group->max_compute_cores = group_args->max_compute_cores;
3409	group->compute_core_mask = group_args->compute_core_mask;
3410	group->max_fragment_cores = group_args->max_fragment_cores;
3411	group->fragment_core_mask = group_args->fragment_core_mask;
3412	group->max_tiler_cores = group_args->max_tiler_cores;
3413	group->tiler_core_mask = group_args->tiler_core_mask;
3414	group->priority = group_args->priority;
3415
3416	INIT_LIST_HEAD(&group->wait_node);
3417	INIT_LIST_HEAD(&group->run_node);
3418	INIT_WORK(&group->term_work, group_term_work);
3419	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
3420	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
3421	INIT_WORK(&group->release_work, group_release_work);
3422
3423	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
3424	if (!group->vm) {
3425		ret = -EINVAL;
3426		goto err_put_group;
3427	}
3428
3429	suspend_size = csg_iface->control->suspend_size;
3430	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3431	if (IS_ERR(group->suspend_buf)) {
3432		ret = PTR_ERR(group->suspend_buf);
3433		group->suspend_buf = NULL;
3434		goto err_put_group;
3435	}
3436
3437	suspend_size = csg_iface->control->protm_suspend_size;
3438	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
3439	if (IS_ERR(group->protm_suspend_buf)) {
3440		ret = PTR_ERR(group->protm_suspend_buf);
3441		group->protm_suspend_buf = NULL;
3442		goto err_put_group;
3443	}
3444
3445	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
3446						   group_args->queues.count *
3447						   sizeof(struct panthor_syncobj_64b),
3448						   DRM_PANTHOR_BO_NO_MMAP,
3449						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
3450						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3451						   PANTHOR_VM_KERNEL_AUTO_VA);
3452	if (IS_ERR(group->syncobjs)) {
3453		ret = PTR_ERR(group->syncobjs);
3454		goto err_put_group;
3455	}
3456
3457	ret = panthor_kernel_bo_vmap(group->syncobjs);
3458	if (ret)
3459		goto err_put_group;
3460
3461	memset(group->syncobjs->kmap, 0,
3462	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
3463
3464	for (i = 0; i < group_args->queues.count; i++) {
3465		group->queues[i] = group_create_queue(group, &queue_args[i]);
3466		if (IS_ERR(group->queues[i])) {
3467			ret = PTR_ERR(group->queues[i]);
3468			group->queues[i] = NULL;
3469			goto err_put_group;
3470		}
3471
3472		group->queue_count++;
3473	}
3474
3475	group->idle_queues = GENMASK(group->queue_count - 1, 0);
3476
3477	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
3478	if (ret)
3479		goto err_put_group;
3480
3481	mutex_lock(&sched->reset.lock);
3482	if (atomic_read(&sched->reset.in_progress)) {
3483		panthor_group_stop(group);
3484	} else {
3485		mutex_lock(&sched->lock);
3486		list_add_tail(&group->run_node,
3487			      &sched->groups.idle[group->priority]);
3488		mutex_unlock(&sched->lock);
3489	}
3490	mutex_unlock(&sched->reset.lock);
3491
3492	mutex_init(&group->fdinfo.lock);
3493
3494	return gid;
3495
3496err_put_group:
3497	group_put(group);
3498	return ret;
3499}
3500
3501int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
3502{
3503	struct panthor_group_pool *gpool = pfile->groups;
3504	struct panthor_device *ptdev = pfile->ptdev;
3505	struct panthor_scheduler *sched = ptdev->scheduler;
3506	struct panthor_group *group;
3507
3508	group = xa_erase(&gpool->xa, group_handle);
3509	if (!group)
3510		return -EINVAL;
3511
3512	for (u32 i = 0; i < group->queue_count; i++) {
3513		if (group->queues[i])
3514			drm_sched_entity_destroy(&group->queues[i]->entity);
3515	}
3516
3517	mutex_lock(&sched->reset.lock);
3518	mutex_lock(&sched->lock);
3519	group->destroyed = true;
3520	if (group->csg_id >= 0) {
3521		sched_queue_delayed_work(sched, tick, 0);
3522	} else if (!atomic_read(&sched->reset.in_progress)) {
3523		/* Remove from the run queues, so the scheduler can't
3524		 * pick the group on the next tick.
3525		 */
3526		list_del_init(&group->run_node);
3527		list_del_init(&group->wait_node);
3528		group_queue_work(group, term);
3529	}
3530	mutex_unlock(&sched->lock);
3531	mutex_unlock(&sched->reset.lock);
3532
3533	group_put(group);
3534	return 0;
3535}
3536
3537static struct panthor_group *group_from_handle(struct panthor_group_pool *pool,
3538					       u32 group_handle)
3539{
3540	struct panthor_group *group;
3541
3542	xa_lock(&pool->xa);
3543	group = group_get(xa_load(&pool->xa, group_handle));
3544	xa_unlock(&pool->xa);
3545
3546	return group;
3547}
3548
3549int panthor_group_get_state(struct panthor_file *pfile,
3550			    struct drm_panthor_group_get_state *get_state)
3551{
3552	struct panthor_group_pool *gpool = pfile->groups;
3553	struct panthor_device *ptdev = pfile->ptdev;
3554	struct panthor_scheduler *sched = ptdev->scheduler;
3555	struct panthor_group *group;
3556
3557	if (get_state->pad)
3558		return -EINVAL;
3559
3560	group = group_from_handle(gpool, get_state->group_handle);
3561	if (!group)
3562		return -EINVAL;
3563
3564	memset(get_state, 0, sizeof(*get_state));
3565
3566	mutex_lock(&sched->lock);
3567	if (group->timedout)
3568		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
3569	if (group->fatal_queues) {
3570		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
3571		get_state->fatal_queues = group->fatal_queues;
3572	}
3573	mutex_unlock(&sched->lock);
3574
3575	group_put(group);
3576	return 0;
3577}
3578
3579int panthor_group_pool_create(struct panthor_file *pfile)
3580{
3581	struct panthor_group_pool *gpool;
3582
3583	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
3584	if (!gpool)
3585		return -ENOMEM;
3586
3587	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
3588	pfile->groups = gpool;
3589	return 0;
3590}
3591
3592void panthor_group_pool_destroy(struct panthor_file *pfile)
3593{
3594	struct panthor_group_pool *gpool = pfile->groups;
3595	struct panthor_group *group;
3596	unsigned long i;
3597
3598	if (IS_ERR_OR_NULL(gpool))
3599		return;
3600
3601	xa_for_each(&gpool->xa, i, group)
3602		panthor_group_destroy(pfile, i);
3603
3604	xa_destroy(&gpool->xa);
3605	kfree(gpool);
3606	pfile->groups = NULL;
3607}
3608
3609static void job_release(struct kref *ref)
3610{
3611	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
3612
3613	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
3614
3615	if (job->base.s_fence)
3616		drm_sched_job_cleanup(&job->base);
3617
3618	if (job->done_fence && job->done_fence->ops)
3619		dma_fence_put(job->done_fence);
3620	else
3621		dma_fence_free(job->done_fence);
3622
3623	group_put(job->group);
3624
3625	kfree(job);
3626}
3627
3628struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
3629{
3630	if (sched_job) {
3631		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3632
3633		kref_get(&job->refcount);
3634	}
3635
3636	return sched_job;
3637}
3638
3639void panthor_job_put(struct drm_sched_job *sched_job)
3640{
3641	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3642
3643	if (sched_job)
3644		kref_put(&job->refcount, job_release);
3645}
3646
3647struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
3648{
3649	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3650
3651	return job->group->vm;
3652}
3653
3654struct drm_sched_job *
3655panthor_job_create(struct panthor_file *pfile,
3656		   u16 group_handle,
3657		   const struct drm_panthor_queue_submit *qsubmit)
3658{
3659	struct panthor_group_pool *gpool = pfile->groups;
3660	struct panthor_job *job;
3661	u32 credits;
3662	int ret;
3663
3664	if (qsubmit->pad)
3665		return ERR_PTR(-EINVAL);
3666
3667	/* If stream_addr is zero, so stream_size should be. */
3668	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
3669		return ERR_PTR(-EINVAL);
3670
3671	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
3672	 * aligned on 8-byte (instruction size).
3673	 */
3674	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
3675		return ERR_PTR(-EINVAL);
3676
3677	/* bits 24:30 must be zero. */
3678	if (qsubmit->latest_flush & GENMASK(30, 24))
3679		return ERR_PTR(-EINVAL);
3680
3681	job = kzalloc(sizeof(*job), GFP_KERNEL);
3682	if (!job)
3683		return ERR_PTR(-ENOMEM);
3684
3685	kref_init(&job->refcount);
3686	job->queue_idx = qsubmit->queue_index;
3687	job->call_info.size = qsubmit->stream_size;
3688	job->call_info.start = qsubmit->stream_addr;
3689	job->call_info.latest_flush = qsubmit->latest_flush;
3690	INIT_LIST_HEAD(&job->node);
3691
3692	job->group = group_from_handle(gpool, group_handle);
3693	if (!job->group) {
3694		ret = -EINVAL;
3695		goto err_put_job;
3696	}
3697
3698	if (!group_can_run(job->group)) {
3699		ret = -EINVAL;
3700		goto err_put_job;
3701	}
3702
3703	if (job->queue_idx >= job->group->queue_count ||
3704	    !job->group->queues[job->queue_idx]) {
3705		ret = -EINVAL;
3706		goto err_put_job;
3707	}
3708
3709	/* Empty command streams don't need a fence, they'll pick the one from
3710	 * the previously submitted job.
3711	 */
3712	if (job->call_info.size) {
3713		job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
3714		if (!job->done_fence) {
3715			ret = -ENOMEM;
3716			goto err_put_job;
3717		}
3718	}
3719
3720	job->profiling.mask = pfile->ptdev->profile_mask;
3721	credits = calc_job_credits(job->profiling.mask);
3722	if (credits == 0) {
3723		ret = -EINVAL;
3724		goto err_put_job;
3725	}
3726
3727	ret = drm_sched_job_init(&job->base,
3728				 &job->group->queues[job->queue_idx]->entity,
3729				 credits, job->group);
3730	if (ret)
3731		goto err_put_job;
3732
3733	return &job->base;
3734
3735err_put_job:
3736	panthor_job_put(&job->base);
3737	return ERR_PTR(ret);
3738}
3739
3740void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
3741{
3742	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
3743
3744	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
3745				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
3746}
3747
3748void panthor_sched_unplug(struct panthor_device *ptdev)
3749{
3750	struct panthor_scheduler *sched = ptdev->scheduler;
3751
3752	cancel_delayed_work_sync(&sched->tick_work);
3753
3754	mutex_lock(&sched->lock);
3755	if (sched->pm.has_ref) {
3756		pm_runtime_put(ptdev->base.dev);
3757		sched->pm.has_ref = false;
3758	}
3759	mutex_unlock(&sched->lock);
3760}
3761
3762static void panthor_sched_fini(struct drm_device *ddev, void *res)
3763{
3764	struct panthor_scheduler *sched = res;
3765	int prio;
3766
3767	if (!sched || !sched->csg_slot_count)
3768		return;
3769
3770	cancel_delayed_work_sync(&sched->tick_work);
3771
3772	if (sched->wq)
3773		destroy_workqueue(sched->wq);
3774
3775	if (sched->heap_alloc_wq)
3776		destroy_workqueue(sched->heap_alloc_wq);
3777
3778	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3779		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
3780		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
3781	}
3782
3783	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
3784}
3785
3786int panthor_sched_init(struct panthor_device *ptdev)
3787{
3788	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
3789	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
3790	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
3791	struct panthor_scheduler *sched;
3792	u32 gpu_as_count, num_groups;
3793	int prio, ret;
3794
3795	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
3796	if (!sched)
3797		return -ENOMEM;
3798
3799	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
3800	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
3801	 */
3802	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
3803
3804	/* The FW-side scheduler might deadlock if two groups with the same
3805	 * priority try to access a set of resources that overlaps, with part
3806	 * of the resources being allocated to one group and the other part to
3807	 * the other group, both groups waiting for the remaining resources to
3808	 * be allocated. To avoid that, it is recommended to assign each CSG a
3809	 * different priority. In theory we could allow several groups to have
3810	 * the same CSG priority if they don't request the same resources, but
3811	 * that makes the scheduling logic more complicated, so let's clamp
3812	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
3813	 */
3814	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
3815
3816	/* We need at least one AS for the MCU and one for the GPU contexts. */
3817	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
3818	if (!gpu_as_count) {
3819		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
3820			gpu_as_count + 1);
3821		return -EINVAL;
3822	}
3823
3824	sched->ptdev = ptdev;
3825	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
3826	sched->csg_slot_count = num_groups;
3827	sched->cs_slot_count = csg_iface->control->stream_num;
3828	sched->as_slot_count = gpu_as_count;
3829	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
3830	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
3831	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
3832
3833	sched->last_tick = 0;
3834	sched->resched_target = U64_MAX;
3835	sched->tick_period = msecs_to_jiffies(10);
3836	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
3837	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
3838	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
3839
3840	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
3841	if (ret)
3842		return ret;
3843
3844	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
3845		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
3846		INIT_LIST_HEAD(&sched->groups.idle[prio]);
3847	}
3848	INIT_LIST_HEAD(&sched->groups.waiting);
3849
3850	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
3851	if (ret)
3852		return ret;
3853
3854	INIT_LIST_HEAD(&sched->reset.stopped_groups);
3855
3856	/* sched->heap_alloc_wq will be used for heap chunk allocation on
3857	 * tiler OOM events, which means we can't use the same workqueue for
3858	 * the scheduler because works queued by the scheduler are in
3859	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
3860	 * work around this limitation.
3861	 *
3862	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
3863	 * allocation path that we can call when a heap OOM is reported. The
3864	 * FW is smart enough to fall back on other methods if the kernel can't
3865	 * allocate memory, and fail the tiling job if none of these
3866	 * countermeasures worked.
3867	 *
3868	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
3869	 * system is running out of memory.
3870	 */
3871	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
3872	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
3873	if (!sched->wq || !sched->heap_alloc_wq) {
3874		panthor_sched_fini(&ptdev->base, sched);
3875		drm_err(&ptdev->base, "Failed to allocate the workqueues");
3876		return -ENOMEM;
3877	}
3878
3879	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
3880	if (ret)
3881		return ret;
3882
3883	ptdev->scheduler = sched;
3884	return 0;
3885}