pvr_queue.c - drivers/gpu/drm/imagination/pvr_queue.c - Linux source code v3.1

Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only OR MIT
   2/* Copyright (c) 2023 Imagination Technologies Ltd. */
   3
   4#include <drm/drm_managed.h>
   5#include <drm/gpu_scheduler.h>
   6
   7#include "pvr_cccb.h"
   8#include "pvr_context.h"
   9#include "pvr_device.h"
  10#include "pvr_drv.h"
  11#include "pvr_job.h"
  12#include "pvr_queue.h"
  13#include "pvr_vm.h"
  14
  15#include "pvr_rogue_fwif_client.h"
  16
  17#define MAX_DEADLINE_MS 30000
  18
  19#define CTX_COMPUTE_CCCB_SIZE_LOG2 15
  20#define CTX_FRAG_CCCB_SIZE_LOG2 15
  21#define CTX_GEOM_CCCB_SIZE_LOG2 15
  22#define CTX_TRANSFER_CCCB_SIZE_LOG2 15
  23
  24static int get_xfer_ctx_state_size(struct pvr_device *pvr_dev)
  25{
  26	u32 num_isp_store_registers;
  27
  28	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
  29		num_isp_store_registers = 1;
  30	} else {
  31		int err;
  32
  33		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
  34		if (WARN_ON(err))
  35			return err;
  36	}
  37
  38	return sizeof(struct rogue_fwif_frag_ctx_state) +
  39	       (num_isp_store_registers *
  40		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
  41}
  42
  43static int get_frag_ctx_state_size(struct pvr_device *pvr_dev)
  44{
  45	u32 num_isp_store_registers;
  46	int err;
  47
  48	if (PVR_HAS_FEATURE(pvr_dev, xe_memory_hierarchy)) {
  49		err = PVR_FEATURE_VALUE(pvr_dev, num_raster_pipes, &num_isp_store_registers);
  50		if (WARN_ON(err))
  51			return err;
  52
  53		if (PVR_HAS_FEATURE(pvr_dev, gpu_multicore_support)) {
  54			u32 xpu_max_slaves;
  55
  56			err = PVR_FEATURE_VALUE(pvr_dev, xpu_max_slaves, &xpu_max_slaves);
  57			if (WARN_ON(err))
  58				return err;
  59
  60			num_isp_store_registers *= (1 + xpu_max_slaves);
  61		}
  62	} else {
  63		err = PVR_FEATURE_VALUE(pvr_dev, num_isp_ipp_pipes, &num_isp_store_registers);
  64		if (WARN_ON(err))
  65			return err;
  66	}
  67
  68	return sizeof(struct rogue_fwif_frag_ctx_state) +
  69	       (num_isp_store_registers *
  70		sizeof(((struct rogue_fwif_frag_ctx_state *)0)->frag_reg_isp_store[0]));
  71}
  72
  73static int get_ctx_state_size(struct pvr_device *pvr_dev, enum drm_pvr_job_type type)
  74{
  75	switch (type) {
  76	case DRM_PVR_JOB_TYPE_GEOMETRY:
  77		return sizeof(struct rogue_fwif_geom_ctx_state);
  78	case DRM_PVR_JOB_TYPE_FRAGMENT:
  79		return get_frag_ctx_state_size(pvr_dev);
  80	case DRM_PVR_JOB_TYPE_COMPUTE:
  81		return sizeof(struct rogue_fwif_compute_ctx_state);
  82	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
  83		return get_xfer_ctx_state_size(pvr_dev);
  84	}
  85
  86	WARN(1, "Invalid queue type");
  87	return -EINVAL;
  88}
  89
  90static u32 get_ctx_offset(enum drm_pvr_job_type type)
  91{
  92	switch (type) {
  93	case DRM_PVR_JOB_TYPE_GEOMETRY:
  94		return offsetof(struct rogue_fwif_fwrendercontext, geom_context);
  95	case DRM_PVR_JOB_TYPE_FRAGMENT:
  96		return offsetof(struct rogue_fwif_fwrendercontext, frag_context);
  97	case DRM_PVR_JOB_TYPE_COMPUTE:
  98		return offsetof(struct rogue_fwif_fwcomputecontext, cdm_context);
  99	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
 100		return offsetof(struct rogue_fwif_fwtransfercontext, tq_context);
 101	}
 102
 103	return 0;
 104}
 105
 106static const char *
 107pvr_queue_fence_get_driver_name(struct dma_fence *f)
 108{
 109	return PVR_DRIVER_NAME;
 110}
 111
 112static void pvr_queue_fence_release(struct dma_fence *f)
 113{
 114	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
 115
 116	pvr_context_put(fence->queue->ctx);
 117	dma_fence_free(f);
 118}
 119
 120static const char *
 121pvr_queue_job_fence_get_timeline_name(struct dma_fence *f)
 122{
 123	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
 124
 125	switch (fence->queue->type) {
 126	case DRM_PVR_JOB_TYPE_GEOMETRY:
 127		return "geometry";
 128
 129	case DRM_PVR_JOB_TYPE_FRAGMENT:
 130		return "fragment";
 131
 132	case DRM_PVR_JOB_TYPE_COMPUTE:
 133		return "compute";
 134
 135	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
 136		return "transfer";
 137	}
 138
 139	WARN(1, "Invalid queue type");
 140	return "invalid";
 141}
 142
 143static const char *
 144pvr_queue_cccb_fence_get_timeline_name(struct dma_fence *f)
 145{
 146	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
 147
 148	switch (fence->queue->type) {
 149	case DRM_PVR_JOB_TYPE_GEOMETRY:
 150		return "geometry-cccb";
 151
 152	case DRM_PVR_JOB_TYPE_FRAGMENT:
 153		return "fragment-cccb";
 154
 155	case DRM_PVR_JOB_TYPE_COMPUTE:
 156		return "compute-cccb";
 157
 158	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
 159		return "transfer-cccb";
 160	}
 161
 162	WARN(1, "Invalid queue type");
 163	return "invalid";
 164}
 165
 166static const struct dma_fence_ops pvr_queue_job_fence_ops = {
 167	.get_driver_name = pvr_queue_fence_get_driver_name,
 168	.get_timeline_name = pvr_queue_job_fence_get_timeline_name,
 169	.release = pvr_queue_fence_release,
 170};
 171
 172/**
 173 * to_pvr_queue_job_fence() - Return a pvr_queue_fence object if the fence is
 174 * backed by a UFO.
 175 * @f: The dma_fence to turn into a pvr_queue_fence.
 176 *
 177 * Return:
 178 *  * A non-NULL pvr_queue_fence object if the dma_fence is backed by a UFO, or
 179 *  * NULL otherwise.
 180 */
 181static struct pvr_queue_fence *
 182to_pvr_queue_job_fence(struct dma_fence *f)
 183{
 184	struct drm_sched_fence *sched_fence = to_drm_sched_fence(f);
 185
 186	if (sched_fence)
 187		f = sched_fence->parent;
 188
 189	if (f && f->ops == &pvr_queue_job_fence_ops)
 190		return container_of(f, struct pvr_queue_fence, base);
 191
 192	return NULL;
 193}
 194
 195static const struct dma_fence_ops pvr_queue_cccb_fence_ops = {
 196	.get_driver_name = pvr_queue_fence_get_driver_name,
 197	.get_timeline_name = pvr_queue_cccb_fence_get_timeline_name,
 198	.release = pvr_queue_fence_release,
 199};
 200
 201/**
 202 * pvr_queue_fence_put() - Put wrapper for pvr_queue_fence objects.
 203 * @f: The dma_fence object to put.
 204 *
 205 * If the pvr_queue_fence has been initialized, we call dma_fence_put(),
 206 * otherwise we free the object with dma_fence_free(). This allows us
 207 * to do the right thing before and after pvr_queue_fence_init() had been
 208 * called.
 209 */
 210static void pvr_queue_fence_put(struct dma_fence *f)
 211{
 212	if (!f)
 213		return;
 214
 215	if (WARN_ON(f->ops &&
 216		    f->ops != &pvr_queue_cccb_fence_ops &&
 217		    f->ops != &pvr_queue_job_fence_ops))
 218		return;
 219
 220	/* If the fence hasn't been initialized yet, free the object directly. */
 221	if (f->ops)
 222		dma_fence_put(f);
 223	else
 224		dma_fence_free(f);
 225}
 226
 227/**
 228 * pvr_queue_fence_alloc() - Allocate a pvr_queue_fence fence object
 229 *
 230 * Call this function to allocate job CCCB and done fences. This only
 231 * allocates the objects. Initialization happens when the underlying
 232 * dma_fence object is to be returned to drm_sched (in prepare_job() or
 233 * run_job()).
 234 *
 235 * Return:
 236 *  * A valid pointer if the allocation succeeds, or
 237 *  * NULL if the allocation fails.
 238 */
 239static struct dma_fence *
 240pvr_queue_fence_alloc(void)
 241{
 242	struct pvr_queue_fence *fence;
 243
 244	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 245	if (!fence)
 246		return NULL;
 247
 248	return &fence->base;
 249}
 250
 251/**
 252 * pvr_queue_fence_init() - Initializes a pvr_queue_fence object.
 253 * @f: The fence to initialize
 254 * @queue: The queue this fence belongs to.
 255 * @fence_ops: The fence operations.
 256 * @fence_ctx: The fence context.
 257 *
 258 * Wrapper around dma_fence_init() that takes care of initializing the
 259 * pvr_queue_fence::queue field too.
 260 */
 261static void
 262pvr_queue_fence_init(struct dma_fence *f,
 263		     struct pvr_queue *queue,
 264		     const struct dma_fence_ops *fence_ops,
 265		     struct pvr_queue_fence_ctx *fence_ctx)
 266{
 267	struct pvr_queue_fence *fence = container_of(f, struct pvr_queue_fence, base);
 268
 269	pvr_context_get(queue->ctx);
 270	fence->queue = queue;
 271	dma_fence_init(&fence->base, fence_ops,
 272		       &fence_ctx->lock, fence_ctx->id,
 273		       atomic_inc_return(&fence_ctx->seqno));
 274}
 275
 276/**
 277 * pvr_queue_cccb_fence_init() - Initializes a CCCB fence object.
 278 * @fence: The fence to initialize.
 279 * @queue: The queue this fence belongs to.
 280 *
 281 * Initializes a fence that can be used to wait for CCCB space.
 282 *
 283 * Should be called in the ::prepare_job() path, so the fence returned to
 284 * drm_sched is valid.
 285 */
 286static void
 287pvr_queue_cccb_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
 288{
 289	pvr_queue_fence_init(fence, queue, &pvr_queue_cccb_fence_ops,
 290			     &queue->cccb_fence_ctx.base);
 291}
 292
 293/**
 294 * pvr_queue_job_fence_init() - Initializes a job done fence object.
 295 * @fence: The fence to initialize.
 296 * @queue: The queue this fence belongs to.
 297 *
 298 * Initializes a fence that will be signaled when the GPU is done executing
 299 * a job.
 300 *
 301 * Should be called *before* the ::run_job() path, so the fence is initialised
 302 * before being placed in the pending_list.
 303 */
 304static void
 305pvr_queue_job_fence_init(struct dma_fence *fence, struct pvr_queue *queue)
 306{
 307	pvr_queue_fence_init(fence, queue, &pvr_queue_job_fence_ops,
 308			     &queue->job_fence_ctx);
 309}
 310
 311/**
 312 * pvr_queue_fence_ctx_init() - Queue fence context initialization.
 313 * @fence_ctx: The context to initialize
 314 */
 315static void
 316pvr_queue_fence_ctx_init(struct pvr_queue_fence_ctx *fence_ctx)
 317{
 318	spin_lock_init(&fence_ctx->lock);
 319	fence_ctx->id = dma_fence_context_alloc(1);
 320	atomic_set(&fence_ctx->seqno, 0);
 321}
 322
 323static u32 ufo_cmds_size(u32 elem_count)
 324{
 325	/* We can pass at most ROGUE_FWIF_CCB_CMD_MAX_UFOS per UFO-related command. */
 326	u32 full_cmd_count = elem_count / ROGUE_FWIF_CCB_CMD_MAX_UFOS;
 327	u32 remaining_elems = elem_count % ROGUE_FWIF_CCB_CMD_MAX_UFOS;
 328	u32 size = full_cmd_count *
 329		   pvr_cccb_get_size_of_cmd_with_hdr(ROGUE_FWIF_CCB_CMD_MAX_UFOS *
 330						     sizeof(struct rogue_fwif_ufo));
 331
 332	if (remaining_elems) {
 333		size += pvr_cccb_get_size_of_cmd_with_hdr(remaining_elems *
 334							  sizeof(struct rogue_fwif_ufo));
 335	}
 336
 337	return size;
 338}
 339
 340static u32 job_cmds_size(struct pvr_job *job, u32 ufo_wait_count)
 341{
 342	/* One UFO cmd for the fence signaling, one UFO cmd per native fence native,
 343	 * and a command for the job itself.
 344	 */
 345	return ufo_cmds_size(1) + ufo_cmds_size(ufo_wait_count) +
 346	       pvr_cccb_get_size_of_cmd_with_hdr(job->cmd_len);
 347}
 348
 349/**
 350 * job_count_remaining_native_deps() - Count the number of non-signaled native dependencies.
 351 * @job: Job to operate on.
 352 *
 353 * Returns: Number of non-signaled native deps remaining.
 354 */
 355static unsigned long job_count_remaining_native_deps(struct pvr_job *job)
 356{
 357	unsigned long remaining_count = 0;
 358	struct dma_fence *fence = NULL;
 359	unsigned long index;
 360
 361	xa_for_each(&job->base.dependencies, index, fence) {
 362		struct pvr_queue_fence *jfence;
 363
 364		jfence = to_pvr_queue_job_fence(fence);
 365		if (!jfence)
 366			continue;
 367
 368		if (!dma_fence_is_signaled(&jfence->base))
 369			remaining_count++;
 370	}
 371
 372	return remaining_count;
 373}
 374
 375/**
 376 * pvr_queue_get_job_cccb_fence() - Get the CCCB fence attached to a job.
 377 * @queue: The queue this job will be submitted to.
 378 * @job: The job to get the CCCB fence on.
 379 *
 380 * The CCCB fence is a synchronization primitive allowing us to delay job
 381 * submission until there's enough space in the CCCB to submit the job.
 382 *
 383 * Return:
 384 *  * NULL if there's enough space in the CCCB to submit this job, or
 385 *  * A valid dma_fence object otherwise.
 386 */
 387static struct dma_fence *
 388pvr_queue_get_job_cccb_fence(struct pvr_queue *queue, struct pvr_job *job)
 389{
 390	struct pvr_queue_fence *cccb_fence;
 391	unsigned int native_deps_remaining;
 392
 393	/* If the fence is NULL, that means we already checked that we had
 394	 * enough space in the cccb for our job.
 395	 */
 396	if (!job->cccb_fence)
 397		return NULL;
 398
 399	mutex_lock(&queue->cccb_fence_ctx.job_lock);
 400
 401	/* Count remaining native dependencies and check if the job fits in the CCCB. */
 402	native_deps_remaining = job_count_remaining_native_deps(job);
 403	if (pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
 404		pvr_queue_fence_put(job->cccb_fence);
 405		job->cccb_fence = NULL;
 406		goto out_unlock;
 407	}
 408
 409	/* There should be no job attached to the CCCB fence context:
 410	 * drm_sched_entity guarantees that jobs are submitted one at a time.
 411	 */
 412	if (WARN_ON(queue->cccb_fence_ctx.job))
 413		pvr_job_put(queue->cccb_fence_ctx.job);
 414
 415	queue->cccb_fence_ctx.job = pvr_job_get(job);
 416
 417	/* Initialize the fence before returning it. */
 418	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
 419	if (!WARN_ON(cccb_fence->queue))
 420		pvr_queue_cccb_fence_init(job->cccb_fence, queue);
 421
 422out_unlock:
 423	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
 424
 425	return dma_fence_get(job->cccb_fence);
 426}
 427
 428/**
 429 * pvr_queue_get_job_kccb_fence() - Get the KCCB fence attached to a job.
 430 * @queue: The queue this job will be submitted to.
 431 * @job: The job to get the KCCB fence on.
 432 *
 433 * The KCCB fence is a synchronization primitive allowing us to delay job
 434 * submission until there's enough space in the KCCB to submit the job.
 435 *
 436 * Return:
 437 *  * NULL if there's enough space in the KCCB to submit this job, or
 438 *  * A valid dma_fence object otherwise.
 439 */
 440static struct dma_fence *
 441pvr_queue_get_job_kccb_fence(struct pvr_queue *queue, struct pvr_job *job)
 442{
 443	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
 444	struct dma_fence *kccb_fence = NULL;
 445
 446	/* If the fence is NULL, that means we already checked that we had
 447	 * enough space in the KCCB for our job.
 448	 */
 449	if (!job->kccb_fence)
 450		return NULL;
 451
 452	if (!WARN_ON(job->kccb_fence->ops)) {
 453		kccb_fence = pvr_kccb_reserve_slot(pvr_dev, job->kccb_fence);
 454		job->kccb_fence = NULL;
 455	}
 456
 457	return kccb_fence;
 458}
 459
 460static struct dma_fence *
 461pvr_queue_get_paired_frag_job_dep(struct pvr_queue *queue, struct pvr_job *job)
 462{
 463	struct pvr_job *frag_job = job->type == DRM_PVR_JOB_TYPE_GEOMETRY ?
 464				   job->paired_job : NULL;
 465	struct dma_fence *f;
 466	unsigned long index;
 467
 468	if (!frag_job)
 469		return NULL;
 470
 471	xa_for_each(&frag_job->base.dependencies, index, f) {
 472		/* Skip already signaled fences. */
 473		if (dma_fence_is_signaled(f))
 474			continue;
 475
 476		/* Skip our own fence. */
 477		if (f == &job->base.s_fence->scheduled)
 478			continue;
 479
 480		return dma_fence_get(f);
 481	}
 482
 483	return frag_job->base.sched->ops->prepare_job(&frag_job->base, &queue->entity);
 484}
 485
 486/**
 487 * pvr_queue_prepare_job() - Return the next internal dependencies expressed as a dma_fence.
 488 * @sched_job: The job to query the next internal dependency on
 489 * @s_entity: The entity this job is queue on.
 490 *
 491 * After iterating over drm_sched_job::dependencies, drm_sched let the driver return
 492 * its own internal dependencies. We use this function to return our internal dependencies.
 493 */
 494static struct dma_fence *
 495pvr_queue_prepare_job(struct drm_sched_job *sched_job,
 496		      struct drm_sched_entity *s_entity)
 497{
 498	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
 499	struct pvr_queue *queue = container_of(s_entity, struct pvr_queue, entity);
 500	struct dma_fence *internal_dep = NULL;
 501
 502	/*
 503	 * Initialize the done_fence, so we can signal it. This must be done
 504	 * here because otherwise by the time of run_job() the job will end up
 505	 * in the pending list without a valid fence.
 506	 */
 507	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
 508		/*
 509		 * This will be called on a paired fragment job after being
 510		 * submitted to firmware. We can tell if this is the case and
 511		 * bail early from whether run_job() has been called on the
 512		 * geometry job, which would issue a pm ref.
 513		 */
 514		if (job->paired_job->has_pm_ref)
 515			return NULL;
 516
 517		/*
 518		 * In this case we need to use the job's own ctx to initialise
 519		 * the done_fence.  The other steps are done in the ctx of the
 520		 * paired geometry job.
 521		 */
 522		pvr_queue_job_fence_init(job->done_fence,
 523					 job->ctx->queues.fragment);
 524	} else {
 525		pvr_queue_job_fence_init(job->done_fence, queue);
 526	}
 527
 528	/* CCCB fence is used to make sure we have enough space in the CCCB to
 529	 * submit our commands.
 530	 */
 531	internal_dep = pvr_queue_get_job_cccb_fence(queue, job);
 532
 533	/* KCCB fence is used to make sure we have a KCCB slot to queue our
 534	 * CMD_KICK.
 535	 */
 536	if (!internal_dep)
 537		internal_dep = pvr_queue_get_job_kccb_fence(queue, job);
 538
 539	/* Any extra internal dependency should be added here, using the following
 540	 * pattern:
 541	 *
 542	 *	if (!internal_dep)
 543	 *		internal_dep = pvr_queue_get_job_xxxx_fence(queue, job);
 544	 */
 545
 546	/* The paired job fence should come last, when everything else is ready. */
 547	if (!internal_dep)
 548		internal_dep = pvr_queue_get_paired_frag_job_dep(queue, job);
 549
 550	return internal_dep;
 551}
 552
 553/**
 554 * pvr_queue_update_active_state_locked() - Update the queue active state.
 555 * @queue: Queue to update the state on.
 556 *
 557 * Locked version of pvr_queue_update_active_state(). Must be called with
 558 * pvr_device::queue::lock held.
 559 */
 560static void pvr_queue_update_active_state_locked(struct pvr_queue *queue)
 561{
 562	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
 563
 564	lockdep_assert_held(&pvr_dev->queues.lock);
 565
 566	/* The queue is temporary out of any list when it's being reset,
 567	 * we don't want a call to pvr_queue_update_active_state_locked()
 568	 * to re-insert it behind our back.
 569	 */
 570	if (list_empty(&queue->node))
 571		return;
 572
 573	if (!atomic_read(&queue->in_flight_job_count))
 574		list_move_tail(&queue->node, &pvr_dev->queues.idle);
 575	else
 576		list_move_tail(&queue->node, &pvr_dev->queues.active);
 577}
 578
 579/**
 580 * pvr_queue_update_active_state() - Update the queue active state.
 581 * @queue: Queue to update the state on.
 582 *
 583 * Active state is based on the in_flight_job_count value.
 584 *
 585 * Updating the active state implies moving the queue in or out of the
 586 * active queue list, which also defines whether the queue is checked
 587 * or not when a FW event is received.
 588 *
 589 * This function should be called any time a job is submitted or it done
 590 * fence is signaled.
 591 */
 592static void pvr_queue_update_active_state(struct pvr_queue *queue)
 593{
 594	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
 595
 596	mutex_lock(&pvr_dev->queues.lock);
 597	pvr_queue_update_active_state_locked(queue);
 598	mutex_unlock(&pvr_dev->queues.lock);
 599}
 600
 601static void pvr_queue_submit_job_to_cccb(struct pvr_job *job)
 602{
 603	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
 604	struct rogue_fwif_ufo ufos[ROGUE_FWIF_CCB_CMD_MAX_UFOS];
 605	struct pvr_cccb *cccb = &queue->cccb;
 606	struct pvr_queue_fence *jfence;
 607	struct dma_fence *fence;
 608	unsigned long index;
 609	u32 ufo_count = 0;
 610
 611	/* We need to add the queue to the active list before updating the CCCB,
 612	 * otherwise we might miss the FW event informing us that something
 613	 * happened on this queue.
 614	 */
 615	atomic_inc(&queue->in_flight_job_count);
 616	pvr_queue_update_active_state(queue);
 617
 618	xa_for_each(&job->base.dependencies, index, fence) {
 619		jfence = to_pvr_queue_job_fence(fence);
 620		if (!jfence)
 621			continue;
 622
 623		/* Skip the partial render fence, we will place it at the end. */
 624		if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job &&
 625		    &job->paired_job->base.s_fence->scheduled == fence)
 626			continue;
 627
 628		if (dma_fence_is_signaled(&jfence->base))
 629			continue;
 630
 631		pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
 632					  &ufos[ufo_count].addr);
 633		ufos[ufo_count++].value = jfence->base.seqno;
 634
 635		if (ufo_count == ARRAY_SIZE(ufos)) {
 636			pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
 637							   sizeof(ufos), ufos, 0, 0);
 638			ufo_count = 0;
 639		}
 640	}
 641
 642	/* Partial render fence goes last. */
 643	if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) {
 644		jfence = to_pvr_queue_job_fence(job->paired_job->done_fence);
 645		if (!WARN_ON(!jfence)) {
 646			pvr_fw_object_get_fw_addr(jfence->queue->timeline_ufo.fw_obj,
 647						  &ufos[ufo_count].addr);
 648			ufos[ufo_count++].value = job->paired_job->done_fence->seqno;
 649		}
 650	}
 651
 652	if (ufo_count) {
 653		pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_FENCE_PR,
 654						   sizeof(ufos[0]) * ufo_count, ufos, 0, 0);
 655	}
 656
 657	if (job->type == DRM_PVR_JOB_TYPE_GEOMETRY && job->paired_job) {
 658		struct rogue_fwif_cmd_geom *cmd = job->cmd;
 659
 660		/* Reference value for the partial render test is the current queue fence
 661		 * seqno minus one.
 662		 */
 663		pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj,
 664					  &cmd->partial_render_geom_frag_fence.addr);
 665		cmd->partial_render_geom_frag_fence.value = job->done_fence->seqno - 1;
 666	}
 667
 668	/* Submit job to FW */
 669	pvr_cccb_write_command_with_header(cccb, job->fw_ccb_cmd_type, job->cmd_len, job->cmd,
 670					   job->id, job->id);
 671
 672	/* Signal the job fence. */
 673	pvr_fw_object_get_fw_addr(queue->timeline_ufo.fw_obj, &ufos[0].addr);
 674	ufos[0].value = job->done_fence->seqno;
 675	pvr_cccb_write_command_with_header(cccb, ROGUE_FWIF_CCB_CMD_TYPE_UPDATE,
 676					   sizeof(ufos[0]), ufos, 0, 0);
 677}
 678
 679/**
 680 * pvr_queue_run_job() - Submit a job to the FW.
 681 * @sched_job: The job to submit.
 682 *
 683 * This function is called when all non-native dependencies have been met and
 684 * when the commands resulting from this job are guaranteed to fit in the CCCB.
 685 */
 686static struct dma_fence *pvr_queue_run_job(struct drm_sched_job *sched_job)
 687{
 688	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
 689	struct pvr_device *pvr_dev = job->pvr_dev;
 690	int err;
 691
 692	/* The fragment job is issued along the geometry job when we use combined
 693	 * geom+frag kicks. When we get there, we should simply return the
 694	 * done_fence that's been initialized earlier.
 695	 */
 696	if (job->paired_job && job->type == DRM_PVR_JOB_TYPE_FRAGMENT &&
 697	    job->done_fence->ops) {
 698		return dma_fence_get(job->done_fence);
 699	}
 700
 701	/* The only kind of jobs that can be paired are geometry and fragment, and
 702	 * we bail out early if we see a fragment job that's paired with a geomtry
 703	 * job.
 704	 * Paired jobs must also target the same context and point to the same
 705	 * HWRT.
 706	 */
 707	if (WARN_ON(job->paired_job &&
 708		    (job->type != DRM_PVR_JOB_TYPE_GEOMETRY ||
 709		     job->paired_job->type != DRM_PVR_JOB_TYPE_FRAGMENT ||
 710		     job->hwrt != job->paired_job->hwrt ||
 711		     job->ctx != job->paired_job->ctx)))
 712		return ERR_PTR(-EINVAL);
 713
 714	err = pvr_job_get_pm_ref(job);
 715	if (WARN_ON(err))
 716		return ERR_PTR(err);
 717
 718	if (job->paired_job) {
 719		err = pvr_job_get_pm_ref(job->paired_job);
 720		if (WARN_ON(err))
 721			return ERR_PTR(err);
 722	}
 723
 724	/* Submit our job to the CCCB */
 725	pvr_queue_submit_job_to_cccb(job);
 726
 727	if (job->paired_job) {
 728		struct pvr_job *geom_job = job;
 729		struct pvr_job *frag_job = job->paired_job;
 730		struct pvr_queue *geom_queue = job->ctx->queues.geometry;
 731		struct pvr_queue *frag_queue = job->ctx->queues.fragment;
 732
 733		/* Submit the fragment job along the geometry job and send a combined kick. */
 734		pvr_queue_submit_job_to_cccb(frag_job);
 735		pvr_cccb_send_kccb_combined_kick(pvr_dev,
 736						 &geom_queue->cccb, &frag_queue->cccb,
 737						 pvr_context_get_fw_addr(geom_job->ctx) +
 738						 geom_queue->ctx_offset,
 739						 pvr_context_get_fw_addr(frag_job->ctx) +
 740						 frag_queue->ctx_offset,
 741						 job->hwrt,
 742						 frag_job->fw_ccb_cmd_type ==
 743						 ROGUE_FWIF_CCB_CMD_TYPE_FRAG_PR);
 744	} else {
 745		struct pvr_queue *queue = container_of(job->base.sched,
 746						       struct pvr_queue, scheduler);
 747
 748		pvr_cccb_send_kccb_kick(pvr_dev, &queue->cccb,
 749					pvr_context_get_fw_addr(job->ctx) + queue->ctx_offset,
 750					job->hwrt);
 751	}
 752
 753	return dma_fence_get(job->done_fence);
 754}
 755
 756static void pvr_queue_stop(struct pvr_queue *queue, struct pvr_job *bad_job)
 757{
 758	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
 759}
 760
 761static void pvr_queue_start(struct pvr_queue *queue)
 762{
 763	struct pvr_job *job;
 764
 765	/* Make sure we CPU-signal the UFO object, so other queues don't get
 766	 * blocked waiting on it.
 767	 */
 768	*queue->timeline_ufo.value = atomic_read(&queue->job_fence_ctx.seqno);
 769
 770	list_for_each_entry(job, &queue->scheduler.pending_list, base.list) {
 771		if (dma_fence_is_signaled(job->done_fence)) {
 772			/* Jobs might have completed after drm_sched_stop() was called.
 773			 * In that case, re-assign the parent field to the done_fence.
 774			 */
 775			WARN_ON(job->base.s_fence->parent);
 776			job->base.s_fence->parent = dma_fence_get(job->done_fence);
 777		} else {
 778			/* If we had unfinished jobs, flag the entity as guilty so no
 779			 * new job can be submitted.
 780			 */
 781			atomic_set(&queue->ctx->faulty, 1);
 782		}
 783	}
 784
 785	drm_sched_start(&queue->scheduler, true);
 786}
 787
 788/**
 789 * pvr_queue_timedout_job() - Handle a job timeout event.
 790 * @s_job: The job this timeout occurred on.
 791 *
 792 * FIXME: We don't do anything here to unblock the situation, we just stop+start
 793 * the scheduler, and re-assign parent fences in the middle.
 794 *
 795 * Return:
 796 *  * DRM_GPU_SCHED_STAT_NOMINAL.
 797 */
 798static enum drm_gpu_sched_stat
 799pvr_queue_timedout_job(struct drm_sched_job *s_job)
 800{
 801	struct drm_gpu_scheduler *sched = s_job->sched;
 802	struct pvr_queue *queue = container_of(sched, struct pvr_queue, scheduler);
 803	struct pvr_device *pvr_dev = queue->ctx->pvr_dev;
 804	struct pvr_job *job;
 805	u32 job_count = 0;
 806
 807	dev_err(sched->dev, "Job timeout\n");
 808
 809	/* Before we stop the scheduler, make sure the queue is out of any list, so
 810	 * any call to pvr_queue_update_active_state_locked() that might happen
 811	 * until the scheduler is really stopped doesn't end up re-inserting the
 812	 * queue in the active list. This would cause
 813	 * pvr_queue_signal_done_fences() and drm_sched_stop() to race with each
 814	 * other when accessing the pending_list, since drm_sched_stop() doesn't
 815	 * grab the job_list_lock when modifying the list (it's assuming the
 816	 * only other accessor is the scheduler, and it's safe to not grab the
 817	 * lock since it's stopped).
 818	 */
 819	mutex_lock(&pvr_dev->queues.lock);
 820	list_del_init(&queue->node);
 821	mutex_unlock(&pvr_dev->queues.lock);
 822
 823	drm_sched_stop(sched, s_job);
 824
 825	/* Re-assign job parent fences. */
 826	list_for_each_entry(job, &sched->pending_list, base.list) {
 827		job->base.s_fence->parent = dma_fence_get(job->done_fence);
 828		job_count++;
 829	}
 830	WARN_ON(atomic_read(&queue->in_flight_job_count) != job_count);
 831
 832	/* Re-insert the queue in the proper list, and kick a queue processing
 833	 * operation if there were jobs pending.
 834	 */
 835	mutex_lock(&pvr_dev->queues.lock);
 836	if (!job_count) {
 837		list_move_tail(&queue->node, &pvr_dev->queues.idle);
 838	} else {
 839		atomic_set(&queue->in_flight_job_count, job_count);
 840		list_move_tail(&queue->node, &pvr_dev->queues.active);
 841		pvr_queue_process(queue);
 842	}
 843	mutex_unlock(&pvr_dev->queues.lock);
 844
 845	drm_sched_start(sched, true);
 846
 847	return DRM_GPU_SCHED_STAT_NOMINAL;
 848}
 849
 850/**
 851 * pvr_queue_free_job() - Release the reference the scheduler had on a job object.
 852 * @sched_job: Job object to free.
 853 */
 854static void pvr_queue_free_job(struct drm_sched_job *sched_job)
 855{
 856	struct pvr_job *job = container_of(sched_job, struct pvr_job, base);
 857
 858	drm_sched_job_cleanup(sched_job);
 859	job->paired_job = NULL;
 860	pvr_job_put(job);
 861}
 862
 863static const struct drm_sched_backend_ops pvr_queue_sched_ops = {
 864	.prepare_job = pvr_queue_prepare_job,
 865	.run_job = pvr_queue_run_job,
 866	.timedout_job = pvr_queue_timedout_job,
 867	.free_job = pvr_queue_free_job,
 868};
 869
 870/**
 871 * pvr_queue_fence_is_ufo_backed() - Check if a dma_fence is backed by a UFO object
 872 * @f: Fence to test.
 873 *
 874 * A UFO-backed fence is a fence that can be signaled or waited upon FW-side.
 875 * pvr_job::done_fence objects are backed by the timeline UFO attached to the queue
 876 * they are pushed to, but those fences are not directly exposed to the outside
 877 * world, so we also need to check if the fence we're being passed is a
 878 * drm_sched_fence that was coming from our driver.
 879 */
 880bool pvr_queue_fence_is_ufo_backed(struct dma_fence *f)
 881{
 882	struct drm_sched_fence *sched_fence = f ? to_drm_sched_fence(f) : NULL;
 883
 884	if (sched_fence &&
 885	    sched_fence->sched->ops == &pvr_queue_sched_ops)
 886		return true;
 887
 888	if (f && f->ops == &pvr_queue_job_fence_ops)
 889		return true;
 890
 891	return false;
 892}
 893
 894/**
 895 * pvr_queue_signal_done_fences() - Signal done fences.
 896 * @queue: Queue to check.
 897 *
 898 * Signal done fences of jobs whose seqno is less than the current value of
 899 * the UFO object attached to the queue.
 900 */
 901static void
 902pvr_queue_signal_done_fences(struct pvr_queue *queue)
 903{
 904	struct pvr_job *job, *tmp_job;
 905	u32 cur_seqno;
 906
 907	spin_lock(&queue->scheduler.job_list_lock);
 908	cur_seqno = *queue->timeline_ufo.value;
 909	list_for_each_entry_safe(job, tmp_job, &queue->scheduler.pending_list, base.list) {
 910		if ((int)(cur_seqno - lower_32_bits(job->done_fence->seqno)) < 0)
 911			break;
 912
 913		if (!dma_fence_is_signaled(job->done_fence)) {
 914			dma_fence_signal(job->done_fence);
 915			pvr_job_release_pm_ref(job);
 916			atomic_dec(&queue->in_flight_job_count);
 917		}
 918	}
 919	spin_unlock(&queue->scheduler.job_list_lock);
 920}
 921
 922/**
 923 * pvr_queue_check_job_waiting_for_cccb_space() - Check if the job waiting for CCCB space
 924 * can be unblocked
 925 * pushed to the CCCB
 926 * @queue: Queue to check
 927 *
 928 * If we have a job waiting for CCCB, and this job now fits in the CCCB, we signal
 929 * its CCCB fence, which should kick drm_sched.
 930 */
 931static void
 932pvr_queue_check_job_waiting_for_cccb_space(struct pvr_queue *queue)
 933{
 934	struct pvr_queue_fence *cccb_fence;
 935	u32 native_deps_remaining;
 936	struct pvr_job *job;
 937
 938	mutex_lock(&queue->cccb_fence_ctx.job_lock);
 939	job = queue->cccb_fence_ctx.job;
 940	if (!job)
 941		goto out_unlock;
 942
 943	/* If we have a job attached to the CCCB fence context, its CCCB fence
 944	 * shouldn't be NULL.
 945	 */
 946	if (WARN_ON(!job->cccb_fence)) {
 947		job = NULL;
 948		goto out_unlock;
 949	}
 950
 951	/* If we get there, CCCB fence has to be initialized. */
 952	cccb_fence = container_of(job->cccb_fence, struct pvr_queue_fence, base);
 953	if (WARN_ON(!cccb_fence->queue)) {
 954		job = NULL;
 955		goto out_unlock;
 956	}
 957
 958	/* Evict signaled dependencies before checking for CCCB space.
 959	 * If the job fits, signal the CCCB fence, this should unblock
 960	 * the drm_sched_entity.
 961	 */
 962	native_deps_remaining = job_count_remaining_native_deps(job);
 963	if (!pvr_cccb_cmdseq_fits(&queue->cccb, job_cmds_size(job, native_deps_remaining))) {
 964		job = NULL;
 965		goto out_unlock;
 966	}
 967
 968	dma_fence_signal(job->cccb_fence);
 969	pvr_queue_fence_put(job->cccb_fence);
 970	job->cccb_fence = NULL;
 971	queue->cccb_fence_ctx.job = NULL;
 972
 973out_unlock:
 974	mutex_unlock(&queue->cccb_fence_ctx.job_lock);
 975
 976	pvr_job_put(job);
 977}
 978
 979/**
 980 * pvr_queue_process() - Process events that happened on a queue.
 981 * @queue: Queue to check
 982 *
 983 * Signal job fences and check if jobs waiting for CCCB space can be unblocked.
 984 */
 985void pvr_queue_process(struct pvr_queue *queue)
 986{
 987	lockdep_assert_held(&queue->ctx->pvr_dev->queues.lock);
 988
 989	pvr_queue_check_job_waiting_for_cccb_space(queue);
 990	pvr_queue_signal_done_fences(queue);
 991	pvr_queue_update_active_state_locked(queue);
 992}
 993
 994static u32 get_dm_type(struct pvr_queue *queue)
 995{
 996	switch (queue->type) {
 997	case DRM_PVR_JOB_TYPE_GEOMETRY:
 998		return PVR_FWIF_DM_GEOM;
 999	case DRM_PVR_JOB_TYPE_TRANSFER_FRAG:
1000	case DRM_PVR_JOB_TYPE_FRAGMENT:
1001		return PVR_FWIF_DM_FRAG;
1002	case DRM_PVR_JOB_TYPE_COMPUTE:
1003		return PVR_FWIF_DM_CDM;
1004	}
1005
1006	return ~0;
1007}
1008
1009/**
1010 * init_fw_context() - Initializes the queue part of a FW context.
1011 * @queue: Queue object to initialize the FW context for.
1012 * @fw_ctx_map: The FW context CPU mapping.
1013 *
1014 * FW contexts are containing various states, one of them being a per-queue state
1015 * that needs to be initialized for each queue being exposed by a context. This
1016 * function takes care of that.
1017 */
1018static void init_fw_context(struct pvr_queue *queue, void *fw_ctx_map)
1019{
1020	struct pvr_context *ctx = queue->ctx;
1021	struct pvr_fw_object *fw_mem_ctx_obj = pvr_vm_get_fw_mem_context(ctx->vm_ctx);
1022	struct rogue_fwif_fwcommoncontext *cctx_fw;
1023	struct pvr_cccb *cccb = &queue->cccb;
1024
1025	cctx_fw = fw_ctx_map + queue->ctx_offset;
1026	cctx_fw->ccbctl_fw_addr = cccb->ctrl_fw_addr;
1027	cctx_fw->ccb_fw_addr = cccb->cccb_fw_addr;
1028
1029	cctx_fw->dm = get_dm_type(queue);
1030	cctx_fw->priority = ctx->priority;
1031	cctx_fw->priority_seq_num = 0;
1032	cctx_fw->max_deadline_ms = MAX_DEADLINE_MS;
1033	cctx_fw->pid = task_tgid_nr(current);
1034	cctx_fw->server_common_context_id = ctx->ctx_id;
1035
1036	pvr_fw_object_get_fw_addr(fw_mem_ctx_obj, &cctx_fw->fw_mem_context_fw_addr);
1037
1038	pvr_fw_object_get_fw_addr(queue->reg_state_obj, &cctx_fw->context_state_addr);
1039}
1040
1041/**
1042 * pvr_queue_cleanup_fw_context() - Wait for the FW context to be idle and clean it up.
1043 * @queue: Queue on FW context to clean up.
1044 *
1045 * Return:
1046 *  * 0 on success,
1047 *  * Any error returned by pvr_fw_structure_cleanup() otherwise.
1048 */
1049static int pvr_queue_cleanup_fw_context(struct pvr_queue *queue)
1050{
1051	if (!queue->ctx->fw_obj)
1052		return 0;
1053
1054	return pvr_fw_structure_cleanup(queue->ctx->pvr_dev,
1055					ROGUE_FWIF_CLEANUP_FWCOMMONCONTEXT,
1056					queue->ctx->fw_obj, queue->ctx_offset);
1057}
1058
1059/**
1060 * pvr_queue_job_init() - Initialize queue related fields in a pvr_job object.
1061 * @job: The job to initialize.
1062 *
1063 * Bind the job to a queue and allocate memory to guarantee pvr_queue_job_arm()
1064 * and pvr_queue_job_push() can't fail. We also make sure the context type is
1065 * valid and the job can fit in the CCCB.
1066 *
1067 * Return:
1068 *  * 0 on success, or
1069 *  * An error code if something failed.
1070 */
1071int pvr_queue_job_init(struct pvr_job *job)
1072{
1073	/* Fragment jobs need at least one native fence wait on the geometry job fence. */
1074	u32 min_native_dep_count = job->type == DRM_PVR_JOB_TYPE_FRAGMENT ? 1 : 0;
1075	struct pvr_queue *queue;
1076	int err;
1077
1078	if (atomic_read(&job->ctx->faulty))
1079		return -EIO;
1080
1081	queue = pvr_context_get_queue_for_job(job->ctx, job->type);
1082	if (!queue)
1083		return -EINVAL;
1084
1085	if (!pvr_cccb_cmdseq_can_fit(&queue->cccb, job_cmds_size(job, min_native_dep_count)))
1086		return -E2BIG;
1087
1088	err = drm_sched_job_init(&job->base, &queue->entity, 1, THIS_MODULE);
1089	if (err)
1090		return err;
1091
1092	job->cccb_fence = pvr_queue_fence_alloc();
1093	job->kccb_fence = pvr_kccb_fence_alloc();
1094	job->done_fence = pvr_queue_fence_alloc();
1095	if (!job->cccb_fence || !job->kccb_fence || !job->done_fence)
1096		return -ENOMEM;
1097
1098	return 0;
1099}
1100
1101/**
1102 * pvr_queue_job_arm() - Arm a job object.
1103 * @job: The job to arm.
1104 *
1105 * Initializes fences and return the drm_sched finished fence so it can
1106 * be exposed to the outside world. Once this function is called, you should
1107 * make sure the job is pushed using pvr_queue_job_push(), or guarantee that
1108 * no one grabbed a reference to the returned fence. The latter can happen if
1109 * we do multi-job submission, and something failed when creating/initializing
1110 * a job. In that case, we know the fence didn't leave the driver, and we
1111 * can thus guarantee nobody will wait on an dead fence object.
1112 *
1113 * Return:
1114 *  * A dma_fence object.
1115 */
1116struct dma_fence *pvr_queue_job_arm(struct pvr_job *job)
1117{
1118	drm_sched_job_arm(&job->base);
1119
1120	return &job->base.s_fence->finished;
1121}
1122
1123/**
1124 * pvr_queue_job_cleanup() - Cleanup fence/scheduler related fields in the job object.
1125 * @job: The job to cleanup.
1126 *
1127 * Should be called in the job release path.
1128 */
1129void pvr_queue_job_cleanup(struct pvr_job *job)
1130{
1131	pvr_queue_fence_put(job->done_fence);
1132	pvr_queue_fence_put(job->cccb_fence);
1133	pvr_kccb_fence_put(job->kccb_fence);
1134
1135	if (job->base.s_fence)
1136		drm_sched_job_cleanup(&job->base);
1137}
1138
1139/**
1140 * pvr_queue_job_push() - Push a job to its queue.
1141 * @job: The job to push.
1142 *
1143 * Must be called after pvr_queue_job_init() and after all dependencies
1144 * have been added to the job. This will effectively queue the job to
1145 * the drm_sched_entity attached to the queue. We grab a reference on
1146 * the job object, so the caller is free to drop its reference when it's
1147 * done accessing the job object.
1148 */
1149void pvr_queue_job_push(struct pvr_job *job)
1150{
1151	struct pvr_queue *queue = container_of(job->base.sched, struct pvr_queue, scheduler);
1152
1153	/* Keep track of the last queued job scheduled fence for combined submit. */
1154	dma_fence_put(queue->last_queued_job_scheduled_fence);
1155	queue->last_queued_job_scheduled_fence = dma_fence_get(&job->base.s_fence->scheduled);
1156
1157	pvr_job_get(job);
1158	drm_sched_entity_push_job(&job->base);
1159}
1160
1161static void reg_state_init(void *cpu_ptr, void *priv)
1162{
1163	struct pvr_queue *queue = priv;
1164
1165	if (queue->type == DRM_PVR_JOB_TYPE_GEOMETRY) {
1166		struct rogue_fwif_geom_ctx_state *geom_ctx_state_fw = cpu_ptr;
1167
1168		geom_ctx_state_fw->geom_core[0].geom_reg_vdm_call_stack_pointer_init =
1169			queue->callstack_addr;
1170	}
1171}
1172
1173/**
1174 * pvr_queue_create() - Create a queue object.
1175 * @ctx: The context this queue will be attached to.
1176 * @type: The type of jobs being pushed to this queue.
1177 * @args: The arguments passed to the context creation function.
1178 * @fw_ctx_map: CPU mapping of the FW context object.
1179 *
1180 * Create a queue object that will be used to queue and track jobs.
1181 *
1182 * Return:
1183 *  * A valid pointer to a pvr_queue object, or
1184 *  * An error pointer if the creation/initialization failed.
1185 */
1186struct pvr_queue *pvr_queue_create(struct pvr_context *ctx,
1187				   enum drm_pvr_job_type type,
1188				   struct drm_pvr_ioctl_create_context_args *args,
1189				   void *fw_ctx_map)
1190{
1191	static const struct {
1192		u32 cccb_size;
1193		const char *name;
1194	} props[] = {
1195		[DRM_PVR_JOB_TYPE_GEOMETRY] = {
1196			.cccb_size = CTX_GEOM_CCCB_SIZE_LOG2,
1197			.name = "geometry",
1198		},
1199		[DRM_PVR_JOB_TYPE_FRAGMENT] = {
1200			.cccb_size = CTX_FRAG_CCCB_SIZE_LOG2,
1201			.name = "fragment"
1202		},
1203		[DRM_PVR_JOB_TYPE_COMPUTE] = {
1204			.cccb_size = CTX_COMPUTE_CCCB_SIZE_LOG2,
1205			.name = "compute"
1206		},
1207		[DRM_PVR_JOB_TYPE_TRANSFER_FRAG] = {
1208			.cccb_size = CTX_TRANSFER_CCCB_SIZE_LOG2,
1209			.name = "transfer_frag"
1210		},
1211	};
1212	struct pvr_device *pvr_dev = ctx->pvr_dev;
1213	struct drm_gpu_scheduler *sched;
1214	struct pvr_queue *queue;
1215	int ctx_state_size, err;
1216	void *cpu_map;
1217
1218	if (WARN_ON(type >= sizeof(props)))
1219		return ERR_PTR(-EINVAL);
1220
1221	switch (ctx->type) {
1222	case DRM_PVR_CTX_TYPE_RENDER:
1223		if (type != DRM_PVR_JOB_TYPE_GEOMETRY &&
1224		    type != DRM_PVR_JOB_TYPE_FRAGMENT)
1225			return ERR_PTR(-EINVAL);
1226		break;
1227	case DRM_PVR_CTX_TYPE_COMPUTE:
1228		if (type != DRM_PVR_JOB_TYPE_COMPUTE)
1229			return ERR_PTR(-EINVAL);
1230		break;
1231	case DRM_PVR_CTX_TYPE_TRANSFER_FRAG:
1232		if (type != DRM_PVR_JOB_TYPE_TRANSFER_FRAG)
1233			return ERR_PTR(-EINVAL);
1234		break;
1235	default:
1236		return ERR_PTR(-EINVAL);
1237	}
1238
1239	ctx_state_size = get_ctx_state_size(pvr_dev, type);
1240	if (ctx_state_size < 0)
1241		return ERR_PTR(ctx_state_size);
1242
1243	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1244	if (!queue)
1245		return ERR_PTR(-ENOMEM);
1246
1247	queue->type = type;
1248	queue->ctx_offset = get_ctx_offset(type);
1249	queue->ctx = ctx;
1250	queue->callstack_addr = args->callstack_addr;
1251	sched = &queue->scheduler;
1252	INIT_LIST_HEAD(&queue->node);
1253	mutex_init(&queue->cccb_fence_ctx.job_lock);
1254	pvr_queue_fence_ctx_init(&queue->cccb_fence_ctx.base);
1255	pvr_queue_fence_ctx_init(&queue->job_fence_ctx);
1256
1257	err = pvr_cccb_init(pvr_dev, &queue->cccb, props[type].cccb_size, props[type].name);
1258	if (err)
1259		goto err_free_queue;
1260
1261	err = pvr_fw_object_create(pvr_dev, ctx_state_size,
1262				   PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1263				   reg_state_init, queue, &queue->reg_state_obj);
1264	if (err)
1265		goto err_cccb_fini;
1266
1267	init_fw_context(queue, fw_ctx_map);
1268
1269	if (type != DRM_PVR_JOB_TYPE_GEOMETRY && type != DRM_PVR_JOB_TYPE_FRAGMENT &&
1270	    args->callstack_addr) {
1271		err = -EINVAL;
1272		goto err_release_reg_state;
1273	}
1274
1275	cpu_map = pvr_fw_object_create_and_map(pvr_dev, sizeof(*queue->timeline_ufo.value),
1276					       PVR_BO_FW_FLAGS_DEVICE_UNCACHED,
1277					       NULL, NULL, &queue->timeline_ufo.fw_obj);
1278	if (IS_ERR(cpu_map)) {
1279		err = PTR_ERR(cpu_map);
1280		goto err_release_reg_state;
1281	}
1282
1283	queue->timeline_ufo.value = cpu_map;
1284
1285	err = drm_sched_init(&queue->scheduler,
1286			     &pvr_queue_sched_ops,
1287			     pvr_dev->sched_wq, 1, 64 * 1024, 1,
1288			     msecs_to_jiffies(500),
1289			     pvr_dev->sched_wq, NULL, "pvr-queue",
1290			     pvr_dev->base.dev);
1291	if (err)
1292		goto err_release_ufo;
1293
1294	err = drm_sched_entity_init(&queue->entity,
1295				    DRM_SCHED_PRIORITY_KERNEL,
1296				    &sched, 1, &ctx->faulty);
1297	if (err)
1298		goto err_sched_fini;
1299
1300	mutex_lock(&pvr_dev->queues.lock);
1301	list_add_tail(&queue->node, &pvr_dev->queues.idle);
1302	mutex_unlock(&pvr_dev->queues.lock);
1303
1304	return queue;
1305
1306err_sched_fini:
1307	drm_sched_fini(&queue->scheduler);
1308
1309err_release_ufo:
1310	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1311
1312err_release_reg_state:
1313	pvr_fw_object_destroy(queue->reg_state_obj);
1314
1315err_cccb_fini:
1316	pvr_cccb_fini(&queue->cccb);
1317
1318err_free_queue:
1319	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1320	kfree(queue);
1321
1322	return ERR_PTR(err);
1323}
1324
1325void pvr_queue_device_pre_reset(struct pvr_device *pvr_dev)
1326{
1327	struct pvr_queue *queue;
1328
1329	mutex_lock(&pvr_dev->queues.lock);
1330	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1331		pvr_queue_stop(queue, NULL);
1332	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1333		pvr_queue_stop(queue, NULL);
1334	mutex_unlock(&pvr_dev->queues.lock);
1335}
1336
1337void pvr_queue_device_post_reset(struct pvr_device *pvr_dev)
1338{
1339	struct pvr_queue *queue;
1340
1341	mutex_lock(&pvr_dev->queues.lock);
1342	list_for_each_entry(queue, &pvr_dev->queues.active, node)
1343		pvr_queue_start(queue);
1344	list_for_each_entry(queue, &pvr_dev->queues.idle, node)
1345		pvr_queue_start(queue);
1346	mutex_unlock(&pvr_dev->queues.lock);
1347}
1348
1349/**
1350 * pvr_queue_kill() - Kill a queue.
1351 * @queue: The queue to kill.
1352 *
1353 * Kill the queue so no new jobs can be pushed. Should be called when the
1354 * context handle is destroyed. The queue object might last longer if jobs
1355 * are still in flight and holding a reference to the context this queue
1356 * belongs to.
1357 */
1358void pvr_queue_kill(struct pvr_queue *queue)
1359{
1360	drm_sched_entity_destroy(&queue->entity);
1361	dma_fence_put(queue->last_queued_job_scheduled_fence);
1362	queue->last_queued_job_scheduled_fence = NULL;
1363}
1364
1365/**
1366 * pvr_queue_destroy() - Destroy a queue.
1367 * @queue: The queue to destroy.
1368 *
1369 * Cleanup the queue and free the resources attached to it. Should be
1370 * called from the context release function.
1371 */
1372void pvr_queue_destroy(struct pvr_queue *queue)
1373{
1374	if (!queue)
1375		return;
1376
1377	mutex_lock(&queue->ctx->pvr_dev->queues.lock);
1378	list_del_init(&queue->node);
1379	mutex_unlock(&queue->ctx->pvr_dev->queues.lock);
1380
1381	drm_sched_fini(&queue->scheduler);
1382	drm_sched_entity_fini(&queue->entity);
1383
1384	if (WARN_ON(queue->last_queued_job_scheduled_fence))
1385		dma_fence_put(queue->last_queued_job_scheduled_fence);
1386
1387	pvr_queue_cleanup_fw_context(queue);
1388
1389	pvr_fw_object_unmap_and_destroy(queue->timeline_ufo.fw_obj);
1390	pvr_fw_object_destroy(queue->reg_state_obj);
1391	pvr_cccb_fini(&queue->cccb);
1392	mutex_destroy(&queue->cccb_fence_ctx.job_lock);
1393	kfree(queue);
1394}
1395
1396/**
1397 * pvr_queue_device_init() - Device-level initialization of queue related fields.
1398 * @pvr_dev: The device to initialize.
1399 *
1400 * Initializes all fields related to queue management in pvr_device.
1401 *
1402 * Return:
1403 *  * 0 on success, or
1404 *  * An error code on failure.
1405 */
1406int pvr_queue_device_init(struct pvr_device *pvr_dev)
1407{
1408	int err;
1409
1410	INIT_LIST_HEAD(&pvr_dev->queues.active);
1411	INIT_LIST_HEAD(&pvr_dev->queues.idle);
1412	err = drmm_mutex_init(from_pvr_device(pvr_dev), &pvr_dev->queues.lock);
1413	if (err)
1414		return err;
1415
1416	pvr_dev->sched_wq = alloc_workqueue("powervr-sched", WQ_UNBOUND, 0);
1417	if (!pvr_dev->sched_wq)
1418		return -ENOMEM;
1419
1420	return 0;
1421}
1422
1423/**
1424 * pvr_queue_device_fini() - Device-level cleanup of queue related fields.
1425 * @pvr_dev: The device to cleanup.
1426 *
1427 * Cleanup/free all queue-related resources attached to a pvr_device object.
1428 */
1429void pvr_queue_device_fini(struct pvr_device *pvr_dev)
1430{
1431	destroy_workqueue(pvr_dev->sched_wq);
1432}